]> git.ipfire.org Git - thirdparty/glibc.git/blame - posix/fnmatch_loop.c
regex: __builtin_expect → __glibc_unlikely
[thirdparty/glibc.git] / posix / fnmatch_loop.c
CommitLineData
688903eb 1/* Copyright (C) 1991-2018 Free Software Foundation, Inc.
1fc82a56
UD
2 This file is part of the GNU C Library.
3
41bdb6e2
AJ
4 The GNU C Library is free software; you can redistribute it and/or
5 modify it under the terms of the GNU Lesser General Public
6 License as published by the Free Software Foundation; either
7 version 2.1 of the License, or (at your option) any later version.
1fc82a56 8
41bdb6e2 9 The GNU C Library is distributed in the hope that it will be useful,
1fc82a56
UD
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
41bdb6e2 12 Lesser General Public License for more details.
1fc82a56 13
41bdb6e2 14 You should have received a copy of the GNU Lesser General Public
59ba27a6
PE
15 License along with the GNU C Library; if not, see
16 <http://www.gnu.org/licenses/>. */
1fc82a56 17
e054f494
RA
18#include <stdint.h>
19
9700b039
UD
20struct STRUCT
21{
22 const CHAR *pattern;
23 const CHAR *string;
24 int no_leading_period;
25};
26
1fc82a56
UD
27/* Match STRING against the filename pattern PATTERN, returning zero if
28 it matches, nonzero if not. */
29static int FCT (const CHAR *pattern, const CHAR *string,
9700b039 30 const CHAR *string_end, int no_leading_period, int flags,
b41bd5bc 31 struct STRUCT *ends, size_t alloca_used);
955994e1 32static int EXT (INT opt, const CHAR *pattern, const CHAR *string,
f15ce4d8 33 const CHAR *string_end, int no_leading_period, int flags,
b41bd5bc
FW
34 size_t alloca_used);
35static const CHAR *END (const CHAR *patternp);
1fc82a56
UD
36
37static int
9dd346ff
JM
38FCT (const CHAR *pattern, const CHAR *string, const CHAR *string_end,
39 int no_leading_period, int flags, struct STRUCT *ends, size_t alloca_used)
1fc82a56 40{
2e09a79a
JM
41 const CHAR *p = pattern, *n = string;
42 UCHAR c;
acb5ee2e 43#ifdef _LIBC
4c7d276e
UD
44# if WIDE_CHAR_VERSION
45 const char *collseq = (const char *)
46 _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQWC);
47# else
acb5ee2e 48 const UCHAR *collseq = (const UCHAR *)
4c7d276e 49 _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQMB);
acb5ee2e
UD
50# endif
51#endif
1fc82a56
UD
52
53 while ((c = *p++) != L('\0'))
54 {
955994e1 55 int new_no_leading_period = 0;
1fc82a56
UD
56 c = FOLD (c);
57
58 switch (c)
59 {
60 case L('?'):
955994e1
UD
61 if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
62 {
f15ce4d8
UD
63 int res = EXT (c, p, n, string_end, no_leading_period,
64 flags, alloca_used);
955994e1
UD
65 if (res != -1)
66 return res;
67 }
68
69 if (n == string_end)
1fc82a56
UD
70 return FNM_NOMATCH;
71 else if (*n == L('/') && (flags & FNM_FILE_NAME))
72 return FNM_NOMATCH;
955994e1 73 else if (*n == L('.') && no_leading_period)
1fc82a56
UD
74 return FNM_NOMATCH;
75 break;
76
77 case L('\\'):
78 if (!(flags & FNM_NOESCAPE))
79 {
80 c = *p++;
81 if (c == L('\0'))
82 /* Trailing \ loses. */
83 return FNM_NOMATCH;
84 c = FOLD (c);
85 }
955994e1 86 if (n == string_end || FOLD ((UCHAR) *n) != c)
1fc82a56
UD
87 return FNM_NOMATCH;
88 break;
89
90 case L('*'):
955994e1
UD
91 if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
92 {
f15ce4d8
UD
93 int res = EXT (c, p, n, string_end, no_leading_period,
94 flags, alloca_used);
955994e1
UD
95 if (res != -1)
96 return res;
97 }
9700b039
UD
98 else if (ends != NULL)
99 {
100 ends->pattern = p - 1;
101 ends->string = n;
102 ends->no_leading_period = no_leading_period;
103 return 0;
104 }
955994e1
UD
105
106 if (n != string_end && *n == L('.') && no_leading_period)
1fc82a56
UD
107 return FNM_NOMATCH;
108
109 for (c = *p++; c == L('?') || c == L('*'); c = *p++)
110 {
955994e1
UD
111 if (*p == L('(') && (flags & FNM_EXTMATCH) != 0)
112 {
113 const CHAR *endp = END (p);
114 if (endp != p)
115 {
116 /* This is a pattern. Skip over it. */
117 p = endp;
118 continue;
119 }
120 }
121
122 if (c == L('?'))
1fc82a56
UD
123 {
124 /* A ? needs to match one character. */
955994e1 125 if (n == string_end)
1fc82a56
UD
126 /* There isn't another character; no match. */
127 return FNM_NOMATCH;
955994e1
UD
128 else if (*n == L('/')
129 && __builtin_expect (flags & FNM_FILE_NAME, 0))
130 /* A slash does not match a wildcard under
131 FNM_FILE_NAME. */
132 return FNM_NOMATCH;
1fc82a56
UD
133 else
134 /* One character of the string is consumed in matching
135 this ? wildcard, so *??? won't match if there are
136 less than three characters. */
137 ++n;
138 }
139 }
140
141 if (c == L('\0'))
142 /* The wildcard(s) is/are the last element of the pattern.
143 If the name is a file name and contains another slash
a4607c1f
UD
144 this means it cannot match, unless the FNM_LEADING_DIR
145 flag is set. */
47924894
UD
146 {
147 int result = (flags & FNM_FILE_NAME) == 0 ? 0 : FNM_NOMATCH;
148
149 if (flags & FNM_FILE_NAME)
150 {
47924894 151 if (flags & FNM_LEADING_DIR)
a4607c1f 152 result = 0;
47924894
UD
153 else
154 {
955994e1 155 if (MEMCHR (n, L('/'), string_end - n) == NULL)
47924894
UD
156 result = 0;
157 }
158 }
159
160 return result;
161 }
1fc82a56
UD
162 else
163 {
164 const CHAR *endp;
9700b039 165 struct STRUCT end;
1fc82a56 166
9700b039 167 end.pattern = NULL;
955994e1
UD
168 endp = MEMCHR (n, (flags & FNM_FILE_NAME) ? L('/') : L('\0'),
169 string_end - n);
170 if (endp == NULL)
171 endp = string_end;
1fc82a56 172
955994e1
UD
173 if (c == L('[')
174 || (__builtin_expect (flags & FNM_EXTMATCH, 0) != 0
228293b5
UD
175 && (c == L('@') || c == L('+') || c == L('!'))
176 && *p == L('(')))
1fc82a56
UD
177 {
178 int flags2 = ((flags & FNM_FILE_NAME)
179 ? flags : (flags & ~FNM_PERIOD));
180
9700b039
UD
181 for (--p; n < endp; ++n, no_leading_period = 0)
182 if (FCT (p, n, string_end, no_leading_period, flags2,
f15ce4d8 183 &end, alloca_used) == 0)
9700b039 184 goto found;
1fc82a56
UD
185 }
186 else if (c == L('/') && (flags & FNM_FILE_NAME))
187 {
955994e1 188 while (n < string_end && *n != L('/'))
1fc82a56 189 ++n;
955994e1 190 if (n < string_end && *n == L('/')
9700b039 191 && (FCT (p, n + 1, string_end, flags & FNM_PERIOD, flags,
f15ce4d8 192 NULL, alloca_used) == 0))
1fc82a56
UD
193 return 0;
194 }
195 else
196 {
197 int flags2 = ((flags & FNM_FILE_NAME)
198 ? flags : (flags & ~FNM_PERIOD));
199
200 if (c == L('\\') && !(flags & FNM_NOESCAPE))
201 c = *p;
202 c = FOLD (c);
9700b039 203 for (--p; n < endp; ++n, no_leading_period = 0)
1fc82a56 204 if (FOLD ((UCHAR) *n) == c
9700b039 205 && (FCT (p, n, string_end, no_leading_period, flags2,
f15ce4d8 206 &end, alloca_used) == 0))
9700b039
UD
207 {
208 found:
209 if (end.pattern == NULL)
210 return 0;
211 break;
212 }
213 if (end.pattern != NULL)
214 {
215 p = end.pattern;
216 n = end.string;
217 no_leading_period = end.no_leading_period;
218 continue;
219 }
1fc82a56
UD
220 }
221 }
222
223 /* If we come here no match is possible with the wildcard. */
224 return FNM_NOMATCH;
225
226 case L('['):
227 {
83b1b6d8 228 /* Nonzero if the sense of the character class is inverted. */
a0bf67cc
RM
229 const CHAR *p_init = p;
230 const CHAR *n_init = n;
2e09a79a 231 int not;
1fc82a56 232 CHAR cold;
955994e1 233 UCHAR fn;
1fc82a56
UD
234
235 if (posixly_correct == 0)
236 posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
237
955994e1 238 if (n == string_end)
1fc82a56
UD
239 return FNM_NOMATCH;
240
955994e1 241 if (*n == L('.') && no_leading_period)
1fc82a56
UD
242 return FNM_NOMATCH;
243
244 if (*n == L('/') && (flags & FNM_FILE_NAME))
245 /* `/' cannot be matched. */
246 return FNM_NOMATCH;
247
248 not = (*p == L('!') || (posixly_correct < 0 && *p == L('^')));
249 if (not)
250 ++p;
251
955994e1
UD
252 fn = FOLD ((UCHAR) *n);
253
1fc82a56
UD
254 c = *p++;
255 for (;;)
256 {
1fc82a56
UD
257 if (!(flags & FNM_NOESCAPE) && c == L('\\'))
258 {
259 if (*p == L('\0'))
260 return FNM_NOMATCH;
261 c = FOLD ((UCHAR) *p);
262 ++p;
263
aae95a1b 264 goto normal_bracket;
1fc82a56
UD
265 }
266 else if (c == L('[') && *p == L(':'))
267 {
268 /* Leave room for the null. */
269 CHAR str[CHAR_CLASS_MAX_LENGTH + 1];
270 size_t c1 = 0;
acb5ee2e 271#if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
1fc82a56 272 wctype_t wt;
acb5ee2e 273#endif
1fc82a56
UD
274 const CHAR *startp = p;
275
276 for (;;)
277 {
278 if (c1 == CHAR_CLASS_MAX_LENGTH)
279 /* The name is too long and therefore the pattern
280 is ill-formed. */
281 return FNM_NOMATCH;
282
283 c = *++p;
284 if (c == L(':') && p[1] == L(']'))
285 {
286 p += 2;
287 break;
288 }
289 if (c < L('a') || c >= L('z'))
290 {
291 /* This cannot possibly be a character class name.
292 Match it as a normal range. */
293 p = startp;
294 c = L('[');
295 goto normal_bracket;
296 }
297 str[c1++] = c;
298 }
299 str[c1] = L('\0');
300
acb5ee2e 301#if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
1fc82a56
UD
302 wt = IS_CHAR_CLASS (str);
303 if (wt == 0)
304 /* Invalid character class name. */
305 return FNM_NOMATCH;
306
7239b3a4 307# if defined _LIBC && ! WIDE_CHAR_VERSION
8fb81470
UD
308 /* The following code is glibc specific but does
309 there a good job in speeding up the code since
310 we can avoid the btowc() call. */
4c7d276e
UD
311 if (_ISCTYPE ((UCHAR) *n, wt))
312 goto matched;
8fb81470
UD
313# else
314 if (ISWCTYPE (BTOWC ((UCHAR) *n), wt))
315 goto matched;
7239b3a4 316# endif
acb5ee2e 317#else
1fc82a56
UD
318 if ((STREQ (str, L("alnum")) && ISALNUM ((UCHAR) *n))
319 || (STREQ (str, L("alpha")) && ISALPHA ((UCHAR) *n))
320 || (STREQ (str, L("blank")) && ISBLANK ((UCHAR) *n))
321 || (STREQ (str, L("cntrl")) && ISCNTRL ((UCHAR) *n))
322 || (STREQ (str, L("digit")) && ISDIGIT ((UCHAR) *n))
323 || (STREQ (str, L("graph")) && ISGRAPH ((UCHAR) *n))
324 || (STREQ (str, L("lower")) && ISLOWER ((UCHAR) *n))
325 || (STREQ (str, L("print")) && ISPRINT ((UCHAR) *n))
326 || (STREQ (str, L("punct")) && ISPUNCT ((UCHAR) *n))
327 || (STREQ (str, L("space")) && ISSPACE ((UCHAR) *n))
328 || (STREQ (str, L("upper")) && ISUPPER ((UCHAR) *n))
329 || (STREQ (str, L("xdigit")) && ISXDIGIT ((UCHAR) *n)))
330 goto matched;
acb5ee2e 331#endif
83b1b6d8 332 c = *p++;
1fc82a56 333 }
ecce00a9
UD
334#ifdef _LIBC
335 else if (c == L('[') && *p == L('='))
336 {
b1eda10e
PE
337 /* It's important that STR be a scalar variable rather
338 than a one-element array, because GCC (at least 4.9.2
339 -O2 on x86-64) can be confused by the array and
340 diagnose a "used initialized" in a dead branch in the
341 findidx function. */
342 UCHAR str;
ecce00a9
UD
343 uint32_t nrules =
344 _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
345 const CHAR *startp = p;
346
347 c = *++p;
348 if (c == L('\0'))
349 {
350 p = startp;
351 c = L('[');
352 goto normal_bracket;
353 }
b1eda10e 354 str = c;
ecce00a9
UD
355
356 c = *++p;
357 if (c != L('=') || p[1] != L(']'))
358 {
359 p = startp;
360 c = L('[');
361 goto normal_bracket;
362 }
363 p += 2;
364
365 if (nrules == 0)
366 {
b1eda10e 367 if ((UCHAR) *n == str)
ecce00a9
UD
368 goto matched;
369 }
370 else
371 {
372 const int32_t *table;
373# if WIDE_CHAR_VERSION
374 const int32_t *weights;
8c0ab919 375 const wint_t *extra;
ecce00a9
UD
376# else
377 const unsigned char *weights;
378 const unsigned char *extra;
379# endif
380 const int32_t *indirect;
381 int32_t idx;
b1eda10e 382 const UCHAR *cp = (const UCHAR *) &str;
ecce00a9 383
ecce00a9
UD
384# if WIDE_CHAR_VERSION
385 table = (const int32_t *)
386 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEWC);
387 weights = (const int32_t *)
388 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTWC);
8c0ab919 389 extra = (const wint_t *)
ecce00a9
UD
390 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAWC);
391 indirect = (const int32_t *)
392 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTWC);
393# else
394 table = (const int32_t *)
395 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
396 weights = (const unsigned char *)
397 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
398 extra = (const unsigned char *)
399 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
400 indirect = (const int32_t *)
401 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
402# endif
403
8c0ab919 404 idx = FINDIDX (table, indirect, extra, &cp, 1);
ecce00a9
UD
405 if (idx != 0)
406 {
407 /* We found a table entry. Now see whether the
408 character we are currently at has the same
409 equivalance class value. */
b7d1c5fa 410 int len = weights[idx & 0xffffff];
ecce00a9
UD
411 int32_t idx2;
412 const UCHAR *np = (const UCHAR *) n;
413
8c0ab919
RM
414 idx2 = FINDIDX (table, indirect, extra,
415 &np, string_end - n);
b7d1c5fa
UD
416 if (idx2 != 0
417 && (idx >> 24) == (idx2 >> 24)
418 && len == weights[idx2 & 0xffffff])
ecce00a9
UD
419 {
420 int cnt = 0;
421
b7d1c5fa
UD
422 idx &= 0xffffff;
423 idx2 &= 0xffffff;
424
ecce00a9
UD
425 while (cnt < len
426 && (weights[idx + 1 + cnt]
427 == weights[idx2 + 1 + cnt]))
428 ++cnt;
429
430 if (cnt == len)
431 goto matched;
432 }
ecce00a9
UD
433 }
434 }
435
436 c = *p++;
437 }
438#endif
1fc82a56 439 else if (c == L('\0'))
794c3ad3
UD
440 {
441 /* [ unterminated, treat as normal character. */
442 p = p_init;
443 n = n_init;
444 c = L('[');
445 goto normal_match;
446 }
1fc82a56
UD
447 else
448 {
f3e29a1a 449 int is_range = 0;
1fc82a56 450
f3e29a1a 451#ifdef _LIBC
c1ac11df
UD
452 int is_seqval = 0;
453
f3e29a1a
UD
454 if (c == L('[') && *p == L('.'))
455 {
456 uint32_t nrules =
457 _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
458 const CHAR *startp = p;
459 size_t c1 = 0;
460
461 while (1)
462 {
463 c = *++p;
464 if (c == L('.') && p[1] == L(']'))
465 {
466 p += 2;
467 break;
468 }
469 if (c == '\0')
470 return FNM_NOMATCH;
471 ++c1;
472 }
473
474 /* We have to handling the symbols differently in
475 ranges since then the collation sequence is
476 important. */
477 is_range = *p == L('-') && p[1] != L('\0');
478
479 if (nrules == 0)
480 {
481 /* There are no names defined in the collation
482 data. Therefore we only accept the trivial
483 names consisting of the character itself. */
484 if (c1 != 1)
485 return FNM_NOMATCH;
486
487 if (!is_range && *n == startp[1])
488 goto matched;
489
490 cold = startp[1];
491 c = *p++;
492 }
493 else
494 {
495 int32_t table_size;
496 const int32_t *symb_table;
059bf913 497# if WIDE_CHAR_VERSION
f3e29a1a 498 char str[c1];
2e47aff5 499 unsigned int strcnt;
f3e29a1a
UD
500# else
501# define str (startp + 1)
502# endif
503 const unsigned char *extra;
504 int32_t idx;
505 int32_t elem;
506 int32_t second;
507 int32_t hash;
508
059bf913 509# if WIDE_CHAR_VERSION
f3e29a1a
UD
510 /* We have to convert the name to a single-byte
511 string. This is possible since the names
512 consist of ASCII characters and the internal
513 representation is UCS4. */
514 for (strcnt = 0; strcnt < c1; ++strcnt)
515 str[strcnt] = startp[1 + strcnt];
516#endif
517
518 table_size =
519 _NL_CURRENT_WORD (LC_COLLATE,
520 _NL_COLLATE_SYMB_HASH_SIZEMB);
521 symb_table = (const int32_t *)
522 _NL_CURRENT (LC_COLLATE,
523 _NL_COLLATE_SYMB_TABLEMB);
524 extra = (const unsigned char *)
525 _NL_CURRENT (LC_COLLATE,
526 _NL_COLLATE_SYMB_EXTRAMB);
527
528 /* Locate the character in the hashing table. */
529 hash = elem_hash (str, c1);
530
531 idx = 0;
532 elem = hash % table_size;
7d4722e3 533 if (symb_table[2 * elem] != 0)
f3e29a1a 534 {
7d4722e3
UD
535 second = hash % (table_size - 2) + 1;
536
537 do
f3e29a1a 538 {
7d4722e3
UD
539 /* First compare the hashing value. */
540 if (symb_table[2 * elem] == hash
541 && (c1
542 == extra[symb_table[2 * elem + 1]])
543 && memcmp (str,
544 &extra[symb_table[2 * elem
545 + 1]
546 + 1], c1) == 0)
547 {
548 /* Yep, this is the entry. */
549 idx = symb_table[2 * elem + 1];
550 idx += 1 + extra[idx];
551 break;
552 }
a334319f 553
7d4722e3
UD
554 /* Next entry. */
555 elem += second;
556 }
557 while (symb_table[2 * elem] != 0);
f3e29a1a
UD
558 }
559
560 if (symb_table[2 * elem] != 0)
561 {
562 /* Compare the byte sequence but only if
563 this is not part of a range. */
059bf913 564# if WIDE_CHAR_VERSION
f3e29a1a
UD
565 int32_t *wextra;
566
567 idx += 1 + extra[idx];
568 /* Adjust for the alignment. */
2f76d88d 569 idx = (idx + 3) & ~3;
f3e29a1a
UD
570
571 wextra = (int32_t *) &extra[idx + 4];
572# endif
573
574 if (! is_range)
575 {
059bf913 576# if WIDE_CHAR_VERSION
eb64f8cb
RM
577 for (c1 = 0;
578 (int32_t) c1 < wextra[idx];
579 ++c1)
f3e29a1a
UD
580 if (n[c1] != wextra[1 + c1])
581 break;
582
eb64f8cb 583 if ((int32_t) c1 == wextra[idx])
f3e29a1a
UD
584 goto matched;
585# else
586 for (c1 = 0; c1 < extra[idx]; ++c1)
587 if (n[c1] != extra[1 + c1])
588 break;
589
590 if (c1 == extra[idx])
591 goto matched;
592# endif
593 }
594
595 /* Get the collation sequence value. */
596 is_seqval = 1;
059bf913 597# if WIDE_CHAR_VERSION
f3e29a1a
UD
598 cold = wextra[1 + wextra[idx]];
599# else
600 /* Adjust for the alignment. */
601 idx += 1 + extra[idx];
602 idx = (idx + 3) & ~4;
603 cold = *((int32_t *) &extra[idx]);
604# endif
605
606 c = *p++;
607 }
70dc5068 608 else if (c1 == 1)
f3e29a1a
UD
609 {
610 /* No valid character. Match it as a
611 single byte. */
612 if (!is_range && *n == str[0])
613 goto matched;
614
615 cold = str[0];
616 c = *p++;
617 }
618 else
619 return FNM_NOMATCH;
620 }
621 }
622 else
623# undef str
624#endif
625 {
626 c = FOLD (c);
627 normal_bracket:
628
629 /* We have to handling the symbols differently in
630 ranges since then the collation sequence is
631 important. */
be29c482
UD
632 is_range = (*p == L('-') && p[1] != L('\0')
633 && p[1] != L(']'));
f3e29a1a
UD
634
635 if (!is_range && c == fn)
636 goto matched;
637
469dcb0d
RM
638 /* This is needed if we goto normal_bracket; from
639 outside of is_seqval's scope. */
640 is_seqval = 0;
f3e29a1a
UD
641 cold = c;
642 c = *p++;
643 }
1fc82a56
UD
644
645 if (c == L('-') && *p != L(']'))
646 {
acb5ee2e
UD
647#if _LIBC
648 /* We have to find the collation sequence
649 value for C. Collation sequence is nothing
650 we can regularly access. The sequence
651 value is defined by the order in which the
652 definitions of the collation values for the
653 various characters appear in the source
654 file. A strange concept, nowhere
655 documented. */
f3e29a1a
UD
656 uint32_t fcollseq;
657 uint32_t lcollseq;
1fc82a56 658 UCHAR cend = *p++;
acb5ee2e 659
059bf913 660# if WIDE_CHAR_VERSION
acb5ee2e 661 /* Search in the `names' array for the characters. */
25337753 662 fcollseq = __collseq_table_lookup (collseq, fn);
4c7d276e
UD
663 if (fcollseq == ~((uint32_t) 0))
664 /* XXX We don't know anything about the character
665 we are supposed to match. This means we are
666 failing. */
667 goto range_not_matched;
f3e29a1a
UD
668
669 if (is_seqval)
670 lcollseq = cold;
671 else
25337753 672 lcollseq = __collseq_table_lookup (collseq, cold);
acb5ee2e 673# else
f3e29a1a
UD
674 fcollseq = collseq[fn];
675 lcollseq = is_seqval ? cold : collseq[(UCHAR) cold];
676# endif
677
678 is_seqval = 0;
679 if (cend == L('[') && *p == L('.'))
680 {
681 uint32_t nrules =
682 _NL_CURRENT_WORD (LC_COLLATE,
683 _NL_COLLATE_NRULES);
684 const CHAR *startp = p;
685 size_t c1 = 0;
686
687 while (1)
688 {
689 c = *++p;
690 if (c == L('.') && p[1] == L(']'))
691 {
692 p += 2;
693 break;
694 }
695 if (c == '\0')
696 return FNM_NOMATCH;
697 ++c1;
698 }
699
700 if (nrules == 0)
701 {
702 /* There are no names defined in the
703 collation data. Therefore we only
704 accept the trivial names consisting
705 of the character itself. */
706 if (c1 != 1)
707 return FNM_NOMATCH;
708
709 cend = startp[1];
710 }
711 else
712 {
713 int32_t table_size;
714 const int32_t *symb_table;
059bf913 715# if WIDE_CHAR_VERSION
f3e29a1a 716 char str[c1];
2e47aff5 717 unsigned int strcnt;
f3e29a1a
UD
718# else
719# define str (startp + 1)
acb5ee2e 720# endif
f3e29a1a
UD
721 const unsigned char *extra;
722 int32_t idx;
723 int32_t elem;
724 int32_t second;
725 int32_t hash;
726
059bf913 727# if WIDE_CHAR_VERSION
f3e29a1a
UD
728 /* We have to convert the name to a single-byte
729 string. This is possible since the names
730 consist of ASCII characters and the internal
731 representation is UCS4. */
732 for (strcnt = 0; strcnt < c1; ++strcnt)
733 str[strcnt] = startp[1 + strcnt];
82eafaf7 734# endif
f3e29a1a
UD
735
736 table_size =
737 _NL_CURRENT_WORD (LC_COLLATE,
738 _NL_COLLATE_SYMB_HASH_SIZEMB);
739 symb_table = (const int32_t *)
740 _NL_CURRENT (LC_COLLATE,
741 _NL_COLLATE_SYMB_TABLEMB);
742 extra = (const unsigned char *)
743 _NL_CURRENT (LC_COLLATE,
744 _NL_COLLATE_SYMB_EXTRAMB);
745
746 /* Locate the character in the hashing
f15ce4d8 747 table. */
f3e29a1a
UD
748 hash = elem_hash (str, c1);
749
750 idx = 0;
751 elem = hash % table_size;
7d4722e3 752 if (symb_table[2 * elem] != 0)
f3e29a1a 753 {
7d4722e3
UD
754 second = hash % (table_size - 2) + 1;
755
756 do
f3e29a1a 757 {
7d4722e3
UD
758 /* First compare the hashing value. */
759 if (symb_table[2 * elem] == hash
760 && (c1
761 == extra[symb_table[2 * elem + 1]])
762 && memcmp (str,
763 &extra[symb_table[2 * elem + 1]
764 + 1], c1) == 0)
765 {
766 /* Yep, this is the entry. */
767 idx = symb_table[2 * elem + 1];
768 idx += 1 + extra[idx];
769 break;
770 }
771
772 /* Next entry. */
773 elem += second;
f3e29a1a 774 }
7d4722e3 775 while (symb_table[2 * elem] != 0);
f3e29a1a
UD
776 }
777
778 if (symb_table[2 * elem] != 0)
779 {
780 /* Compare the byte sequence but only if
781 this is not part of a range. */
059bf913 782# if WIDE_CHAR_VERSION
f3e29a1a
UD
783 int32_t *wextra;
784
785 idx += 1 + extra[idx];
786 /* Adjust for the alignment. */
787 idx = (idx + 3) & ~4;
788
789 wextra = (int32_t *) &extra[idx + 4];
790# endif
791 /* Get the collation sequence value. */
792 is_seqval = 1;
059bf913 793# if WIDE_CHAR_VERSION
f3e29a1a
UD
794 cend = wextra[1 + wextra[idx]];
795# else
796 /* Adjust for the alignment. */
797 idx += 1 + extra[idx];
798 idx = (idx + 3) & ~4;
799 cend = *((int32_t *) &extra[idx]);
800# endif
801 }
802 else if (symb_table[2 * elem] != 0 && c1 == 1)
803 {
804 cend = str[0];
805 c = *p++;
806 }
807 else
808 return FNM_NOMATCH;
809 }
810# undef str
811 }
812 else
813 {
814 if (!(flags & FNM_NOESCAPE) && cend == L('\\'))
815 cend = *p++;
816 if (cend == L('\0'))
817 return FNM_NOMATCH;
818 cend = FOLD (cend);
819 }
acb5ee2e
UD
820
821 /* XXX It is not entirely clear to me how to handle
822 characters which are not mentioned in the
823 collation specification. */
824 if (
059bf913 825# if WIDE_CHAR_VERSION
f3e29a1a 826 lcollseq == 0xffffffff ||
acb5ee2e 827# endif
f3e29a1a 828 lcollseq <= fcollseq)
acb5ee2e
UD
829 {
830 /* We have to look at the upper bound. */
f3e29a1a 831 uint32_t hcollseq;
acb5ee2e 832
f3e29a1a
UD
833 if (is_seqval)
834 hcollseq = cend;
835 else
acb5ee2e 836 {
059bf913 837# if WIDE_CHAR_VERSION
4c7d276e 838 hcollseq =
25337753 839 __collseq_table_lookup (collseq, cend);
4c7d276e 840 if (hcollseq == ~((uint32_t) 0))
acb5ee2e 841 {
4c7d276e
UD
842 /* Hum, no information about the upper
843 bound. The matching succeeds if the
844 lower bound is matched exactly. */
845 if (lcollseq != fcollseq)
846 goto range_not_matched;
04ea3b0f 847
4c7d276e 848 goto matched;
acb5ee2e 849 }
acb5ee2e 850# else
f3e29a1a 851 hcollseq = collseq[cend];
acb5ee2e 852# endif
f3e29a1a 853 }
acb5ee2e 854
f3e29a1a 855 if (lcollseq <= hcollseq && fcollseq <= hcollseq)
9de4e203
UD
856 goto matched;
857 }
059bf913 858# if WIDE_CHAR_VERSION
acb5ee2e
UD
859 range_not_matched:
860# endif
861#else
862 /* We use a boring value comparison of the character
863 values. This is better than comparing using
864 `strcoll' since the latter would have surprising
865 and sometimes fatal consequences. */
866 UCHAR cend = *p++;
867
868 if (!(flags & FNM_NOESCAPE) && cend == L('\\'))
869 cend = *p++;
870 if (cend == L('\0'))
871 return FNM_NOMATCH;
872
873 /* It is a range. */
bd8fbd57 874 if (cold <= fn && fn <= cend)
acb5ee2e
UD
875 goto matched;
876#endif
1fc82a56
UD
877
878 c = *p++;
879 }
880 }
881
882 if (c == L(']'))
883 break;
884 }
885
886 if (!not)
887 return FNM_NOMATCH;
888 break;
889
890 matched:
891 /* Skip the rest of the [...] that already matched. */
b3a9f56b 892 while ((c = *p++) != L (']'))
1fc82a56
UD
893 {
894 if (c == L('\0'))
895 /* [... (unterminated) loses. */
896 return FNM_NOMATCH;
897
1fc82a56
UD
898 if (!(flags & FNM_NOESCAPE) && c == L('\\'))
899 {
900 if (*p == L('\0'))
901 return FNM_NOMATCH;
902 /* XXX 1003.2d11 is unclear if this is right. */
903 ++p;
904 }
905 else if (c == L('[') && *p == L(':'))
906 {
f3e29a1a
UD
907 int c1 = 0;
908 const CHAR *startp = p;
909
910 while (1)
911 {
912 c = *++p;
913 if (++c1 == CHAR_CLASS_MAX_LENGTH)
914 return FNM_NOMATCH;
915
916 if (*p == L(':') && p[1] == L(']'))
917 break;
918
919 if (c < L('a') || c >= L('z'))
920 {
b3a9f56b
AS
921 p = startp - 2;
922 break;
f3e29a1a
UD
923 }
924 }
1fc82a56 925 p += 2;
f3e29a1a
UD
926 }
927 else if (c == L('[') && *p == L('='))
928 {
929 c = *++p;
930 if (c == L('\0'))
931 return FNM_NOMATCH;
932 c = *++p;
933 if (c != L('=') || p[1] != L(']'))
934 return FNM_NOMATCH;
935 p += 2;
f3e29a1a
UD
936 }
937 else if (c == L('[') && *p == L('.'))
938 {
f3e29a1a
UD
939 while (1)
940 {
941 c = *++p;
4a28f4d5 942 if (c == L('\0'))
f3e29a1a
UD
943 return FNM_NOMATCH;
944
4a28f4d5 945 if (c == L('.') && p[1] == L(']'))
f3e29a1a
UD
946 break;
947 }
948 p += 2;
1fc82a56
UD
949 }
950 }
951 if (not)
952 return FNM_NOMATCH;
953 }
954 break;
955
955994e1
UD
956 case L('+'):
957 case L('@'):
958 case L('!'):
959 if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
960 {
f15ce4d8
UD
961 int res = EXT (c, p, n, string_end, no_leading_period, flags,
962 alloca_used);
955994e1
UD
963 if (res != -1)
964 return res;
965 }
966 goto normal_match;
967
968 case L('/'):
969 if (NO_LEADING_PERIOD (flags))
970 {
eb64f8cb 971 if (n == string_end || c != (UCHAR) *n)
955994e1
UD
972 return FNM_NOMATCH;
973
974 new_no_leading_period = 1;
975 break;
976 }
977 /* FALLTHROUGH */
1fc82a56 978 default:
955994e1
UD
979 normal_match:
980 if (n == string_end || c != FOLD ((UCHAR) *n))
1fc82a56
UD
981 return FNM_NOMATCH;
982 }
983
955994e1 984 no_leading_period = new_no_leading_period;
1fc82a56
UD
985 ++n;
986 }
987
955994e1 988 if (n == string_end)
1fc82a56
UD
989 return 0;
990
955994e1 991 if ((flags & FNM_LEADING_DIR) && n != string_end && *n == L('/'))
1fc82a56
UD
992 /* The FNM_LEADING_DIR flag says that "foo*" matches "foobar/frobozz". */
993 return 0;
994
995 return FNM_NOMATCH;
ea6eb383 996}
1fc82a56 997
955994e1
UD
998
999static const CHAR *
955994e1
UD
1000END (const CHAR *pattern)
1001{
1002 const CHAR *p = pattern;
1003
1004 while (1)
1005 if (*++p == L('\0'))
1006 /* This is an invalid pattern. */
1007 return pattern;
1008 else if (*p == L('['))
1009 {
1010 /* Handle brackets special. */
1011 if (posixly_correct == 0)
1012 posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
1013
1014 /* Skip the not sign. We have to recognize it because of a possibly
1015 following ']'. */
1016 if (*++p == L('!') || (posixly_correct < 0 && *p == L('^')))
1017 ++p;
1018 /* A leading ']' is recognized as such. */
1019 if (*p == L(']'))
1020 ++p;
1021 /* Skip over all characters of the list. */
1022 while (*p != L(']'))
1023 if (*p++ == L('\0'))
1024 /* This is no valid pattern. */
1025 return pattern;
1026 }
1027 else if ((*p == L('?') || *p == L('*') || *p == L('+') || *p == L('@')
1028 || *p == L('!')) && p[1] == L('('))
c2c6d39f
PP
1029 {
1030 p = END (p + 1);
1031 if (*p == L('\0'))
1032 /* This is an invalid pattern. */
1033 return pattern;
1034 }
955994e1
UD
1035 else if (*p == L(')'))
1036 break;
1037
1038 return p + 1;
1039}
1040
1041
1042static int
955994e1 1043EXT (INT opt, const CHAR *pattern, const CHAR *string, const CHAR *string_end,
f15ce4d8 1044 int no_leading_period, int flags, size_t alloca_used)
955994e1
UD
1045{
1046 const CHAR *startp;
1047 int level;
1048 struct patternlist
1049 {
1050 struct patternlist *next;
f15ce4d8 1051 CHAR malloced;
955994e1
UD
1052 CHAR str[0];
1053 } *list = NULL;
1054 struct patternlist **lastp = &list;
821a6bb4 1055 size_t pattern_len = STRLEN (pattern);
f15ce4d8 1056 int any_malloced = 0;
955994e1
UD
1057 const CHAR *p;
1058 const CHAR *rs;
f15ce4d8 1059 int retval = 0;
955994e1
UD
1060
1061 /* Parse the pattern. Store the individual parts in the list. */
1062 level = 0;
1063 for (startp = p = pattern + 1; level >= 0; ++p)
1064 if (*p == L('\0'))
f15ce4d8
UD
1065 {
1066 /* This is an invalid pattern. */
1067 retval = -1;
1068 goto out;
1069 }
955994e1
UD
1070 else if (*p == L('['))
1071 {
1072 /* Handle brackets special. */
1073 if (posixly_correct == 0)
1074 posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
1075
1076 /* Skip the not sign. We have to recognize it because of a possibly
1077 following ']'. */
1078 if (*++p == L('!') || (posixly_correct < 0 && *p == L('^')))
1079 ++p;
1080 /* A leading ']' is recognized as such. */
1081 if (*p == L(']'))
1082 ++p;
1083 /* Skip over all characters of the list. */
1084 while (*p != L(']'))
1085 if (*p++ == L('\0'))
f15ce4d8
UD
1086 {
1087 /* This is no valid pattern. */
1088 retval = -1;
1089 goto out;
1090 }
955994e1
UD
1091 }
1092 else if ((*p == L('?') || *p == L('*') || *p == L('+') || *p == L('@')
1093 || *p == L('!')) && p[1] == L('('))
1094 /* Remember the nesting level. */
1095 ++level;
1096 else if (*p == L(')'))
1097 {
1098 if (level-- == 0)
1099 {
1100 /* This means we found the end of the pattern. */
1101#define NEW_PATTERN \
821a6bb4 1102 struct patternlist *newp; \
f15ce4d8
UD
1103 size_t slen = (opt == L('?') || opt == L('@') \
1104 ? pattern_len : (p - startp + 1)); \
1105 slen = sizeof (struct patternlist) + (slen * sizeof (CHAR)); \
1106 int malloced = ! __libc_use_alloca (alloca_used + slen); \
1107 if (__builtin_expect (malloced, 0)) \
f15ce4d8
UD
1108 { \
1109 newp = malloc (slen); \
1110 if (newp == NULL) \
1111 { \
1112 retval = -2; \
1113 goto out; \
1114 } \
3540d66b 1115 any_malloced = 1; \
f15ce4d8 1116 } \
3540d66b
AS
1117 else \
1118 newp = alloca_account (slen, alloca_used); \
955994e1 1119 newp->next = NULL; \
f15ce4d8
UD
1120 newp->malloced = malloced; \
1121 *((CHAR *) MEMPCPY (newp->str, startp, p - startp)) = L('\0'); \
955994e1
UD
1122 *lastp = newp; \
1123 lastp = &newp->next
1124 NEW_PATTERN;
1125 }
1126 }
1127 else if (*p == L('|'))
1128 {
1129 if (level == 0)
1130 {
1131 NEW_PATTERN;
1132 startp = p + 1;
1133 }
1134 }
1135 assert (list != NULL);
1136 assert (p[-1] == L(')'));
817a51e2 1137#undef NEW_PATTERN
955994e1
UD
1138
1139 switch (opt)
1140 {
1141 case L('*'):
f15ce4d8
UD
1142 if (FCT (p, string, string_end, no_leading_period, flags, NULL,
1143 alloca_used) == 0)
1144 goto success;
955994e1
UD
1145 /* FALLTHROUGH */
1146
1147 case L('+'):
1148 do
1149 {
1150 for (rs = string; rs <= string_end; ++rs)
1151 /* First match the prefix with the current pattern with the
1152 current pattern. */
1153 if (FCT (list->str, string, rs, no_leading_period,
9700b039 1154 flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
f15ce4d8 1155 NULL, alloca_used) == 0
955994e1
UD
1156 /* This was successful. Now match the rest with the rest
1157 of the pattern. */
1158 && (FCT (p, rs, string_end,
1159 rs == string
1160 ? no_leading_period
1161 : rs[-1] == '/' && NO_LEADING_PERIOD (flags) ? 1 : 0,
1162 flags & FNM_FILE_NAME
f15ce4d8 1163 ? flags : flags & ~FNM_PERIOD, NULL, alloca_used) == 0
955994e1
UD
1164 /* This didn't work. Try the whole pattern. */
1165 || (rs != string
1166 && FCT (pattern - 1, rs, string_end,
1167 rs == string
1168 ? no_leading_period
1169 : (rs[-1] == '/' && NO_LEADING_PERIOD (flags)
1170 ? 1 : 0),
1171 flags & FNM_FILE_NAME
f15ce4d8
UD
1172 ? flags : flags & ~FNM_PERIOD, NULL,
1173 alloca_used) == 0)))
955994e1 1174 /* It worked. Signal success. */
f15ce4d8 1175 goto success;
955994e1
UD
1176 }
1177 while ((list = list->next) != NULL);
1178
1179 /* None of the patterns lead to a match. */
f15ce4d8
UD
1180 retval = FNM_NOMATCH;
1181 break;
955994e1
UD
1182
1183 case L('?'):
f15ce4d8
UD
1184 if (FCT (p, string, string_end, no_leading_period, flags, NULL,
1185 alloca_used) == 0)
1186 goto success;
955994e1
UD
1187 /* FALLTHROUGH */
1188
1189 case L('@'):
1190 do
821a6bb4
UD
1191 /* I cannot believe it but `strcat' is actually acceptable
1192 here. Match the entire string with the prefix from the
1193 pattern list and the rest of the pattern following the
1194 pattern list. */
1195 if (FCT (STRCAT (list->str, p), string, string_end,
1196 no_leading_period,
9700b039 1197 flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
f15ce4d8 1198 NULL, alloca_used) == 0)
821a6bb4 1199 /* It worked. Signal success. */
f15ce4d8 1200 goto success;
955994e1
UD
1201 while ((list = list->next) != NULL);
1202
1203 /* None of the patterns lead to a match. */
f15ce4d8
UD
1204 retval = FNM_NOMATCH;
1205 break;
955994e1
UD
1206
1207 case L('!'):
1208 for (rs = string; rs <= string_end; ++rs)
1209 {
1210 struct patternlist *runp;
1211
1212 for (runp = list; runp != NULL; runp = runp->next)
1213 if (FCT (runp->str, string, rs, no_leading_period,
9700b039 1214 flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
f15ce4d8 1215 NULL, alloca_used) == 0)
955994e1
UD
1216 break;
1217
1218 /* If none of the patterns matched see whether the rest does. */
1219 if (runp == NULL
1220 && (FCT (p, rs, string_end,
1221 rs == string
1222 ? no_leading_period
1223 : rs[-1] == '/' && NO_LEADING_PERIOD (flags) ? 1 : 0,
9700b039 1224 flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
f15ce4d8 1225 NULL, alloca_used) == 0))
955994e1 1226 /* This is successful. */
f15ce4d8 1227 goto success;
955994e1
UD
1228 }
1229
1230 /* None of the patterns together with the rest of the pattern
1231 lead to a match. */
f15ce4d8
UD
1232 retval = FNM_NOMATCH;
1233 break;
955994e1
UD
1234
1235 default:
1236 assert (! "Invalid extended matching operator");
f15ce4d8 1237 retval = -1;
955994e1
UD
1238 break;
1239 }
1240
f15ce4d8
UD
1241 success:
1242 out:
1243 if (any_malloced)
1244 while (list != NULL)
1245 {
1246 struct patternlist *old = list;
1247 list = list->next;
1248 if (old->malloced)
1249 free (old);
1250 }
1251
1252 return retval;
955994e1
UD
1253}
1254
1255
1fc82a56
UD
1256#undef FOLD
1257#undef CHAR
1258#undef UCHAR
955994e1 1259#undef INT
1fc82a56 1260#undef FCT
955994e1
UD
1261#undef EXT
1262#undef END
9700b039 1263#undef STRUCT
955994e1
UD
1264#undef MEMPCPY
1265#undef MEMCHR
1827fc4c 1266#undef STRCOLL
a986484f
UD
1267#undef STRLEN
1268#undef STRCAT
1fc82a56 1269#undef L
ea6eb383 1270#undef BTOWC
059bf913 1271#undef WIDE_CHAR_VERSION
8c0ab919 1272#undef FINDIDX