]>
Commit | Line | Data |
---|---|---|
1 | /* Copyright (C) 1991-2025 Free Software Foundation, Inc. | |
2 | This file is part of the GNU C Library. | |
3 | ||
4 | The GNU C Library is free software; you can redistribute it and/or | |
5 | modify it under the terms of the GNU Lesser General Public | |
6 | License as published by the Free Software Foundation; either | |
7 | version 2.1 of the License, or (at your option) any later version. | |
8 | ||
9 | The GNU C Library is distributed in the hope that it will be useful, | |
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
12 | Lesser General Public License for more details. | |
13 | ||
14 | You should have received a copy of the GNU Lesser General Public | |
15 | License along with the GNU C Library; if not, see | |
16 | <https://www.gnu.org/licenses/>. */ | |
17 | ||
18 | #ifdef _LIBC | |
19 | # include <stdint.h> | |
20 | #endif | |
21 | ||
22 | struct STRUCT | |
23 | { | |
24 | const CHAR *pattern; | |
25 | const CHAR *string; | |
26 | bool no_leading_period; | |
27 | }; | |
28 | ||
29 | /* Match STRING against the file name pattern PATTERN, returning zero if | |
30 | it matches, nonzero if not. */ | |
31 | static int FCT (const CHAR *pattern, const CHAR *string, | |
32 | const CHAR *string_end, bool no_leading_period, int flags, | |
33 | struct STRUCT *ends); | |
34 | static int EXT (INT opt, const CHAR *pattern, const CHAR *string, | |
35 | const CHAR *string_end, bool no_leading_period, int flags); | |
36 | static const CHAR *END (const CHAR *patternp); | |
37 | ||
38 | static int | |
39 | FCT (const CHAR *pattern, const CHAR *string, const CHAR *string_end, | |
40 | bool no_leading_period, int flags, struct STRUCT *ends) | |
41 | { | |
42 | const CHAR *p = pattern, *n = string; | |
43 | UCHAR c; | |
44 | #ifdef _LIBC | |
45 | # if WIDE_CHAR_VERSION | |
46 | const char *collseq = (const char *) | |
47 | _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQWC); | |
48 | # else | |
49 | const UCHAR *collseq = (const UCHAR *) | |
50 | _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQMB); | |
51 | # endif | |
52 | #endif | |
53 | ||
54 | while ((c = *p++) != L_('\0')) | |
55 | { | |
56 | bool new_no_leading_period = false; | |
57 | c = FOLD (c); | |
58 | ||
59 | switch (c) | |
60 | { | |
61 | case L_('?'): | |
62 | if (__glibc_unlikely (flags & FNM_EXTMATCH) && *p == '(') | |
63 | { | |
64 | int res = EXT (c, p, n, string_end, no_leading_period, flags); | |
65 | if (res != -1) | |
66 | return res; | |
67 | } | |
68 | ||
69 | if (n == string_end) | |
70 | return FNM_NOMATCH; | |
71 | else if (*n == L_('/') && (flags & FNM_FILE_NAME)) | |
72 | return FNM_NOMATCH; | |
73 | else if (*n == L_('.') && no_leading_period) | |
74 | return FNM_NOMATCH; | |
75 | break; | |
76 | ||
77 | case L_('\\'): | |
78 | if (!(flags & FNM_NOESCAPE)) | |
79 | { | |
80 | c = *p++; | |
81 | if (c == L_('\0')) | |
82 | /* Trailing \ loses. */ | |
83 | return FNM_NOMATCH; | |
84 | c = FOLD (c); | |
85 | } | |
86 | if (n == string_end || FOLD ((UCHAR) *n) != c) | |
87 | return FNM_NOMATCH; | |
88 | break; | |
89 | ||
90 | case L_('*'): | |
91 | if (__glibc_unlikely (flags & FNM_EXTMATCH) && *p == '(') | |
92 | { | |
93 | int res = EXT (c, p, n, string_end, no_leading_period, flags); | |
94 | if (res != -1) | |
95 | return res; | |
96 | } | |
97 | else if (ends != NULL) | |
98 | { | |
99 | ends->pattern = p - 1; | |
100 | ends->string = n; | |
101 | ends->no_leading_period = no_leading_period; | |
102 | return 0; | |
103 | } | |
104 | ||
105 | if (n != string_end && *n == L_('.') && no_leading_period) | |
106 | return FNM_NOMATCH; | |
107 | ||
108 | for (c = *p++; c == L_('?') || c == L_('*'); c = *p++) | |
109 | { | |
110 | if (*p == L_('(') && (flags & FNM_EXTMATCH) != 0) | |
111 | { | |
112 | const CHAR *endp = END (p); | |
113 | if (endp != p) | |
114 | { | |
115 | /* This is a pattern. Skip over it. */ | |
116 | p = endp; | |
117 | continue; | |
118 | } | |
119 | } | |
120 | ||
121 | if (c == L_('?')) | |
122 | { | |
123 | /* A ? needs to match one character. */ | |
124 | if (n == string_end) | |
125 | /* There isn't another character; no match. */ | |
126 | return FNM_NOMATCH; | |
127 | else if (*n == L_('/') | |
128 | && __glibc_unlikely (flags & FNM_FILE_NAME)) | |
129 | /* A slash does not match a wildcard under | |
130 | FNM_FILE_NAME. */ | |
131 | return FNM_NOMATCH; | |
132 | else | |
133 | /* One character of the string is consumed in matching | |
134 | this ? wildcard, so *??? won't match if there are | |
135 | less than three characters. */ | |
136 | ++n; | |
137 | } | |
138 | } | |
139 | ||
140 | if (c == L_('\0')) | |
141 | /* The wildcard(s) is/are the last element of the pattern. | |
142 | If the name is a file name and contains another slash | |
143 | this means it cannot match, unless the FNM_LEADING_DIR | |
144 | flag is set. */ | |
145 | { | |
146 | int result = (flags & FNM_FILE_NAME) == 0 ? 0 : FNM_NOMATCH; | |
147 | ||
148 | if (flags & FNM_FILE_NAME) | |
149 | { | |
150 | if (flags & FNM_LEADING_DIR) | |
151 | result = 0; | |
152 | else | |
153 | { | |
154 | if (MEMCHR (n, L_('/'), string_end - n) == NULL) | |
155 | result = 0; | |
156 | } | |
157 | } | |
158 | ||
159 | return result; | |
160 | } | |
161 | else | |
162 | { | |
163 | const CHAR *endp; | |
164 | struct STRUCT end; | |
165 | ||
166 | end.pattern = NULL; | |
167 | endp = MEMCHR (n, (flags & FNM_FILE_NAME) ? L_('/') : L_('\0'), | |
168 | string_end - n); | |
169 | if (endp == NULL) | |
170 | endp = string_end; | |
171 | ||
172 | if (c == L_('[') | |
173 | || (__glibc_unlikely (flags & FNM_EXTMATCH) | |
174 | && (c == L_('@') || c == L_('+') || c == L_('!')) | |
175 | && *p == L_('('))) | |
176 | { | |
177 | int flags2 = ((flags & FNM_FILE_NAME) | |
178 | ? flags : (flags & ~FNM_PERIOD)); | |
179 | ||
180 | for (--p; n < endp; ++n, no_leading_period = false) | |
181 | if (FCT (p, n, string_end, no_leading_period, flags2, | |
182 | &end) == 0) | |
183 | goto found; | |
184 | } | |
185 | else if (c == L_('/') && (flags & FNM_FILE_NAME)) | |
186 | { | |
187 | while (n < string_end && *n != L_('/')) | |
188 | ++n; | |
189 | if (n < string_end && *n == L_('/') | |
190 | && (FCT (p, n + 1, string_end, flags & FNM_PERIOD, flags, | |
191 | NULL) == 0)) | |
192 | return 0; | |
193 | } | |
194 | else | |
195 | { | |
196 | int flags2 = ((flags & FNM_FILE_NAME) | |
197 | ? flags : (flags & ~FNM_PERIOD)); | |
198 | ||
199 | if (c == L_('\\') && !(flags & FNM_NOESCAPE)) | |
200 | c = *p; | |
201 | c = FOLD (c); | |
202 | for (--p; n < endp; ++n, no_leading_period = false) | |
203 | if (FOLD ((UCHAR) *n) == c | |
204 | && (FCT (p, n, string_end, no_leading_period, flags2, | |
205 | &end) == 0)) | |
206 | { | |
207 | found: | |
208 | if (end.pattern == NULL) | |
209 | return 0; | |
210 | break; | |
211 | } | |
212 | if (end.pattern != NULL) | |
213 | { | |
214 | p = end.pattern; | |
215 | n = end.string; | |
216 | no_leading_period = end.no_leading_period; | |
217 | continue; | |
218 | } | |
219 | } | |
220 | } | |
221 | ||
222 | /* If we come here no match is possible with the wildcard. */ | |
223 | return FNM_NOMATCH; | |
224 | ||
225 | case L_('['): | |
226 | { | |
227 | /* Nonzero if the sense of the character class is inverted. */ | |
228 | const CHAR *p_init = p; | |
229 | const CHAR *n_init = n; | |
230 | bool not; | |
231 | CHAR cold; | |
232 | UCHAR fn; | |
233 | ||
234 | if (posixly_correct == 0) | |
235 | posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1; | |
236 | ||
237 | if (n == string_end) | |
238 | return FNM_NOMATCH; | |
239 | ||
240 | if (*n == L_('.') && no_leading_period) | |
241 | return FNM_NOMATCH; | |
242 | ||
243 | if (*n == L_('/') && (flags & FNM_FILE_NAME)) | |
244 | /* '/' cannot be matched. */ | |
245 | return FNM_NOMATCH; | |
246 | ||
247 | not = (*p == L_('!') || (posixly_correct < 0 && *p == L_('^'))); | |
248 | if (not) | |
249 | ++p; | |
250 | ||
251 | fn = FOLD ((UCHAR) *n); | |
252 | ||
253 | c = *p++; | |
254 | for (;;) | |
255 | { | |
256 | if (!(flags & FNM_NOESCAPE) && c == L_('\\')) | |
257 | { | |
258 | if (*p == L_('\0')) | |
259 | return FNM_NOMATCH; | |
260 | c = FOLD ((UCHAR) *p); | |
261 | ++p; | |
262 | ||
263 | goto normal_bracket; | |
264 | } | |
265 | else if (c == L_('[') && *p == L_(':')) | |
266 | { | |
267 | /* Leave room for the null. */ | |
268 | CHAR str[CHAR_CLASS_MAX_LENGTH + 1]; | |
269 | size_t c1 = 0; | |
270 | wctype_t wt; | |
271 | const CHAR *startp = p; | |
272 | ||
273 | for (;;) | |
274 | { | |
275 | if (c1 == CHAR_CLASS_MAX_LENGTH) | |
276 | /* The name is too long and therefore the pattern | |
277 | is ill-formed. */ | |
278 | return FNM_NOMATCH; | |
279 | ||
280 | c = *++p; | |
281 | if (c == L_(':') && p[1] == L_(']')) | |
282 | { | |
283 | p += 2; | |
284 | break; | |
285 | } | |
286 | if (c < L_('a') || c >= L_('z')) | |
287 | { | |
288 | /* This cannot possibly be a character class name. | |
289 | Match it as a normal range. */ | |
290 | p = startp; | |
291 | c = L_('['); | |
292 | goto normal_bracket; | |
293 | } | |
294 | str[c1++] = c; | |
295 | } | |
296 | str[c1] = L_('\0'); | |
297 | ||
298 | wt = IS_CHAR_CLASS (str); | |
299 | if (wt == 0) | |
300 | /* Invalid character class name. */ | |
301 | return FNM_NOMATCH; | |
302 | ||
303 | #if defined _LIBC && ! WIDE_CHAR_VERSION | |
304 | /* The following code is glibc specific but does | |
305 | there a good job in speeding up the code since | |
306 | we can avoid the btowc() call. */ | |
307 | if (_ISCTYPE ((UCHAR) *n, wt)) | |
308 | goto matched; | |
309 | #else | |
310 | if (iswctype (BTOWC ((UCHAR) *n), wt)) | |
311 | goto matched; | |
312 | #endif | |
313 | c = *p++; | |
314 | } | |
315 | #ifdef _LIBC | |
316 | else if (c == L_('[') && *p == L_('=')) | |
317 | { | |
318 | /* It's important that STR be a scalar variable rather | |
319 | than a one-element array, because GCC (at least 4.9.2 | |
320 | -O2 on x86-64) can be confused by the array and | |
321 | diagnose a "used initialized" in a dead branch in the | |
322 | findidx function. */ | |
323 | UCHAR str; | |
324 | uint32_t nrules = | |
325 | _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); | |
326 | const CHAR *startp = p; | |
327 | ||
328 | c = *++p; | |
329 | if (c == L_('\0')) | |
330 | { | |
331 | p = startp; | |
332 | c = L_('['); | |
333 | goto normal_bracket; | |
334 | } | |
335 | str = c; | |
336 | ||
337 | c = *++p; | |
338 | if (c != L_('=') || p[1] != L_(']')) | |
339 | { | |
340 | p = startp; | |
341 | c = L_('['); | |
342 | goto normal_bracket; | |
343 | } | |
344 | p += 2; | |
345 | ||
346 | if (nrules == 0) | |
347 | { | |
348 | if ((UCHAR) *n == str) | |
349 | goto matched; | |
350 | } | |
351 | else | |
352 | { | |
353 | const int32_t *table; | |
354 | # if WIDE_CHAR_VERSION | |
355 | const int32_t *weights; | |
356 | const wint_t *extra; | |
357 | # else | |
358 | const unsigned char *weights; | |
359 | const unsigned char *extra; | |
360 | # endif | |
361 | const int32_t *indirect; | |
362 | int32_t idx; | |
363 | const UCHAR *cp = (const UCHAR *) &str; | |
364 | ||
365 | # if WIDE_CHAR_VERSION | |
366 | table = (const int32_t *) | |
367 | _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEWC); | |
368 | weights = (const int32_t *) | |
369 | _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTWC); | |
370 | extra = (const wint_t *) | |
371 | _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAWC); | |
372 | indirect = (const int32_t *) | |
373 | _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTWC); | |
374 | # else | |
375 | table = (const int32_t *) | |
376 | _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB); | |
377 | weights = (const unsigned char *) | |
378 | _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB); | |
379 | extra = (const unsigned char *) | |
380 | _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB); | |
381 | indirect = (const int32_t *) | |
382 | _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB); | |
383 | # endif | |
384 | ||
385 | idx = FINDIDX (table, indirect, extra, &cp, 1); | |
386 | if (idx != 0) | |
387 | { | |
388 | /* We found a table entry. Now see whether the | |
389 | character we are currently at has the same | |
390 | equivalence class value. */ | |
391 | int len = weights[idx & 0xffffff]; | |
392 | int32_t idx2; | |
393 | const UCHAR *np = (const UCHAR *) n; | |
394 | ||
395 | idx2 = FINDIDX (table, indirect, extra, | |
396 | &np, string_end - n); | |
397 | if (idx2 != 0 | |
398 | && (idx >> 24) == (idx2 >> 24) | |
399 | && len == weights[idx2 & 0xffffff]) | |
400 | { | |
401 | int cnt = 0; | |
402 | ||
403 | idx &= 0xffffff; | |
404 | idx2 &= 0xffffff; | |
405 | ||
406 | while (cnt < len | |
407 | && (weights[idx + 1 + cnt] | |
408 | == weights[idx2 + 1 + cnt])) | |
409 | ++cnt; | |
410 | ||
411 | if (cnt == len) | |
412 | goto matched; | |
413 | } | |
414 | } | |
415 | } | |
416 | ||
417 | c = *p++; | |
418 | } | |
419 | #endif | |
420 | else if (c == L_('\0')) | |
421 | { | |
422 | /* [ unterminated, treat as normal character. */ | |
423 | p = p_init; | |
424 | n = n_init; | |
425 | c = L_('['); | |
426 | goto normal_match; | |
427 | } | |
428 | else | |
429 | { | |
430 | bool is_range = false; | |
431 | ||
432 | #ifdef _LIBC | |
433 | bool is_seqval = false; | |
434 | ||
435 | if (c == L_('[') && *p == L_('.')) | |
436 | { | |
437 | uint32_t nrules = | |
438 | _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); | |
439 | const CHAR *startp = p; | |
440 | size_t c1 = 0; | |
441 | ||
442 | while (1) | |
443 | { | |
444 | c = *++p; | |
445 | if (c == L_('.') && p[1] == L_(']')) | |
446 | { | |
447 | p += 2; | |
448 | break; | |
449 | } | |
450 | if (c == '\0') | |
451 | return FNM_NOMATCH; | |
452 | ++c1; | |
453 | } | |
454 | ||
455 | /* We have to handling the symbols differently in | |
456 | ranges since then the collation sequence is | |
457 | important. */ | |
458 | is_range = *p == L_('-') && p[1] != L_('\0'); | |
459 | ||
460 | if (nrules == 0) | |
461 | { | |
462 | /* There are no names defined in the collation | |
463 | data. Therefore we only accept the trivial | |
464 | names consisting of the character itself. */ | |
465 | if (c1 != 1) | |
466 | return FNM_NOMATCH; | |
467 | ||
468 | if (!is_range && *n == startp[1]) | |
469 | goto matched; | |
470 | ||
471 | cold = startp[1]; | |
472 | c = *p++; | |
473 | } | |
474 | else | |
475 | { | |
476 | int32_t table_size; | |
477 | const int32_t *symb_table; | |
478 | const unsigned char *extra; | |
479 | int32_t idx; | |
480 | int32_t elem; | |
481 | # if WIDE_CHAR_VERSION | |
482 | CHAR *wextra; | |
483 | # endif | |
484 | ||
485 | table_size = | |
486 | _NL_CURRENT_WORD (LC_COLLATE, | |
487 | _NL_COLLATE_SYMB_HASH_SIZEMB); | |
488 | symb_table = (const int32_t *) | |
489 | _NL_CURRENT (LC_COLLATE, | |
490 | _NL_COLLATE_SYMB_TABLEMB); | |
491 | extra = (const unsigned char *) | |
492 | _NL_CURRENT (LC_COLLATE, | |
493 | _NL_COLLATE_SYMB_EXTRAMB); | |
494 | ||
495 | for (elem = 0; elem < table_size; elem++) | |
496 | if (symb_table[2 * elem] != 0) | |
497 | { | |
498 | idx = symb_table[2 * elem + 1]; | |
499 | /* Skip the name of collating element. */ | |
500 | idx += 1 + extra[idx]; | |
501 | # if WIDE_CHAR_VERSION | |
502 | /* Skip the byte sequence of the | |
503 | collating element. */ | |
504 | idx += 1 + extra[idx]; | |
505 | /* Adjust for the alignment. */ | |
506 | idx = (idx + 3) & ~3; | |
507 | ||
508 | wextra = (CHAR *) &extra[idx + 4]; | |
509 | ||
510 | if (/* Compare the length of the sequence. */ | |
511 | c1 == wextra[0] | |
512 | /* Compare the wide char sequence. */ | |
513 | && (__wmemcmp (startp + 1, &wextra[1], | |
514 | c1) | |
515 | == 0)) | |
516 | /* Yep, this is the entry. */ | |
517 | break; | |
518 | # else | |
519 | if (/* Compare the length of the sequence. */ | |
520 | c1 == extra[idx] | |
521 | /* Compare the byte sequence. */ | |
522 | && memcmp (startp + 1, | |
523 | &extra[idx + 1], c1) == 0) | |
524 | /* Yep, this is the entry. */ | |
525 | break; | |
526 | # endif | |
527 | } | |
528 | ||
529 | if (elem < table_size) | |
530 | { | |
531 | /* Compare the byte sequence but only if | |
532 | this is not part of a range. */ | |
533 | ||
534 | /* The compiler might warn that idx may be | |
535 | used uninitialized, however it will be | |
536 | reached iff elem < table_size which means | |
537 | that it was properly set in the loop | |
538 | above. */ | |
539 | DIAG_PUSH_NEEDS_COMMENT; | |
540 | DIAG_IGNORE_NEEDS_COMMENT (16, "-Wmaybe-uninitialized"); | |
541 | if (! is_range | |
542 | ||
543 | # if WIDE_CHAR_VERSION | |
544 | && __wmemcmp (n, &wextra[1], c1) == 0 | |
545 | # else | |
546 | && memcmp (n, &extra[idx + 1], c1) == 0 | |
547 | # endif | |
548 | ) | |
549 | { | |
550 | n += c1 - 1; | |
551 | goto matched; | |
552 | } | |
553 | DIAG_POP_NEEDS_COMMENT; | |
554 | ||
555 | /* Get the collation sequence value. */ | |
556 | is_seqval = true; | |
557 | # if WIDE_CHAR_VERSION | |
558 | /* The compile might warn that wextra may be | |
559 | used uninitialized and similar to 'idx' | |
560 | above it will be properly set by the loop. | |
561 | */ | |
562 | DIAG_PUSH_NEEDS_COMMENT; | |
563 | DIAG_IGNORE_NEEDS_COMMENT (16, "-Wmaybe-uninitialized"); | |
564 | cold = wextra[1 + wextra[0]]; | |
565 | DIAG_POP_NEEDS_COMMENT; | |
566 | # else | |
567 | idx += 1 + extra[idx]; | |
568 | /* Adjust for the alignment. */ | |
569 | idx = (idx + 3) & ~3; | |
570 | cold = *((int32_t *) &extra[idx]); | |
571 | # endif | |
572 | ||
573 | c = *p++; | |
574 | } | |
575 | else if (c1 == 1) | |
576 | { | |
577 | /* No valid character. Match it as a | |
578 | single byte. */ | |
579 | if (!is_range && *n == startp[1]) | |
580 | goto matched; | |
581 | ||
582 | cold = startp[1]; | |
583 | c = *p++; | |
584 | } | |
585 | else | |
586 | return FNM_NOMATCH; | |
587 | } | |
588 | } | |
589 | else | |
590 | #endif | |
591 | { | |
592 | c = FOLD (c); | |
593 | normal_bracket: | |
594 | ||
595 | /* We have to handling the symbols differently in | |
596 | ranges since then the collation sequence is | |
597 | important. */ | |
598 | is_range = (*p == L_('-') && p[1] != L_('\0') | |
599 | && p[1] != L_(']')); | |
600 | ||
601 | if (!is_range && c == fn) | |
602 | goto matched; | |
603 | ||
604 | #if _LIBC | |
605 | /* This is needed if we goto normal_bracket; from | |
606 | outside of is_seqval's scope. */ | |
607 | is_seqval = false; | |
608 | #endif | |
609 | cold = c; | |
610 | c = *p++; | |
611 | } | |
612 | ||
613 | if (c == L_('-') && *p != L_(']')) | |
614 | { | |
615 | #if _LIBC | |
616 | /* We have to find the collation sequence | |
617 | value for C. Collation sequence is nothing | |
618 | we can regularly access. The sequence | |
619 | value is defined by the order in which the | |
620 | definitions of the collation values for the | |
621 | various characters appear in the source | |
622 | file. A strange concept, nowhere | |
623 | documented. */ | |
624 | uint32_t fcollseq; | |
625 | uint32_t lcollseq; | |
626 | UCHAR cend = *p++; | |
627 | ||
628 | # if WIDE_CHAR_VERSION | |
629 | /* Search in the 'names' array for the characters. */ | |
630 | fcollseq = __collseq_table_lookup (collseq, fn); | |
631 | if (fcollseq == ~((uint32_t) 0)) | |
632 | /* XXX We don't know anything about the character | |
633 | we are supposed to match. This means we are | |
634 | failing. */ | |
635 | goto range_not_matched; | |
636 | ||
637 | if (is_seqval) | |
638 | lcollseq = cold; | |
639 | else | |
640 | lcollseq = __collseq_table_lookup (collseq, cold); | |
641 | # else | |
642 | fcollseq = collseq[fn]; | |
643 | lcollseq = is_seqval ? cold : collseq[(UCHAR) cold]; | |
644 | # endif | |
645 | ||
646 | is_seqval = false; | |
647 | if (cend == L_('[') && *p == L_('.')) | |
648 | { | |
649 | uint32_t nrules = | |
650 | _NL_CURRENT_WORD (LC_COLLATE, | |
651 | _NL_COLLATE_NRULES); | |
652 | const CHAR *startp = p; | |
653 | size_t c1 = 0; | |
654 | ||
655 | while (1) | |
656 | { | |
657 | c = *++p; | |
658 | if (c == L_('.') && p[1] == L_(']')) | |
659 | { | |
660 | p += 2; | |
661 | break; | |
662 | } | |
663 | if (c == '\0') | |
664 | return FNM_NOMATCH; | |
665 | ++c1; | |
666 | } | |
667 | ||
668 | if (nrules == 0) | |
669 | { | |
670 | /* There are no names defined in the | |
671 | collation data. Therefore we only | |
672 | accept the trivial names consisting | |
673 | of the character itself. */ | |
674 | if (c1 != 1) | |
675 | return FNM_NOMATCH; | |
676 | ||
677 | cend = startp[1]; | |
678 | } | |
679 | else | |
680 | { | |
681 | int32_t table_size; | |
682 | const int32_t *symb_table; | |
683 | const unsigned char *extra; | |
684 | int32_t idx; | |
685 | int32_t elem; | |
686 | # if WIDE_CHAR_VERSION | |
687 | CHAR *wextra; | |
688 | # endif | |
689 | ||
690 | table_size = | |
691 | _NL_CURRENT_WORD (LC_COLLATE, | |
692 | _NL_COLLATE_SYMB_HASH_SIZEMB); | |
693 | symb_table = (const int32_t *) | |
694 | _NL_CURRENT (LC_COLLATE, | |
695 | _NL_COLLATE_SYMB_TABLEMB); | |
696 | extra = (const unsigned char *) | |
697 | _NL_CURRENT (LC_COLLATE, | |
698 | _NL_COLLATE_SYMB_EXTRAMB); | |
699 | ||
700 | for (elem = 0; elem < table_size; elem++) | |
701 | if (symb_table[2 * elem] != 0) | |
702 | { | |
703 | idx = symb_table[2 * elem + 1]; | |
704 | /* Skip the name of collating | |
705 | element. */ | |
706 | idx += 1 + extra[idx]; | |
707 | # if WIDE_CHAR_VERSION | |
708 | /* Skip the byte sequence of the | |
709 | collating element. */ | |
710 | idx += 1 + extra[idx]; | |
711 | /* Adjust for the alignment. */ | |
712 | idx = (idx + 3) & ~3; | |
713 | ||
714 | wextra = (CHAR *) &extra[idx + 4]; | |
715 | ||
716 | if (/* Compare the length of the | |
717 | sequence. */ | |
718 | c1 == wextra[0] | |
719 | /* Compare the wide char sequence. */ | |
720 | && (__wmemcmp (startp + 1, | |
721 | &wextra[1], c1) | |
722 | == 0)) | |
723 | /* Yep, this is the entry. */ | |
724 | break; | |
725 | # else | |
726 | if (/* Compare the length of the | |
727 | sequence. */ | |
728 | c1 == extra[idx] | |
729 | /* Compare the byte sequence. */ | |
730 | && memcmp (startp + 1, | |
731 | &extra[idx + 1], c1) == 0) | |
732 | /* Yep, this is the entry. */ | |
733 | break; | |
734 | # endif | |
735 | } | |
736 | ||
737 | if (elem < table_size) | |
738 | { | |
739 | /* Get the collation sequence value. */ | |
740 | is_seqval = true; | |
741 | # if WIDE_CHAR_VERSION | |
742 | /* The compiler might warn that wextra may | |
743 | be used uninitialized, however it will | |
744 | be reached iff elem < table_size which | |
745 | means that it was properly set in the | |
746 | loop above. */ | |
747 | DIAG_PUSH_NEEDS_COMMENT; | |
748 | DIAG_IGNORE_NEEDS_COMMENT (16, "-Wmaybe-uninitialized"); | |
749 | cend = wextra[1 + wextra[0]]; | |
750 | DIAG_POP_NEEDS_COMMENT; | |
751 | # else | |
752 | /* The compile might warn that idx may | |
753 | be used uninitialized and similar to | |
754 | wextra above it will be properly set by | |
755 | the loop. */ | |
756 | DIAG_PUSH_NEEDS_COMMENT; | |
757 | DIAG_IGNORE_NEEDS_COMMENT (16, "-Wmaybe-uninitialized"); | |
758 | idx += 1 + extra[idx]; | |
759 | DIAG_POP_NEEDS_COMMENT; | |
760 | /* Adjust for the alignment. */ | |
761 | idx = (idx + 3) & ~3; | |
762 | cend = *((int32_t *) &extra[idx]); | |
763 | # endif | |
764 | } | |
765 | else if (c1 == 1) | |
766 | { | |
767 | cend = startp[1]; | |
768 | c = *p++; | |
769 | } | |
770 | else | |
771 | return FNM_NOMATCH; | |
772 | } | |
773 | } | |
774 | else | |
775 | { | |
776 | if (!(flags & FNM_NOESCAPE) && cend == L_('\\')) | |
777 | cend = *p++; | |
778 | if (cend == L_('\0')) | |
779 | return FNM_NOMATCH; | |
780 | cend = FOLD (cend); | |
781 | } | |
782 | ||
783 | /* XXX It is not entirely clear to me how to handle | |
784 | characters which are not mentioned in the | |
785 | collation specification. */ | |
786 | if ( | |
787 | # if WIDE_CHAR_VERSION | |
788 | lcollseq == 0xffffffff || | |
789 | # endif | |
790 | lcollseq <= fcollseq) | |
791 | { | |
792 | /* We have to look at the upper bound. */ | |
793 | uint32_t hcollseq; | |
794 | ||
795 | if (is_seqval) | |
796 | hcollseq = cend; | |
797 | else | |
798 | { | |
799 | # if WIDE_CHAR_VERSION | |
800 | hcollseq = | |
801 | __collseq_table_lookup (collseq, cend); | |
802 | if (hcollseq == ~((uint32_t) 0)) | |
803 | { | |
804 | /* Hum, no information about the upper | |
805 | bound. The matching succeeds if the | |
806 | lower bound is matched exactly. */ | |
807 | if (lcollseq != fcollseq) | |
808 | goto range_not_matched; | |
809 | ||
810 | goto matched; | |
811 | } | |
812 | # else | |
813 | hcollseq = collseq[cend]; | |
814 | # endif | |
815 | } | |
816 | ||
817 | if (lcollseq <= hcollseq && fcollseq <= hcollseq) | |
818 | goto matched; | |
819 | } | |
820 | # if WIDE_CHAR_VERSION | |
821 | range_not_matched: | |
822 | # endif | |
823 | #else | |
824 | /* We use a boring value comparison of the character | |
825 | values. This is better than comparing using | |
826 | 'strcoll' since the latter would have surprising | |
827 | and sometimes fatal consequences. */ | |
828 | UCHAR cend = *p++; | |
829 | ||
830 | if (!(flags & FNM_NOESCAPE) && cend == L_('\\')) | |
831 | cend = *p++; | |
832 | if (cend == L_('\0')) | |
833 | return FNM_NOMATCH; | |
834 | ||
835 | /* It is a range. */ | |
836 | if ((UCHAR) cold <= fn && fn <= cend) | |
837 | goto matched; | |
838 | #endif | |
839 | ||
840 | c = *p++; | |
841 | } | |
842 | } | |
843 | ||
844 | if (c == L_(']')) | |
845 | break; | |
846 | } | |
847 | ||
848 | if (!not) | |
849 | return FNM_NOMATCH; | |
850 | break; | |
851 | ||
852 | matched: | |
853 | /* Skip the rest of the [...] that already matched. */ | |
854 | while ((c = *p++) != L_(']')) | |
855 | { | |
856 | if (c == L_('\0')) | |
857 | { | |
858 | /* [ unterminated, treat as normal character. */ | |
859 | p = p_init; | |
860 | n = n_init; | |
861 | c = L_('['); | |
862 | goto normal_match; | |
863 | } | |
864 | ||
865 | if (!(flags & FNM_NOESCAPE) && c == L_('\\')) | |
866 | { | |
867 | if (*p == L_('\0')) | |
868 | return FNM_NOMATCH; | |
869 | /* XXX 1003.2d11 is unclear if this is right. */ | |
870 | ++p; | |
871 | } | |
872 | else if (c == L_('[') && *p == L_(':')) | |
873 | { | |
874 | int c1 = 0; | |
875 | const CHAR *startp = p; | |
876 | ||
877 | while (1) | |
878 | { | |
879 | c = *++p; | |
880 | if (++c1 == CHAR_CLASS_MAX_LENGTH) | |
881 | return FNM_NOMATCH; | |
882 | ||
883 | if (*p == L_(':') && p[1] == L_(']')) | |
884 | break; | |
885 | ||
886 | if (c < L_('a') || c >= L_('z')) | |
887 | { | |
888 | p = startp - 2; | |
889 | break; | |
890 | } | |
891 | } | |
892 | p += 2; | |
893 | } | |
894 | else if (c == L_('[') && *p == L_('=')) | |
895 | { | |
896 | c = *++p; | |
897 | if (c == L_('\0')) | |
898 | return FNM_NOMATCH; | |
899 | c = *++p; | |
900 | if (c != L_('=') || p[1] != L_(']')) | |
901 | return FNM_NOMATCH; | |
902 | p += 2; | |
903 | } | |
904 | else if (c == L_('[') && *p == L_('.')) | |
905 | { | |
906 | while (1) | |
907 | { | |
908 | c = *++p; | |
909 | if (c == L_('\0')) | |
910 | return FNM_NOMATCH; | |
911 | ||
912 | if (c == L_('.') && p[1] == L_(']')) | |
913 | break; | |
914 | } | |
915 | p += 2; | |
916 | } | |
917 | } | |
918 | if (not) | |
919 | return FNM_NOMATCH; | |
920 | } | |
921 | break; | |
922 | ||
923 | case L_('+'): | |
924 | case L_('@'): | |
925 | case L_('!'): | |
926 | if (__glibc_unlikely (flags & FNM_EXTMATCH) && *p == '(') | |
927 | { | |
928 | int res = EXT (c, p, n, string_end, no_leading_period, flags); | |
929 | if (res != -1) | |
930 | return res; | |
931 | } | |
932 | goto normal_match; | |
933 | ||
934 | case L_('/'): | |
935 | if (NO_LEADING_PERIOD (flags)) | |
936 | { | |
937 | if (n == string_end || c != (UCHAR) *n) | |
938 | return FNM_NOMATCH; | |
939 | ||
940 | new_no_leading_period = true; | |
941 | break; | |
942 | } | |
943 | FALLTHROUGH; | |
944 | default: | |
945 | normal_match: | |
946 | if (n == string_end || c != FOLD ((UCHAR) *n)) | |
947 | return FNM_NOMATCH; | |
948 | } | |
949 | ||
950 | no_leading_period = new_no_leading_period; | |
951 | ++n; | |
952 | } | |
953 | ||
954 | if (n == string_end) | |
955 | return 0; | |
956 | ||
957 | if ((flags & FNM_LEADING_DIR) && n != string_end && *n == L_('/')) | |
958 | /* The FNM_LEADING_DIR flag says that "foo*" matches "foobar/frobozz". */ | |
959 | return 0; | |
960 | ||
961 | return FNM_NOMATCH; | |
962 | } | |
963 | ||
964 | ||
965 | static const CHAR * | |
966 | END (const CHAR *pattern) | |
967 | { | |
968 | const CHAR *p = pattern; | |
969 | ||
970 | while (1) | |
971 | if (*++p == L_('\0')) | |
972 | /* This is an invalid pattern. */ | |
973 | return pattern; | |
974 | else if (*p == L_('[')) | |
975 | { | |
976 | /* Handle brackets special. */ | |
977 | if (posixly_correct == 0) | |
978 | posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1; | |
979 | ||
980 | /* Skip the not sign. We have to recognize it because of a possibly | |
981 | following ']'. */ | |
982 | if (*++p == L_('!') || (posixly_correct < 0 && *p == L_('^'))) | |
983 | ++p; | |
984 | /* A leading ']' is recognized as such. */ | |
985 | if (*p == L_(']')) | |
986 | ++p; | |
987 | /* Skip over all characters of the list. */ | |
988 | while (*p != L_(']')) | |
989 | if (*p++ == L_('\0')) | |
990 | /* This is no valid pattern. */ | |
991 | return pattern; | |
992 | } | |
993 | else if ((*p == L_('?') || *p == L_('*') || *p == L_('+') || *p == L_('@') | |
994 | || *p == L_('!')) && p[1] == L_('(')) | |
995 | { | |
996 | p = END (p + 1); | |
997 | if (*p == L_('\0')) | |
998 | /* This is an invalid pattern. */ | |
999 | return pattern; | |
1000 | } | |
1001 | else if (*p == L_(')')) | |
1002 | break; | |
1003 | ||
1004 | return p + 1; | |
1005 | } | |
1006 | ||
1007 | #if WIDE_CHAR_VERSION | |
1008 | # define PATTERN_PREFIX pattern_list | |
1009 | #else | |
1010 | # define PATTERN_PREFIX wpattern_list | |
1011 | #endif | |
1012 | ||
1013 | #define PASTE(a,b) PASTE1(a,b) | |
1014 | #define PASTE1(a,b) a##b | |
1015 | ||
1016 | #define DYNARRAY_STRUCT PATTERN_PREFIX | |
1017 | #define DYNARRAY_ELEMENT_FREE(ptr) free (*ptr) | |
1018 | #define DYNARRAY_ELEMENT CHAR * | |
1019 | #define DYNARRAY_PREFIX PASTE(PATTERN_PREFIX,_) | |
1020 | #define DYNARRAY_INITIAL_SIZE 8 | |
1021 | #include <malloc/dynarray-skeleton.c> | |
1022 | ||
1023 | static int | |
1024 | EXT (INT opt, const CHAR *pattern, const CHAR *string, const CHAR *string_end, | |
1025 | bool no_leading_period, int flags) | |
1026 | { | |
1027 | const CHAR *startp; | |
1028 | ptrdiff_t level; | |
1029 | struct PATTERN_PREFIX list; | |
1030 | size_t pattern_len = STRLEN (pattern); | |
1031 | size_t pattern_i = 0; | |
1032 | const CHAR *p; | |
1033 | const CHAR *rs; | |
1034 | int retval = 0; | |
1035 | ||
1036 | PASTE (PATTERN_PREFIX, _init) (&list); | |
1037 | ||
1038 | /* Parse the pattern. Store the individual parts in the list. */ | |
1039 | level = 0; | |
1040 | for (startp = p = pattern + 1; level >= 0; ++p) | |
1041 | if (*p == L_('\0')) | |
1042 | { | |
1043 | /* This is an invalid pattern. */ | |
1044 | retval = -1; | |
1045 | goto out; | |
1046 | } | |
1047 | else if (*p == L_('[')) | |
1048 | { | |
1049 | /* Handle brackets special. */ | |
1050 | if (posixly_correct == 0) | |
1051 | posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1; | |
1052 | ||
1053 | /* Skip the not sign. We have to recognize it because of a possibly | |
1054 | following ']'. */ | |
1055 | if (*++p == L_('!') || (posixly_correct < 0 && *p == L_('^'))) | |
1056 | ++p; | |
1057 | /* A leading ']' is recognized as such. */ | |
1058 | if (*p == L_(']')) | |
1059 | ++p; | |
1060 | /* Skip over all characters of the list. */ | |
1061 | while (*p != L_(']')) | |
1062 | if (*p++ == L_('\0')) | |
1063 | { | |
1064 | /* This is no valid pattern. */ | |
1065 | retval = -1; | |
1066 | goto out; | |
1067 | } | |
1068 | } | |
1069 | else if ((*p == L_('?') || *p == L_('*') || *p == L_('+') || *p == L_('@') | |
1070 | || *p == L_('!')) && p[1] == L_('(')) | |
1071 | /* Remember the nesting level. */ | |
1072 | ++level; | |
1073 | else if (*p == L_(')') || *p == L_('|')) | |
1074 | { | |
1075 | if (level == 0) | |
1076 | { | |
1077 | size_t slen = opt == L_('?') || opt == L_('@') | |
1078 | ? pattern_len : p - startp + 1; | |
1079 | CHAR *newp = malloc (slen * sizeof (CHAR)); | |
1080 | if (newp != NULL) | |
1081 | { | |
1082 | *((CHAR *) MEMPCPY (newp, startp, p - startp)) = L_('\0'); | |
1083 | PASTE (PATTERN_PREFIX,_add) (&list, newp); | |
1084 | } | |
1085 | if (newp == NULL || PASTE (PATTERN_PREFIX, _has_failed) (&list)) | |
1086 | { | |
1087 | retval = -2; | |
1088 | goto out; | |
1089 | } | |
1090 | ||
1091 | if (*p == L_('|')) | |
1092 | startp = p + 1; | |
1093 | } | |
1094 | if (*p == L_(')')) | |
1095 | level--; | |
1096 | } | |
1097 | assert (p[-1] == L_(')')); | |
1098 | ||
1099 | switch (opt) | |
1100 | { | |
1101 | case L_('*'): | |
1102 | if (FCT (p, string, string_end, no_leading_period, flags, NULL) == 0) | |
1103 | goto success; | |
1104 | FALLTHROUGH; | |
1105 | case L_('+'): | |
1106 | for (; pattern_i < PASTE (PATTERN_PREFIX, _size)(&list); pattern_i++) | |
1107 | { | |
1108 | for (rs = string; rs <= string_end; ++rs) | |
1109 | /* First match the prefix with the current pattern with the | |
1110 | current pattern. */ | |
1111 | if (FCT (*PASTE (PATTERN_PREFIX, _at) (&list, pattern_i), string, | |
1112 | rs, no_leading_period, | |
1113 | flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD, | |
1114 | NULL) == 0 | |
1115 | /* This was successful. Now match the rest with the rest | |
1116 | of the pattern. */ | |
1117 | && (FCT (p, rs, string_end, | |
1118 | rs == string | |
1119 | ? no_leading_period | |
1120 | : rs[-1] == '/' && NO_LEADING_PERIOD (flags), | |
1121 | flags & FNM_FILE_NAME | |
1122 | ? flags : flags & ~FNM_PERIOD, NULL) == 0 | |
1123 | /* This didn't work. Try the whole pattern. */ | |
1124 | || (rs != string | |
1125 | && FCT (pattern - 1, rs, string_end, | |
1126 | rs == string | |
1127 | ? no_leading_period | |
1128 | : rs[-1] == '/' && NO_LEADING_PERIOD (flags), | |
1129 | flags & FNM_FILE_NAME | |
1130 | ? flags : flags & ~FNM_PERIOD, NULL) == 0))) | |
1131 | /* It worked. Signal success. */ | |
1132 | goto success; | |
1133 | } | |
1134 | ||
1135 | /* None of the patterns lead to a match. */ | |
1136 | retval = FNM_NOMATCH; | |
1137 | break; | |
1138 | ||
1139 | case L_('?'): | |
1140 | if (FCT (p, string, string_end, no_leading_period, flags, NULL) == 0) | |
1141 | goto success; | |
1142 | FALLTHROUGH; | |
1143 | case L_('@'): | |
1144 | for (; pattern_i < PASTE (PATTERN_PREFIX, _size) (&list); pattern_i++) | |
1145 | { | |
1146 | /* I cannot believe it but `strcat' is actually acceptable | |
1147 | here. Match the entire string with the prefix from the | |
1148 | pattern list and the rest of the pattern following the | |
1149 | pattern list. */ | |
1150 | if (FCT (STRCAT (*PASTE (PATTERN_PREFIX, _at) (&list, pattern_i), p), | |
1151 | string, string_end, no_leading_period, | |
1152 | flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD, | |
1153 | NULL) == 0) | |
1154 | /* It worked. Signal success. */ | |
1155 | goto success; | |
1156 | } | |
1157 | ||
1158 | /* None of the patterns lead to a match. */ | |
1159 | retval = FNM_NOMATCH; | |
1160 | break; | |
1161 | ||
1162 | case L_('!'): | |
1163 | for (rs = string; rs <= string_end; ++rs) | |
1164 | { | |
1165 | size_t runp_i; | |
1166 | ||
1167 | for (runp_i = pattern_i; | |
1168 | runp_i != PASTE (PATTERN_PREFIX, _size) (&list); | |
1169 | runp_i++) | |
1170 | { | |
1171 | if (FCT (*PASTE (PATTERN_PREFIX, _at) (&list, runp_i), string, rs, | |
1172 | no_leading_period, | |
1173 | flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD, | |
1174 | NULL) == 0) | |
1175 | break; | |
1176 | } | |
1177 | ||
1178 | /* If none of the patterns matched see whether the rest does. */ | |
1179 | if (runp_i == PASTE (PATTERN_PREFIX, _size) (&list) | |
1180 | && (FCT (p, rs, string_end, | |
1181 | rs == string | |
1182 | ? no_leading_period | |
1183 | : rs[-1] == '/' && NO_LEADING_PERIOD (flags), | |
1184 | flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD, | |
1185 | NULL) == 0)) | |
1186 | /* This is successful. */ | |
1187 | goto success; | |
1188 | } | |
1189 | ||
1190 | /* None of the patterns together with the rest of the pattern | |
1191 | lead to a match. */ | |
1192 | retval = FNM_NOMATCH; | |
1193 | break; | |
1194 | ||
1195 | default: | |
1196 | assert (! "Invalid extended matching operator"); | |
1197 | retval = -1; | |
1198 | break; | |
1199 | } | |
1200 | ||
1201 | success: | |
1202 | out: | |
1203 | PASTE (PATTERN_PREFIX, _free) (&list); | |
1204 | ||
1205 | return retval; | |
1206 | } | |
1207 | ||
1208 | #undef PATTERN_PREFIX | |
1209 | #undef PASTE | |
1210 | #undef PASTE1 | |
1211 | ||
1212 | #undef FOLD | |
1213 | #undef CHAR | |
1214 | #undef UCHAR | |
1215 | #undef INT | |
1216 | #undef FCT | |
1217 | #undef EXT | |
1218 | #undef END | |
1219 | #undef STRUCT | |
1220 | #undef MEMPCPY | |
1221 | #undef MEMCHR | |
1222 | #undef STRLEN | |
1223 | #undef STRCAT | |
1224 | #undef L_ | |
1225 | #undef BTOWC | |
1226 | #undef WIDE_CHAR_VERSION | |
1227 | #undef FINDIDX |