]>
Commit | Line | Data |
---|---|---|
1 | /* Copyright (C) 1991-2019 Free Software Foundation, Inc. | |
2 | This file is part of the GNU C Library. | |
3 | ||
4 | The GNU C Library is free software; you can redistribute it and/or | |
5 | modify it under the terms of the GNU Lesser General Public | |
6 | License as published by the Free Software Foundation; either | |
7 | version 2.1 of the License, or (at your option) any later version. | |
8 | ||
9 | The GNU C Library is distributed in the hope that it will be useful, | |
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
12 | Lesser General Public License for more details. | |
13 | ||
14 | You should have received a copy of the GNU Lesser General Public | |
15 | License along with the GNU C Library; if not, see | |
16 | <https://www.gnu.org/licenses/>. */ | |
17 | ||
18 | #include <stdint.h> | |
19 | ||
20 | struct STRUCT | |
21 | { | |
22 | const CHAR *pattern; | |
23 | const CHAR *string; | |
24 | int no_leading_period; | |
25 | }; | |
26 | ||
27 | /* Match STRING against the filename pattern PATTERN, returning zero if | |
28 | it matches, nonzero if not. */ | |
29 | static int FCT (const CHAR *pattern, const CHAR *string, | |
30 | const CHAR *string_end, int no_leading_period, int flags, | |
31 | struct STRUCT *ends, size_t alloca_used); | |
32 | static int EXT (INT opt, const CHAR *pattern, const CHAR *string, | |
33 | const CHAR *string_end, int no_leading_period, int flags, | |
34 | size_t alloca_used); | |
35 | static const CHAR *END (const CHAR *patternp); | |
36 | ||
37 | static int | |
38 | FCT (const CHAR *pattern, const CHAR *string, const CHAR *string_end, | |
39 | int no_leading_period, int flags, struct STRUCT *ends, size_t alloca_used) | |
40 | { | |
41 | const CHAR *p = pattern, *n = string; | |
42 | UCHAR c; | |
43 | #ifdef _LIBC | |
44 | # if WIDE_CHAR_VERSION | |
45 | const char *collseq = (const char *) | |
46 | _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQWC); | |
47 | # else | |
48 | const UCHAR *collseq = (const UCHAR *) | |
49 | _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQMB); | |
50 | # endif | |
51 | #endif | |
52 | ||
53 | while ((c = *p++) != L('\0')) | |
54 | { | |
55 | int new_no_leading_period = 0; | |
56 | c = FOLD (c); | |
57 | ||
58 | switch (c) | |
59 | { | |
60 | case L('?'): | |
61 | if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(') | |
62 | { | |
63 | int res = EXT (c, p, n, string_end, no_leading_period, | |
64 | flags, alloca_used); | |
65 | if (res != -1) | |
66 | return res; | |
67 | } | |
68 | ||
69 | if (n == string_end) | |
70 | return FNM_NOMATCH; | |
71 | else if (*n == L('/') && (flags & FNM_FILE_NAME)) | |
72 | return FNM_NOMATCH; | |
73 | else if (*n == L('.') && no_leading_period) | |
74 | return FNM_NOMATCH; | |
75 | break; | |
76 | ||
77 | case L('\\'): | |
78 | if (!(flags & FNM_NOESCAPE)) | |
79 | { | |
80 | c = *p++; | |
81 | if (c == L('\0')) | |
82 | /* Trailing \ loses. */ | |
83 | return FNM_NOMATCH; | |
84 | c = FOLD (c); | |
85 | } | |
86 | if (n == string_end || FOLD ((UCHAR) *n) != c) | |
87 | return FNM_NOMATCH; | |
88 | break; | |
89 | ||
90 | case L('*'): | |
91 | if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(') | |
92 | { | |
93 | int res = EXT (c, p, n, string_end, no_leading_period, | |
94 | flags, alloca_used); | |
95 | if (res != -1) | |
96 | return res; | |
97 | } | |
98 | else if (ends != NULL) | |
99 | { | |
100 | ends->pattern = p - 1; | |
101 | ends->string = n; | |
102 | ends->no_leading_period = no_leading_period; | |
103 | return 0; | |
104 | } | |
105 | ||
106 | if (n != string_end && *n == L('.') && no_leading_period) | |
107 | return FNM_NOMATCH; | |
108 | ||
109 | for (c = *p++; c == L('?') || c == L('*'); c = *p++) | |
110 | { | |
111 | if (*p == L('(') && (flags & FNM_EXTMATCH) != 0) | |
112 | { | |
113 | const CHAR *endp = END (p); | |
114 | if (endp != p) | |
115 | { | |
116 | /* This is a pattern. Skip over it. */ | |
117 | p = endp; | |
118 | continue; | |
119 | } | |
120 | } | |
121 | ||
122 | if (c == L('?')) | |
123 | { | |
124 | /* A ? needs to match one character. */ | |
125 | if (n == string_end) | |
126 | /* There isn't another character; no match. */ | |
127 | return FNM_NOMATCH; | |
128 | else if (*n == L('/') | |
129 | && __builtin_expect (flags & FNM_FILE_NAME, 0)) | |
130 | /* A slash does not match a wildcard under | |
131 | FNM_FILE_NAME. */ | |
132 | return FNM_NOMATCH; | |
133 | else | |
134 | /* One character of the string is consumed in matching | |
135 | this ? wildcard, so *??? won't match if there are | |
136 | less than three characters. */ | |
137 | ++n; | |
138 | } | |
139 | } | |
140 | ||
141 | if (c == L('\0')) | |
142 | /* The wildcard(s) is/are the last element of the pattern. | |
143 | If the name is a file name and contains another slash | |
144 | this means it cannot match, unless the FNM_LEADING_DIR | |
145 | flag is set. */ | |
146 | { | |
147 | int result = (flags & FNM_FILE_NAME) == 0 ? 0 : FNM_NOMATCH; | |
148 | ||
149 | if (flags & FNM_FILE_NAME) | |
150 | { | |
151 | if (flags & FNM_LEADING_DIR) | |
152 | result = 0; | |
153 | else | |
154 | { | |
155 | if (MEMCHR (n, L('/'), string_end - n) == NULL) | |
156 | result = 0; | |
157 | } | |
158 | } | |
159 | ||
160 | return result; | |
161 | } | |
162 | else | |
163 | { | |
164 | const CHAR *endp; | |
165 | struct STRUCT end; | |
166 | ||
167 | end.pattern = NULL; | |
168 | endp = MEMCHR (n, (flags & FNM_FILE_NAME) ? L('/') : L('\0'), | |
169 | string_end - n); | |
170 | if (endp == NULL) | |
171 | endp = string_end; | |
172 | ||
173 | if (c == L('[') | |
174 | || (__builtin_expect (flags & FNM_EXTMATCH, 0) != 0 | |
175 | && (c == L('@') || c == L('+') || c == L('!')) | |
176 | && *p == L('('))) | |
177 | { | |
178 | int flags2 = ((flags & FNM_FILE_NAME) | |
179 | ? flags : (flags & ~FNM_PERIOD)); | |
180 | ||
181 | for (--p; n < endp; ++n, no_leading_period = 0) | |
182 | if (FCT (p, n, string_end, no_leading_period, flags2, | |
183 | &end, alloca_used) == 0) | |
184 | goto found; | |
185 | } | |
186 | else if (c == L('/') && (flags & FNM_FILE_NAME)) | |
187 | { | |
188 | while (n < string_end && *n != L('/')) | |
189 | ++n; | |
190 | if (n < string_end && *n == L('/') | |
191 | && (FCT (p, n + 1, string_end, flags & FNM_PERIOD, flags, | |
192 | NULL, alloca_used) == 0)) | |
193 | return 0; | |
194 | } | |
195 | else | |
196 | { | |
197 | int flags2 = ((flags & FNM_FILE_NAME) | |
198 | ? flags : (flags & ~FNM_PERIOD)); | |
199 | ||
200 | if (c == L('\\') && !(flags & FNM_NOESCAPE)) | |
201 | c = *p; | |
202 | c = FOLD (c); | |
203 | for (--p; n < endp; ++n, no_leading_period = 0) | |
204 | if (FOLD ((UCHAR) *n) == c | |
205 | && (FCT (p, n, string_end, no_leading_period, flags2, | |
206 | &end, alloca_used) == 0)) | |
207 | { | |
208 | found: | |
209 | if (end.pattern == NULL) | |
210 | return 0; | |
211 | break; | |
212 | } | |
213 | if (end.pattern != NULL) | |
214 | { | |
215 | p = end.pattern; | |
216 | n = end.string; | |
217 | no_leading_period = end.no_leading_period; | |
218 | continue; | |
219 | } | |
220 | } | |
221 | } | |
222 | ||
223 | /* If we come here no match is possible with the wildcard. */ | |
224 | return FNM_NOMATCH; | |
225 | ||
226 | case L('['): | |
227 | { | |
228 | /* Nonzero if the sense of the character class is inverted. */ | |
229 | const CHAR *p_init = p; | |
230 | const CHAR *n_init = n; | |
231 | int not; | |
232 | CHAR cold; | |
233 | UCHAR fn; | |
234 | ||
235 | if (posixly_correct == 0) | |
236 | posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1; | |
237 | ||
238 | if (n == string_end) | |
239 | return FNM_NOMATCH; | |
240 | ||
241 | if (*n == L('.') && no_leading_period) | |
242 | return FNM_NOMATCH; | |
243 | ||
244 | if (*n == L('/') && (flags & FNM_FILE_NAME)) | |
245 | /* `/' cannot be matched. */ | |
246 | return FNM_NOMATCH; | |
247 | ||
248 | not = (*p == L('!') || (posixly_correct < 0 && *p == L('^'))); | |
249 | if (not) | |
250 | ++p; | |
251 | ||
252 | fn = FOLD ((UCHAR) *n); | |
253 | ||
254 | c = *p++; | |
255 | for (;;) | |
256 | { | |
257 | if (!(flags & FNM_NOESCAPE) && c == L('\\')) | |
258 | { | |
259 | if (*p == L('\0')) | |
260 | return FNM_NOMATCH; | |
261 | c = FOLD ((UCHAR) *p); | |
262 | ++p; | |
263 | ||
264 | goto normal_bracket; | |
265 | } | |
266 | else if (c == L('[') && *p == L(':')) | |
267 | { | |
268 | /* Leave room for the null. */ | |
269 | CHAR str[CHAR_CLASS_MAX_LENGTH + 1]; | |
270 | size_t c1 = 0; | |
271 | #if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H) | |
272 | wctype_t wt; | |
273 | #endif | |
274 | const CHAR *startp = p; | |
275 | ||
276 | for (;;) | |
277 | { | |
278 | if (c1 == CHAR_CLASS_MAX_LENGTH) | |
279 | /* The name is too long and therefore the pattern | |
280 | is ill-formed. */ | |
281 | return FNM_NOMATCH; | |
282 | ||
283 | c = *++p; | |
284 | if (c == L(':') && p[1] == L(']')) | |
285 | { | |
286 | p += 2; | |
287 | break; | |
288 | } | |
289 | if (c < L('a') || c >= L('z')) | |
290 | { | |
291 | /* This cannot possibly be a character class name. | |
292 | Match it as a normal range. */ | |
293 | p = startp; | |
294 | c = L('['); | |
295 | goto normal_bracket; | |
296 | } | |
297 | str[c1++] = c; | |
298 | } | |
299 | str[c1] = L('\0'); | |
300 | ||
301 | #if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H) | |
302 | wt = IS_CHAR_CLASS (str); | |
303 | if (wt == 0) | |
304 | /* Invalid character class name. */ | |
305 | return FNM_NOMATCH; | |
306 | ||
307 | # if defined _LIBC && ! WIDE_CHAR_VERSION | |
308 | /* The following code is glibc specific but does | |
309 | there a good job in speeding up the code since | |
310 | we can avoid the btowc() call. */ | |
311 | if (_ISCTYPE ((UCHAR) *n, wt)) | |
312 | goto matched; | |
313 | # else | |
314 | if (ISWCTYPE (BTOWC ((UCHAR) *n), wt)) | |
315 | goto matched; | |
316 | # endif | |
317 | #else | |
318 | if ((STREQ (str, L("alnum")) && ISALNUM ((UCHAR) *n)) | |
319 | || (STREQ (str, L("alpha")) && ISALPHA ((UCHAR) *n)) | |
320 | || (STREQ (str, L("blank")) && ISBLANK ((UCHAR) *n)) | |
321 | || (STREQ (str, L("cntrl")) && ISCNTRL ((UCHAR) *n)) | |
322 | || (STREQ (str, L("digit")) && ISDIGIT ((UCHAR) *n)) | |
323 | || (STREQ (str, L("graph")) && ISGRAPH ((UCHAR) *n)) | |
324 | || (STREQ (str, L("lower")) && ISLOWER ((UCHAR) *n)) | |
325 | || (STREQ (str, L("print")) && ISPRINT ((UCHAR) *n)) | |
326 | || (STREQ (str, L("punct")) && ISPUNCT ((UCHAR) *n)) | |
327 | || (STREQ (str, L("space")) && ISSPACE ((UCHAR) *n)) | |
328 | || (STREQ (str, L("upper")) && ISUPPER ((UCHAR) *n)) | |
329 | || (STREQ (str, L("xdigit")) && ISXDIGIT ((UCHAR) *n))) | |
330 | goto matched; | |
331 | #endif | |
332 | c = *p++; | |
333 | } | |
334 | #ifdef _LIBC | |
335 | else if (c == L('[') && *p == L('=')) | |
336 | { | |
337 | /* It's important that STR be a scalar variable rather | |
338 | than a one-element array, because GCC (at least 4.9.2 | |
339 | -O2 on x86-64) can be confused by the array and | |
340 | diagnose a "used initialized" in a dead branch in the | |
341 | findidx function. */ | |
342 | UCHAR str; | |
343 | uint32_t nrules = | |
344 | _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); | |
345 | const CHAR *startp = p; | |
346 | ||
347 | c = *++p; | |
348 | if (c == L('\0')) | |
349 | { | |
350 | p = startp; | |
351 | c = L('['); | |
352 | goto normal_bracket; | |
353 | } | |
354 | str = c; | |
355 | ||
356 | c = *++p; | |
357 | if (c != L('=') || p[1] != L(']')) | |
358 | { | |
359 | p = startp; | |
360 | c = L('['); | |
361 | goto normal_bracket; | |
362 | } | |
363 | p += 2; | |
364 | ||
365 | if (nrules == 0) | |
366 | { | |
367 | if ((UCHAR) *n == str) | |
368 | goto matched; | |
369 | } | |
370 | else | |
371 | { | |
372 | const int32_t *table; | |
373 | # if WIDE_CHAR_VERSION | |
374 | const int32_t *weights; | |
375 | const wint_t *extra; | |
376 | # else | |
377 | const unsigned char *weights; | |
378 | const unsigned char *extra; | |
379 | # endif | |
380 | const int32_t *indirect; | |
381 | int32_t idx; | |
382 | const UCHAR *cp = (const UCHAR *) &str; | |
383 | ||
384 | # if WIDE_CHAR_VERSION | |
385 | table = (const int32_t *) | |
386 | _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEWC); | |
387 | weights = (const int32_t *) | |
388 | _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTWC); | |
389 | extra = (const wint_t *) | |
390 | _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAWC); | |
391 | indirect = (const int32_t *) | |
392 | _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTWC); | |
393 | # else | |
394 | table = (const int32_t *) | |
395 | _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB); | |
396 | weights = (const unsigned char *) | |
397 | _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB); | |
398 | extra = (const unsigned char *) | |
399 | _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB); | |
400 | indirect = (const int32_t *) | |
401 | _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB); | |
402 | # endif | |
403 | ||
404 | idx = FINDIDX (table, indirect, extra, &cp, 1); | |
405 | if (idx != 0) | |
406 | { | |
407 | /* We found a table entry. Now see whether the | |
408 | character we are currently at has the same | |
409 | equivalance class value. */ | |
410 | int len = weights[idx & 0xffffff]; | |
411 | int32_t idx2; | |
412 | const UCHAR *np = (const UCHAR *) n; | |
413 | ||
414 | idx2 = FINDIDX (table, indirect, extra, | |
415 | &np, string_end - n); | |
416 | if (idx2 != 0 | |
417 | && (idx >> 24) == (idx2 >> 24) | |
418 | && len == weights[idx2 & 0xffffff]) | |
419 | { | |
420 | int cnt = 0; | |
421 | ||
422 | idx &= 0xffffff; | |
423 | idx2 &= 0xffffff; | |
424 | ||
425 | while (cnt < len | |
426 | && (weights[idx + 1 + cnt] | |
427 | == weights[idx2 + 1 + cnt])) | |
428 | ++cnt; | |
429 | ||
430 | if (cnt == len) | |
431 | goto matched; | |
432 | } | |
433 | } | |
434 | } | |
435 | ||
436 | c = *p++; | |
437 | } | |
438 | #endif | |
439 | else if (c == L('\0')) | |
440 | { | |
441 | /* [ unterminated, treat as normal character. */ | |
442 | p = p_init; | |
443 | n = n_init; | |
444 | c = L('['); | |
445 | goto normal_match; | |
446 | } | |
447 | else | |
448 | { | |
449 | int is_range = 0; | |
450 | ||
451 | #ifdef _LIBC | |
452 | int is_seqval = 0; | |
453 | ||
454 | if (c == L('[') && *p == L('.')) | |
455 | { | |
456 | uint32_t nrules = | |
457 | _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); | |
458 | const CHAR *startp = p; | |
459 | size_t c1 = 0; | |
460 | ||
461 | while (1) | |
462 | { | |
463 | c = *++p; | |
464 | if (c == L('.') && p[1] == L(']')) | |
465 | { | |
466 | p += 2; | |
467 | break; | |
468 | } | |
469 | if (c == '\0') | |
470 | return FNM_NOMATCH; | |
471 | ++c1; | |
472 | } | |
473 | ||
474 | /* We have to handling the symbols differently in | |
475 | ranges since then the collation sequence is | |
476 | important. */ | |
477 | is_range = *p == L('-') && p[1] != L('\0'); | |
478 | ||
479 | if (nrules == 0) | |
480 | { | |
481 | /* There are no names defined in the collation | |
482 | data. Therefore we only accept the trivial | |
483 | names consisting of the character itself. */ | |
484 | if (c1 != 1) | |
485 | return FNM_NOMATCH; | |
486 | ||
487 | if (!is_range && *n == startp[1]) | |
488 | goto matched; | |
489 | ||
490 | cold = startp[1]; | |
491 | c = *p++; | |
492 | } | |
493 | else | |
494 | { | |
495 | int32_t table_size; | |
496 | const int32_t *symb_table; | |
497 | const unsigned char *extra; | |
498 | int32_t idx; | |
499 | int32_t elem; | |
500 | # if WIDE_CHAR_VERSION | |
501 | CHAR *wextra; | |
502 | # endif | |
503 | ||
504 | table_size = | |
505 | _NL_CURRENT_WORD (LC_COLLATE, | |
506 | _NL_COLLATE_SYMB_HASH_SIZEMB); | |
507 | symb_table = (const int32_t *) | |
508 | _NL_CURRENT (LC_COLLATE, | |
509 | _NL_COLLATE_SYMB_TABLEMB); | |
510 | extra = (const unsigned char *) | |
511 | _NL_CURRENT (LC_COLLATE, | |
512 | _NL_COLLATE_SYMB_EXTRAMB); | |
513 | ||
514 | for (elem = 0; elem < table_size; elem++) | |
515 | if (symb_table[2 * elem] != 0) | |
516 | { | |
517 | idx = symb_table[2 * elem + 1]; | |
518 | /* Skip the name of collating element. */ | |
519 | idx += 1 + extra[idx]; | |
520 | # if WIDE_CHAR_VERSION | |
521 | /* Skip the byte sequence of the | |
522 | collating element. */ | |
523 | idx += 1 + extra[idx]; | |
524 | /* Adjust for the alignment. */ | |
525 | idx = (idx + 3) & ~3; | |
526 | ||
527 | wextra = (CHAR *) &extra[idx + 4]; | |
528 | ||
529 | if (/* Compare the length of the sequence. */ | |
530 | c1 == wextra[0] | |
531 | /* Compare the wide char sequence. */ | |
532 | && WMEMCMP (startp + 1, &wextra[1], | |
533 | c1) == 0) | |
534 | /* Yep, this is the entry. */ | |
535 | break; | |
536 | # else | |
537 | if (/* Compare the length of the sequence. */ | |
538 | c1 == extra[idx] | |
539 | /* Compare the byte sequence. */ | |
540 | && memcmp (startp + 1, | |
541 | &extra[idx + 1], c1) == 0) | |
542 | /* Yep, this is the entry. */ | |
543 | break; | |
544 | # endif | |
545 | } | |
546 | ||
547 | if (elem < table_size) | |
548 | { | |
549 | /* Compare the byte sequence but only if | |
550 | this is not part of a range. */ | |
551 | if (! is_range | |
552 | ||
553 | # if WIDE_CHAR_VERSION | |
554 | && WMEMCMP (n, &wextra[1], c1) == 0 | |
555 | # else | |
556 | && memcmp (n, &extra[idx + 1], c1) == 0 | |
557 | # endif | |
558 | ) | |
559 | { | |
560 | n += c1 - 1; | |
561 | goto matched; | |
562 | } | |
563 | ||
564 | /* Get the collation sequence value. */ | |
565 | is_seqval = 1; | |
566 | # if WIDE_CHAR_VERSION | |
567 | cold = wextra[1 + wextra[idx]]; | |
568 | # else | |
569 | idx += 1 + extra[idx]; | |
570 | /* Adjust for the alignment. */ | |
571 | idx = (idx + 3) & ~3; | |
572 | cold = *((int32_t *) &extra[idx]); | |
573 | # endif | |
574 | ||
575 | c = *p++; | |
576 | } | |
577 | else if (c1 == 1) | |
578 | { | |
579 | /* No valid character. Match it as a | |
580 | single byte. */ | |
581 | if (!is_range && *n == startp[1]) | |
582 | goto matched; | |
583 | ||
584 | cold = startp[1]; | |
585 | c = *p++; | |
586 | } | |
587 | else | |
588 | return FNM_NOMATCH; | |
589 | } | |
590 | } | |
591 | else | |
592 | #endif | |
593 | { | |
594 | c = FOLD (c); | |
595 | normal_bracket: | |
596 | ||
597 | /* We have to handling the symbols differently in | |
598 | ranges since then the collation sequence is | |
599 | important. */ | |
600 | is_range = (*p == L('-') && p[1] != L('\0') | |
601 | && p[1] != L(']')); | |
602 | ||
603 | if (!is_range && c == fn) | |
604 | goto matched; | |
605 | ||
606 | /* This is needed if we goto normal_bracket; from | |
607 | outside of is_seqval's scope. */ | |
608 | is_seqval = 0; | |
609 | cold = c; | |
610 | c = *p++; | |
611 | } | |
612 | ||
613 | if (c == L('-') && *p != L(']')) | |
614 | { | |
615 | #if _LIBC | |
616 | /* We have to find the collation sequence | |
617 | value for C. Collation sequence is nothing | |
618 | we can regularly access. The sequence | |
619 | value is defined by the order in which the | |
620 | definitions of the collation values for the | |
621 | various characters appear in the source | |
622 | file. A strange concept, nowhere | |
623 | documented. */ | |
624 | uint32_t fcollseq; | |
625 | uint32_t lcollseq; | |
626 | UCHAR cend = *p++; | |
627 | ||
628 | # if WIDE_CHAR_VERSION | |
629 | /* Search in the `names' array for the characters. */ | |
630 | fcollseq = __collseq_table_lookup (collseq, fn); | |
631 | if (fcollseq == ~((uint32_t) 0)) | |
632 | /* XXX We don't know anything about the character | |
633 | we are supposed to match. This means we are | |
634 | failing. */ | |
635 | goto range_not_matched; | |
636 | ||
637 | if (is_seqval) | |
638 | lcollseq = cold; | |
639 | else | |
640 | lcollseq = __collseq_table_lookup (collseq, cold); | |
641 | # else | |
642 | fcollseq = collseq[fn]; | |
643 | lcollseq = is_seqval ? cold : collseq[(UCHAR) cold]; | |
644 | # endif | |
645 | ||
646 | is_seqval = 0; | |
647 | if (cend == L('[') && *p == L('.')) | |
648 | { | |
649 | uint32_t nrules = | |
650 | _NL_CURRENT_WORD (LC_COLLATE, | |
651 | _NL_COLLATE_NRULES); | |
652 | const CHAR *startp = p; | |
653 | size_t c1 = 0; | |
654 | ||
655 | while (1) | |
656 | { | |
657 | c = *++p; | |
658 | if (c == L('.') && p[1] == L(']')) | |
659 | { | |
660 | p += 2; | |
661 | break; | |
662 | } | |
663 | if (c == '\0') | |
664 | return FNM_NOMATCH; | |
665 | ++c1; | |
666 | } | |
667 | ||
668 | if (nrules == 0) | |
669 | { | |
670 | /* There are no names defined in the | |
671 | collation data. Therefore we only | |
672 | accept the trivial names consisting | |
673 | of the character itself. */ | |
674 | if (c1 != 1) | |
675 | return FNM_NOMATCH; | |
676 | ||
677 | cend = startp[1]; | |
678 | } | |
679 | else | |
680 | { | |
681 | int32_t table_size; | |
682 | const int32_t *symb_table; | |
683 | const unsigned char *extra; | |
684 | int32_t idx; | |
685 | int32_t elem; | |
686 | # if WIDE_CHAR_VERSION | |
687 | CHAR *wextra; | |
688 | # endif | |
689 | ||
690 | table_size = | |
691 | _NL_CURRENT_WORD (LC_COLLATE, | |
692 | _NL_COLLATE_SYMB_HASH_SIZEMB); | |
693 | symb_table = (const int32_t *) | |
694 | _NL_CURRENT (LC_COLLATE, | |
695 | _NL_COLLATE_SYMB_TABLEMB); | |
696 | extra = (const unsigned char *) | |
697 | _NL_CURRENT (LC_COLLATE, | |
698 | _NL_COLLATE_SYMB_EXTRAMB); | |
699 | ||
700 | for (elem = 0; elem < table_size; elem++) | |
701 | if (symb_table[2 * elem] != 0) | |
702 | { | |
703 | idx = symb_table[2 * elem + 1]; | |
704 | /* Skip the name of collating | |
705 | element. */ | |
706 | idx += 1 + extra[idx]; | |
707 | # if WIDE_CHAR_VERSION | |
708 | /* Skip the byte sequence of the | |
709 | collating element. */ | |
710 | idx += 1 + extra[idx]; | |
711 | /* Adjust for the alignment. */ | |
712 | idx = (idx + 3) & ~3; | |
713 | ||
714 | wextra = (CHAR *) &extra[idx + 4]; | |
715 | ||
716 | if (/* Compare the length of the | |
717 | sequence. */ | |
718 | c1 == wextra[0] | |
719 | /* Compare the wide char sequence. */ | |
720 | && WMEMCMP (startp + 1, &wextra[1], | |
721 | c1) == 0) | |
722 | /* Yep, this is the entry. */ | |
723 | break; | |
724 | # else | |
725 | if (/* Compare the length of the | |
726 | sequence. */ | |
727 | c1 == extra[idx] | |
728 | /* Compare the byte sequence. */ | |
729 | && memcmp (startp + 1, | |
730 | &extra[idx + 1], c1) == 0) | |
731 | /* Yep, this is the entry. */ | |
732 | break; | |
733 | # endif | |
734 | } | |
735 | ||
736 | if (elem < table_size) | |
737 | { | |
738 | /* Get the collation sequence value. */ | |
739 | is_seqval = 1; | |
740 | # if WIDE_CHAR_VERSION | |
741 | cend = wextra[1 + wextra[idx]]; | |
742 | # else | |
743 | idx += 1 + extra[idx]; | |
744 | /* Adjust for the alignment. */ | |
745 | idx = (idx + 3) & ~3; | |
746 | cend = *((int32_t *) &extra[idx]); | |
747 | # endif | |
748 | } | |
749 | else if (c1 == 1) | |
750 | { | |
751 | cend = startp[1]; | |
752 | c = *p++; | |
753 | } | |
754 | else | |
755 | return FNM_NOMATCH; | |
756 | } | |
757 | } | |
758 | else | |
759 | { | |
760 | if (!(flags & FNM_NOESCAPE) && cend == L('\\')) | |
761 | cend = *p++; | |
762 | if (cend == L('\0')) | |
763 | return FNM_NOMATCH; | |
764 | cend = FOLD (cend); | |
765 | } | |
766 | ||
767 | /* XXX It is not entirely clear to me how to handle | |
768 | characters which are not mentioned in the | |
769 | collation specification. */ | |
770 | if ( | |
771 | # if WIDE_CHAR_VERSION | |
772 | lcollseq == 0xffffffff || | |
773 | # endif | |
774 | lcollseq <= fcollseq) | |
775 | { | |
776 | /* We have to look at the upper bound. */ | |
777 | uint32_t hcollseq; | |
778 | ||
779 | if (is_seqval) | |
780 | hcollseq = cend; | |
781 | else | |
782 | { | |
783 | # if WIDE_CHAR_VERSION | |
784 | hcollseq = | |
785 | __collseq_table_lookup (collseq, cend); | |
786 | if (hcollseq == ~((uint32_t) 0)) | |
787 | { | |
788 | /* Hum, no information about the upper | |
789 | bound. The matching succeeds if the | |
790 | lower bound is matched exactly. */ | |
791 | if (lcollseq != fcollseq) | |
792 | goto range_not_matched; | |
793 | ||
794 | goto matched; | |
795 | } | |
796 | # else | |
797 | hcollseq = collseq[cend]; | |
798 | # endif | |
799 | } | |
800 | ||
801 | if (lcollseq <= hcollseq && fcollseq <= hcollseq) | |
802 | goto matched; | |
803 | } | |
804 | # if WIDE_CHAR_VERSION | |
805 | range_not_matched: | |
806 | # endif | |
807 | #else | |
808 | /* We use a boring value comparison of the character | |
809 | values. This is better than comparing using | |
810 | `strcoll' since the latter would have surprising | |
811 | and sometimes fatal consequences. */ | |
812 | UCHAR cend = *p++; | |
813 | ||
814 | if (!(flags & FNM_NOESCAPE) && cend == L('\\')) | |
815 | cend = *p++; | |
816 | if (cend == L('\0')) | |
817 | return FNM_NOMATCH; | |
818 | ||
819 | /* It is a range. */ | |
820 | if (cold <= fn && fn <= cend) | |
821 | goto matched; | |
822 | #endif | |
823 | ||
824 | c = *p++; | |
825 | } | |
826 | } | |
827 | ||
828 | if (c == L(']')) | |
829 | break; | |
830 | } | |
831 | ||
832 | if (!not) | |
833 | return FNM_NOMATCH; | |
834 | break; | |
835 | ||
836 | matched: | |
837 | /* Skip the rest of the [...] that already matched. */ | |
838 | while ((c = *p++) != L (']')) | |
839 | { | |
840 | if (c == L('\0')) | |
841 | /* [... (unterminated) loses. */ | |
842 | return FNM_NOMATCH; | |
843 | ||
844 | if (!(flags & FNM_NOESCAPE) && c == L('\\')) | |
845 | { | |
846 | if (*p == L('\0')) | |
847 | return FNM_NOMATCH; | |
848 | /* XXX 1003.2d11 is unclear if this is right. */ | |
849 | ++p; | |
850 | } | |
851 | else if (c == L('[') && *p == L(':')) | |
852 | { | |
853 | int c1 = 0; | |
854 | const CHAR *startp = p; | |
855 | ||
856 | while (1) | |
857 | { | |
858 | c = *++p; | |
859 | if (++c1 == CHAR_CLASS_MAX_LENGTH) | |
860 | return FNM_NOMATCH; | |
861 | ||
862 | if (*p == L(':') && p[1] == L(']')) | |
863 | break; | |
864 | ||
865 | if (c < L('a') || c >= L('z')) | |
866 | { | |
867 | p = startp - 2; | |
868 | break; | |
869 | } | |
870 | } | |
871 | p += 2; | |
872 | } | |
873 | else if (c == L('[') && *p == L('=')) | |
874 | { | |
875 | c = *++p; | |
876 | if (c == L('\0')) | |
877 | return FNM_NOMATCH; | |
878 | c = *++p; | |
879 | if (c != L('=') || p[1] != L(']')) | |
880 | return FNM_NOMATCH; | |
881 | p += 2; | |
882 | } | |
883 | else if (c == L('[') && *p == L('.')) | |
884 | { | |
885 | while (1) | |
886 | { | |
887 | c = *++p; | |
888 | if (c == L('\0')) | |
889 | return FNM_NOMATCH; | |
890 | ||
891 | if (c == L('.') && p[1] == L(']')) | |
892 | break; | |
893 | } | |
894 | p += 2; | |
895 | } | |
896 | } | |
897 | if (not) | |
898 | return FNM_NOMATCH; | |
899 | } | |
900 | break; | |
901 | ||
902 | case L('+'): | |
903 | case L('@'): | |
904 | case L('!'): | |
905 | if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(') | |
906 | { | |
907 | int res = EXT (c, p, n, string_end, no_leading_period, flags, | |
908 | alloca_used); | |
909 | if (res != -1) | |
910 | return res; | |
911 | } | |
912 | goto normal_match; | |
913 | ||
914 | case L('/'): | |
915 | if (NO_LEADING_PERIOD (flags)) | |
916 | { | |
917 | if (n == string_end || c != (UCHAR) *n) | |
918 | return FNM_NOMATCH; | |
919 | ||
920 | new_no_leading_period = 1; | |
921 | break; | |
922 | } | |
923 | /* FALLTHROUGH */ | |
924 | default: | |
925 | normal_match: | |
926 | if (n == string_end || c != FOLD ((UCHAR) *n)) | |
927 | return FNM_NOMATCH; | |
928 | } | |
929 | ||
930 | no_leading_period = new_no_leading_period; | |
931 | ++n; | |
932 | } | |
933 | ||
934 | if (n == string_end) | |
935 | return 0; | |
936 | ||
937 | if ((flags & FNM_LEADING_DIR) && n != string_end && *n == L('/')) | |
938 | /* The FNM_LEADING_DIR flag says that "foo*" matches "foobar/frobozz". */ | |
939 | return 0; | |
940 | ||
941 | return FNM_NOMATCH; | |
942 | } | |
943 | ||
944 | ||
945 | static const CHAR * | |
946 | END (const CHAR *pattern) | |
947 | { | |
948 | const CHAR *p = pattern; | |
949 | ||
950 | while (1) | |
951 | if (*++p == L('\0')) | |
952 | /* This is an invalid pattern. */ | |
953 | return pattern; | |
954 | else if (*p == L('[')) | |
955 | { | |
956 | /* Handle brackets special. */ | |
957 | if (posixly_correct == 0) | |
958 | posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1; | |
959 | ||
960 | /* Skip the not sign. We have to recognize it because of a possibly | |
961 | following ']'. */ | |
962 | if (*++p == L('!') || (posixly_correct < 0 && *p == L('^'))) | |
963 | ++p; | |
964 | /* A leading ']' is recognized as such. */ | |
965 | if (*p == L(']')) | |
966 | ++p; | |
967 | /* Skip over all characters of the list. */ | |
968 | while (*p != L(']')) | |
969 | if (*p++ == L('\0')) | |
970 | /* This is no valid pattern. */ | |
971 | return pattern; | |
972 | } | |
973 | else if ((*p == L('?') || *p == L('*') || *p == L('+') || *p == L('@') | |
974 | || *p == L('!')) && p[1] == L('(')) | |
975 | { | |
976 | p = END (p + 1); | |
977 | if (*p == L('\0')) | |
978 | /* This is an invalid pattern. */ | |
979 | return pattern; | |
980 | } | |
981 | else if (*p == L(')')) | |
982 | break; | |
983 | ||
984 | return p + 1; | |
985 | } | |
986 | ||
987 | ||
988 | static int | |
989 | EXT (INT opt, const CHAR *pattern, const CHAR *string, const CHAR *string_end, | |
990 | int no_leading_period, int flags, size_t alloca_used) | |
991 | { | |
992 | const CHAR *startp; | |
993 | int level; | |
994 | struct patternlist | |
995 | { | |
996 | struct patternlist *next; | |
997 | CHAR malloced; | |
998 | CHAR str[0]; | |
999 | } *list = NULL; | |
1000 | struct patternlist **lastp = &list; | |
1001 | size_t pattern_len = STRLEN (pattern); | |
1002 | int any_malloced = 0; | |
1003 | const CHAR *p; | |
1004 | const CHAR *rs; | |
1005 | int retval = 0; | |
1006 | ||
1007 | /* Parse the pattern. Store the individual parts in the list. */ | |
1008 | level = 0; | |
1009 | for (startp = p = pattern + 1; level >= 0; ++p) | |
1010 | if (*p == L('\0')) | |
1011 | { | |
1012 | /* This is an invalid pattern. */ | |
1013 | retval = -1; | |
1014 | goto out; | |
1015 | } | |
1016 | else if (*p == L('[')) | |
1017 | { | |
1018 | /* Handle brackets special. */ | |
1019 | if (posixly_correct == 0) | |
1020 | posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1; | |
1021 | ||
1022 | /* Skip the not sign. We have to recognize it because of a possibly | |
1023 | following ']'. */ | |
1024 | if (*++p == L('!') || (posixly_correct < 0 && *p == L('^'))) | |
1025 | ++p; | |
1026 | /* A leading ']' is recognized as such. */ | |
1027 | if (*p == L(']')) | |
1028 | ++p; | |
1029 | /* Skip over all characters of the list. */ | |
1030 | while (*p != L(']')) | |
1031 | if (*p++ == L('\0')) | |
1032 | { | |
1033 | /* This is no valid pattern. */ | |
1034 | retval = -1; | |
1035 | goto out; | |
1036 | } | |
1037 | } | |
1038 | else if ((*p == L('?') || *p == L('*') || *p == L('+') || *p == L('@') | |
1039 | || *p == L('!')) && p[1] == L('(')) | |
1040 | /* Remember the nesting level. */ | |
1041 | ++level; | |
1042 | else if (*p == L(')')) | |
1043 | { | |
1044 | if (level-- == 0) | |
1045 | { | |
1046 | /* This means we found the end of the pattern. */ | |
1047 | #define NEW_PATTERN \ | |
1048 | struct patternlist *newp; \ | |
1049 | size_t slen = (opt == L('?') || opt == L('@') \ | |
1050 | ? pattern_len : (p - startp + 1)); \ | |
1051 | slen = sizeof (struct patternlist) + (slen * sizeof (CHAR)); \ | |
1052 | int malloced = ! __libc_use_alloca (alloca_used + slen); \ | |
1053 | if (__builtin_expect (malloced, 0)) \ | |
1054 | { \ | |
1055 | newp = malloc (slen); \ | |
1056 | if (newp == NULL) \ | |
1057 | { \ | |
1058 | retval = -2; \ | |
1059 | goto out; \ | |
1060 | } \ | |
1061 | any_malloced = 1; \ | |
1062 | } \ | |
1063 | else \ | |
1064 | newp = alloca_account (slen, alloca_used); \ | |
1065 | newp->next = NULL; \ | |
1066 | newp->malloced = malloced; \ | |
1067 | *((CHAR *) MEMPCPY (newp->str, startp, p - startp)) = L('\0'); \ | |
1068 | *lastp = newp; \ | |
1069 | lastp = &newp->next | |
1070 | NEW_PATTERN; | |
1071 | } | |
1072 | } | |
1073 | else if (*p == L('|')) | |
1074 | { | |
1075 | if (level == 0) | |
1076 | { | |
1077 | NEW_PATTERN; | |
1078 | startp = p + 1; | |
1079 | } | |
1080 | } | |
1081 | assert (list != NULL); | |
1082 | assert (p[-1] == L(')')); | |
1083 | #undef NEW_PATTERN | |
1084 | ||
1085 | switch (opt) | |
1086 | { | |
1087 | case L('*'): | |
1088 | if (FCT (p, string, string_end, no_leading_period, flags, NULL, | |
1089 | alloca_used) == 0) | |
1090 | goto success; | |
1091 | /* FALLTHROUGH */ | |
1092 | ||
1093 | case L('+'): | |
1094 | do | |
1095 | { | |
1096 | for (rs = string; rs <= string_end; ++rs) | |
1097 | /* First match the prefix with the current pattern with the | |
1098 | current pattern. */ | |
1099 | if (FCT (list->str, string, rs, no_leading_period, | |
1100 | flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD, | |
1101 | NULL, alloca_used) == 0 | |
1102 | /* This was successful. Now match the rest with the rest | |
1103 | of the pattern. */ | |
1104 | && (FCT (p, rs, string_end, | |
1105 | rs == string | |
1106 | ? no_leading_period | |
1107 | : rs[-1] == '/' && NO_LEADING_PERIOD (flags) ? 1 : 0, | |
1108 | flags & FNM_FILE_NAME | |
1109 | ? flags : flags & ~FNM_PERIOD, NULL, alloca_used) == 0 | |
1110 | /* This didn't work. Try the whole pattern. */ | |
1111 | || (rs != string | |
1112 | && FCT (pattern - 1, rs, string_end, | |
1113 | rs == string | |
1114 | ? no_leading_period | |
1115 | : (rs[-1] == '/' && NO_LEADING_PERIOD (flags) | |
1116 | ? 1 : 0), | |
1117 | flags & FNM_FILE_NAME | |
1118 | ? flags : flags & ~FNM_PERIOD, NULL, | |
1119 | alloca_used) == 0))) | |
1120 | /* It worked. Signal success. */ | |
1121 | goto success; | |
1122 | } | |
1123 | while ((list = list->next) != NULL); | |
1124 | ||
1125 | /* None of the patterns lead to a match. */ | |
1126 | retval = FNM_NOMATCH; | |
1127 | break; | |
1128 | ||
1129 | case L('?'): | |
1130 | if (FCT (p, string, string_end, no_leading_period, flags, NULL, | |
1131 | alloca_used) == 0) | |
1132 | goto success; | |
1133 | /* FALLTHROUGH */ | |
1134 | ||
1135 | case L('@'): | |
1136 | do | |
1137 | /* I cannot believe it but `strcat' is actually acceptable | |
1138 | here. Match the entire string with the prefix from the | |
1139 | pattern list and the rest of the pattern following the | |
1140 | pattern list. */ | |
1141 | if (FCT (STRCAT (list->str, p), string, string_end, | |
1142 | no_leading_period, | |
1143 | flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD, | |
1144 | NULL, alloca_used) == 0) | |
1145 | /* It worked. Signal success. */ | |
1146 | goto success; | |
1147 | while ((list = list->next) != NULL); | |
1148 | ||
1149 | /* None of the patterns lead to a match. */ | |
1150 | retval = FNM_NOMATCH; | |
1151 | break; | |
1152 | ||
1153 | case L('!'): | |
1154 | for (rs = string; rs <= string_end; ++rs) | |
1155 | { | |
1156 | struct patternlist *runp; | |
1157 | ||
1158 | for (runp = list; runp != NULL; runp = runp->next) | |
1159 | if (FCT (runp->str, string, rs, no_leading_period, | |
1160 | flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD, | |
1161 | NULL, alloca_used) == 0) | |
1162 | break; | |
1163 | ||
1164 | /* If none of the patterns matched see whether the rest does. */ | |
1165 | if (runp == NULL | |
1166 | && (FCT (p, rs, string_end, | |
1167 | rs == string | |
1168 | ? no_leading_period | |
1169 | : rs[-1] == '/' && NO_LEADING_PERIOD (flags) ? 1 : 0, | |
1170 | flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD, | |
1171 | NULL, alloca_used) == 0)) | |
1172 | /* This is successful. */ | |
1173 | goto success; | |
1174 | } | |
1175 | ||
1176 | /* None of the patterns together with the rest of the pattern | |
1177 | lead to a match. */ | |
1178 | retval = FNM_NOMATCH; | |
1179 | break; | |
1180 | ||
1181 | default: | |
1182 | assert (! "Invalid extended matching operator"); | |
1183 | retval = -1; | |
1184 | break; | |
1185 | } | |
1186 | ||
1187 | success: | |
1188 | out: | |
1189 | if (any_malloced) | |
1190 | while (list != NULL) | |
1191 | { | |
1192 | struct patternlist *old = list; | |
1193 | list = list->next; | |
1194 | if (old->malloced) | |
1195 | free (old); | |
1196 | } | |
1197 | ||
1198 | return retval; | |
1199 | } | |
1200 | ||
1201 | ||
1202 | #undef FOLD | |
1203 | #undef CHAR | |
1204 | #undef UCHAR | |
1205 | #undef INT | |
1206 | #undef FCT | |
1207 | #undef EXT | |
1208 | #undef END | |
1209 | #undef STRUCT | |
1210 | #undef MEMPCPY | |
1211 | #undef MEMCHR | |
1212 | #undef STRCOLL | |
1213 | #undef STRLEN | |
1214 | #undef STRCAT | |
1215 | #undef L | |
1216 | #undef BTOWC | |
1217 | #undef WIDE_CHAR_VERSION | |
1218 | #undef FINDIDX |