]>
Commit | Line | Data |
---|---|---|
7117c2d2 JA |
1 | /* strmatch.c -- ksh-like extended pattern matching for the shell and filename |
2 | globbing. */ | |
3 | ||
495aee44 | 4 | /* Copyright (C) 1991-2011 Free Software Foundation, Inc. |
7117c2d2 JA |
5 | |
6 | This file is part of GNU Bash, the Bourne Again SHell. | |
7 | ||
3185942a JA |
8 | Bash is free software: you can redistribute it and/or modify |
9 | it under the terms of the GNU General Public License as published by | |
10 | the Free Software Foundation, either version 3 of the License, or | |
11 | (at your option) any later version. | |
12 | ||
13 | Bash is distributed in the hope that it will be useful, | |
14 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
16 | GNU General Public License for more details. | |
17 | ||
18 | You should have received a copy of the GNU General Public License | |
19 | along with Bash. If not, see <http://www.gnu.org/licenses/>. | |
20 | */ | |
7117c2d2 JA |
21 | |
22 | #include <config.h> | |
23 | ||
24 | #include <stdio.h> /* for debugging */ | |
25 | ||
26 | #include "strmatch.h" | |
27 | #include <chartypes.h> | |
28 | ||
29 | #include "bashansi.h" | |
30 | #include "shmbutil.h" | |
31 | #include "xmalloc.h" | |
32 | ||
33 | /* First, compile `sm_loop.c' for single-byte characters. */ | |
34 | #define CHAR unsigned char | |
35 | #define U_CHAR unsigned char | |
36 | #define XCHAR char | |
37 | #define INT int | |
38 | #define L(CS) CS | |
39 | #define INVALID -1 | |
40 | ||
41 | #undef STREQ | |
42 | #undef STREQN | |
43 | #define STREQ(a, b) ((a)[0] == (b)[0] && strcmp(a, b) == 0) | |
44 | #define STREQN(a, b, n) ((a)[0] == (b)[0] && strncmp(a, b, n) == 0) | |
45 | ||
46 | /* We use strcoll(3) for range comparisons in bracket expressions, | |
47 | even though it can have unwanted side effects in locales | |
48 | other than POSIX or US. For instance, in the de locale, [A-Z] matches | |
49 | all characters. */ | |
50 | ||
51 | #if defined (HAVE_STRCOLL) | |
52 | /* Helper function for collating symbol equivalence. */ | |
53 | static int rangecmp (c1, c2) | |
54 | int c1, c2; | |
55 | { | |
56 | static char s1[2] = { ' ', '\0' }; | |
57 | static char s2[2] = { ' ', '\0' }; | |
58 | int ret; | |
59 | ||
60 | /* Eight bits only. Period. */ | |
61 | c1 &= 0xFF; | |
62 | c2 &= 0xFF; | |
63 | ||
64 | if (c1 == c2) | |
65 | return (0); | |
66 | ||
67 | s1[0] = c1; | |
68 | s2[0] = c2; | |
69 | ||
70 | if ((ret = strcoll (s1, s2)) != 0) | |
71 | return ret; | |
72 | return (c1 - c2); | |
73 | } | |
74 | #else /* !HAVE_STRCOLL */ | |
75 | # define rangecmp(c1, c2) ((int)(c1) - (int)(c2)) | |
76 | #endif /* !HAVE_STRCOLL */ | |
77 | ||
78 | #if defined (HAVE_STRCOLL) | |
79 | static int | |
80 | collequiv (c1, c2) | |
81 | int c1, c2; | |
82 | { | |
83 | return (rangecmp (c1, c2) == 0); | |
84 | } | |
85 | #else | |
86 | # define collequiv(c1, c2) ((c1) == (c2)) | |
87 | #endif | |
88 | ||
89 | #define _COLLSYM _collsym | |
90 | #define __COLLSYM __collsym | |
91 | #define POSIXCOLL posix_collsyms | |
92 | #include "collsyms.h" | |
93 | ||
94 | static int | |
95 | collsym (s, len) | |
95732b49 | 96 | CHAR *s; |
7117c2d2 JA |
97 | int len; |
98 | { | |
99 | register struct _collsym *csp; | |
95732b49 | 100 | char *x; |
7117c2d2 | 101 | |
95732b49 | 102 | x = (char *)s; |
7117c2d2 JA |
103 | for (csp = posix_collsyms; csp->name; csp++) |
104 | { | |
95732b49 | 105 | if (STREQN(csp->name, x, len) && csp->name[len] == '\0') |
7117c2d2 JA |
106 | return (csp->code); |
107 | } | |
108 | if (len == 1) | |
109 | return s[0]; | |
110 | return INVALID; | |
111 | } | |
112 | ||
113 | /* unibyte character classification */ | |
114 | #if !defined (isascii) && !defined (HAVE_ISASCII) | |
115 | # define isascii(c) ((unsigned int)(c) <= 0177) | |
116 | #endif | |
117 | ||
118 | enum char_class | |
119 | { | |
120 | CC_NO_CLASS = 0, | |
121 | CC_ASCII, CC_ALNUM, CC_ALPHA, CC_BLANK, CC_CNTRL, CC_DIGIT, CC_GRAPH, | |
122 | CC_LOWER, CC_PRINT, CC_PUNCT, CC_SPACE, CC_UPPER, CC_WORD, CC_XDIGIT | |
123 | }; | |
124 | ||
125 | static char const *const cclass_name[] = | |
126 | { | |
127 | "", | |
128 | "ascii", "alnum", "alpha", "blank", "cntrl", "digit", "graph", | |
129 | "lower", "print", "punct", "space", "upper", "word", "xdigit" | |
130 | }; | |
131 | ||
132 | #define N_CHAR_CLASS (sizeof(cclass_name) / sizeof (cclass_name[0])) | |
133 | ||
134 | static int | |
135 | is_cclass (c, name) | |
136 | int c; | |
137 | const char *name; | |
138 | { | |
139 | enum char_class char_class = CC_NO_CLASS; | |
140 | int i, result; | |
141 | ||
142 | for (i = 1; i < N_CHAR_CLASS; i++) | |
143 | { | |
144 | if (STREQ (name, cclass_name[i])) | |
145 | { | |
146 | char_class = (enum char_class)i; | |
147 | break; | |
148 | } | |
149 | } | |
150 | ||
151 | if (char_class == 0) | |
152 | return -1; | |
153 | ||
154 | switch (char_class) | |
155 | { | |
156 | case CC_ASCII: | |
157 | result = isascii (c); | |
158 | break; | |
159 | case CC_ALNUM: | |
160 | result = ISALNUM (c); | |
161 | break; | |
162 | case CC_ALPHA: | |
163 | result = ISALPHA (c); | |
164 | break; | |
165 | case CC_BLANK: | |
166 | result = ISBLANK (c); | |
167 | break; | |
168 | case CC_CNTRL: | |
169 | result = ISCNTRL (c); | |
170 | break; | |
171 | case CC_DIGIT: | |
172 | result = ISDIGIT (c); | |
173 | break; | |
174 | case CC_GRAPH: | |
175 | result = ISGRAPH (c); | |
176 | break; | |
177 | case CC_LOWER: | |
178 | result = ISLOWER (c); | |
179 | break; | |
180 | case CC_PRINT: | |
181 | result = ISPRINT (c); | |
182 | break; | |
183 | case CC_PUNCT: | |
184 | result = ISPUNCT (c); | |
185 | break; | |
186 | case CC_SPACE: | |
187 | result = ISSPACE (c); | |
188 | break; | |
189 | case CC_UPPER: | |
190 | result = ISUPPER (c); | |
191 | break; | |
192 | case CC_WORD: | |
193 | result = (ISALNUM (c) || c == '_'); | |
194 | break; | |
195 | case CC_XDIGIT: | |
196 | result = ISXDIGIT (c); | |
197 | break; | |
198 | default: | |
199 | result = -1; | |
200 | break; | |
201 | } | |
202 | ||
203 | return result; | |
204 | } | |
205 | ||
206 | /* Now include `sm_loop.c' for single-byte characters. */ | |
207 | /* The result of FOLD is an `unsigned char' */ | |
208 | # define FOLD(c) ((flags & FNM_CASEFOLD) \ | |
209 | ? TOLOWER ((unsigned char)c) \ | |
210 | : ((unsigned char)c)) | |
211 | ||
212 | #define FCT internal_strmatch | |
213 | #define GMATCH gmatch | |
214 | #define COLLSYM collsym | |
215 | #define PARSE_COLLSYM parse_collsym | |
216 | #define BRACKMATCH brackmatch | |
217 | #define PATSCAN patscan | |
218 | #define STRCOMPARE strcompare | |
219 | #define EXTMATCH extmatch | |
220 | #define STRCHR(S, C) strchr((S), (C)) | |
221 | #define STRCOLL(S1, S2) strcoll((S1), (S2)) | |
222 | #define STRLEN(S) strlen(S) | |
223 | #define STRCMP(S1, S2) strcmp((S1), (S2)) | |
224 | #define RANGECMP(C1, C2) rangecmp((C1), (C2)) | |
225 | #define COLLEQUIV(C1, C2) collequiv((C1), (C2)) | |
226 | #define CTYPE_T enum char_class | |
227 | #define IS_CCLASS(C, S) is_cclass((C), (S)) | |
228 | #include "sm_loop.c" | |
229 | ||
230 | #if HANDLE_MULTIBYTE | |
231 | ||
232 | # define CHAR wchar_t | |
233 | # define U_CHAR wint_t | |
234 | # define XCHAR wchar_t | |
235 | # define INT wint_t | |
236 | # define L(CS) L##CS | |
237 | # define INVALID WEOF | |
238 | ||
239 | # undef STREQ | |
240 | # undef STREQN | |
241 | # define STREQ(s1, s2) ((wcscmp (s1, s2) == 0)) | |
242 | # define STREQN(a, b, n) ((a)[0] == (b)[0] && wcsncmp(a, b, n) == 0) | |
243 | ||
495aee44 CR |
244 | extern char *mbsmbchar __P((const char *)); |
245 | ||
7117c2d2 JA |
246 | static int |
247 | rangecmp_wc (c1, c2) | |
248 | wint_t c1, c2; | |
249 | { | |
250 | static wchar_t s1[2] = { L' ', L'\0' }; | |
251 | static wchar_t s2[2] = { L' ', L'\0' }; | |
7117c2d2 JA |
252 | |
253 | if (c1 == c2) | |
254 | return 0; | |
255 | ||
256 | s1[0] = c1; | |
257 | s2[0] = c2; | |
258 | ||
259 | return (wcscoll (s1, s2)); | |
260 | } | |
261 | ||
262 | static int | |
263 | collequiv_wc (c, equiv) | |
264 | wint_t c, equiv; | |
265 | { | |
266 | return (!(c - equiv)); | |
267 | } | |
268 | ||
269 | /* Helper function for collating symbol. */ | |
270 | # define _COLLSYM _collwcsym | |
271 | # define __COLLSYM __collwcsym | |
272 | # define POSIXCOLL posix_collwcsyms | |
273 | # include "collsyms.h" | |
274 | ||
275 | static wint_t | |
276 | collwcsym (s, len) | |
277 | wchar_t *s; | |
278 | int len; | |
279 | { | |
280 | register struct _collwcsym *csp; | |
281 | ||
282 | for (csp = posix_collwcsyms; csp->name; csp++) | |
283 | { | |
284 | if (STREQN(csp->name, s, len) && csp->name[len] == L'\0') | |
285 | return (csp->code); | |
286 | } | |
287 | if (len == 1) | |
288 | return s[0]; | |
289 | return INVALID; | |
290 | } | |
291 | ||
292 | static int | |
293 | is_wcclass (wc, name) | |
294 | wint_t wc; | |
295 | wchar_t *name; | |
296 | { | |
297 | char *mbs; | |
298 | mbstate_t state; | |
299 | size_t mbslength; | |
300 | wctype_t desc; | |
301 | int want_word; | |
302 | ||
303 | if ((wctype ("ascii") == (wctype_t)0) && (wcscmp (name, L"ascii") == 0)) | |
304 | { | |
305 | int c; | |
306 | ||
307 | if ((c = wctob (wc)) == EOF) | |
308 | return 0; | |
309 | else | |
310 | return (c <= 0x7F); | |
311 | } | |
312 | ||
313 | want_word = (wcscmp (name, L"word") == 0); | |
314 | if (want_word) | |
315 | name = L"alnum"; | |
316 | ||
317 | memset (&state, '\0', sizeof (mbstate_t)); | |
318 | mbs = (char *) malloc (wcslen(name) * MB_CUR_MAX + 1); | |
495aee44 | 319 | mbslength = wcsrtombs (mbs, (const wchar_t **)&name, (wcslen(name) * MB_CUR_MAX + 1), &state); |
7117c2d2 JA |
320 | |
321 | if (mbslength == (size_t)-1 || mbslength == (size_t)-2) | |
322 | { | |
323 | free (mbs); | |
324 | return -1; | |
325 | } | |
326 | desc = wctype (mbs); | |
327 | free (mbs); | |
328 | ||
329 | if (desc == (wctype_t)0) | |
330 | return -1; | |
331 | ||
332 | if (want_word) | |
333 | return (iswctype (wc, desc) || wc == L'_'); | |
334 | else | |
335 | return (iswctype (wc, desc)); | |
336 | } | |
337 | ||
338 | /* Now include `sm_loop.c' for multibyte characters. */ | |
339 | #define FOLD(c) ((flags & FNM_CASEFOLD) && iswupper (c) ? towlower (c) : (c)) | |
340 | #define FCT internal_wstrmatch | |
341 | #define GMATCH gmatch_wc | |
342 | #define COLLSYM collwcsym | |
343 | #define PARSE_COLLSYM parse_collwcsym | |
344 | #define BRACKMATCH brackmatch_wc | |
345 | #define PATSCAN patscan_wc | |
346 | #define STRCOMPARE wscompare | |
347 | #define EXTMATCH extmatch_wc | |
348 | #define STRCHR(S, C) wcschr((S), (C)) | |
349 | #define STRCOLL(S1, S2) wcscoll((S1), (S2)) | |
350 | #define STRLEN(S) wcslen(S) | |
351 | #define STRCMP(S1, S2) wcscmp((S1), (S2)) | |
352 | #define RANGECMP(C1, C2) rangecmp_wc((C1), (C2)) | |
353 | #define COLLEQUIV(C1, C2) collequiv_wc((C1), (C2)) | |
354 | #define CTYPE_T enum char_class | |
355 | #define IS_CCLASS(C, S) is_wcclass((C), (S)) | |
356 | #include "sm_loop.c" | |
357 | ||
358 | #endif /* HAVE_MULTIBYTE */ | |
359 | ||
360 | int | |
361 | xstrmatch (pattern, string, flags) | |
362 | char *pattern; | |
363 | char *string; | |
364 | int flags; | |
365 | { | |
366 | #if HANDLE_MULTIBYTE | |
367 | int ret; | |
7117c2d2 | 368 | size_t n; |
7117c2d2 | 369 | wchar_t *wpattern, *wstring; |
495aee44 CR |
370 | size_t plen, slen, mplen, mslen; |
371 | ||
372 | #if 0 | |
373 | plen = strlen (pattern); | |
374 | mplen = mbstrlen (pattern); | |
375 | if (plen == mplen && strlen (string) == mbstrlen (string)) | |
376 | #else | |
377 | if (mbsmbchar (string) == 0 && mbsmbchar (pattern) == 0) | |
378 | #endif | |
379 | return (internal_strmatch ((unsigned char *)pattern, (unsigned char *)string, flags)); | |
7117c2d2 JA |
380 | |
381 | if (MB_CUR_MAX == 1) | |
95732b49 | 382 | return (internal_strmatch ((unsigned char *)pattern, (unsigned char *)string, flags)); |
7117c2d2 | 383 | |
b80f6443 | 384 | n = xdupmbstowcs (&wpattern, NULL, pattern); |
7117c2d2 | 385 | if (n == (size_t)-1 || n == (size_t)-2) |
b80f6443 | 386 | return (internal_strmatch ((unsigned char *)pattern, (unsigned char *)string, flags)); |
7117c2d2 | 387 | |
b80f6443 | 388 | n = xdupmbstowcs (&wstring, NULL, string); |
7117c2d2 JA |
389 | if (n == (size_t)-1 || n == (size_t)-2) |
390 | { | |
391 | free (wpattern); | |
b80f6443 | 392 | return (internal_strmatch ((unsigned char *)pattern, (unsigned char *)string, flags)); |
7117c2d2 JA |
393 | } |
394 | ||
7117c2d2 JA |
395 | ret = internal_wstrmatch (wpattern, wstring, flags); |
396 | ||
7117c2d2 JA |
397 | free (wpattern); |
398 | free (wstring); | |
399 | ||
400 | return ret; | |
401 | #else | |
402 | return (internal_strmatch ((unsigned char *)pattern, (unsigned char *)string, flags)); | |
403 | #endif /* !HANDLE_MULTIBYTE */ | |
404 | } |