]>
Commit | Line | Data |
---|---|---|
7117c2d2 JA |
1 | /* strmatch.c -- ksh-like extended pattern matching for the shell and filename |
2 | globbing. */ | |
3 | ||
95732b49 | 4 | /* Copyright (C) 1991-2005 Free Software Foundation, Inc. |
7117c2d2 JA |
5 | |
6 | This file is part of GNU Bash, the Bourne Again SHell. | |
7 | ||
8 | Bash is free software; you can redistribute it and/or modify it under | |
9 | the terms of the GNU General Public License as published by the Free | |
10 | Software Foundation; either version 2, or (at your option) any later | |
11 | version. | |
12 | ||
13 | Bash is distributed in the hope that it will be useful, but WITHOUT ANY | |
14 | WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
15 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
16 | for more details. | |
17 | ||
18 | You should have received a copy of the GNU General Public License along | |
19 | with Bash; see the file COPYING. If not, write to the Free Software | |
20 | Foundation, 59 Temple Place, Suite 330, Boston, MA 02111 USA. */ | |
21 | ||
22 | #include <config.h> | |
23 | ||
24 | #include <stdio.h> /* for debugging */ | |
25 | ||
26 | #include "strmatch.h" | |
27 | #include <chartypes.h> | |
28 | ||
29 | #include "bashansi.h" | |
30 | #include "shmbutil.h" | |
31 | #include "xmalloc.h" | |
32 | ||
33 | /* First, compile `sm_loop.c' for single-byte characters. */ | |
34 | #define CHAR unsigned char | |
35 | #define U_CHAR unsigned char | |
36 | #define XCHAR char | |
37 | #define INT int | |
38 | #define L(CS) CS | |
39 | #define INVALID -1 | |
40 | ||
41 | #undef STREQ | |
42 | #undef STREQN | |
43 | #define STREQ(a, b) ((a)[0] == (b)[0] && strcmp(a, b) == 0) | |
44 | #define STREQN(a, b, n) ((a)[0] == (b)[0] && strncmp(a, b, n) == 0) | |
45 | ||
46 | /* We use strcoll(3) for range comparisons in bracket expressions, | |
47 | even though it can have unwanted side effects in locales | |
48 | other than POSIX or US. For instance, in the de locale, [A-Z] matches | |
49 | all characters. */ | |
50 | ||
51 | #if defined (HAVE_STRCOLL) | |
52 | /* Helper function for collating symbol equivalence. */ | |
53 | static int rangecmp (c1, c2) | |
54 | int c1, c2; | |
55 | { | |
56 | static char s1[2] = { ' ', '\0' }; | |
57 | static char s2[2] = { ' ', '\0' }; | |
58 | int ret; | |
59 | ||
60 | /* Eight bits only. Period. */ | |
61 | c1 &= 0xFF; | |
62 | c2 &= 0xFF; | |
63 | ||
64 | if (c1 == c2) | |
65 | return (0); | |
66 | ||
67 | s1[0] = c1; | |
68 | s2[0] = c2; | |
69 | ||
70 | if ((ret = strcoll (s1, s2)) != 0) | |
71 | return ret; | |
72 | return (c1 - c2); | |
73 | } | |
74 | #else /* !HAVE_STRCOLL */ | |
75 | # define rangecmp(c1, c2) ((int)(c1) - (int)(c2)) | |
76 | #endif /* !HAVE_STRCOLL */ | |
77 | ||
78 | #if defined (HAVE_STRCOLL) | |
79 | static int | |
80 | collequiv (c1, c2) | |
81 | int c1, c2; | |
82 | { | |
83 | return (rangecmp (c1, c2) == 0); | |
84 | } | |
85 | #else | |
86 | # define collequiv(c1, c2) ((c1) == (c2)) | |
87 | #endif | |
88 | ||
89 | #define _COLLSYM _collsym | |
90 | #define __COLLSYM __collsym | |
91 | #define POSIXCOLL posix_collsyms | |
92 | #include "collsyms.h" | |
93 | ||
94 | static int | |
95 | collsym (s, len) | |
95732b49 | 96 | CHAR *s; |
7117c2d2 JA |
97 | int len; |
98 | { | |
99 | register struct _collsym *csp; | |
95732b49 | 100 | char *x; |
7117c2d2 | 101 | |
95732b49 | 102 | x = (char *)s; |
7117c2d2 JA |
103 | for (csp = posix_collsyms; csp->name; csp++) |
104 | { | |
95732b49 | 105 | if (STREQN(csp->name, x, len) && csp->name[len] == '\0') |
7117c2d2 JA |
106 | return (csp->code); |
107 | } | |
108 | if (len == 1) | |
109 | return s[0]; | |
110 | return INVALID; | |
111 | } | |
112 | ||
113 | /* unibyte character classification */ | |
114 | #if !defined (isascii) && !defined (HAVE_ISASCII) | |
115 | # define isascii(c) ((unsigned int)(c) <= 0177) | |
116 | #endif | |
117 | ||
118 | enum char_class | |
119 | { | |
120 | CC_NO_CLASS = 0, | |
121 | CC_ASCII, CC_ALNUM, CC_ALPHA, CC_BLANK, CC_CNTRL, CC_DIGIT, CC_GRAPH, | |
122 | CC_LOWER, CC_PRINT, CC_PUNCT, CC_SPACE, CC_UPPER, CC_WORD, CC_XDIGIT | |
123 | }; | |
124 | ||
125 | static char const *const cclass_name[] = | |
126 | { | |
127 | "", | |
128 | "ascii", "alnum", "alpha", "blank", "cntrl", "digit", "graph", | |
129 | "lower", "print", "punct", "space", "upper", "word", "xdigit" | |
130 | }; | |
131 | ||
132 | #define N_CHAR_CLASS (sizeof(cclass_name) / sizeof (cclass_name[0])) | |
133 | ||
134 | static int | |
135 | is_cclass (c, name) | |
136 | int c; | |
137 | const char *name; | |
138 | { | |
139 | enum char_class char_class = CC_NO_CLASS; | |
140 | int i, result; | |
141 | ||
142 | for (i = 1; i < N_CHAR_CLASS; i++) | |
143 | { | |
144 | if (STREQ (name, cclass_name[i])) | |
145 | { | |
146 | char_class = (enum char_class)i; | |
147 | break; | |
148 | } | |
149 | } | |
150 | ||
151 | if (char_class == 0) | |
152 | return -1; | |
153 | ||
154 | switch (char_class) | |
155 | { | |
156 | case CC_ASCII: | |
157 | result = isascii (c); | |
158 | break; | |
159 | case CC_ALNUM: | |
160 | result = ISALNUM (c); | |
161 | break; | |
162 | case CC_ALPHA: | |
163 | result = ISALPHA (c); | |
164 | break; | |
165 | case CC_BLANK: | |
166 | result = ISBLANK (c); | |
167 | break; | |
168 | case CC_CNTRL: | |
169 | result = ISCNTRL (c); | |
170 | break; | |
171 | case CC_DIGIT: | |
172 | result = ISDIGIT (c); | |
173 | break; | |
174 | case CC_GRAPH: | |
175 | result = ISGRAPH (c); | |
176 | break; | |
177 | case CC_LOWER: | |
178 | result = ISLOWER (c); | |
179 | break; | |
180 | case CC_PRINT: | |
181 | result = ISPRINT (c); | |
182 | break; | |
183 | case CC_PUNCT: | |
184 | result = ISPUNCT (c); | |
185 | break; | |
186 | case CC_SPACE: | |
187 | result = ISSPACE (c); | |
188 | break; | |
189 | case CC_UPPER: | |
190 | result = ISUPPER (c); | |
191 | break; | |
192 | case CC_WORD: | |
193 | result = (ISALNUM (c) || c == '_'); | |
194 | break; | |
195 | case CC_XDIGIT: | |
196 | result = ISXDIGIT (c); | |
197 | break; | |
198 | default: | |
199 | result = -1; | |
200 | break; | |
201 | } | |
202 | ||
203 | return result; | |
204 | } | |
205 | ||
206 | /* Now include `sm_loop.c' for single-byte characters. */ | |
207 | /* The result of FOLD is an `unsigned char' */ | |
208 | # define FOLD(c) ((flags & FNM_CASEFOLD) \ | |
209 | ? TOLOWER ((unsigned char)c) \ | |
210 | : ((unsigned char)c)) | |
211 | ||
212 | #define FCT internal_strmatch | |
213 | #define GMATCH gmatch | |
214 | #define COLLSYM collsym | |
215 | #define PARSE_COLLSYM parse_collsym | |
216 | #define BRACKMATCH brackmatch | |
217 | #define PATSCAN patscan | |
218 | #define STRCOMPARE strcompare | |
219 | #define EXTMATCH extmatch | |
220 | #define STRCHR(S, C) strchr((S), (C)) | |
221 | #define STRCOLL(S1, S2) strcoll((S1), (S2)) | |
222 | #define STRLEN(S) strlen(S) | |
223 | #define STRCMP(S1, S2) strcmp((S1), (S2)) | |
224 | #define RANGECMP(C1, C2) rangecmp((C1), (C2)) | |
225 | #define COLLEQUIV(C1, C2) collequiv((C1), (C2)) | |
226 | #define CTYPE_T enum char_class | |
227 | #define IS_CCLASS(C, S) is_cclass((C), (S)) | |
228 | #include "sm_loop.c" | |
229 | ||
230 | #if HANDLE_MULTIBYTE | |
231 | ||
232 | # define CHAR wchar_t | |
233 | # define U_CHAR wint_t | |
234 | # define XCHAR wchar_t | |
235 | # define INT wint_t | |
236 | # define L(CS) L##CS | |
237 | # define INVALID WEOF | |
238 | ||
239 | # undef STREQ | |
240 | # undef STREQN | |
241 | # define STREQ(s1, s2) ((wcscmp (s1, s2) == 0)) | |
242 | # define STREQN(a, b, n) ((a)[0] == (b)[0] && wcsncmp(a, b, n) == 0) | |
243 | ||
244 | static int | |
245 | rangecmp_wc (c1, c2) | |
246 | wint_t c1, c2; | |
247 | { | |
248 | static wchar_t s1[2] = { L' ', L'\0' }; | |
249 | static wchar_t s2[2] = { L' ', L'\0' }; | |
250 | int ret; | |
251 | ||
252 | if (c1 == c2) | |
253 | return 0; | |
254 | ||
255 | s1[0] = c1; | |
256 | s2[0] = c2; | |
257 | ||
258 | return (wcscoll (s1, s2)); | |
259 | } | |
260 | ||
261 | static int | |
262 | collequiv_wc (c, equiv) | |
263 | wint_t c, equiv; | |
264 | { | |
265 | return (!(c - equiv)); | |
266 | } | |
267 | ||
268 | /* Helper function for collating symbol. */ | |
269 | # define _COLLSYM _collwcsym | |
270 | # define __COLLSYM __collwcsym | |
271 | # define POSIXCOLL posix_collwcsyms | |
272 | # include "collsyms.h" | |
273 | ||
274 | static wint_t | |
275 | collwcsym (s, len) | |
276 | wchar_t *s; | |
277 | int len; | |
278 | { | |
279 | register struct _collwcsym *csp; | |
280 | ||
281 | for (csp = posix_collwcsyms; csp->name; csp++) | |
282 | { | |
283 | if (STREQN(csp->name, s, len) && csp->name[len] == L'\0') | |
284 | return (csp->code); | |
285 | } | |
286 | if (len == 1) | |
287 | return s[0]; | |
288 | return INVALID; | |
289 | } | |
290 | ||
291 | static int | |
292 | is_wcclass (wc, name) | |
293 | wint_t wc; | |
294 | wchar_t *name; | |
295 | { | |
296 | char *mbs; | |
297 | mbstate_t state; | |
298 | size_t mbslength; | |
299 | wctype_t desc; | |
300 | int want_word; | |
301 | ||
302 | if ((wctype ("ascii") == (wctype_t)0) && (wcscmp (name, L"ascii") == 0)) | |
303 | { | |
304 | int c; | |
305 | ||
306 | if ((c = wctob (wc)) == EOF) | |
307 | return 0; | |
308 | else | |
309 | return (c <= 0x7F); | |
310 | } | |
311 | ||
312 | want_word = (wcscmp (name, L"word") == 0); | |
313 | if (want_word) | |
314 | name = L"alnum"; | |
315 | ||
316 | memset (&state, '\0', sizeof (mbstate_t)); | |
317 | mbs = (char *) malloc (wcslen(name) * MB_CUR_MAX + 1); | |
318 | mbslength = wcsrtombs(mbs, (const wchar_t **)&name, (wcslen(name) * MB_CUR_MAX + 1), &state); | |
319 | ||
320 | if (mbslength == (size_t)-1 || mbslength == (size_t)-2) | |
321 | { | |
322 | free (mbs); | |
323 | return -1; | |
324 | } | |
325 | desc = wctype (mbs); | |
326 | free (mbs); | |
327 | ||
328 | if (desc == (wctype_t)0) | |
329 | return -1; | |
330 | ||
331 | if (want_word) | |
332 | return (iswctype (wc, desc) || wc == L'_'); | |
333 | else | |
334 | return (iswctype (wc, desc)); | |
335 | } | |
336 | ||
337 | /* Now include `sm_loop.c' for multibyte characters. */ | |
338 | #define FOLD(c) ((flags & FNM_CASEFOLD) && iswupper (c) ? towlower (c) : (c)) | |
339 | #define FCT internal_wstrmatch | |
340 | #define GMATCH gmatch_wc | |
341 | #define COLLSYM collwcsym | |
342 | #define PARSE_COLLSYM parse_collwcsym | |
343 | #define BRACKMATCH brackmatch_wc | |
344 | #define PATSCAN patscan_wc | |
345 | #define STRCOMPARE wscompare | |
346 | #define EXTMATCH extmatch_wc | |
347 | #define STRCHR(S, C) wcschr((S), (C)) | |
348 | #define STRCOLL(S1, S2) wcscoll((S1), (S2)) | |
349 | #define STRLEN(S) wcslen(S) | |
350 | #define STRCMP(S1, S2) wcscmp((S1), (S2)) | |
351 | #define RANGECMP(C1, C2) rangecmp_wc((C1), (C2)) | |
352 | #define COLLEQUIV(C1, C2) collequiv_wc((C1), (C2)) | |
353 | #define CTYPE_T enum char_class | |
354 | #define IS_CCLASS(C, S) is_wcclass((C), (S)) | |
355 | #include "sm_loop.c" | |
356 | ||
357 | #endif /* HAVE_MULTIBYTE */ | |
358 | ||
359 | int | |
360 | xstrmatch (pattern, string, flags) | |
361 | char *pattern; | |
362 | char *string; | |
363 | int flags; | |
364 | { | |
365 | #if HANDLE_MULTIBYTE | |
366 | int ret; | |
7117c2d2 | 367 | size_t n; |
7117c2d2 JA |
368 | wchar_t *wpattern, *wstring; |
369 | ||
370 | if (MB_CUR_MAX == 1) | |
95732b49 | 371 | return (internal_strmatch ((unsigned char *)pattern, (unsigned char *)string, flags)); |
7117c2d2 | 372 | |
b80f6443 | 373 | n = xdupmbstowcs (&wpattern, NULL, pattern); |
7117c2d2 | 374 | if (n == (size_t)-1 || n == (size_t)-2) |
b80f6443 | 375 | return (internal_strmatch ((unsigned char *)pattern, (unsigned char *)string, flags)); |
7117c2d2 | 376 | |
b80f6443 | 377 | n = xdupmbstowcs (&wstring, NULL, string); |
7117c2d2 JA |
378 | if (n == (size_t)-1 || n == (size_t)-2) |
379 | { | |
380 | free (wpattern); | |
b80f6443 | 381 | return (internal_strmatch ((unsigned char *)pattern, (unsigned char *)string, flags)); |
7117c2d2 JA |
382 | } |
383 | ||
7117c2d2 JA |
384 | ret = internal_wstrmatch (wpattern, wstring, flags); |
385 | ||
7117c2d2 JA |
386 | free (wpattern); |
387 | free (wstring); | |
388 | ||
389 | return ret; | |
390 | #else | |
391 | return (internal_strmatch ((unsigned char *)pattern, (unsigned char *)string, flags)); | |
392 | #endif /* !HANDLE_MULTIBYTE */ | |
393 | } |