]> git.ipfire.org Git - thirdparty/bash.git/blob - lib/glob/smatch.c
Imported from ../bash-3.2.tar.gz.
[thirdparty/bash.git] / lib / glob / smatch.c
1 /* strmatch.c -- ksh-like extended pattern matching for the shell and filename
2 globbing. */
3
4 /* Copyright (C) 1991-2005 Free Software Foundation, Inc.
5
6 This file is part of GNU Bash, the Bourne Again SHell.
7
8 Bash is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 2, or (at your option) any later
11 version.
12
13 Bash is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License along
19 with Bash; see the file COPYING. If not, write to the Free Software
20 Foundation, 59 Temple Place, Suite 330, Boston, MA 02111 USA. */
21
22 #include <config.h>
23
24 #include <stdio.h> /* for debugging */
25
26 #include "strmatch.h"
27 #include <chartypes.h>
28
29 #include "bashansi.h"
30 #include "shmbutil.h"
31 #include "xmalloc.h"
32
33 /* First, compile `sm_loop.c' for single-byte characters. */
34 #define CHAR unsigned char
35 #define U_CHAR unsigned char
36 #define XCHAR char
37 #define INT int
38 #define L(CS) CS
39 #define INVALID -1
40
41 #undef STREQ
42 #undef STREQN
43 #define STREQ(a, b) ((a)[0] == (b)[0] && strcmp(a, b) == 0)
44 #define STREQN(a, b, n) ((a)[0] == (b)[0] && strncmp(a, b, n) == 0)
45
46 /* We use strcoll(3) for range comparisons in bracket expressions,
47 even though it can have unwanted side effects in locales
48 other than POSIX or US. For instance, in the de locale, [A-Z] matches
49 all characters. */
50
51 #if defined (HAVE_STRCOLL)
52 /* Helper function for collating symbol equivalence. */
53 static int rangecmp (c1, c2)
54 int c1, c2;
55 {
56 static char s1[2] = { ' ', '\0' };
57 static char s2[2] = { ' ', '\0' };
58 int ret;
59
60 /* Eight bits only. Period. */
61 c1 &= 0xFF;
62 c2 &= 0xFF;
63
64 if (c1 == c2)
65 return (0);
66
67 s1[0] = c1;
68 s2[0] = c2;
69
70 if ((ret = strcoll (s1, s2)) != 0)
71 return ret;
72 return (c1 - c2);
73 }
74 #else /* !HAVE_STRCOLL */
75 # define rangecmp(c1, c2) ((int)(c1) - (int)(c2))
76 #endif /* !HAVE_STRCOLL */
77
78 #if defined (HAVE_STRCOLL)
79 static int
80 collequiv (c1, c2)
81 int c1, c2;
82 {
83 return (rangecmp (c1, c2) == 0);
84 }
85 #else
86 # define collequiv(c1, c2) ((c1) == (c2))
87 #endif
88
89 #define _COLLSYM _collsym
90 #define __COLLSYM __collsym
91 #define POSIXCOLL posix_collsyms
92 #include "collsyms.h"
93
94 static int
95 collsym (s, len)
96 CHAR *s;
97 int len;
98 {
99 register struct _collsym *csp;
100 char *x;
101
102 x = (char *)s;
103 for (csp = posix_collsyms; csp->name; csp++)
104 {
105 if (STREQN(csp->name, x, len) && csp->name[len] == '\0')
106 return (csp->code);
107 }
108 if (len == 1)
109 return s[0];
110 return INVALID;
111 }
112
113 /* unibyte character classification */
114 #if !defined (isascii) && !defined (HAVE_ISASCII)
115 # define isascii(c) ((unsigned int)(c) <= 0177)
116 #endif
117
118 enum char_class
119 {
120 CC_NO_CLASS = 0,
121 CC_ASCII, CC_ALNUM, CC_ALPHA, CC_BLANK, CC_CNTRL, CC_DIGIT, CC_GRAPH,
122 CC_LOWER, CC_PRINT, CC_PUNCT, CC_SPACE, CC_UPPER, CC_WORD, CC_XDIGIT
123 };
124
125 static char const *const cclass_name[] =
126 {
127 "",
128 "ascii", "alnum", "alpha", "blank", "cntrl", "digit", "graph",
129 "lower", "print", "punct", "space", "upper", "word", "xdigit"
130 };
131
132 #define N_CHAR_CLASS (sizeof(cclass_name) / sizeof (cclass_name[0]))
133
134 static int
135 is_cclass (c, name)
136 int c;
137 const char *name;
138 {
139 enum char_class char_class = CC_NO_CLASS;
140 int i, result;
141
142 for (i = 1; i < N_CHAR_CLASS; i++)
143 {
144 if (STREQ (name, cclass_name[i]))
145 {
146 char_class = (enum char_class)i;
147 break;
148 }
149 }
150
151 if (char_class == 0)
152 return -1;
153
154 switch (char_class)
155 {
156 case CC_ASCII:
157 result = isascii (c);
158 break;
159 case CC_ALNUM:
160 result = ISALNUM (c);
161 break;
162 case CC_ALPHA:
163 result = ISALPHA (c);
164 break;
165 case CC_BLANK:
166 result = ISBLANK (c);
167 break;
168 case CC_CNTRL:
169 result = ISCNTRL (c);
170 break;
171 case CC_DIGIT:
172 result = ISDIGIT (c);
173 break;
174 case CC_GRAPH:
175 result = ISGRAPH (c);
176 break;
177 case CC_LOWER:
178 result = ISLOWER (c);
179 break;
180 case CC_PRINT:
181 result = ISPRINT (c);
182 break;
183 case CC_PUNCT:
184 result = ISPUNCT (c);
185 break;
186 case CC_SPACE:
187 result = ISSPACE (c);
188 break;
189 case CC_UPPER:
190 result = ISUPPER (c);
191 break;
192 case CC_WORD:
193 result = (ISALNUM (c) || c == '_');
194 break;
195 case CC_XDIGIT:
196 result = ISXDIGIT (c);
197 break;
198 default:
199 result = -1;
200 break;
201 }
202
203 return result;
204 }
205
206 /* Now include `sm_loop.c' for single-byte characters. */
207 /* The result of FOLD is an `unsigned char' */
208 # define FOLD(c) ((flags & FNM_CASEFOLD) \
209 ? TOLOWER ((unsigned char)c) \
210 : ((unsigned char)c))
211
212 #define FCT internal_strmatch
213 #define GMATCH gmatch
214 #define COLLSYM collsym
215 #define PARSE_COLLSYM parse_collsym
216 #define BRACKMATCH brackmatch
217 #define PATSCAN patscan
218 #define STRCOMPARE strcompare
219 #define EXTMATCH extmatch
220 #define STRCHR(S, C) strchr((S), (C))
221 #define STRCOLL(S1, S2) strcoll((S1), (S2))
222 #define STRLEN(S) strlen(S)
223 #define STRCMP(S1, S2) strcmp((S1), (S2))
224 #define RANGECMP(C1, C2) rangecmp((C1), (C2))
225 #define COLLEQUIV(C1, C2) collequiv((C1), (C2))
226 #define CTYPE_T enum char_class
227 #define IS_CCLASS(C, S) is_cclass((C), (S))
228 #include "sm_loop.c"
229
230 #if HANDLE_MULTIBYTE
231
232 # define CHAR wchar_t
233 # define U_CHAR wint_t
234 # define XCHAR wchar_t
235 # define INT wint_t
236 # define L(CS) L##CS
237 # define INVALID WEOF
238
239 # undef STREQ
240 # undef STREQN
241 # define STREQ(s1, s2) ((wcscmp (s1, s2) == 0))
242 # define STREQN(a, b, n) ((a)[0] == (b)[0] && wcsncmp(a, b, n) == 0)
243
244 static int
245 rangecmp_wc (c1, c2)
246 wint_t c1, c2;
247 {
248 static wchar_t s1[2] = { L' ', L'\0' };
249 static wchar_t s2[2] = { L' ', L'\0' };
250
251 if (c1 == c2)
252 return 0;
253
254 s1[0] = c1;
255 s2[0] = c2;
256
257 return (wcscoll (s1, s2));
258 }
259
260 static int
261 collequiv_wc (c, equiv)
262 wint_t c, equiv;
263 {
264 return (!(c - equiv));
265 }
266
267 /* Helper function for collating symbol. */
268 # define _COLLSYM _collwcsym
269 # define __COLLSYM __collwcsym
270 # define POSIXCOLL posix_collwcsyms
271 # include "collsyms.h"
272
273 static wint_t
274 collwcsym (s, len)
275 wchar_t *s;
276 int len;
277 {
278 register struct _collwcsym *csp;
279
280 for (csp = posix_collwcsyms; csp->name; csp++)
281 {
282 if (STREQN(csp->name, s, len) && csp->name[len] == L'\0')
283 return (csp->code);
284 }
285 if (len == 1)
286 return s[0];
287 return INVALID;
288 }
289
290 static int
291 is_wcclass (wc, name)
292 wint_t wc;
293 wchar_t *name;
294 {
295 char *mbs;
296 mbstate_t state;
297 size_t mbslength;
298 wctype_t desc;
299 int want_word;
300
301 if ((wctype ("ascii") == (wctype_t)0) && (wcscmp (name, L"ascii") == 0))
302 {
303 int c;
304
305 if ((c = wctob (wc)) == EOF)
306 return 0;
307 else
308 return (c <= 0x7F);
309 }
310
311 want_word = (wcscmp (name, L"word") == 0);
312 if (want_word)
313 name = L"alnum";
314
315 memset (&state, '\0', sizeof (mbstate_t));
316 mbs = (char *) malloc (wcslen(name) * MB_CUR_MAX + 1);
317 mbslength = wcsrtombs(mbs, (const wchar_t **)&name, (wcslen(name) * MB_CUR_MAX + 1), &state);
318
319 if (mbslength == (size_t)-1 || mbslength == (size_t)-2)
320 {
321 free (mbs);
322 return -1;
323 }
324 desc = wctype (mbs);
325 free (mbs);
326
327 if (desc == (wctype_t)0)
328 return -1;
329
330 if (want_word)
331 return (iswctype (wc, desc) || wc == L'_');
332 else
333 return (iswctype (wc, desc));
334 }
335
336 /* Now include `sm_loop.c' for multibyte characters. */
337 #define FOLD(c) ((flags & FNM_CASEFOLD) && iswupper (c) ? towlower (c) : (c))
338 #define FCT internal_wstrmatch
339 #define GMATCH gmatch_wc
340 #define COLLSYM collwcsym
341 #define PARSE_COLLSYM parse_collwcsym
342 #define BRACKMATCH brackmatch_wc
343 #define PATSCAN patscan_wc
344 #define STRCOMPARE wscompare
345 #define EXTMATCH extmatch_wc
346 #define STRCHR(S, C) wcschr((S), (C))
347 #define STRCOLL(S1, S2) wcscoll((S1), (S2))
348 #define STRLEN(S) wcslen(S)
349 #define STRCMP(S1, S2) wcscmp((S1), (S2))
350 #define RANGECMP(C1, C2) rangecmp_wc((C1), (C2))
351 #define COLLEQUIV(C1, C2) collequiv_wc((C1), (C2))
352 #define CTYPE_T enum char_class
353 #define IS_CCLASS(C, S) is_wcclass((C), (S))
354 #include "sm_loop.c"
355
356 #endif /* HAVE_MULTIBYTE */
357
358 int
359 xstrmatch (pattern, string, flags)
360 char *pattern;
361 char *string;
362 int flags;
363 {
364 #if HANDLE_MULTIBYTE
365 int ret;
366 size_t n;
367 wchar_t *wpattern, *wstring;
368
369 if (MB_CUR_MAX == 1)
370 return (internal_strmatch ((unsigned char *)pattern, (unsigned char *)string, flags));
371
372 n = xdupmbstowcs (&wpattern, NULL, pattern);
373 if (n == (size_t)-1 || n == (size_t)-2)
374 return (internal_strmatch ((unsigned char *)pattern, (unsigned char *)string, flags));
375
376 n = xdupmbstowcs (&wstring, NULL, string);
377 if (n == (size_t)-1 || n == (size_t)-2)
378 {
379 free (wpattern);
380 return (internal_strmatch ((unsigned char *)pattern, (unsigned char *)string, flags));
381 }
382
383 ret = internal_wstrmatch (wpattern, wstring, flags);
384
385 free (wpattern);
386 free (wstring);
387
388 return ret;
389 #else
390 return (internal_strmatch ((unsigned char *)pattern, (unsigned char *)string, flags));
391 #endif /* !HANDLE_MULTIBYTE */
392 }