]> git.ipfire.org Git - thirdparty/bash.git/blob - lib/glob/smatch.c
Imported from ../bash-2.05b.tar.gz.
[thirdparty/bash.git] / lib / glob / smatch.c
1 /* strmatch.c -- ksh-like extended pattern matching for the shell and filename
2 globbing. */
3
4 /* Copyright (C) 1991-2002 Free Software Foundation, Inc.
5
6 This file is part of GNU Bash, the Bourne Again SHell.
7
8 Bash is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 2, or (at your option) any later
11 version.
12
13 Bash is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License along
19 with Bash; see the file COPYING. If not, write to the Free Software
20 Foundation, 59 Temple Place, Suite 330, Boston, MA 02111 USA. */
21
22 #include <config.h>
23
24 #include <stdio.h> /* for debugging */
25
26 #include "strmatch.h"
27 #include <chartypes.h>
28
29 #include "bashansi.h"
30 #include "shmbutil.h"
31 #include "xmalloc.h"
32
33 /* First, compile `sm_loop.c' for single-byte characters. */
34 #define CHAR unsigned char
35 #define U_CHAR unsigned char
36 #define XCHAR char
37 #define INT int
38 #define L(CS) CS
39 #define INVALID -1
40
41 #undef STREQ
42 #undef STREQN
43 #define STREQ(a, b) ((a)[0] == (b)[0] && strcmp(a, b) == 0)
44 #define STREQN(a, b, n) ((a)[0] == (b)[0] && strncmp(a, b, n) == 0)
45
46 /* We use strcoll(3) for range comparisons in bracket expressions,
47 even though it can have unwanted side effects in locales
48 other than POSIX or US. For instance, in the de locale, [A-Z] matches
49 all characters. */
50
51 #if defined (HAVE_STRCOLL)
52 /* Helper function for collating symbol equivalence. */
53 static int rangecmp (c1, c2)
54 int c1, c2;
55 {
56 static char s1[2] = { ' ', '\0' };
57 static char s2[2] = { ' ', '\0' };
58 int ret;
59
60 /* Eight bits only. Period. */
61 c1 &= 0xFF;
62 c2 &= 0xFF;
63
64 if (c1 == c2)
65 return (0);
66
67 s1[0] = c1;
68 s2[0] = c2;
69
70 if ((ret = strcoll (s1, s2)) != 0)
71 return ret;
72 return (c1 - c2);
73 }
74 #else /* !HAVE_STRCOLL */
75 # define rangecmp(c1, c2) ((int)(c1) - (int)(c2))
76 #endif /* !HAVE_STRCOLL */
77
78 #if defined (HAVE_STRCOLL)
79 static int
80 collequiv (c1, c2)
81 int c1, c2;
82 {
83 return (rangecmp (c1, c2) == 0);
84 }
85 #else
86 # define collequiv(c1, c2) ((c1) == (c2))
87 #endif
88
89 #define _COLLSYM _collsym
90 #define __COLLSYM __collsym
91 #define POSIXCOLL posix_collsyms
92 #include "collsyms.h"
93
94 static int
95 collsym (s, len)
96 char *s;
97 int len;
98 {
99 register struct _collsym *csp;
100
101 for (csp = posix_collsyms; csp->name; csp++)
102 {
103 if (STREQN(csp->name, s, len) && csp->name[len] == '\0')
104 return (csp->code);
105 }
106 if (len == 1)
107 return s[0];
108 return INVALID;
109 }
110
111 /* unibyte character classification */
112 #if !defined (isascii) && !defined (HAVE_ISASCII)
113 # define isascii(c) ((unsigned int)(c) <= 0177)
114 #endif
115
116 enum char_class
117 {
118 CC_NO_CLASS = 0,
119 CC_ASCII, CC_ALNUM, CC_ALPHA, CC_BLANK, CC_CNTRL, CC_DIGIT, CC_GRAPH,
120 CC_LOWER, CC_PRINT, CC_PUNCT, CC_SPACE, CC_UPPER, CC_WORD, CC_XDIGIT
121 };
122
123 static char const *const cclass_name[] =
124 {
125 "",
126 "ascii", "alnum", "alpha", "blank", "cntrl", "digit", "graph",
127 "lower", "print", "punct", "space", "upper", "word", "xdigit"
128 };
129
130 #define N_CHAR_CLASS (sizeof(cclass_name) / sizeof (cclass_name[0]))
131
132 static int
133 is_cclass (c, name)
134 int c;
135 const char *name;
136 {
137 enum char_class char_class = CC_NO_CLASS;
138 int i, result;
139
140 for (i = 1; i < N_CHAR_CLASS; i++)
141 {
142 if (STREQ (name, cclass_name[i]))
143 {
144 char_class = (enum char_class)i;
145 break;
146 }
147 }
148
149 if (char_class == 0)
150 return -1;
151
152 switch (char_class)
153 {
154 case CC_ASCII:
155 result = isascii (c);
156 break;
157 case CC_ALNUM:
158 result = ISALNUM (c);
159 break;
160 case CC_ALPHA:
161 result = ISALPHA (c);
162 break;
163 case CC_BLANK:
164 result = ISBLANK (c);
165 break;
166 case CC_CNTRL:
167 result = ISCNTRL (c);
168 break;
169 case CC_DIGIT:
170 result = ISDIGIT (c);
171 break;
172 case CC_GRAPH:
173 result = ISGRAPH (c);
174 break;
175 case CC_LOWER:
176 result = ISLOWER (c);
177 break;
178 case CC_PRINT:
179 result = ISPRINT (c);
180 break;
181 case CC_PUNCT:
182 result = ISPUNCT (c);
183 break;
184 case CC_SPACE:
185 result = ISSPACE (c);
186 break;
187 case CC_UPPER:
188 result = ISUPPER (c);
189 break;
190 case CC_WORD:
191 result = (ISALNUM (c) || c == '_');
192 break;
193 case CC_XDIGIT:
194 result = ISXDIGIT (c);
195 break;
196 default:
197 result = -1;
198 break;
199 }
200
201 return result;
202 }
203
204 /* Now include `sm_loop.c' for single-byte characters. */
205 /* The result of FOLD is an `unsigned char' */
206 # define FOLD(c) ((flags & FNM_CASEFOLD) \
207 ? TOLOWER ((unsigned char)c) \
208 : ((unsigned char)c))
209
210 #define FCT internal_strmatch
211 #define GMATCH gmatch
212 #define COLLSYM collsym
213 #define PARSE_COLLSYM parse_collsym
214 #define BRACKMATCH brackmatch
215 #define PATSCAN patscan
216 #define STRCOMPARE strcompare
217 #define EXTMATCH extmatch
218 #define STRCHR(S, C) strchr((S), (C))
219 #define STRCOLL(S1, S2) strcoll((S1), (S2))
220 #define STRLEN(S) strlen(S)
221 #define STRCMP(S1, S2) strcmp((S1), (S2))
222 #define RANGECMP(C1, C2) rangecmp((C1), (C2))
223 #define COLLEQUIV(C1, C2) collequiv((C1), (C2))
224 #define CTYPE_T enum char_class
225 #define IS_CCLASS(C, S) is_cclass((C), (S))
226 #include "sm_loop.c"
227
228 #if HANDLE_MULTIBYTE
229
230 # define CHAR wchar_t
231 # define U_CHAR wint_t
232 # define XCHAR wchar_t
233 # define INT wint_t
234 # define L(CS) L##CS
235 # define INVALID WEOF
236
237 # undef STREQ
238 # undef STREQN
239 # define STREQ(s1, s2) ((wcscmp (s1, s2) == 0))
240 # define STREQN(a, b, n) ((a)[0] == (b)[0] && wcsncmp(a, b, n) == 0)
241
242 static int
243 rangecmp_wc (c1, c2)
244 wint_t c1, c2;
245 {
246 static wchar_t s1[2] = { L' ', L'\0' };
247 static wchar_t s2[2] = { L' ', L'\0' };
248 int ret;
249
250 if (c1 == c2)
251 return 0;
252
253 s1[0] = c1;
254 s2[0] = c2;
255
256 return (wcscoll (s1, s2));
257 }
258
259 static int
260 collequiv_wc (c, equiv)
261 wint_t c, equiv;
262 {
263 return (!(c - equiv));
264 }
265
266 /* Helper function for collating symbol. */
267 # define _COLLSYM _collwcsym
268 # define __COLLSYM __collwcsym
269 # define POSIXCOLL posix_collwcsyms
270 # include "collsyms.h"
271
272 static wint_t
273 collwcsym (s, len)
274 wchar_t *s;
275 int len;
276 {
277 register struct _collwcsym *csp;
278
279 for (csp = posix_collwcsyms; csp->name; csp++)
280 {
281 if (STREQN(csp->name, s, len) && csp->name[len] == L'\0')
282 return (csp->code);
283 }
284 if (len == 1)
285 return s[0];
286 return INVALID;
287 }
288
289 static int
290 is_wcclass (wc, name)
291 wint_t wc;
292 wchar_t *name;
293 {
294 char *mbs;
295 mbstate_t state;
296 size_t mbslength;
297 wctype_t desc;
298 int want_word;
299
300 if ((wctype ("ascii") == (wctype_t)0) && (wcscmp (name, L"ascii") == 0))
301 {
302 int c;
303
304 if ((c = wctob (wc)) == EOF)
305 return 0;
306 else
307 return (c <= 0x7F);
308 }
309
310 want_word = (wcscmp (name, L"word") == 0);
311 if (want_word)
312 name = L"alnum";
313
314 memset (&state, '\0', sizeof (mbstate_t));
315 mbs = (char *) malloc (wcslen(name) * MB_CUR_MAX + 1);
316 mbslength = wcsrtombs(mbs, (const wchar_t **)&name, (wcslen(name) * MB_CUR_MAX + 1), &state);
317
318 if (mbslength == (size_t)-1 || mbslength == (size_t)-2)
319 {
320 free (mbs);
321 return -1;
322 }
323 desc = wctype (mbs);
324 free (mbs);
325
326 if (desc == (wctype_t)0)
327 return -1;
328
329 if (want_word)
330 return (iswctype (wc, desc) || wc == L'_');
331 else
332 return (iswctype (wc, desc));
333 }
334
335 /* Now include `sm_loop.c' for multibyte characters. */
336 #define FOLD(c) ((flags & FNM_CASEFOLD) && iswupper (c) ? towlower (c) : (c))
337 #define FCT internal_wstrmatch
338 #define GMATCH gmatch_wc
339 #define COLLSYM collwcsym
340 #define PARSE_COLLSYM parse_collwcsym
341 #define BRACKMATCH brackmatch_wc
342 #define PATSCAN patscan_wc
343 #define STRCOMPARE wscompare
344 #define EXTMATCH extmatch_wc
345 #define STRCHR(S, C) wcschr((S), (C))
346 #define STRCOLL(S1, S2) wcscoll((S1), (S2))
347 #define STRLEN(S) wcslen(S)
348 #define STRCMP(S1, S2) wcscmp((S1), (S2))
349 #define RANGECMP(C1, C2) rangecmp_wc((C1), (C2))
350 #define COLLEQUIV(C1, C2) collequiv_wc((C1), (C2))
351 #define CTYPE_T enum char_class
352 #define IS_CCLASS(C, S) is_wcclass((C), (S))
353 #include "sm_loop.c"
354
355 #endif /* HAVE_MULTIBYTE */
356
357 int
358 xstrmatch (pattern, string, flags)
359 char *pattern;
360 char *string;
361 int flags;
362 {
363 #if HANDLE_MULTIBYTE
364 int ret;
365 mbstate_t ps;
366 size_t n;
367 char *pattern_bak;
368 wchar_t *wpattern, *wstring;
369
370 if (MB_CUR_MAX == 1)
371 return (internal_strmatch (pattern, string, flags));
372
373 pattern_bak = (char *)xmalloc (strlen (pattern) + 1);
374 strcpy (pattern_bak, pattern);
375
376 memset (&ps, '\0', sizeof (mbstate_t));
377 n = xmbsrtowcs (NULL, (const char **)&pattern, 0, &ps);
378 if (n == (size_t)-1 || n == (size_t)-2)
379 {
380 free (pattern_bak);
381 return (internal_strmatch ((unsigned char *)pattern, (unsigned char *)string, flags));
382 }
383
384 wpattern = (wchar_t *)xmalloc ((n + 1) * sizeof (wchar_t));
385 (void) xmbsrtowcs (wpattern, (const char **)&pattern, n + 1, &ps);
386
387 memset (&ps, '\0', sizeof (mbstate_t));
388 n = xmbsrtowcs (NULL, (const char **)&string, 0, &ps);
389 if (n == (size_t)-1 || n == (size_t)-2)
390 {
391 free (wpattern);
392 ret = internal_strmatch (pattern_bak, string, flags);
393 free (pattern_bak);
394 return ret;
395 }
396
397 wstring = (wchar_t *)xmalloc ((n + 1) * sizeof (wchar_t));
398 (void) xmbsrtowcs (wstring, (const char **)&string, n + 1, &ps);
399
400 ret = internal_wstrmatch (wpattern, wstring, flags);
401
402 free (pattern_bak);
403 free (wpattern);
404 free (wstring);
405
406 return ret;
407 #else
408 return (internal_strmatch ((unsigned char *)pattern, (unsigned char *)string, flags));
409 #endif /* !HANDLE_MULTIBYTE */
410 }