]> git.ipfire.org Git - thirdparty/bash.git/blame - lib/glob/smatch.c
Imported from ../bash-3.1.tar.gz.
[thirdparty/bash.git] / lib / glob / smatch.c
CommitLineData
7117c2d2
JA
1/* strmatch.c -- ksh-like extended pattern matching for the shell and filename
2 globbing. */
3
95732b49 4/* Copyright (C) 1991-2005 Free Software Foundation, Inc.
7117c2d2
JA
5
6 This file is part of GNU Bash, the Bourne Again SHell.
7
8 Bash is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 2, or (at your option) any later
11 version.
12
13 Bash is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License along
19 with Bash; see the file COPYING. If not, write to the Free Software
20 Foundation, 59 Temple Place, Suite 330, Boston, MA 02111 USA. */
21
22#include <config.h>
23
24#include <stdio.h> /* for debugging */
25
26#include "strmatch.h"
27#include <chartypes.h>
28
29#include "bashansi.h"
30#include "shmbutil.h"
31#include "xmalloc.h"
32
33/* First, compile `sm_loop.c' for single-byte characters. */
34#define CHAR unsigned char
35#define U_CHAR unsigned char
36#define XCHAR char
37#define INT int
38#define L(CS) CS
39#define INVALID -1
40
41#undef STREQ
42#undef STREQN
43#define STREQ(a, b) ((a)[0] == (b)[0] && strcmp(a, b) == 0)
44#define STREQN(a, b, n) ((a)[0] == (b)[0] && strncmp(a, b, n) == 0)
45
46/* We use strcoll(3) for range comparisons in bracket expressions,
47 even though it can have unwanted side effects in locales
48 other than POSIX or US. For instance, in the de locale, [A-Z] matches
49 all characters. */
50
51#if defined (HAVE_STRCOLL)
52/* Helper function for collating symbol equivalence. */
53static int rangecmp (c1, c2)
54 int c1, c2;
55{
56 static char s1[2] = { ' ', '\0' };
57 static char s2[2] = { ' ', '\0' };
58 int ret;
59
60 /* Eight bits only. Period. */
61 c1 &= 0xFF;
62 c2 &= 0xFF;
63
64 if (c1 == c2)
65 return (0);
66
67 s1[0] = c1;
68 s2[0] = c2;
69
70 if ((ret = strcoll (s1, s2)) != 0)
71 return ret;
72 return (c1 - c2);
73}
74#else /* !HAVE_STRCOLL */
75# define rangecmp(c1, c2) ((int)(c1) - (int)(c2))
76#endif /* !HAVE_STRCOLL */
77
78#if defined (HAVE_STRCOLL)
79static int
80collequiv (c1, c2)
81 int c1, c2;
82{
83 return (rangecmp (c1, c2) == 0);
84}
85#else
86# define collequiv(c1, c2) ((c1) == (c2))
87#endif
88
89#define _COLLSYM _collsym
90#define __COLLSYM __collsym
91#define POSIXCOLL posix_collsyms
92#include "collsyms.h"
93
94static int
95collsym (s, len)
95732b49 96 CHAR *s;
7117c2d2
JA
97 int len;
98{
99 register struct _collsym *csp;
95732b49 100 char *x;
7117c2d2 101
95732b49 102 x = (char *)s;
7117c2d2
JA
103 for (csp = posix_collsyms; csp->name; csp++)
104 {
95732b49 105 if (STREQN(csp->name, x, len) && csp->name[len] == '\0')
7117c2d2
JA
106 return (csp->code);
107 }
108 if (len == 1)
109 return s[0];
110 return INVALID;
111}
112
113/* unibyte character classification */
114#if !defined (isascii) && !defined (HAVE_ISASCII)
115# define isascii(c) ((unsigned int)(c) <= 0177)
116#endif
117
118enum char_class
119 {
120 CC_NO_CLASS = 0,
121 CC_ASCII, CC_ALNUM, CC_ALPHA, CC_BLANK, CC_CNTRL, CC_DIGIT, CC_GRAPH,
122 CC_LOWER, CC_PRINT, CC_PUNCT, CC_SPACE, CC_UPPER, CC_WORD, CC_XDIGIT
123 };
124
125static char const *const cclass_name[] =
126 {
127 "",
128 "ascii", "alnum", "alpha", "blank", "cntrl", "digit", "graph",
129 "lower", "print", "punct", "space", "upper", "word", "xdigit"
130 };
131
132#define N_CHAR_CLASS (sizeof(cclass_name) / sizeof (cclass_name[0]))
133
134static int
135is_cclass (c, name)
136 int c;
137 const char *name;
138{
139 enum char_class char_class = CC_NO_CLASS;
140 int i, result;
141
142 for (i = 1; i < N_CHAR_CLASS; i++)
143 {
144 if (STREQ (name, cclass_name[i]))
145 {
146 char_class = (enum char_class)i;
147 break;
148 }
149 }
150
151 if (char_class == 0)
152 return -1;
153
154 switch (char_class)
155 {
156 case CC_ASCII:
157 result = isascii (c);
158 break;
159 case CC_ALNUM:
160 result = ISALNUM (c);
161 break;
162 case CC_ALPHA:
163 result = ISALPHA (c);
164 break;
165 case CC_BLANK:
166 result = ISBLANK (c);
167 break;
168 case CC_CNTRL:
169 result = ISCNTRL (c);
170 break;
171 case CC_DIGIT:
172 result = ISDIGIT (c);
173 break;
174 case CC_GRAPH:
175 result = ISGRAPH (c);
176 break;
177 case CC_LOWER:
178 result = ISLOWER (c);
179 break;
180 case CC_PRINT:
181 result = ISPRINT (c);
182 break;
183 case CC_PUNCT:
184 result = ISPUNCT (c);
185 break;
186 case CC_SPACE:
187 result = ISSPACE (c);
188 break;
189 case CC_UPPER:
190 result = ISUPPER (c);
191 break;
192 case CC_WORD:
193 result = (ISALNUM (c) || c == '_');
194 break;
195 case CC_XDIGIT:
196 result = ISXDIGIT (c);
197 break;
198 default:
199 result = -1;
200 break;
201 }
202
203 return result;
204}
205
206/* Now include `sm_loop.c' for single-byte characters. */
207/* The result of FOLD is an `unsigned char' */
208# define FOLD(c) ((flags & FNM_CASEFOLD) \
209 ? TOLOWER ((unsigned char)c) \
210 : ((unsigned char)c))
211
212#define FCT internal_strmatch
213#define GMATCH gmatch
214#define COLLSYM collsym
215#define PARSE_COLLSYM parse_collsym
216#define BRACKMATCH brackmatch
217#define PATSCAN patscan
218#define STRCOMPARE strcompare
219#define EXTMATCH extmatch
220#define STRCHR(S, C) strchr((S), (C))
221#define STRCOLL(S1, S2) strcoll((S1), (S2))
222#define STRLEN(S) strlen(S)
223#define STRCMP(S1, S2) strcmp((S1), (S2))
224#define RANGECMP(C1, C2) rangecmp((C1), (C2))
225#define COLLEQUIV(C1, C2) collequiv((C1), (C2))
226#define CTYPE_T enum char_class
227#define IS_CCLASS(C, S) is_cclass((C), (S))
228#include "sm_loop.c"
229
230#if HANDLE_MULTIBYTE
231
232# define CHAR wchar_t
233# define U_CHAR wint_t
234# define XCHAR wchar_t
235# define INT wint_t
236# define L(CS) L##CS
237# define INVALID WEOF
238
239# undef STREQ
240# undef STREQN
241# define STREQ(s1, s2) ((wcscmp (s1, s2) == 0))
242# define STREQN(a, b, n) ((a)[0] == (b)[0] && wcsncmp(a, b, n) == 0)
243
244static int
245rangecmp_wc (c1, c2)
246 wint_t c1, c2;
247{
248 static wchar_t s1[2] = { L' ', L'\0' };
249 static wchar_t s2[2] = { L' ', L'\0' };
250 int ret;
251
252 if (c1 == c2)
253 return 0;
254
255 s1[0] = c1;
256 s2[0] = c2;
257
258 return (wcscoll (s1, s2));
259}
260
261static int
262collequiv_wc (c, equiv)
263 wint_t c, equiv;
264{
265 return (!(c - equiv));
266}
267
268/* Helper function for collating symbol. */
269# define _COLLSYM _collwcsym
270# define __COLLSYM __collwcsym
271# define POSIXCOLL posix_collwcsyms
272# include "collsyms.h"
273
274static wint_t
275collwcsym (s, len)
276 wchar_t *s;
277 int len;
278{
279 register struct _collwcsym *csp;
280
281 for (csp = posix_collwcsyms; csp->name; csp++)
282 {
283 if (STREQN(csp->name, s, len) && csp->name[len] == L'\0')
284 return (csp->code);
285 }
286 if (len == 1)
287 return s[0];
288 return INVALID;
289}
290
291static int
292is_wcclass (wc, name)
293 wint_t wc;
294 wchar_t *name;
295{
296 char *mbs;
297 mbstate_t state;
298 size_t mbslength;
299 wctype_t desc;
300 int want_word;
301
302 if ((wctype ("ascii") == (wctype_t)0) && (wcscmp (name, L"ascii") == 0))
303 {
304 int c;
305
306 if ((c = wctob (wc)) == EOF)
307 return 0;
308 else
309 return (c <= 0x7F);
310 }
311
312 want_word = (wcscmp (name, L"word") == 0);
313 if (want_word)
314 name = L"alnum";
315
316 memset (&state, '\0', sizeof (mbstate_t));
317 mbs = (char *) malloc (wcslen(name) * MB_CUR_MAX + 1);
318 mbslength = wcsrtombs(mbs, (const wchar_t **)&name, (wcslen(name) * MB_CUR_MAX + 1), &state);
319
320 if (mbslength == (size_t)-1 || mbslength == (size_t)-2)
321 {
322 free (mbs);
323 return -1;
324 }
325 desc = wctype (mbs);
326 free (mbs);
327
328 if (desc == (wctype_t)0)
329 return -1;
330
331 if (want_word)
332 return (iswctype (wc, desc) || wc == L'_');
333 else
334 return (iswctype (wc, desc));
335}
336
337/* Now include `sm_loop.c' for multibyte characters. */
338#define FOLD(c) ((flags & FNM_CASEFOLD) && iswupper (c) ? towlower (c) : (c))
339#define FCT internal_wstrmatch
340#define GMATCH gmatch_wc
341#define COLLSYM collwcsym
342#define PARSE_COLLSYM parse_collwcsym
343#define BRACKMATCH brackmatch_wc
344#define PATSCAN patscan_wc
345#define STRCOMPARE wscompare
346#define EXTMATCH extmatch_wc
347#define STRCHR(S, C) wcschr((S), (C))
348#define STRCOLL(S1, S2) wcscoll((S1), (S2))
349#define STRLEN(S) wcslen(S)
350#define STRCMP(S1, S2) wcscmp((S1), (S2))
351#define RANGECMP(C1, C2) rangecmp_wc((C1), (C2))
352#define COLLEQUIV(C1, C2) collequiv_wc((C1), (C2))
353#define CTYPE_T enum char_class
354#define IS_CCLASS(C, S) is_wcclass((C), (S))
355#include "sm_loop.c"
356
357#endif /* HAVE_MULTIBYTE */
358
359int
360xstrmatch (pattern, string, flags)
361 char *pattern;
362 char *string;
363 int flags;
364{
365#if HANDLE_MULTIBYTE
366 int ret;
7117c2d2 367 size_t n;
7117c2d2
JA
368 wchar_t *wpattern, *wstring;
369
370 if (MB_CUR_MAX == 1)
95732b49 371 return (internal_strmatch ((unsigned char *)pattern, (unsigned char *)string, flags));
7117c2d2 372
b80f6443 373 n = xdupmbstowcs (&wpattern, NULL, pattern);
7117c2d2 374 if (n == (size_t)-1 || n == (size_t)-2)
b80f6443 375 return (internal_strmatch ((unsigned char *)pattern, (unsigned char *)string, flags));
7117c2d2 376
b80f6443 377 n = xdupmbstowcs (&wstring, NULL, string);
7117c2d2
JA
378 if (n == (size_t)-1 || n == (size_t)-2)
379 {
380 free (wpattern);
b80f6443 381 return (internal_strmatch ((unsigned char *)pattern, (unsigned char *)string, flags));
7117c2d2
JA
382 }
383
7117c2d2
JA
384 ret = internal_wstrmatch (wpattern, wstring, flags);
385
7117c2d2
JA
386 free (wpattern);
387 free (wstring);
388
389 return ret;
390#else
391 return (internal_strmatch ((unsigned char *)pattern, (unsigned char *)string, flags));
392#endif /* !HANDLE_MULTIBYTE */
393}