]> git.ipfire.org Git - thirdparty/bash.git/blame - lib/glob/smatch.c
Bash-4.2 distribution sources and documentation
[thirdparty/bash.git] / lib / glob / smatch.c
CommitLineData
7117c2d2
JA
1/* strmatch.c -- ksh-like extended pattern matching for the shell and filename
2 globbing. */
3
495aee44 4/* Copyright (C) 1991-2011 Free Software Foundation, Inc.
7117c2d2
JA
5
6 This file is part of GNU Bash, the Bourne Again SHell.
7
3185942a
JA
8 Bash is free software: you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation, either version 3 of the License, or
11 (at your option) any later version.
12
13 Bash is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with Bash. If not, see <http://www.gnu.org/licenses/>.
20*/
7117c2d2
JA
21
22#include <config.h>
23
24#include <stdio.h> /* for debugging */
25
26#include "strmatch.h"
27#include <chartypes.h>
28
29#include "bashansi.h"
30#include "shmbutil.h"
31#include "xmalloc.h"
32
33/* First, compile `sm_loop.c' for single-byte characters. */
34#define CHAR unsigned char
35#define U_CHAR unsigned char
36#define XCHAR char
37#define INT int
38#define L(CS) CS
39#define INVALID -1
40
41#undef STREQ
42#undef STREQN
43#define STREQ(a, b) ((a)[0] == (b)[0] && strcmp(a, b) == 0)
44#define STREQN(a, b, n) ((a)[0] == (b)[0] && strncmp(a, b, n) == 0)
45
46/* We use strcoll(3) for range comparisons in bracket expressions,
47 even though it can have unwanted side effects in locales
48 other than POSIX or US. For instance, in the de locale, [A-Z] matches
49 all characters. */
50
51#if defined (HAVE_STRCOLL)
52/* Helper function for collating symbol equivalence. */
53static int rangecmp (c1, c2)
54 int c1, c2;
55{
56 static char s1[2] = { ' ', '\0' };
57 static char s2[2] = { ' ', '\0' };
58 int ret;
59
60 /* Eight bits only. Period. */
61 c1 &= 0xFF;
62 c2 &= 0xFF;
63
64 if (c1 == c2)
65 return (0);
66
67 s1[0] = c1;
68 s2[0] = c2;
69
70 if ((ret = strcoll (s1, s2)) != 0)
71 return ret;
72 return (c1 - c2);
73}
74#else /* !HAVE_STRCOLL */
75# define rangecmp(c1, c2) ((int)(c1) - (int)(c2))
76#endif /* !HAVE_STRCOLL */
77
78#if defined (HAVE_STRCOLL)
79static int
80collequiv (c1, c2)
81 int c1, c2;
82{
83 return (rangecmp (c1, c2) == 0);
84}
85#else
86# define collequiv(c1, c2) ((c1) == (c2))
87#endif
88
89#define _COLLSYM _collsym
90#define __COLLSYM __collsym
91#define POSIXCOLL posix_collsyms
92#include "collsyms.h"
93
94static int
95collsym (s, len)
95732b49 96 CHAR *s;
7117c2d2
JA
97 int len;
98{
99 register struct _collsym *csp;
95732b49 100 char *x;
7117c2d2 101
95732b49 102 x = (char *)s;
7117c2d2
JA
103 for (csp = posix_collsyms; csp->name; csp++)
104 {
95732b49 105 if (STREQN(csp->name, x, len) && csp->name[len] == '\0')
7117c2d2
JA
106 return (csp->code);
107 }
108 if (len == 1)
109 return s[0];
110 return INVALID;
111}
112
113/* unibyte character classification */
114#if !defined (isascii) && !defined (HAVE_ISASCII)
115# define isascii(c) ((unsigned int)(c) <= 0177)
116#endif
117
118enum char_class
119 {
120 CC_NO_CLASS = 0,
121 CC_ASCII, CC_ALNUM, CC_ALPHA, CC_BLANK, CC_CNTRL, CC_DIGIT, CC_GRAPH,
122 CC_LOWER, CC_PRINT, CC_PUNCT, CC_SPACE, CC_UPPER, CC_WORD, CC_XDIGIT
123 };
124
125static char const *const cclass_name[] =
126 {
127 "",
128 "ascii", "alnum", "alpha", "blank", "cntrl", "digit", "graph",
129 "lower", "print", "punct", "space", "upper", "word", "xdigit"
130 };
131
132#define N_CHAR_CLASS (sizeof(cclass_name) / sizeof (cclass_name[0]))
133
134static int
135is_cclass (c, name)
136 int c;
137 const char *name;
138{
139 enum char_class char_class = CC_NO_CLASS;
140 int i, result;
141
142 for (i = 1; i < N_CHAR_CLASS; i++)
143 {
144 if (STREQ (name, cclass_name[i]))
145 {
146 char_class = (enum char_class)i;
147 break;
148 }
149 }
150
151 if (char_class == 0)
152 return -1;
153
154 switch (char_class)
155 {
156 case CC_ASCII:
157 result = isascii (c);
158 break;
159 case CC_ALNUM:
160 result = ISALNUM (c);
161 break;
162 case CC_ALPHA:
163 result = ISALPHA (c);
164 break;
165 case CC_BLANK:
166 result = ISBLANK (c);
167 break;
168 case CC_CNTRL:
169 result = ISCNTRL (c);
170 break;
171 case CC_DIGIT:
172 result = ISDIGIT (c);
173 break;
174 case CC_GRAPH:
175 result = ISGRAPH (c);
176 break;
177 case CC_LOWER:
178 result = ISLOWER (c);
179 break;
180 case CC_PRINT:
181 result = ISPRINT (c);
182 break;
183 case CC_PUNCT:
184 result = ISPUNCT (c);
185 break;
186 case CC_SPACE:
187 result = ISSPACE (c);
188 break;
189 case CC_UPPER:
190 result = ISUPPER (c);
191 break;
192 case CC_WORD:
193 result = (ISALNUM (c) || c == '_');
194 break;
195 case CC_XDIGIT:
196 result = ISXDIGIT (c);
197 break;
198 default:
199 result = -1;
200 break;
201 }
202
203 return result;
204}
205
206/* Now include `sm_loop.c' for single-byte characters. */
207/* The result of FOLD is an `unsigned char' */
208# define FOLD(c) ((flags & FNM_CASEFOLD) \
209 ? TOLOWER ((unsigned char)c) \
210 : ((unsigned char)c))
211
212#define FCT internal_strmatch
213#define GMATCH gmatch
214#define COLLSYM collsym
215#define PARSE_COLLSYM parse_collsym
216#define BRACKMATCH brackmatch
217#define PATSCAN patscan
218#define STRCOMPARE strcompare
219#define EXTMATCH extmatch
220#define STRCHR(S, C) strchr((S), (C))
221#define STRCOLL(S1, S2) strcoll((S1), (S2))
222#define STRLEN(S) strlen(S)
223#define STRCMP(S1, S2) strcmp((S1), (S2))
224#define RANGECMP(C1, C2) rangecmp((C1), (C2))
225#define COLLEQUIV(C1, C2) collequiv((C1), (C2))
226#define CTYPE_T enum char_class
227#define IS_CCLASS(C, S) is_cclass((C), (S))
228#include "sm_loop.c"
229
230#if HANDLE_MULTIBYTE
231
232# define CHAR wchar_t
233# define U_CHAR wint_t
234# define XCHAR wchar_t
235# define INT wint_t
236# define L(CS) L##CS
237# define INVALID WEOF
238
239# undef STREQ
240# undef STREQN
241# define STREQ(s1, s2) ((wcscmp (s1, s2) == 0))
242# define STREQN(a, b, n) ((a)[0] == (b)[0] && wcsncmp(a, b, n) == 0)
243
495aee44
CR
244extern char *mbsmbchar __P((const char *));
245
7117c2d2
JA
246static int
247rangecmp_wc (c1, c2)
248 wint_t c1, c2;
249{
250 static wchar_t s1[2] = { L' ', L'\0' };
251 static wchar_t s2[2] = { L' ', L'\0' };
7117c2d2
JA
252
253 if (c1 == c2)
254 return 0;
255
256 s1[0] = c1;
257 s2[0] = c2;
258
259 return (wcscoll (s1, s2));
260}
261
262static int
263collequiv_wc (c, equiv)
264 wint_t c, equiv;
265{
266 return (!(c - equiv));
267}
268
269/* Helper function for collating symbol. */
270# define _COLLSYM _collwcsym
271# define __COLLSYM __collwcsym
272# define POSIXCOLL posix_collwcsyms
273# include "collsyms.h"
274
275static wint_t
276collwcsym (s, len)
277 wchar_t *s;
278 int len;
279{
280 register struct _collwcsym *csp;
281
282 for (csp = posix_collwcsyms; csp->name; csp++)
283 {
284 if (STREQN(csp->name, s, len) && csp->name[len] == L'\0')
285 return (csp->code);
286 }
287 if (len == 1)
288 return s[0];
289 return INVALID;
290}
291
292static int
293is_wcclass (wc, name)
294 wint_t wc;
295 wchar_t *name;
296{
297 char *mbs;
298 mbstate_t state;
299 size_t mbslength;
300 wctype_t desc;
301 int want_word;
302
303 if ((wctype ("ascii") == (wctype_t)0) && (wcscmp (name, L"ascii") == 0))
304 {
305 int c;
306
307 if ((c = wctob (wc)) == EOF)
308 return 0;
309 else
310 return (c <= 0x7F);
311 }
312
313 want_word = (wcscmp (name, L"word") == 0);
314 if (want_word)
315 name = L"alnum";
316
317 memset (&state, '\0', sizeof (mbstate_t));
318 mbs = (char *) malloc (wcslen(name) * MB_CUR_MAX + 1);
495aee44 319 mbslength = wcsrtombs (mbs, (const wchar_t **)&name, (wcslen(name) * MB_CUR_MAX + 1), &state);
7117c2d2
JA
320
321 if (mbslength == (size_t)-1 || mbslength == (size_t)-2)
322 {
323 free (mbs);
324 return -1;
325 }
326 desc = wctype (mbs);
327 free (mbs);
328
329 if (desc == (wctype_t)0)
330 return -1;
331
332 if (want_word)
333 return (iswctype (wc, desc) || wc == L'_');
334 else
335 return (iswctype (wc, desc));
336}
337
338/* Now include `sm_loop.c' for multibyte characters. */
339#define FOLD(c) ((flags & FNM_CASEFOLD) && iswupper (c) ? towlower (c) : (c))
340#define FCT internal_wstrmatch
341#define GMATCH gmatch_wc
342#define COLLSYM collwcsym
343#define PARSE_COLLSYM parse_collwcsym
344#define BRACKMATCH brackmatch_wc
345#define PATSCAN patscan_wc
346#define STRCOMPARE wscompare
347#define EXTMATCH extmatch_wc
348#define STRCHR(S, C) wcschr((S), (C))
349#define STRCOLL(S1, S2) wcscoll((S1), (S2))
350#define STRLEN(S) wcslen(S)
351#define STRCMP(S1, S2) wcscmp((S1), (S2))
352#define RANGECMP(C1, C2) rangecmp_wc((C1), (C2))
353#define COLLEQUIV(C1, C2) collequiv_wc((C1), (C2))
354#define CTYPE_T enum char_class
355#define IS_CCLASS(C, S) is_wcclass((C), (S))
356#include "sm_loop.c"
357
358#endif /* HAVE_MULTIBYTE */
359
360int
361xstrmatch (pattern, string, flags)
362 char *pattern;
363 char *string;
364 int flags;
365{
366#if HANDLE_MULTIBYTE
367 int ret;
7117c2d2 368 size_t n;
7117c2d2 369 wchar_t *wpattern, *wstring;
495aee44
CR
370 size_t plen, slen, mplen, mslen;
371
372#if 0
373 plen = strlen (pattern);
374 mplen = mbstrlen (pattern);
375 if (plen == mplen && strlen (string) == mbstrlen (string))
376#else
377 if (mbsmbchar (string) == 0 && mbsmbchar (pattern) == 0)
378#endif
379 return (internal_strmatch ((unsigned char *)pattern, (unsigned char *)string, flags));
7117c2d2
JA
380
381 if (MB_CUR_MAX == 1)
95732b49 382 return (internal_strmatch ((unsigned char *)pattern, (unsigned char *)string, flags));
7117c2d2 383
b80f6443 384 n = xdupmbstowcs (&wpattern, NULL, pattern);
7117c2d2 385 if (n == (size_t)-1 || n == (size_t)-2)
b80f6443 386 return (internal_strmatch ((unsigned char *)pattern, (unsigned char *)string, flags));
7117c2d2 387
b80f6443 388 n = xdupmbstowcs (&wstring, NULL, string);
7117c2d2
JA
389 if (n == (size_t)-1 || n == (size_t)-2)
390 {
391 free (wpattern);
b80f6443 392 return (internal_strmatch ((unsigned char *)pattern, (unsigned char *)string, flags));
7117c2d2
JA
393 }
394
7117c2d2
JA
395 ret = internal_wstrmatch (wpattern, wstring, flags);
396
7117c2d2
JA
397 free (wpattern);
398 free (wstring);
399
400 return ret;
401#else
402 return (internal_strmatch ((unsigned char *)pattern, (unsigned char *)string, flags));
403#endif /* !HANDLE_MULTIBYTE */
404}