]>
git.ipfire.org Git - thirdparty/bash.git/blob - lib/glob/smatch.c
8c54702b0d4b5addd5dad7b62d0bc4a2c7fc70aa
1 /* strmatch.c -- ksh-like extended pattern matching for the shell and filename
4 /* Copyright (C) 1991-2002 Free Software Foundation, Inc.
6 This file is part of GNU Bash, the Bourne Again SHell.
8 Bash is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 2, or (at your option) any later
13 Bash is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License along
19 with Bash; see the file COPYING. If not, write to the Free Software
20 Foundation, 59 Temple Place, Suite 330, Boston, MA 02111 USA. */
24 #include <stdio.h> /* for debugging */
27 #include <chartypes.h>
33 /* First, compile `sm_loop.c' for single-byte characters. */
34 #define CHAR unsigned char
35 #define U_CHAR unsigned char
43 #define STREQ(a, b) ((a)[0] == (b)[0] && strcmp(a, b) == 0)
44 #define STREQN(a, b, n) ((a)[0] == (b)[0] && strncmp(a, b, n) == 0)
46 /* We use strcoll(3) for range comparisons in bracket expressions,
47 even though it can have unwanted side effects in locales
48 other than POSIX or US. For instance, in the de locale, [A-Z] matches
51 #if defined (HAVE_STRCOLL)
52 /* Helper function for collating symbol equivalence. */
53 static int rangecmp (c1
, c2
)
56 static char s1
[2] = { ' ', '\0' };
57 static char s2
[2] = { ' ', '\0' };
60 /* Eight bits only. Period. */
70 if ((ret
= strcoll (s1
, s2
)) != 0)
74 #else /* !HAVE_STRCOLL */
75 # define rangecmp(c1, c2) ((int)(c1) - (int)(c2))
76 #endif /* !HAVE_STRCOLL */
78 #if defined (HAVE_STRCOLL)
83 return (rangecmp (c1
, c2
) == 0);
86 # define collequiv(c1, c2) ((c1) == (c2))
89 #define _COLLSYM _collsym
90 #define __COLLSYM __collsym
91 #define POSIXCOLL posix_collsyms
99 register struct _collsym
*csp
;
101 for (csp
= posix_collsyms
; csp
->name
; csp
++)
103 if (STREQN(csp
->name
, s
, len
) && csp
->name
[len
] == '\0')
111 /* unibyte character classification */
112 #if !defined (isascii) && !defined (HAVE_ISASCII)
113 # define isascii(c) ((unsigned int)(c) <= 0177)
119 CC_ASCII
, CC_ALNUM
, CC_ALPHA
, CC_BLANK
, CC_CNTRL
, CC_DIGIT
, CC_GRAPH
,
120 CC_LOWER
, CC_PRINT
, CC_PUNCT
, CC_SPACE
, CC_UPPER
, CC_WORD
, CC_XDIGIT
123 static char const *const cclass_name
[] =
126 "ascii", "alnum", "alpha", "blank", "cntrl", "digit", "graph",
127 "lower", "print", "punct", "space", "upper", "word", "xdigit"
130 #define N_CHAR_CLASS (sizeof(cclass_name) / sizeof (cclass_name[0]))
137 enum char_class char_class
= CC_NO_CLASS
;
140 for (i
= 1; i
< N_CHAR_CLASS
; i
++)
142 if (STREQ (name
, cclass_name
[i
]))
144 char_class
= (enum char_class
)i
;
155 result
= isascii (c
);
158 result
= ISALNUM (c
);
161 result
= ISALPHA (c
);
164 result
= ISBLANK (c
);
167 result
= ISCNTRL (c
);
170 result
= ISDIGIT (c
);
173 result
= ISGRAPH (c
);
176 result
= ISLOWER (c
);
179 result
= ISPRINT (c
);
182 result
= ISPUNCT (c
);
185 result
= ISSPACE (c
);
188 result
= ISUPPER (c
);
191 result
= (ISALNUM (c
) || c
== '_');
194 result
= ISXDIGIT (c
);
204 /* Now include `sm_loop.c' for single-byte characters. */
205 /* The result of FOLD is an `unsigned char' */
206 # define FOLD(c) ((flags & FNM_CASEFOLD) \
207 ? TOLOWER ((unsigned char)c) \
208 : ((unsigned char)c))
210 #define FCT internal_strmatch
211 #define GMATCH gmatch
212 #define COLLSYM collsym
213 #define PARSE_COLLSYM parse_collsym
214 #define BRACKMATCH brackmatch
215 #define PATSCAN patscan
216 #define STRCOMPARE strcompare
217 #define EXTMATCH extmatch
218 #define STRCHR(S, C) strchr((S), (C))
219 #define STRCOLL(S1, S2) strcoll((S1), (S2))
220 #define STRLEN(S) strlen(S)
221 #define STRCMP(S1, S2) strcmp((S1), (S2))
222 #define RANGECMP(C1, C2) rangecmp((C1), (C2))
223 #define COLLEQUIV(C1, C2) collequiv((C1), (C2))
224 #define CTYPE_T enum char_class
225 #define IS_CCLASS(C, S) is_cclass((C), (S))
230 # define CHAR wchar_t
231 # define U_CHAR wint_t
232 # define XCHAR wchar_t
235 # define INVALID WEOF
239 # define STREQ(s1, s2) ((wcscmp (s1, s2) == 0))
240 # define STREQN(a, b, n) ((a)[0] == (b)[0] && wcsncmp(a, b, n) == 0)
246 static wchar_t s1
[2] = { L
' ', L
'\0' };
247 static wchar_t s2
[2] = { L
' ', L
'\0' };
256 return (wcscoll (s1
, s2
));
260 collequiv_wc (c
, equiv
)
263 return (!(c
- equiv
));
266 /* Helper function for collating symbol. */
267 # define _COLLSYM _collwcsym
268 # define __COLLSYM __collwcsym
269 # define POSIXCOLL posix_collwcsyms
270 # include "collsyms.h"
277 register struct _collwcsym
*csp
;
279 for (csp
= posix_collwcsyms
; csp
->name
; csp
++)
281 if (STREQN(csp
->name
, s
, len
) && csp
->name
[len
] == L
'\0')
290 is_wcclass (wc
, name
)
300 if ((wctype ("ascii") == (wctype_t)0) && (wcscmp (name
, L
"ascii") == 0))
304 if ((c
= wctob (wc
)) == EOF
)
310 want_word
= (wcscmp (name
, L
"word") == 0);
314 memset (&state
, '\0', sizeof (mbstate_t));
315 mbs
= (char *) malloc (wcslen(name
) * MB_CUR_MAX
+ 1);
316 mbslength
= wcsrtombs(mbs
, (const wchar_t **)&name
, (wcslen(name
) * MB_CUR_MAX
+ 1), &state
);
318 if (mbslength
== (size_t)-1 || mbslength
== (size_t)-2)
326 if (desc
== (wctype_t)0)
330 return (iswctype (wc
, desc
) || wc
== L
'_');
332 return (iswctype (wc
, desc
));
335 /* Now include `sm_loop.c' for multibyte characters. */
336 #define FOLD(c) ((flags & FNM_CASEFOLD) && iswupper (c) ? towlower (c) : (c))
337 #define FCT internal_wstrmatch
338 #define GMATCH gmatch_wc
339 #define COLLSYM collwcsym
340 #define PARSE_COLLSYM parse_collwcsym
341 #define BRACKMATCH brackmatch_wc
342 #define PATSCAN patscan_wc
343 #define STRCOMPARE wscompare
344 #define EXTMATCH extmatch_wc
345 #define STRCHR(S, C) wcschr((S), (C))
346 #define STRCOLL(S1, S2) wcscoll((S1), (S2))
347 #define STRLEN(S) wcslen(S)
348 #define STRCMP(S1, S2) wcscmp((S1), (S2))
349 #define RANGECMP(C1, C2) rangecmp_wc((C1), (C2))
350 #define COLLEQUIV(C1, C2) collequiv_wc((C1), (C2))
351 #define CTYPE_T enum char_class
352 #define IS_CCLASS(C, S) is_wcclass((C), (S))
355 #endif /* HAVE_MULTIBYTE */
358 xstrmatch (pattern
, string
, flags
)
368 wchar_t *wpattern
, *wstring
;
371 return (internal_strmatch (pattern
, string
, flags
));
373 pattern_bak
= (char *)xmalloc (strlen (pattern
) + 1);
374 strcpy (pattern_bak
, pattern
);
376 memset (&ps
, '\0', sizeof (mbstate_t));
377 n
= xmbsrtowcs (NULL
, (const char **)&pattern
, 0, &ps
);
378 if (n
== (size_t)-1 || n
== (size_t)-2)
381 return (internal_strmatch ((unsigned char *)pattern
, (unsigned char *)string
, flags
));
384 wpattern
= (wchar_t *)xmalloc ((n
+ 1) * sizeof (wchar_t));
385 (void) xmbsrtowcs (wpattern
, (const char **)&pattern
, n
+ 1, &ps
);
387 memset (&ps
, '\0', sizeof (mbstate_t));
388 n
= xmbsrtowcs (NULL
, (const char **)&string
, 0, &ps
);
389 if (n
== (size_t)-1 || n
== (size_t)-2)
392 ret
= internal_strmatch (pattern_bak
, string
, flags
);
397 wstring
= (wchar_t *)xmalloc ((n
+ 1) * sizeof (wchar_t));
398 (void) xmbsrtowcs (wstring
, (const char **)&string
, n
+ 1, &ps
);
400 ret
= internal_wstrmatch (wpattern
, wstring
, flags
);
408 return (internal_strmatch ((unsigned char *)pattern
, (unsigned char *)string
, flags
));
409 #endif /* !HANDLE_MULTIBYTE */