]>
git.ipfire.org Git - thirdparty/bash.git/blob - lib/glob/smatch.c
379c2d2e62d5e8452706a1b5a4ba5d5f364bf5fc
1 /* strmatch.c -- ksh-like extended pattern matching for the shell and filename
4 /* Copyright (C) 1991-2021 Free Software Foundation, Inc.
6 This file is part of GNU Bash, the Bourne Again SHell.
8 Bash is free software: you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation, either version 3 of the License, or
11 (at your option) any later version.
13 Bash is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with Bash. If not, see <http://www.gnu.org/licenses/>.
24 #include <stdio.h> /* for debugging */
27 #include <chartypes.h>
39 #if FNMATCH_EQUIV_FALLBACK
40 /* We don't include <fnmatch.h> in order to avoid namespace collisions; the
41 internal strmatch still uses the FNM_ constants. */
42 extern int fnmatch (const char *, const char *, int);
45 /* First, compile `sm_loop.c' for single-byte characters. */
46 #define CHAR unsigned char
47 #define U_CHAR unsigned char
55 #define STREQ(a, b) ((a)[0] == (b)[0] && strcmp(a, b) == 0)
56 #define STREQN(a, b, n) ((a)[0] == (b)[0] && strncmp(a, b, n) == 0)
58 #ifndef GLOBASCII_DEFAULT
59 # define GLOBASCII_DEFAULT 0
62 int glob_asciirange
= GLOBASCII_DEFAULT
;
64 #if FNMATCH_EQUIV_FALLBACK
65 /* Construct a string w1 = "c1" and a pattern w2 = "[[=c2=]]" and pass them
66 to fnmatch to see if wide characters c1 and c2 collate as members of the
67 same equivalence class. We can't really do this portably any other way */
69 _fnmatch_fallback (s
, p
)
70 int s
, p
; /* string char, patchar */
72 char s1
[2]; /* string */
73 char s2
[8]; /* constructed pattern */
75 s1
[0] = (unsigned char)s
;
78 /* reconstruct the pattern */
81 s2
[3] = (unsigned char)p
;
86 return (fnmatch ((const char *)s2
, (const char *)s1
, 0));
90 /* We use strcoll(3) for range comparisons in bracket expressions,
91 even though it can have unwanted side effects in locales
92 other than POSIX or US. For instance, in the de locale, [A-Z] matches
93 all characters. If GLOB_ASCIIRANGE is non-zero, and we're not forcing
94 the use of strcoll (e.g., for explicit collating symbols), we use
95 straight ordering as if in the C locale. */
97 #if defined (HAVE_STRCOLL)
98 /* Helper functions for collating symbol equivalence. */
100 /* Return 0 if C1 == C2 or collates equally if FORCECOLL is non-zero. */
102 charcmp (c1
, c2
, forcecoll
)
106 static char s1
[2] = { ' ', '\0' };
107 static char s2
[2] = { ' ', '\0' };
110 /* Eight bits only. Period. */
117 if (forcecoll
== 0 && glob_asciirange
)
123 return (strcoll (s1
, s2
));
127 rangecmp (c1
, c2
, forcecoll
)
133 r
= charcmp (c1
, c2
, forcecoll
);
135 /* We impose a total ordering here by returning c1-c2 if charcmp returns 0 */
138 return (c1
- c2
); /* impose total ordering */
140 #else /* !HAVE_STRCOLL */
141 # define rangecmp(c1, c2, f) ((int)(c1) - (int)(c2))
142 #endif /* !HAVE_STRCOLL */
144 #if defined (HAVE_STRCOLL)
145 /* Returns 1 if chars C and EQUIV collate equally in the current locale. */
150 if (charcmp (c
, equiv
, 1) == 0)
153 #if FNMATCH_EQUIV_FALLBACK
154 return (_fnmatch_fallback (c
, equiv
) == 0);
161 # define collequiv(c, equiv) ((c) == (equiv))
164 #define _COLLSYM _collsym
165 #define __COLLSYM __collsym
166 #define POSIXCOLL posix_collsyms
167 #include "collsyms.h"
174 register struct _collsym
*csp
;
178 for (csp
= posix_collsyms
; csp
->name
; csp
++)
180 if (STREQN(csp
->name
, x
, len
) && csp
->name
[len
] == '\0')
188 /* unibyte character classification */
189 #if !defined (isascii) && !defined (HAVE_ISASCII)
190 # define isascii(c) ((unsigned int)(c) <= 0177)
196 CC_ASCII
, CC_ALNUM
, CC_ALPHA
, CC_BLANK
, CC_CNTRL
, CC_DIGIT
, CC_GRAPH
,
197 CC_LOWER
, CC_PRINT
, CC_PUNCT
, CC_SPACE
, CC_UPPER
, CC_WORD
, CC_XDIGIT
200 static char const *const cclass_name
[] =
203 "ascii", "alnum", "alpha", "blank", "cntrl", "digit", "graph",
204 "lower", "print", "punct", "space", "upper", "word", "xdigit"
207 #define N_CHAR_CLASS (sizeof(cclass_name) / sizeof (cclass_name[0]))
209 static enum char_class
210 is_valid_cclass (name
)
218 for (i
= 1; i
< N_CHAR_CLASS
; i
++)
220 if (STREQ (name
, cclass_name
[i
]))
222 ret
= (enum char_class
)i
;
231 cclass_test (c
, char_class
)
233 enum char_class char_class
;
240 result
= isascii (c
);
243 result
= ISALNUM (c
);
246 result
= ISALPHA (c
);
249 result
= ISBLANK (c
);
252 result
= ISCNTRL (c
);
255 result
= ISDIGIT (c
);
258 result
= ISGRAPH (c
);
261 result
= ISLOWER (c
);
264 result
= ISPRINT (c
);
267 result
= ISPUNCT (c
);
270 result
= ISSPACE (c
);
273 result
= ISUPPER (c
);
276 result
= (ISALNUM (c
) || c
== '_');
279 result
= ISXDIGIT (c
);
294 enum char_class char_class
;
297 char_class
= is_valid_cclass (name
);
298 if (char_class
== CC_NO_CLASS
)
301 result
= cclass_test (c
, char_class
);
305 /* Now include `sm_loop.c' for single-byte characters. */
306 /* The result of FOLD is an `unsigned char' */
307 # define FOLD(c) ((flags & FNM_CASEFOLD) \
308 ? TOLOWER ((unsigned char)c) \
309 : ((unsigned char)c))
311 #if !defined (__CYGWIN__)
312 # define ISDIRSEP(c) ((c) == '/')
314 # define ISDIRSEP(c) ((c) == '/' || (c) == '\\')
315 #endif /* __CYGWIN__ */
316 #define PATHSEP(c) (ISDIRSEP(c) || (c) == 0)
318 # define PDOT_OR_DOTDOT(s) (s[0] == '.' && (PATHSEP (s[1]) || (s[1] == '.' && PATHSEP (s[2]))))
319 # define SDOT_OR_DOTDOT(s) (s[0] == '.' && (s[1] == 0 || (s[1] == '.' && s[2] == 0)))
321 #define FCT internal_strmatch
322 #define GMATCH gmatch
323 #define COLLSYM collsym
324 #define PARSE_COLLSYM parse_collsym
325 #define BRACKMATCH brackmatch
326 #define PATSCAN glob_patscan
327 #define STRCOMPARE strcompare
328 #define EXTMATCH extmatch
329 #define DEQUOTE_PATHNAME udequote_pathname
330 #define STRUCT smat_struct
331 #define STRCHR(S, C) strchr((S), (C))
332 #define MEMCHR(S, C, N) memchr((S), (C), (N))
333 #define STRCOLL(S1, S2) strcoll((S1), (S2))
334 #define STRLEN(S) strlen(S)
335 #define STRCMP(S1, S2) strcmp((S1), (S2))
336 #define RANGECMP(C1, C2, F) rangecmp((C1), (C2), (F))
337 #define COLLEQUIV(C1, C2) collequiv((C1), (C2))
338 #define CTYPE_T enum char_class
339 #define IS_CCLASS(C, S) is_cclass((C), (S))
344 # define CHAR wchar_t
345 # define U_CHAR wint_t
346 # define XCHAR wchar_t
349 # define INVALID WEOF
353 # define STREQ(s1, s2) ((wcscmp (s1, s2) == 0))
354 # define STREQN(a, b, n) ((a)[0] == (b)[0] && wcsncmp(a, b, n) == 0)
356 extern char *mbsmbchar
PARAMS((const char *));
358 #if FNMATCH_EQUIV_FALLBACK
359 /* Construct a string w1 = "c1" and a pattern w2 = "[[=c2=]]" and pass them
360 to fnmatch to see if wide characters c1 and c2 collate as members of the
361 same equivalence class. We can't really do this portably any other way */
363 _fnmatch_fallback_wc (c1
, c2
)
364 wchar_t c1
, c2
; /* string char, patchar */
366 char w1
[MB_LEN_MAX
+1]; /* string */
367 char w2
[MB_LEN_MAX
+8]; /* constructed pattern */
370 l1
= wctomb (w1
, c1
);
375 /* reconstruct the pattern */
378 l2
= wctomb (w2
+3, c2
);
382 w2
[l2
+4] = w2
[l2
+5] = ']';
385 return (fnmatch ((const char *)w2
, (const char *)w1
, 0));
390 charcmp_wc (c1
, c2
, forcecoll
)
394 static wchar_t s1
[2] = { L
' ', L
'\0' };
395 static wchar_t s2
[2] = { L
' ', L
'\0' };
401 if (forcecoll
== 0 && glob_asciirange
&& c1
<= UCHAR_MAX
&& c2
<= UCHAR_MAX
)
402 return ((int)(c1
- c2
));
407 return (wcscoll (s1
, s2
));
411 rangecmp_wc (c1
, c2
, forcecoll
)
417 r
= charcmp_wc (c1
, c2
, forcecoll
);
419 /* We impose a total ordering here by returning c1-c2 if charcmp returns 0,
420 as we do above in the single-byte case. */
421 if (r
!= 0 || forcecoll
)
423 return ((int)(c1
- c2
)); /* impose total ordering */
426 /* Returns 1 if wide chars C and EQUIV collate equally in the current locale. */
428 collequiv_wc (c
, equiv
)
433 if (charcmp_wc (c
, equiv
, 1) == 0)
436 #if FNMATCH_EQUIV_FALLBACK
437 /* We check explicitly for success (fnmatch returns 0) to avoid problems if
438 our local definition of FNM_NOMATCH (strmatch.h) doesn't match the
439 system's (fnmatch.h). We don't care about error return values here. */
443 return (_fnmatch_fallback_wc (s
, p
) == 0);
449 /* Helper function for collating symbol. */
450 # define _COLLSYM _collwcsym
451 # define __COLLSYM __collwcsym
452 # define POSIXCOLL posix_collwcsyms
453 # include "collsyms.h"
460 register struct _collwcsym
*csp
;
462 for (csp
= posix_collwcsyms
; csp
->name
; csp
++)
464 if (STREQN(csp
->name
, s
, len
) && csp
->name
[len
] == L
'\0')
473 is_wcclass (wc
, name
)
483 if ((wctype ("ascii") == (wctype_t)0) && (wcscmp (name
, L
"ascii") == 0))
487 if ((c
= wctob (wc
)) == EOF
)
493 want_word
= (wcscmp (name
, L
"word") == 0);
497 memset (&state
, '\0', sizeof (mbstate_t));
498 mbs
= (char *) malloc (wcslen(name
) * MB_CUR_MAX
+ 1);
501 mbslength
= wcsrtombs (mbs
, (const wchar_t **)&name
, (wcslen(name
) * MB_CUR_MAX
+ 1), &state
);
503 if (mbslength
== (size_t)-1 || mbslength
== (size_t)-2)
511 if (desc
== (wctype_t)0)
515 return (iswctype (wc
, desc
) || wc
== L
'_');
517 return (iswctype (wc
, desc
));
520 /* Return 1 if there are no char class [:class:] expressions (degenerate case)
521 or only posix-specified (C locale supported) char class expressions in
522 PATTERN. These are the ones where it's safe to punt to the single-byte
523 code, since wide character support allows locale-defined char classes.
524 This only uses single-byte code, but is only needed to support multibyte
527 posix_cclass_only (pattern
)
531 char cc
[16]; /* sufficient for all valid posix char class names */
532 enum char_class valid
;
535 while (p
= strchr (p
, '['))
542 p
+= 2; /* skip past "[:" */
543 /* Find end of char class expression */
544 for (p1
= p
; *p1
; p1
++)
545 if (*p1
== ':' && p1
[1] == ']')
547 if (*p1
== 0) /* no char class expression found */
549 /* Find char class name and validate it against posix char classes */
550 if ((p1
- p
) >= sizeof (cc
))
552 bcopy (p
, cc
, p1
- p
);
554 valid
= is_valid_cclass (cc
);
555 if (valid
== CC_NO_CLASS
)
556 return 0; /* found unrecognized char class name */
558 p
= p1
+ 2; /* found posix char class name */
561 return 1; /* no char class names or only posix */
564 /* Now include `sm_loop.c' for multibyte characters. */
565 #define FOLD(c) ((flags & FNM_CASEFOLD) && iswupper (c) ? towlower (c) : (c))
567 # if !defined (__CYGWIN__)
568 # define ISDIRSEP(c) ((c) == L'/')
570 # define ISDIRSEP(c) ((c) == L'/' || (c) == L'\\')
571 # endif /* __CYGWIN__ */
572 # define PATHSEP(c) (ISDIRSEP(c) || (c) == L'\0')
574 # define PDOT_OR_DOTDOT(w) (w[0] == L'.' && (PATHSEP(w[1]) || (w[1] == L'.' && PATHSEP(w[2]))))
575 # define SDOT_OR_DOTDOT(w) (w[0] == L'.' && (w[1] == L'\0' || (w[1] == L'.' && w[2] == L'\0')))
577 #define FCT internal_wstrmatch
578 #define GMATCH gmatch_wc
579 #define COLLSYM collwcsym
580 #define PARSE_COLLSYM parse_collwcsym
581 #define BRACKMATCH brackmatch_wc
582 #define PATSCAN glob_patscan_wc
583 #define STRCOMPARE wscompare
584 #define EXTMATCH extmatch_wc
585 #define DEQUOTE_PATHNAME wcdequote_pathname
586 #define STRUCT wcsmat_struct
587 #define STRCHR(S, C) wcschr((S), (C))
588 #define MEMCHR(S, C, N) wmemchr((S), (C), (N))
589 #define STRCOLL(S1, S2) wcscoll((S1), (S2))
590 #define STRLEN(S) wcslen(S)
591 #define STRCMP(S1, S2) wcscmp((S1), (S2))
592 #define RANGECMP(C1, C2, F) rangecmp_wc((C1), (C2), (F))
593 #define COLLEQUIV(C1, C2) collequiv_wc((C1), (C2))
594 #define CTYPE_T enum char_class
595 #define IS_CCLASS(C, S) is_wcclass((C), (S))
598 #endif /* HAVE_MULTIBYTE */
601 xstrmatch (pattern
, string
, flags
)
609 wchar_t *wpattern
, *wstring
;
610 size_t plen
, slen
, mplen
, mslen
;
613 return (internal_strmatch ((unsigned char *)pattern
, (unsigned char *)string
, flags
));
615 if (mbsmbchar (string
) == 0 && mbsmbchar (pattern
) == 0 && posix_cclass_only (pattern
))
616 return (internal_strmatch ((unsigned char *)pattern
, (unsigned char *)string
, flags
));
618 n
= xdupmbstowcs (&wpattern
, NULL
, pattern
);
619 if (n
== (size_t)-1 || n
== (size_t)-2)
620 return (internal_strmatch ((unsigned char *)pattern
, (unsigned char *)string
, flags
));
622 n
= xdupmbstowcs (&wstring
, NULL
, string
);
623 if (n
== (size_t)-1 || n
== (size_t)-2)
626 return (internal_strmatch ((unsigned char *)pattern
, (unsigned char *)string
, flags
));
629 ret
= internal_wstrmatch (wpattern
, wstring
, flags
);
636 return (internal_strmatch ((unsigned char *)pattern
, (unsigned char *)string
, flags
));
637 #endif /* !HANDLE_MULTIBYTE */