]> git.ipfire.org Git - thirdparty/bash.git/blame - lib/glob/smatch.c
bash-5.1 beta release
[thirdparty/bash.git] / lib / glob / smatch.c
CommitLineData
7117c2d2
JA
1/* strmatch.c -- ksh-like extended pattern matching for the shell and filename
2 globbing. */
3
712f80b0 4/* Copyright (C) 1991-2020 Free Software Foundation, Inc.
7117c2d2
JA
5
6 This file is part of GNU Bash, the Bourne Again SHell.
7
3185942a
JA
8 Bash is free software: you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation, either version 3 of the License, or
11 (at your option) any later version.
12
13 Bash is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with Bash. If not, see <http://www.gnu.org/licenses/>.
20*/
7117c2d2
JA
21
22#include <config.h>
23
24#include <stdio.h> /* for debugging */
25
26#include "strmatch.h"
27#include <chartypes.h>
28
29#include "bashansi.h"
30#include "shmbutil.h"
31#include "xmalloc.h"
32
d233b485
CR
33#include <errno.h>
34
35#if !defined (errno)
36extern int errno;
37#endif
38
3eb0018e
CR
39#if FNMATCH_EQUIV_FALLBACK
40/* We don't include <fnmatch.h> in order to avoid namespace collisions; the
41 internal strmatch still uses the FNM_ constants. */
42extern int fnmatch (const char *, const char *, int);
43#endif
44
7117c2d2
JA
45/* First, compile `sm_loop.c' for single-byte characters. */
46#define CHAR unsigned char
47#define U_CHAR unsigned char
48#define XCHAR char
49#define INT int
50#define L(CS) CS
51#define INVALID -1
52
53#undef STREQ
54#undef STREQN
55#define STREQ(a, b) ((a)[0] == (b)[0] && strcmp(a, b) == 0)
56#define STREQN(a, b, n) ((a)[0] == (b)[0] && strncmp(a, b, n) == 0)
57
ac50fbac
CR
58#ifndef GLOBASCII_DEFAULT
59# define GLOBASCII_DEFAULT 0
60#endif
61
62int glob_asciirange = GLOBASCII_DEFAULT;
63
3eb0018e
CR
64#if FNMATCH_EQUIV_FALLBACK
65/* Construct a string w1 = "c1" and a pattern w2 = "[[=c2=]]" and pass them
66 to fnmatch to see if wide characters c1 and c2 collate as members of the
67 same equivalence class. We can't really do this portably any other way */
68static int
69_fnmatch_fallback (s, p)
70 int s, p; /* string char, patchar */
71{
72 char s1[2]; /* string */
73 char s2[8]; /* constructed pattern */
74
75 s1[0] = (unsigned char)s;
76 s1[1] = '\0';
77
78 /* reconstruct the pattern */
79 s2[0] = s2[1] = '[';
80 s2[2] = '=';
81 s2[3] = (unsigned char)p;
82 s2[4] = '=';
83 s2[5] = s2[6] = ']';
84 s2[7] = '\0';
85
86 return (fnmatch ((const char *)s2, (const char *)s1, 0));
87}
88#endif
89
7117c2d2
JA
90/* We use strcoll(3) for range comparisons in bracket expressions,
91 even though it can have unwanted side effects in locales
92 other than POSIX or US. For instance, in the de locale, [A-Z] matches
ac50fbac
CR
93 all characters. If GLOB_ASCIIRANGE is non-zero, and we're not forcing
94 the use of strcoll (e.g., for explicit collating symbols), we use
95 straight ordering as if in the C locale. */
7117c2d2
JA
96
97#if defined (HAVE_STRCOLL)
3eb0018e
CR
98/* Helper functions for collating symbol equivalence. */
99
100/* Return 0 if C1 == C2 or collates equally if FORCECOLL is non-zero. */
ac50fbac 101static int
3eb0018e 102charcmp (c1, c2, forcecoll)
7117c2d2 103 int c1, c2;
ac50fbac 104 int forcecoll;
7117c2d2
JA
105{
106 static char s1[2] = { ' ', '\0' };
107 static char s2[2] = { ' ', '\0' };
108 int ret;
109
110 /* Eight bits only. Period. */
111 c1 &= 0xFF;
112 c2 &= 0xFF;
113
114 if (c1 == c2)
115 return (0);
116
ac50fbac
CR
117 if (forcecoll == 0 && glob_asciirange)
118 return (c1 - c2);
119
7117c2d2
JA
120 s1[0] = c1;
121 s2[0] = c2;
122
3eb0018e
CR
123 return (strcoll (s1, s2));
124}
125
126static int
127rangecmp (c1, c2, forcecoll)
128 int c1, c2;
129 int forcecoll;
130{
131 int r;
132
133 r = charcmp (c1, c2, forcecoll);
134
135 /* We impose a total ordering here by returning c1-c2 if charcmp returns 0 */
136 if (r != 0)
137 return r;
d233b485 138 return (c1 - c2); /* impose total ordering */
7117c2d2
JA
139}
140#else /* !HAVE_STRCOLL */
ac50fbac 141# define rangecmp(c1, c2, f) ((int)(c1) - (int)(c2))
7117c2d2
JA
142#endif /* !HAVE_STRCOLL */
143
144#if defined (HAVE_STRCOLL)
3eb0018e 145/* Returns 1 if chars C and EQUIV collate equally in the current locale. */
7117c2d2 146static int
3eb0018e
CR
147collequiv (c, equiv)
148 int c, equiv;
7117c2d2 149{
3eb0018e
CR
150 if (charcmp (c, equiv, 1) == 0)
151 return 1;
152
153#if FNMATCH_EQUIV_FALLBACK
154 return (_fnmatch_fallback (c, equiv) == 0);
155#else
156 return 0;
157#endif
158
7117c2d2
JA
159}
160#else
3eb0018e 161# define collequiv(c, equiv) ((c) == (equiv))
7117c2d2
JA
162#endif
163
164#define _COLLSYM _collsym
165#define __COLLSYM __collsym
166#define POSIXCOLL posix_collsyms
167#include "collsyms.h"
168
169static int
170collsym (s, len)
95732b49 171 CHAR *s;
7117c2d2
JA
172 int len;
173{
174 register struct _collsym *csp;
95732b49 175 char *x;
7117c2d2 176
95732b49 177 x = (char *)s;
7117c2d2
JA
178 for (csp = posix_collsyms; csp->name; csp++)
179 {
95732b49 180 if (STREQN(csp->name, x, len) && csp->name[len] == '\0')
7117c2d2
JA
181 return (csp->code);
182 }
183 if (len == 1)
184 return s[0];
185 return INVALID;
186}
187
188/* unibyte character classification */
189#if !defined (isascii) && !defined (HAVE_ISASCII)
190# define isascii(c) ((unsigned int)(c) <= 0177)
191#endif
192
193enum char_class
194 {
195 CC_NO_CLASS = 0,
196 CC_ASCII, CC_ALNUM, CC_ALPHA, CC_BLANK, CC_CNTRL, CC_DIGIT, CC_GRAPH,
197 CC_LOWER, CC_PRINT, CC_PUNCT, CC_SPACE, CC_UPPER, CC_WORD, CC_XDIGIT
198 };
199
200static char const *const cclass_name[] =
201 {
202 "",
203 "ascii", "alnum", "alpha", "blank", "cntrl", "digit", "graph",
204 "lower", "print", "punct", "space", "upper", "word", "xdigit"
205 };
206
207#define N_CHAR_CLASS (sizeof(cclass_name) / sizeof (cclass_name[0]))
208
d233b485
CR
209static enum char_class
210is_valid_cclass (name)
7117c2d2
JA
211 const char *name;
212{
d233b485
CR
213 enum char_class ret;
214 int i;
215
216 ret = CC_NO_CLASS;
7117c2d2
JA
217
218 for (i = 1; i < N_CHAR_CLASS; i++)
219 {
220 if (STREQ (name, cclass_name[i]))
221 {
d233b485 222 ret = (enum char_class)i;
7117c2d2
JA
223 break;
224 }
225 }
226
d233b485
CR
227 return ret;
228}
229
230static int
231cclass_test (c, char_class)
232 int c;
233 enum char_class char_class;
234{
235 int result;
7117c2d2
JA
236
237 switch (char_class)
238 {
239 case CC_ASCII:
240 result = isascii (c);
241 break;
242 case CC_ALNUM:
243 result = ISALNUM (c);
244 break;
245 case CC_ALPHA:
246 result = ISALPHA (c);
247 break;
248 case CC_BLANK:
249 result = ISBLANK (c);
250 break;
251 case CC_CNTRL:
252 result = ISCNTRL (c);
253 break;
254 case CC_DIGIT:
255 result = ISDIGIT (c);
256 break;
257 case CC_GRAPH:
258 result = ISGRAPH (c);
259 break;
260 case CC_LOWER:
261 result = ISLOWER (c);
262 break;
263 case CC_PRINT:
264 result = ISPRINT (c);
265 break;
266 case CC_PUNCT:
267 result = ISPUNCT (c);
268 break;
269 case CC_SPACE:
270 result = ISSPACE (c);
271 break;
272 case CC_UPPER:
273 result = ISUPPER (c);
274 break;
275 case CC_WORD:
276 result = (ISALNUM (c) || c == '_');
277 break;
278 case CC_XDIGIT:
279 result = ISXDIGIT (c);
280 break;
281 default:
282 result = -1;
283 break;
284 }
285
286 return result;
287}
d233b485
CR
288
289static int
290is_cclass (c, name)
291 int c;
292 const char *name;
293{
294 enum char_class char_class;
295 int result;
296
297 char_class = is_valid_cclass (name);
298 if (char_class == CC_NO_CLASS)
299 return -1;
300
301 result = cclass_test (c, char_class);
302 return (result);
303}
7117c2d2
JA
304
305/* Now include `sm_loop.c' for single-byte characters. */
306/* The result of FOLD is an `unsigned char' */
307# define FOLD(c) ((flags & FNM_CASEFOLD) \
308 ? TOLOWER ((unsigned char)c) \
309 : ((unsigned char)c))
310
311#define FCT internal_strmatch
312#define GMATCH gmatch
313#define COLLSYM collsym
314#define PARSE_COLLSYM parse_collsym
315#define BRACKMATCH brackmatch
ac50fbac 316#define PATSCAN glob_patscan
7117c2d2
JA
317#define STRCOMPARE strcompare
318#define EXTMATCH extmatch
712f80b0 319#define DEQUOTE_PATHNAME udequote_pathname
d233b485 320#define STRUCT smat_struct
7117c2d2 321#define STRCHR(S, C) strchr((S), (C))
a0c0a00f 322#define MEMCHR(S, C, N) memchr((S), (C), (N))
7117c2d2
JA
323#define STRCOLL(S1, S2) strcoll((S1), (S2))
324#define STRLEN(S) strlen(S)
325#define STRCMP(S1, S2) strcmp((S1), (S2))
ac50fbac 326#define RANGECMP(C1, C2, F) rangecmp((C1), (C2), (F))
7117c2d2
JA
327#define COLLEQUIV(C1, C2) collequiv((C1), (C2))
328#define CTYPE_T enum char_class
329#define IS_CCLASS(C, S) is_cclass((C), (S))
330#include "sm_loop.c"
331
332#if HANDLE_MULTIBYTE
333
334# define CHAR wchar_t
335# define U_CHAR wint_t
336# define XCHAR wchar_t
337# define INT wint_t
338# define L(CS) L##CS
339# define INVALID WEOF
340
341# undef STREQ
342# undef STREQN
343# define STREQ(s1, s2) ((wcscmp (s1, s2) == 0))
344# define STREQN(a, b, n) ((a)[0] == (b)[0] && wcsncmp(a, b, n) == 0)
345
712f80b0 346extern char *mbsmbchar PARAMS((const char *));
495aee44 347
d233b485 348#if FNMATCH_EQUIV_FALLBACK
d233b485
CR
349/* Construct a string w1 = "c1" and a pattern w2 = "[[=c2=]]" and pass them
350 to fnmatch to see if wide characters c1 and c2 collate as members of the
351 same equivalence class. We can't really do this portably any other way */
352static int
353_fnmatch_fallback_wc (c1, c2)
354 wchar_t c1, c2; /* string char, patchar */
355{
356 char w1[MB_LEN_MAX+1]; /* string */
357 char w2[MB_LEN_MAX+8]; /* constructed pattern */
358 int l1, l2;
359
360 l1 = wctomb (w1, c1);
361 if (l1 == -1)
362 return (2);
363 w1[l1] = '\0';
364
365 /* reconstruct the pattern */
366 w2[0] = w2[1] = '[';
367 w2[2] = '=';
368 l2 = wctomb (w2+3, c2);
369 if (l2 == -1)
370 return (2);
371 w2[l2+3] = '=';
372 w2[l2+4] = w2[l2+5] = ']';
373 w2[l2+6] = '\0';
374
375 return (fnmatch ((const char *)w2, (const char *)w1, 0));
376}
377#endif
378
7117c2d2 379static int
3eb0018e 380charcmp_wc (c1, c2, forcecoll)
7117c2d2 381 wint_t c1, c2;
ac50fbac 382 int forcecoll;
7117c2d2
JA
383{
384 static wchar_t s1[2] = { L' ', L'\0' };
385 static wchar_t s2[2] = { L' ', L'\0' };
3eb0018e 386 int r;
7117c2d2
JA
387
388 if (c1 == c2)
389 return 0;
390
ac50fbac
CR
391 if (forcecoll == 0 && glob_asciirange && c1 <= UCHAR_MAX && c2 <= UCHAR_MAX)
392 return ((int)(c1 - c2));
393
7117c2d2
JA
394 s1[0] = c1;
395 s2[0] = c2;
396
3eb0018e
CR
397 return (wcscoll (s1, s2));
398}
399
400static int
401rangecmp_wc (c1, c2, forcecoll)
402 wint_t c1, c2;
403 int forcecoll;
404{
405 int r;
406
407 r = charcmp_wc (c1, c2, forcecoll);
408
409 /* We impose a total ordering here by returning c1-c2 if charcmp returns 0,
410 as we do above in the single-byte case. */
411 if (r != 0 || forcecoll)
d233b485
CR
412 return r;
413 return ((int)(c1 - c2)); /* impose total ordering */
7117c2d2
JA
414}
415
3eb0018e 416/* Returns 1 if wide chars C and EQUIV collate equally in the current locale. */
7117c2d2
JA
417static int
418collequiv_wc (c, equiv)
419 wint_t c, equiv;
420{
d233b485
CR
421 wchar_t s, p;
422
3eb0018e 423 if (charcmp_wc (c, equiv, 1) == 0)
d233b485 424 return 1;
3eb0018e 425
d233b485
CR
426#if FNMATCH_EQUIV_FALLBACK
427/* We check explicitly for success (fnmatch returns 0) to avoid problems if
428 our local definition of FNM_NOMATCH (strmatch.h) doesn't match the
429 system's (fnmatch.h). We don't care about error return values here. */
430
431 s = c;
432 p = equiv;
433 return (_fnmatch_fallback_wc (s, p) == 0);
434#else
435 return 0;
436#endif
7117c2d2
JA
437}
438
439/* Helper function for collating symbol. */
440# define _COLLSYM _collwcsym
441# define __COLLSYM __collwcsym
442# define POSIXCOLL posix_collwcsyms
443# include "collsyms.h"
444
445static wint_t
446collwcsym (s, len)
447 wchar_t *s;
448 int len;
449{
450 register struct _collwcsym *csp;
451
452 for (csp = posix_collwcsyms; csp->name; csp++)
453 {
454 if (STREQN(csp->name, s, len) && csp->name[len] == L'\0')
455 return (csp->code);
456 }
457 if (len == 1)
458 return s[0];
459 return INVALID;
460}
461
462static int
463is_wcclass (wc, name)
464 wint_t wc;
465 wchar_t *name;
466{
467 char *mbs;
468 mbstate_t state;
469 size_t mbslength;
470 wctype_t desc;
471 int want_word;
472
473 if ((wctype ("ascii") == (wctype_t)0) && (wcscmp (name, L"ascii") == 0))
474 {
475 int c;
476
477 if ((c = wctob (wc)) == EOF)
478 return 0;
479 else
480 return (c <= 0x7F);
481 }
482
483 want_word = (wcscmp (name, L"word") == 0);
484 if (want_word)
485 name = L"alnum";
486
487 memset (&state, '\0', sizeof (mbstate_t));
488 mbs = (char *) malloc (wcslen(name) * MB_CUR_MAX + 1);
a0c0a00f
CR
489 if (mbs == 0)
490 return -1;
495aee44 491 mbslength = wcsrtombs (mbs, (const wchar_t **)&name, (wcslen(name) * MB_CUR_MAX + 1), &state);
7117c2d2
JA
492
493 if (mbslength == (size_t)-1 || mbslength == (size_t)-2)
494 {
495 free (mbs);
496 return -1;
497 }
498 desc = wctype (mbs);
499 free (mbs);
500
501 if (desc == (wctype_t)0)
502 return -1;
503
504 if (want_word)
505 return (iswctype (wc, desc) || wc == L'_');
506 else
507 return (iswctype (wc, desc));
508}
509
d233b485
CR
510/* Return 1 if there are no char class [:class:] expressions (degenerate case)
511 or only posix-specified (C locale supported) char class expressions in
512 PATTERN. These are the ones where it's safe to punt to the single-byte
513 code, since wide character support allows locale-defined char classes.
514 This only uses single-byte code, but is only needed to support multibyte
515 locales. */
516static int
517posix_cclass_only (pattern)
518 char *pattern;
519{
520 char *p, *p1;
521 char cc[16]; /* sufficient for all valid posix char class names */
522 enum char_class valid;
523
524 p = pattern;
525 while (p = strchr (p, '['))
526 {
527 if (p[1] != ':')
528 {
529 p++;
530 continue;
531 }
532 p += 2; /* skip past "[:" */
533 /* Find end of char class expression */
534 for (p1 = p; *p1; p1++)
535 if (*p1 == ':' && p1[1] == ']')
536 break;
537 if (*p1 == 0) /* no char class expression found */
538 break;
539 /* Find char class name and validate it against posix char classes */
540 if ((p1 - p) >= sizeof (cc))
541 return 0;
542 bcopy (p, cc, p1 - p);
543 cc[p1 - p] = '\0';
544 valid = is_valid_cclass (cc);
545 if (valid == CC_NO_CLASS)
546 return 0; /* found unrecognized char class name */
547
548 p = p1 + 2; /* found posix char class name */
549 }
550
551 return 1; /* no char class names or only posix */
552}
553
7117c2d2
JA
554/* Now include `sm_loop.c' for multibyte characters. */
555#define FOLD(c) ((flags & FNM_CASEFOLD) && iswupper (c) ? towlower (c) : (c))
556#define FCT internal_wstrmatch
557#define GMATCH gmatch_wc
558#define COLLSYM collwcsym
559#define PARSE_COLLSYM parse_collwcsym
560#define BRACKMATCH brackmatch_wc
ac50fbac 561#define PATSCAN glob_patscan_wc
7117c2d2
JA
562#define STRCOMPARE wscompare
563#define EXTMATCH extmatch_wc
712f80b0 564#define DEQUOTE_PATHNAME wcdequote_pathname
d233b485 565#define STRUCT wcsmat_struct
7117c2d2 566#define STRCHR(S, C) wcschr((S), (C))
a0c0a00f 567#define MEMCHR(S, C, N) wmemchr((S), (C), (N))
7117c2d2
JA
568#define STRCOLL(S1, S2) wcscoll((S1), (S2))
569#define STRLEN(S) wcslen(S)
570#define STRCMP(S1, S2) wcscmp((S1), (S2))
ac50fbac 571#define RANGECMP(C1, C2, F) rangecmp_wc((C1), (C2), (F))
7117c2d2
JA
572#define COLLEQUIV(C1, C2) collequiv_wc((C1), (C2))
573#define CTYPE_T enum char_class
574#define IS_CCLASS(C, S) is_wcclass((C), (S))
575#include "sm_loop.c"
576
577#endif /* HAVE_MULTIBYTE */
578
579int
580xstrmatch (pattern, string, flags)
581 char *pattern;
582 char *string;
583 int flags;
584{
585#if HANDLE_MULTIBYTE
586 int ret;
7117c2d2 587 size_t n;
7117c2d2 588 wchar_t *wpattern, *wstring;
495aee44
CR
589 size_t plen, slen, mplen, mslen;
590
d233b485 591 if (MB_CUR_MAX == 1)
495aee44 592 return (internal_strmatch ((unsigned char *)pattern, (unsigned char *)string, flags));
7117c2d2 593
712f80b0 594 if (mbsmbchar (string) == 0 && mbsmbchar (pattern) == 0 && posix_cclass_only (pattern))
95732b49 595 return (internal_strmatch ((unsigned char *)pattern, (unsigned char *)string, flags));
7117c2d2 596
b80f6443 597 n = xdupmbstowcs (&wpattern, NULL, pattern);
7117c2d2 598 if (n == (size_t)-1 || n == (size_t)-2)
b80f6443 599 return (internal_strmatch ((unsigned char *)pattern, (unsigned char *)string, flags));
7117c2d2 600
b80f6443 601 n = xdupmbstowcs (&wstring, NULL, string);
7117c2d2
JA
602 if (n == (size_t)-1 || n == (size_t)-2)
603 {
604 free (wpattern);
b80f6443 605 return (internal_strmatch ((unsigned char *)pattern, (unsigned char *)string, flags));
7117c2d2
JA
606 }
607
7117c2d2
JA
608 ret = internal_wstrmatch (wpattern, wstring, flags);
609
7117c2d2
JA
610 free (wpattern);
611 free (wstring);
612
613 return ret;
614#else
615 return (internal_strmatch ((unsigned char *)pattern, (unsigned char *)string, flags));
616#endif /* !HANDLE_MULTIBYTE */
617}