]> git.ipfire.org Git - thirdparty/bash.git/blame - lib/glob/smatch.c
updated translations; remove unneeded files
[thirdparty/bash.git] / lib / glob / smatch.c
CommitLineData
7117c2d2
JA
1/* strmatch.c -- ksh-like extended pattern matching for the shell and filename
2 globbing. */
3
74091dd4 4/* Copyright (C) 1991-2021 Free Software Foundation, Inc.
7117c2d2
JA
5
6 This file is part of GNU Bash, the Bourne Again SHell.
7
3185942a
JA
8 Bash is free software: you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation, either version 3 of the License, or
11 (at your option) any later version.
12
13 Bash is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with Bash. If not, see <http://www.gnu.org/licenses/>.
20*/
7117c2d2
JA
21
22#include <config.h>
23
24#include <stdio.h> /* for debugging */
25
26#include "strmatch.h"
27#include <chartypes.h>
28
29#include "bashansi.h"
30#include "shmbutil.h"
31#include "xmalloc.h"
32
d233b485
CR
33#include <errno.h>
34
35#if !defined (errno)
36extern int errno;
37#endif
38
8868edaf
CR
39#if FNMATCH_EQUIV_FALLBACK
40/* We don't include <fnmatch.h> in order to avoid namespace collisions; the
41 internal strmatch still uses the FNM_ constants. */
42extern int fnmatch (const char *, const char *, int);
43#endif
44
7117c2d2
JA
45/* First, compile `sm_loop.c' for single-byte characters. */
46#define CHAR unsigned char
47#define U_CHAR unsigned char
48#define XCHAR char
49#define INT int
50#define L(CS) CS
51#define INVALID -1
52
53#undef STREQ
54#undef STREQN
55#define STREQ(a, b) ((a)[0] == (b)[0] && strcmp(a, b) == 0)
56#define STREQN(a, b, n) ((a)[0] == (b)[0] && strncmp(a, b, n) == 0)
57
ac50fbac
CR
58#ifndef GLOBASCII_DEFAULT
59# define GLOBASCII_DEFAULT 0
60#endif
61
62int glob_asciirange = GLOBASCII_DEFAULT;
63
8868edaf
CR
64#if FNMATCH_EQUIV_FALLBACK
65/* Construct a string w1 = "c1" and a pattern w2 = "[[=c2=]]" and pass them
66 to fnmatch to see if wide characters c1 and c2 collate as members of the
67 same equivalence class. We can't really do this portably any other way */
68static int
69_fnmatch_fallback (s, p)
70 int s, p; /* string char, patchar */
71{
72 char s1[2]; /* string */
73 char s2[8]; /* constructed pattern */
74
75 s1[0] = (unsigned char)s;
76 s1[1] = '\0';
77
78 /* reconstruct the pattern */
79 s2[0] = s2[1] = '[';
80 s2[2] = '=';
81 s2[3] = (unsigned char)p;
82 s2[4] = '=';
83 s2[5] = s2[6] = ']';
84 s2[7] = '\0';
85
86 return (fnmatch ((const char *)s2, (const char *)s1, 0));
87}
88#endif
89
7117c2d2
JA
90/* We use strcoll(3) for range comparisons in bracket expressions,
91 even though it can have unwanted side effects in locales
92 other than POSIX or US. For instance, in the de locale, [A-Z] matches
ac50fbac
CR
93 all characters. If GLOB_ASCIIRANGE is non-zero, and we're not forcing
94 the use of strcoll (e.g., for explicit collating symbols), we use
95 straight ordering as if in the C locale. */
7117c2d2
JA
96
97#if defined (HAVE_STRCOLL)
8868edaf
CR
98/* Helper functions for collating symbol equivalence. */
99
100/* Return 0 if C1 == C2 or collates equally if FORCECOLL is non-zero. */
ac50fbac 101static int
8868edaf 102charcmp (c1, c2, forcecoll)
7117c2d2 103 int c1, c2;
ac50fbac 104 int forcecoll;
7117c2d2
JA
105{
106 static char s1[2] = { ' ', '\0' };
107 static char s2[2] = { ' ', '\0' };
108 int ret;
109
110 /* Eight bits only. Period. */
111 c1 &= 0xFF;
112 c2 &= 0xFF;
113
114 if (c1 == c2)
115 return (0);
116
ac50fbac
CR
117 if (forcecoll == 0 && glob_asciirange)
118 return (c1 - c2);
119
7117c2d2
JA
120 s1[0] = c1;
121 s2[0] = c2;
122
8868edaf
CR
123 return (strcoll (s1, s2));
124}
125
126static int
127rangecmp (c1, c2, forcecoll)
128 int c1, c2;
129 int forcecoll;
130{
131 int r;
132
133 r = charcmp (c1, c2, forcecoll);
134
135 /* We impose a total ordering here by returning c1-c2 if charcmp returns 0 */
136 if (r != 0)
137 return r;
d233b485 138 return (c1 - c2); /* impose total ordering */
7117c2d2
JA
139}
140#else /* !HAVE_STRCOLL */
ac50fbac 141# define rangecmp(c1, c2, f) ((int)(c1) - (int)(c2))
7117c2d2
JA
142#endif /* !HAVE_STRCOLL */
143
144#if defined (HAVE_STRCOLL)
8868edaf 145/* Returns 1 if chars C and EQUIV collate equally in the current locale. */
7117c2d2 146static int
8868edaf
CR
147collequiv (c, equiv)
148 int c, equiv;
7117c2d2 149{
8868edaf
CR
150 if (charcmp (c, equiv, 1) == 0)
151 return 1;
152
153#if FNMATCH_EQUIV_FALLBACK
154 return (_fnmatch_fallback (c, equiv) == 0);
155#else
156 return 0;
157#endif
158
7117c2d2
JA
159}
160#else
8868edaf 161# define collequiv(c, equiv) ((c) == (equiv))
7117c2d2
JA
162#endif
163
164#define _COLLSYM _collsym
165#define __COLLSYM __collsym
166#define POSIXCOLL posix_collsyms
167#include "collsyms.h"
168
169static int
170collsym (s, len)
95732b49 171 CHAR *s;
7117c2d2
JA
172 int len;
173{
174 register struct _collsym *csp;
95732b49 175 char *x;
7117c2d2 176
95732b49 177 x = (char *)s;
7117c2d2
JA
178 for (csp = posix_collsyms; csp->name; csp++)
179 {
95732b49 180 if (STREQN(csp->name, x, len) && csp->name[len] == '\0')
7117c2d2
JA
181 return (csp->code);
182 }
183 if (len == 1)
184 return s[0];
185 return INVALID;
186}
187
188/* unibyte character classification */
189#if !defined (isascii) && !defined (HAVE_ISASCII)
190# define isascii(c) ((unsigned int)(c) <= 0177)
191#endif
192
193enum char_class
194 {
195 CC_NO_CLASS = 0,
196 CC_ASCII, CC_ALNUM, CC_ALPHA, CC_BLANK, CC_CNTRL, CC_DIGIT, CC_GRAPH,
197 CC_LOWER, CC_PRINT, CC_PUNCT, CC_SPACE, CC_UPPER, CC_WORD, CC_XDIGIT
198 };
199
200static char const *const cclass_name[] =
201 {
202 "",
203 "ascii", "alnum", "alpha", "blank", "cntrl", "digit", "graph",
204 "lower", "print", "punct", "space", "upper", "word", "xdigit"
205 };
206
207#define N_CHAR_CLASS (sizeof(cclass_name) / sizeof (cclass_name[0]))
208
d233b485
CR
209static enum char_class
210is_valid_cclass (name)
7117c2d2
JA
211 const char *name;
212{
d233b485
CR
213 enum char_class ret;
214 int i;
215
216 ret = CC_NO_CLASS;
7117c2d2
JA
217
218 for (i = 1; i < N_CHAR_CLASS; i++)
219 {
220 if (STREQ (name, cclass_name[i]))
221 {
d233b485 222 ret = (enum char_class)i;
7117c2d2
JA
223 break;
224 }
225 }
226
d233b485
CR
227 return ret;
228}
229
230static int
231cclass_test (c, char_class)
232 int c;
233 enum char_class char_class;
234{
235 int result;
7117c2d2
JA
236
237 switch (char_class)
238 {
239 case CC_ASCII:
240 result = isascii (c);
241 break;
242 case CC_ALNUM:
243 result = ISALNUM (c);
244 break;
245 case CC_ALPHA:
246 result = ISALPHA (c);
247 break;
248 case CC_BLANK:
249 result = ISBLANK (c);
250 break;
251 case CC_CNTRL:
252 result = ISCNTRL (c);
253 break;
254 case CC_DIGIT:
255 result = ISDIGIT (c);
256 break;
257 case CC_GRAPH:
258 result = ISGRAPH (c);
259 break;
260 case CC_LOWER:
261 result = ISLOWER (c);
262 break;
263 case CC_PRINT:
264 result = ISPRINT (c);
265 break;
266 case CC_PUNCT:
267 result = ISPUNCT (c);
268 break;
269 case CC_SPACE:
270 result = ISSPACE (c);
271 break;
272 case CC_UPPER:
273 result = ISUPPER (c);
274 break;
275 case CC_WORD:
276 result = (ISALNUM (c) || c == '_');
277 break;
278 case CC_XDIGIT:
279 result = ISXDIGIT (c);
280 break;
281 default:
282 result = -1;
283 break;
284 }
285
286 return result;
287}
d233b485
CR
288
289static int
290is_cclass (c, name)
291 int c;
292 const char *name;
293{
294 enum char_class char_class;
295 int result;
296
297 char_class = is_valid_cclass (name);
298 if (char_class == CC_NO_CLASS)
299 return -1;
300
301 result = cclass_test (c, char_class);
302 return (result);
303}
7117c2d2
JA
304
305/* Now include `sm_loop.c' for single-byte characters. */
306/* The result of FOLD is an `unsigned char' */
307# define FOLD(c) ((flags & FNM_CASEFOLD) \
308 ? TOLOWER ((unsigned char)c) \
309 : ((unsigned char)c))
310
74091dd4
CR
311#if !defined (__CYGWIN__)
312# define ISDIRSEP(c) ((c) == '/')
313#else
314# define ISDIRSEP(c) ((c) == '/' || (c) == '\\')
315#endif /* __CYGWIN__ */
316#define PATHSEP(c) (ISDIRSEP(c) || (c) == 0)
317
318# define PDOT_OR_DOTDOT(s) (s[0] == '.' && (PATHSEP (s[1]) || (s[1] == '.' && PATHSEP (s[2]))))
319# define SDOT_OR_DOTDOT(s) (s[0] == '.' && (s[1] == 0 || (s[1] == '.' && s[2] == 0)))
320
7117c2d2
JA
321#define FCT internal_strmatch
322#define GMATCH gmatch
323#define COLLSYM collsym
324#define PARSE_COLLSYM parse_collsym
325#define BRACKMATCH brackmatch
ac50fbac 326#define PATSCAN glob_patscan
7117c2d2
JA
327#define STRCOMPARE strcompare
328#define EXTMATCH extmatch
8868edaf 329#define DEQUOTE_PATHNAME udequote_pathname
d233b485 330#define STRUCT smat_struct
7117c2d2 331#define STRCHR(S, C) strchr((S), (C))
a0c0a00f 332#define MEMCHR(S, C, N) memchr((S), (C), (N))
7117c2d2
JA
333#define STRCOLL(S1, S2) strcoll((S1), (S2))
334#define STRLEN(S) strlen(S)
335#define STRCMP(S1, S2) strcmp((S1), (S2))
ac50fbac 336#define RANGECMP(C1, C2, F) rangecmp((C1), (C2), (F))
7117c2d2
JA
337#define COLLEQUIV(C1, C2) collequiv((C1), (C2))
338#define CTYPE_T enum char_class
339#define IS_CCLASS(C, S) is_cclass((C), (S))
340#include "sm_loop.c"
341
342#if HANDLE_MULTIBYTE
343
344# define CHAR wchar_t
345# define U_CHAR wint_t
346# define XCHAR wchar_t
347# define INT wint_t
348# define L(CS) L##CS
349# define INVALID WEOF
350
351# undef STREQ
352# undef STREQN
353# define STREQ(s1, s2) ((wcscmp (s1, s2) == 0))
354# define STREQN(a, b, n) ((a)[0] == (b)[0] && wcsncmp(a, b, n) == 0)
355
8868edaf 356extern char *mbsmbchar PARAMS((const char *));
495aee44 357
d233b485 358#if FNMATCH_EQUIV_FALLBACK
d233b485
CR
359/* Construct a string w1 = "c1" and a pattern w2 = "[[=c2=]]" and pass them
360 to fnmatch to see if wide characters c1 and c2 collate as members of the
361 same equivalence class. We can't really do this portably any other way */
362static int
363_fnmatch_fallback_wc (c1, c2)
364 wchar_t c1, c2; /* string char, patchar */
365{
366 char w1[MB_LEN_MAX+1]; /* string */
367 char w2[MB_LEN_MAX+8]; /* constructed pattern */
368 int l1, l2;
369
370 l1 = wctomb (w1, c1);
371 if (l1 == -1)
372 return (2);
373 w1[l1] = '\0';
374
375 /* reconstruct the pattern */
376 w2[0] = w2[1] = '[';
377 w2[2] = '=';
378 l2 = wctomb (w2+3, c2);
379 if (l2 == -1)
380 return (2);
381 w2[l2+3] = '=';
382 w2[l2+4] = w2[l2+5] = ']';
383 w2[l2+6] = '\0';
384
385 return (fnmatch ((const char *)w2, (const char *)w1, 0));
386}
387#endif
388
7117c2d2 389static int
8868edaf 390charcmp_wc (c1, c2, forcecoll)
7117c2d2 391 wint_t c1, c2;
ac50fbac 392 int forcecoll;
7117c2d2
JA
393{
394 static wchar_t s1[2] = { L' ', L'\0' };
395 static wchar_t s2[2] = { L' ', L'\0' };
8868edaf 396 int r;
7117c2d2
JA
397
398 if (c1 == c2)
399 return 0;
400
ac50fbac
CR
401 if (forcecoll == 0 && glob_asciirange && c1 <= UCHAR_MAX && c2 <= UCHAR_MAX)
402 return ((int)(c1 - c2));
403
7117c2d2
JA
404 s1[0] = c1;
405 s2[0] = c2;
406
8868edaf
CR
407 return (wcscoll (s1, s2));
408}
409
410static int
411rangecmp_wc (c1, c2, forcecoll)
412 wint_t c1, c2;
413 int forcecoll;
414{
415 int r;
416
417 r = charcmp_wc (c1, c2, forcecoll);
418
419 /* We impose a total ordering here by returning c1-c2 if charcmp returns 0,
420 as we do above in the single-byte case. */
421 if (r != 0 || forcecoll)
d233b485
CR
422 return r;
423 return ((int)(c1 - c2)); /* impose total ordering */
7117c2d2
JA
424}
425
8868edaf 426/* Returns 1 if wide chars C and EQUIV collate equally in the current locale. */
7117c2d2
JA
427static int
428collequiv_wc (c, equiv)
429 wint_t c, equiv;
430{
d233b485
CR
431 wchar_t s, p;
432
8868edaf 433 if (charcmp_wc (c, equiv, 1) == 0)
d233b485 434 return 1;
8868edaf 435
d233b485
CR
436#if FNMATCH_EQUIV_FALLBACK
437/* We check explicitly for success (fnmatch returns 0) to avoid problems if
438 our local definition of FNM_NOMATCH (strmatch.h) doesn't match the
439 system's (fnmatch.h). We don't care about error return values here. */
440
441 s = c;
442 p = equiv;
443 return (_fnmatch_fallback_wc (s, p) == 0);
444#else
445 return 0;
446#endif
7117c2d2
JA
447}
448
449/* Helper function for collating symbol. */
450# define _COLLSYM _collwcsym
451# define __COLLSYM __collwcsym
452# define POSIXCOLL posix_collwcsyms
453# include "collsyms.h"
454
455static wint_t
456collwcsym (s, len)
457 wchar_t *s;
458 int len;
459{
460 register struct _collwcsym *csp;
461
462 for (csp = posix_collwcsyms; csp->name; csp++)
463 {
464 if (STREQN(csp->name, s, len) && csp->name[len] == L'\0')
465 return (csp->code);
466 }
467 if (len == 1)
468 return s[0];
469 return INVALID;
470}
471
472static int
473is_wcclass (wc, name)
474 wint_t wc;
475 wchar_t *name;
476{
477 char *mbs;
478 mbstate_t state;
479 size_t mbslength;
480 wctype_t desc;
481 int want_word;
482
483 if ((wctype ("ascii") == (wctype_t)0) && (wcscmp (name, L"ascii") == 0))
484 {
485 int c;
486
487 if ((c = wctob (wc)) == EOF)
488 return 0;
489 else
490 return (c <= 0x7F);
491 }
492
493 want_word = (wcscmp (name, L"word") == 0);
494 if (want_word)
495 name = L"alnum";
496
497 memset (&state, '\0', sizeof (mbstate_t));
498 mbs = (char *) malloc (wcslen(name) * MB_CUR_MAX + 1);
a0c0a00f
CR
499 if (mbs == 0)
500 return -1;
495aee44 501 mbslength = wcsrtombs (mbs, (const wchar_t **)&name, (wcslen(name) * MB_CUR_MAX + 1), &state);
7117c2d2
JA
502
503 if (mbslength == (size_t)-1 || mbslength == (size_t)-2)
504 {
505 free (mbs);
506 return -1;
507 }
508 desc = wctype (mbs);
509 free (mbs);
510
511 if (desc == (wctype_t)0)
512 return -1;
513
514 if (want_word)
515 return (iswctype (wc, desc) || wc == L'_');
516 else
517 return (iswctype (wc, desc));
518}
519
d233b485
CR
520/* Return 1 if there are no char class [:class:] expressions (degenerate case)
521 or only posix-specified (C locale supported) char class expressions in
522 PATTERN. These are the ones where it's safe to punt to the single-byte
523 code, since wide character support allows locale-defined char classes.
524 This only uses single-byte code, but is only needed to support multibyte
525 locales. */
526static int
527posix_cclass_only (pattern)
528 char *pattern;
529{
530 char *p, *p1;
531 char cc[16]; /* sufficient for all valid posix char class names */
532 enum char_class valid;
533
534 p = pattern;
535 while (p = strchr (p, '['))
536 {
537 if (p[1] != ':')
538 {
539 p++;
540 continue;
541 }
542 p += 2; /* skip past "[:" */
543 /* Find end of char class expression */
544 for (p1 = p; *p1; p1++)
545 if (*p1 == ':' && p1[1] == ']')
546 break;
547 if (*p1 == 0) /* no char class expression found */
548 break;
549 /* Find char class name and validate it against posix char classes */
550 if ((p1 - p) >= sizeof (cc))
551 return 0;
552 bcopy (p, cc, p1 - p);
553 cc[p1 - p] = '\0';
554 valid = is_valid_cclass (cc);
555 if (valid == CC_NO_CLASS)
556 return 0; /* found unrecognized char class name */
557
558 p = p1 + 2; /* found posix char class name */
559 }
560
561 return 1; /* no char class names or only posix */
562}
563
7117c2d2
JA
564/* Now include `sm_loop.c' for multibyte characters. */
565#define FOLD(c) ((flags & FNM_CASEFOLD) && iswupper (c) ? towlower (c) : (c))
74091dd4
CR
566
567# if !defined (__CYGWIN__)
568# define ISDIRSEP(c) ((c) == L'/')
569# else
570# define ISDIRSEP(c) ((c) == L'/' || (c) == L'\\')
571# endif /* __CYGWIN__ */
572# define PATHSEP(c) (ISDIRSEP(c) || (c) == L'\0')
573
574# define PDOT_OR_DOTDOT(w) (w[0] == L'.' && (PATHSEP(w[1]) || (w[1] == L'.' && PATHSEP(w[2]))))
575# define SDOT_OR_DOTDOT(w) (w[0] == L'.' && (w[1] == L'\0' || (w[1] == L'.' && w[2] == L'\0')))
576
7117c2d2
JA
577#define FCT internal_wstrmatch
578#define GMATCH gmatch_wc
579#define COLLSYM collwcsym
580#define PARSE_COLLSYM parse_collwcsym
581#define BRACKMATCH brackmatch_wc
ac50fbac 582#define PATSCAN glob_patscan_wc
7117c2d2
JA
583#define STRCOMPARE wscompare
584#define EXTMATCH extmatch_wc
8868edaf 585#define DEQUOTE_PATHNAME wcdequote_pathname
d233b485 586#define STRUCT wcsmat_struct
7117c2d2 587#define STRCHR(S, C) wcschr((S), (C))
a0c0a00f 588#define MEMCHR(S, C, N) wmemchr((S), (C), (N))
7117c2d2
JA
589#define STRCOLL(S1, S2) wcscoll((S1), (S2))
590#define STRLEN(S) wcslen(S)
591#define STRCMP(S1, S2) wcscmp((S1), (S2))
ac50fbac 592#define RANGECMP(C1, C2, F) rangecmp_wc((C1), (C2), (F))
7117c2d2
JA
593#define COLLEQUIV(C1, C2) collequiv_wc((C1), (C2))
594#define CTYPE_T enum char_class
595#define IS_CCLASS(C, S) is_wcclass((C), (S))
596#include "sm_loop.c"
597
598#endif /* HAVE_MULTIBYTE */
599
600int
601xstrmatch (pattern, string, flags)
602 char *pattern;
603 char *string;
604 int flags;
605{
606#if HANDLE_MULTIBYTE
607 int ret;
7117c2d2 608 size_t n;
7117c2d2 609 wchar_t *wpattern, *wstring;
495aee44
CR
610 size_t plen, slen, mplen, mslen;
611
d233b485 612 if (MB_CUR_MAX == 1)
495aee44 613 return (internal_strmatch ((unsigned char *)pattern, (unsigned char *)string, flags));
7117c2d2 614
8868edaf 615 if (mbsmbchar (string) == 0 && mbsmbchar (pattern) == 0 && posix_cclass_only (pattern))
95732b49 616 return (internal_strmatch ((unsigned char *)pattern, (unsigned char *)string, flags));
7117c2d2 617
b80f6443 618 n = xdupmbstowcs (&wpattern, NULL, pattern);
7117c2d2 619 if (n == (size_t)-1 || n == (size_t)-2)
b80f6443 620 return (internal_strmatch ((unsigned char *)pattern, (unsigned char *)string, flags));
7117c2d2 621
b80f6443 622 n = xdupmbstowcs (&wstring, NULL, string);
7117c2d2
JA
623 if (n == (size_t)-1 || n == (size_t)-2)
624 {
625 free (wpattern);
b80f6443 626 return (internal_strmatch ((unsigned char *)pattern, (unsigned char *)string, flags));
7117c2d2
JA
627 }
628
7117c2d2
JA
629 ret = internal_wstrmatch (wpattern, wstring, flags);
630
7117c2d2
JA
631 free (wpattern);
632 free (wstring);
633
634 return ret;
635#else
636 return (internal_strmatch ((unsigned char *)pattern, (unsigned char *)string, flags));
637#endif /* !HANDLE_MULTIBYTE */
638}