# include <locale/weight.h>
#endif
+/* The localeinfo-related code fixes glibc bug 20381.
+ Someday this fix should be merged into glibc. */
+#ifndef _LIBC
+# include "localeinfo.h"
+#endif
+
static reg_errcode_t re_compile_internal (regex_t *preg, const char * pattern,
size_t length, reg_syntax_t syntax);
static void re_compile_fastmap_iter (regex_t *bufp,
weak_alias (__re_compile_fastmap, re_compile_fastmap)
static __always_inline void
-re_set_fastmap (char *fastmap, bool icase, int ch)
+re_set_fastmap (char *fastmap, unsigned char ch)
{
fastmap[ch] = 1;
- if (icase)
- fastmap[tolower (ch)] = 1;
+}
+
+/* Record in FASTMAP the initial byte of the representations of all
+ characters that match WC ignoring case, other than WC itself.
+ Use MBS as a scratch state. */
+
+static void
+re_set_fastmap_icase (char *fastmap, wchar_t wc, mbstate_t *mbs)
+{
+#ifdef _LIBC
+ wchar_t folded[1] = {__towlower (wc)};
+ int nfolded = folded[0] != wc;
+#else
+ wchar_t folded[CASE_FOLDED_BUFSIZE];
+ int nfolded = case_folded_counterparts (wc, folded);
+#endif
+ for (int i = 0; i < nfolded; i++)
+ {
+ char buf[MB_LEN_MAX];
+ if (__wcrtomb (buf, folded[i], mbs) != (size_t) -1)
+ re_set_fastmap (fastmap, buf[0]);
+ }
}
/* Helper function for re_compile_fastmap.
{
re_dfa_t *dfa = bufp->buffer;
Idx node_cnt;
- bool icase = (dfa->mb_cur_max == 1 && (bufp->syntax & RE_ICASE));
for (node_cnt = 0; node_cnt < init_state->nodes.nelem; ++node_cnt)
{
Idx node = init_state->nodes.elems[node_cnt];
if (type == CHARACTER)
{
- re_set_fastmap (fastmap, icase, dfa->nodes[node].opr.c);
- if ((bufp->syntax & RE_ICASE) && dfa->mb_cur_max > 1)
+ re_set_fastmap (fastmap, dfa->nodes[node].opr.c);
+ if (bufp->syntax & RE_ICASE)
{
unsigned char buf[MB_LEN_MAX];
unsigned char *p;
*p++ = dfa->nodes[node].opr.c;
memset (&state, '\0', sizeof (state));
if (__mbrtowc (&wc, (const char *) buf, p - buf,
- &state) == p - buf
- && (__wcrtomb ((char *) buf, __towlower (wc), &state)
- != (size_t) -1))
- re_set_fastmap (fastmap, false, buf[0]);
+ &state) == p - buf)
+ re_set_fastmap_icase (fastmap, wc, &state);
}
}
else if (type == SIMPLE_BRACKET)
bitset_word_t w = dfa->nodes[node].opr.sbcset[i];
for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch)
if (w & ((bitset_word_t) 1 << j))
- re_set_fastmap (fastmap, icase, ch);
+ re_set_fastmap (fastmap, ch);
}
}
else if (type == COMPLEX_BRACKET)
_NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
for (i = 0; i < SBC_MAX; ++i)
if (table[i] < 0)
- re_set_fastmap (fastmap, icase, i);
+ re_set_fastmap (fastmap, i);
}
#endif /* _LIBC */
mbstate_t mbs;
memset (&mbs, 0, sizeof (mbs));
if (__mbrtowc (NULL, (char *) &c, 1, &mbs) == (size_t) -2)
- re_set_fastmap (fastmap, false, (int) c);
+ re_set_fastmap (fastmap, c);
}
while (++c != 0);
}
/* ... Else catch all bytes which can start the mbchars. */
for (i = 0; i < cset->nmbchars; ++i)
{
- char buf[256];
+ char buf[MB_LEN_MAX];
mbstate_t state;
memset (&state, '\0', sizeof (state));
if (__wcrtomb (buf, cset->mbchars[i], &state) != (size_t) -1)
- re_set_fastmap (fastmap, icase, *(unsigned char *) buf);
- if ((bufp->syntax & RE_ICASE) && dfa->mb_cur_max > 1)
- {
- if (__wcrtomb (buf, __towlower (cset->mbchars[i]), &state)
- != (size_t) -1)
- re_set_fastmap (fastmap, false, *(unsigned char *) buf);
- }
+ re_set_fastmap (fastmap, buf[0]);
+ if (bufp->syntax & RE_ICASE)
+ re_set_fastmap_icase (fastmap, cset->mbchars[i], &state);
}
}
}
#include "regex.h"
+#include <ctype.h>
#include <locale.h>
#include <limits.h>
#include <stdarg.h>
}
}
+ /* Test for glibc bug 20381
+ <https://sourceware.org/bugzilla/show_bug.cgi?id=20381>. */
+ if (setlocale (LC_ALL, "el_GR.iso88597")
+ || setlocale (LC_ALL, "el_GR.ISO8859-7")
+ || setlocale (LC_ALL, "el_GR.iso8859-7"))
+ {
+ /* Check this only in Greek locales that seem to be working.
+ In macOS 26, for example, setlocale (LC_ALL, "el_GR.ISO8859-7")
+ succeed but acts like the C locale. */
+ if (toupper (0xf2) == 0xd3 && toupper (0xf3) == 0xd3)
+ for (int i = 0; i < 3; i++)
+ for (int j = 0; j < 3; j++)
+ {
+ static char const str[3][2] = { "\xd3", "\xf2", "\xf3" };
+ regex_t re;
+ int err = regcomp (&re, str[i], REG_ICASE | REG_NOSUB);
+ if (err)
+ {
+ char buf[500];
+ regerror (err, &re, buf, sizeof buf);
+ report_error ("regcomp \\x%02x failed: %s",
+ (unsigned char) str[i][0], buf);
+ continue;
+ }
+
+ int with = regexec (&re, str[j], 0, NULL, 0);
+ free (re.fastmap);
+ re.fastmap = NULL;
+ re.fastmap_accurate = 0;
+ int without = regexec (&re, str[j], 0, NULL, 0);
+ if (with != without)
+ report_error
+ ("fastmap mismatch: pattern = \\x%02x, string = \\x%02x,"
+ " with = %d, without = %d",
+ (unsigned char) str[i][0], (unsigned char) str[j][0],
+ with, without);
+
+ regfree (&re);
+ }
+
+ if (! setlocale (LC_ALL, "C"))
+ {
+ report_error ("setlocale \"C\" failed");
+ return exit_status;
+ }
+ }
+
if (setlocale (LC_ALL, "tr_TR.UTF-8"))
{
if (really_utf8 () && towupper (L'i') == 0x0130 /* U+0130; see below. */)