2026-05-05 Paul Eggert <eggert@cs.ucla.edu>
+ localeinfo: port to single-byte non-__STDC_ISO_10646__
+ * lib/localeinfo.c (CHAR32_T_IS_UNICODE): New macro.
+ (case_folded_counterparts): In single-byte locales where
+ char32_t is not known to be Unicode, check all 255 possibilities
+ instead of hoping that char32_t is Unicode.
+ * lib/localeinfo.h (CASE_FOLDED_BUFSIZE): Increase to 255.
+ * modules/localeinfo (Depends-on): Add btoc32.
+
localeinfo: don’t check U+03F2 in newer glibc
* lib/localeinfo.c (lonesome_lower): Omit U+03F2 in recent Unicode.
c32tolower, and 1 for each entry in LONESOME_LOWER. */
verify (1 + 1 + countof (lonesome_lower) <= CASE_FOLDED_BUFSIZE);
+/* Whether char32_t values are Unicode code points.
+ It is OK if only UTF-16 is supported,
+ since this file converts only single-byte encodings to char32_t
+ and in practice these encodings convert to characters in the BMP. */
+#ifdef GL_CHAR32_T_IS_UNICODE
+# define CHAR32_T_IS_UNICODE GL_CHAR32_T_IS_UNICODE /* uchar-h-c23 */
+#elif defined __STDC_ISO_10646__
+# define CHAR32_T_IS_UNICODE 1 /* glibc, musl libc, Cygwin */
+#else
+# define CHAR32_T_IS_UNICODE 0
+#endif
+
/* Find the characters equal to C after case-folding, other than C
itself, and store them into FOLDED. Return the number of characters
stored; this is zero if C is WEOF. */
{
int n = 0;
wint_t uc = c32toupper (c);
- wint_t lc = c32tolower (uc);
- if (uc != c)
- folded[n++] = uc;
- if (lc != uc && lc != c && c32toupper (lc) == uc)
- folded[n++] = lc;
- for (int i = 0; i < countof (lonesome_lower); i++)
+
+ if (CHAR32_T_IS_UNICODE || 1 < MB_CUR_MAX)
{
- wint_t li = lonesome_lower[i];
- if (li != lc && li != uc && li != c && c32toupper (li) == uc)
- folded[n++] = li;
+ /* char32_t is Unicode, or this is a multibyte locale where
+ it is impractical to look for all case-folded counterparts
+ and where guessing Unicode will not produce false positives
+ though it may miss some case-folded counterparts. */
+ wint_t lc = c32tolower (uc);
+ if (uc != c)
+ folded[n++] = uc;
+ if (lc != uc && lc != c && c32toupper (lc) == uc)
+ folded[n++] = lc;
+ for (int i = 0; i < countof (lonesome_lower); i++)
+ {
+ wint_t li = lonesome_lower[i];
+ if (li != lc && li != uc && li != c && c32toupper (li) == uc)
+ folded[n++] = li;
+ }
}
+ else if (c != WEOF)
+ {
+ /* A single-byte locale where it is not known that char32_t is Unicode,
+ and C is not WEOF. Check all 255 possibilities for counterparts. */
+ for (int i = 1; i <= UCHAR_MAX; i++)
+ {
+ wint_t li = btoc32 (i);
+ if (li != c && c32toupper (li) == uc)
+ folded[n++] = li;
+ }
+ }
+
return n;
}
extern void init_localeinfo (struct localeinfo *);
/* Maximum number of characters that can be the case-folded
- counterparts of a single character, not counting the character
- itself. This is a generous upper bound. */
-enum { CASE_FOLDED_BUFSIZE = 32 };
+ counterparts of a single character, not counting the character itself.
+ Subtract from 256 one for U+0000. This is a generous upper bound. */
+enum { CASE_FOLDED_BUFSIZE = (unsigned char) -1 };
extern int case_folded_counterparts (wint_t, char32_t[CASE_FOLDED_BUFSIZE]);