]> git.ipfire.org Git - thirdparty/postgresql.git/commitdiff
Avoid global LC_CTYPE dependency in pg_locale_icu.c.
authorJeff Davis <jdavis@postgresql.org>
Tue, 16 Dec 2025 23:32:57 +0000 (15:32 -0800)
committerJeff Davis <jdavis@postgresql.org>
Tue, 16 Dec 2025 23:32:57 +0000 (15:32 -0800)
ICU still depends on libc for compatibility with certain historical
behavior for single-byte encodings. Make the dependency explicit by
holding a locale_t object when required.

We should consider a better solution in the future, such as decoding
the text to UTF-32 and using u_tolower(). That would be a behavior
change and require additional infrastructure though; so for now, just
avoid the global LC_CTYPE dependency.

Reviewed-by: Chao Li <li.evan.chao@gmail.com>
Discussion: https://postgr.es/m/450ceb6260cad30d7afdf155d991a9caafee7c0d.camel@j-davis.com

src/backend/utils/adt/pg_locale_icu.c
src/include/utils/pg_locale.h

index 69f22b47a68c1fb6c5aeb87869c9cd4cf75ab390..43d44fe43bdc408c3d0da66f97384c2a76e78f86 100644 (file)
@@ -244,6 +244,29 @@ static const struct ctype_methods ctype_methods_icu = {
        .wc_toupper = toupper_icu,
        .wc_tolower = tolower_icu,
 };
+
+/*
+ * ICU still depends on libc for compatibility with certain historical
+ * behavior for single-byte encodings.  See downcase_ident_icu().
+ *
+ * XXX: consider fixing by decoding the single byte into a code point, and
+ * using u_tolower().
+ */
+static locale_t
+make_libc_ctype_locale(const char *ctype)
+{
+       locale_t        loc;
+
+#ifndef WIN32
+       loc = newlocale(LC_CTYPE_MASK, ctype, NULL);
+#else
+       loc = _create_locale(LC_ALL, ctype);
+#endif
+       if (!loc)
+               report_newlocale_failure(ctype);
+
+       return loc;
+}
 #endif
 
 pg_locale_t
@@ -254,6 +277,7 @@ create_pg_locale_icu(Oid collid, MemoryContext context)
        const char *iculocstr;
        const char *icurules = NULL;
        UCollator  *collator;
+       locale_t        loc = (locale_t) 0;
        pg_locale_t result;
 
        if (collid == DEFAULT_COLLATION_OID)
@@ -276,6 +300,18 @@ create_pg_locale_icu(Oid collid, MemoryContext context)
                if (!isnull)
                        icurules = TextDatumGetCString(datum);
 
+               /* libc only needed for default locale and single-byte encoding */
+               if (pg_database_encoding_max_length() == 1)
+               {
+                       const char *ctype;
+
+                       datum = SysCacheGetAttrNotNull(DATABASEOID, tp,
+                                                                                  Anum_pg_database_datctype);
+                       ctype = TextDatumGetCString(datum);
+
+                       loc = make_libc_ctype_locale(ctype);
+               }
+
                ReleaseSysCache(tp);
        }
        else
@@ -306,6 +342,7 @@ create_pg_locale_icu(Oid collid, MemoryContext context)
        result = MemoryContextAllocZero(context, sizeof(struct pg_locale_struct));
        result->icu.locale = MemoryContextStrdup(context, iculocstr);
        result->icu.ucol = collator;
+       result->icu.lt = loc;
        result->deterministic = deterministic;
        result->collate_is_c = false;
        result->ctype_is_c = false;
@@ -578,17 +615,19 @@ downcase_ident_icu(char *dst, size_t dstsize, const char *src,
                                   ssize_t srclen, pg_locale_t locale)
 {
        int                     i;
-       bool            enc_is_single_byte;
+       bool            libc_lower;
+       locale_t        lt = locale->icu.lt;
+
+       libc_lower = lt && (pg_database_encoding_max_length() == 1);
 
-       enc_is_single_byte = pg_database_encoding_max_length() == 1;
        for (i = 0; i < srclen && i < dstsize; i++)
        {
                unsigned char ch = (unsigned char) src[i];
 
                if (ch >= 'A' && ch <= 'Z')
                        ch = pg_ascii_tolower(ch);
-               else if (enc_is_single_byte && IS_HIGHBIT_SET(ch) && isupper(ch))
-                       ch = tolower(ch);
+               else if (libc_lower && IS_HIGHBIT_SET(ch) && isupper_l(ch, lt))
+                       ch = tolower_l(ch, lt);
                dst[i] = (char) ch;
        }
 
index 1e584819c5e9faad309266a247cf2333318b2dcc..86016b9344e4d981237f66d7c37cbdb2f0f988b4 100644 (file)
@@ -167,6 +167,7 @@ struct pg_locale_struct
                {
                        const char *locale;
                        UCollator  *ucol;
+                       locale_t        lt;
                }                       icu;
 #endif
        };