#ifdef USE_ICU
extern UCollator *pg_ucol_open(const char *loc_str);
+static UCaseMap *pg_ucasemap_open(const char *loc_str);
static size_t strlower_icu(char *dest, size_t destsize, const char *src,
ssize_t srclen, pg_locale_t locale);
ssize_t srclen, pg_locale_t locale);
static size_t strfold_icu(char *dest, size_t destsize, const char *src,
ssize_t srclen, pg_locale_t locale);
+static size_t strlower_icu_utf8(char *dest, size_t destsize, const char *src,
+ ssize_t srclen, pg_locale_t locale);
+static size_t strtitle_icu_utf8(char *dest, size_t destsize, const char *src,
+ ssize_t srclen, pg_locale_t locale);
+static size_t strupper_icu_utf8(char *dest, size_t destsize, const char *src,
+ ssize_t srclen, pg_locale_t locale);
+static size_t strfold_icu_utf8(char *dest, size_t destsize, const char *src,
+ ssize_t srclen, pg_locale_t locale);
static size_t downcase_ident_icu(char *dst, size_t dstsize, const char *src,
ssize_t srclen, pg_locale_t locale);
static int strncoll_icu(const char *arg1, ssize_t len1,
const UChar *buff_uchar, int32_t len_uchar);
static void icu_set_collation_attributes(UCollator *collator, const char *loc,
UErrorCode *status);
-static int32_t icu_convert_case(ICU_Convert_Func func, pg_locale_t mylocale,
- UChar **buff_dest, UChar *buff_source,
- int32_t len_source);
+static int32_t icu_convert_case(ICU_Convert_Func func, char *dest,
+ size_t destsize, const char *src,
+ ssize_t srclen, pg_locale_t locale);
static int32_t u_strToTitle_default_BI(UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
const char *locale,
const UChar *src, int32_t srcLength,
const char *locale,
UErrorCode *pErrorCode);
+static int32_t foldcase_options(const char *locale);
/*
* XXX: many of the functions below rely on casts directly from pg_wchar to
.wc_tolower = tolower_icu,
};
+static const struct ctype_methods ctype_methods_icu_utf8 = {
+ .strlower = strlower_icu_utf8,
+ .strtitle = strtitle_icu_utf8,
+ .strupper = strupper_icu_utf8,
+ .strfold = strfold_icu_utf8,
+ /* uses plain ASCII semantics for historical reasons */
+ .downcase_ident = NULL,
+ .wc_isdigit = wc_isdigit_icu,
+ .wc_isalpha = wc_isalpha_icu,
+ .wc_isalnum = wc_isalnum_icu,
+ .wc_isupper = wc_isupper_icu,
+ .wc_islower = wc_islower_icu,
+ .wc_isgraph = wc_isgraph_icu,
+ .wc_isprint = wc_isprint_icu,
+ .wc_ispunct = wc_ispunct_icu,
+ .wc_isspace = wc_isspace_icu,
+ .wc_isxdigit = wc_isxdigit_icu,
+ .wc_iscased = wc_iscased_icu,
+ .wc_toupper = toupper_icu,
+ .wc_tolower = tolower_icu,
+};
+
/*
* ICU still depends on libc for compatibility with certain historical
* behavior for single-byte encodings. See downcase_ident_icu().
result->collate_is_c = false;
result->ctype_is_c = false;
if (GetDatabaseEncoding() == PG_UTF8)
+ {
+ result->icu.ucasemap = pg_ucasemap_open(iculocstr);
result->collate = &collate_methods_icu_utf8;
+ result->ctype = &ctype_methods_icu_utf8;
+ }
else
+ {
result->collate = &collate_methods_icu;
- result->ctype = &ctype_methods_icu;
+ result->ctype = &ctype_methods_icu;
+ }
return result;
#else
#ifdef USE_ICU
/*
- * Wrapper around ucol_open() to handle API differences for older ICU
- * versions.
+ * Check locale string and fix it if necessary. Returns a new palloc'd string.
*
- * Ensure that no path leaks a UCollator.
+ * In ICU versions 54 and earlier, "und" is not a recognized spelling of the
+ * root locale. If the first component of the locale is "und", replace with
+ * "root" before opening.
*/
-UCollator *
-pg_ucol_open(const char *loc_str)
+static char *
+fix_icu_locale_str(const char *loc_str)
{
- UCollator *collator;
- UErrorCode status;
- const char *orig_str = loc_str;
- char *fixed_str = NULL;
-
/*
* Must never open default collator, because it depends on the environment
* and may change at any time. Should not happen, but check here to catch
if (loc_str == NULL)
elog(ERROR, "opening default collator is not supported");
- /*
- * In ICU versions 54 and earlier, "und" is not a recognized spelling of
- * the root locale. If the first component of the locale is "und", replace
- * with "root" before opening.
- */
if (U_ICU_VERSION_MAJOR_NUM < 55)
{
char lang[ULOC_LANG_CAPACITY];
+ UErrorCode status = U_ZERO_ERROR;
- status = U_ZERO_ERROR;
uloc_getLanguage(loc_str, lang, ULOC_LANG_CAPACITY, &status);
if (U_FAILURE(status) || status == U_STRING_NOT_TERMINATED_WARNING)
{
if (strcmp(lang, "und") == 0)
{
const char *remainder = loc_str + strlen("und");
+ char *fixed_str;
fixed_str = palloc(strlen("root") + strlen(remainder) + 1);
strcpy(fixed_str, "root");
strcat(fixed_str, remainder);
- loc_str = fixed_str;
+ return fixed_str;
}
}
+ return pstrdup(loc_str);
+}
+
+/*
+ * Wrapper around ucol_open() to handle API differences for older ICU
+ * versions.
+ *
+ * Ensure that no path leaks a UCollator.
+ */
+UCollator *
+pg_ucol_open(const char *loc_str)
+{
+ UCollator *collator;
+ UErrorCode status;
+ char *fixed_str;
+
+ fixed_str = fix_icu_locale_str(loc_str);
+
status = U_ZERO_ERROR;
- collator = ucol_open(loc_str, &status);
+ collator = ucol_open(fixed_str, &status);
if (U_FAILURE(status))
ereport(ERROR,
/* use original string for error report */
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("could not open collator for locale \"%s\": %s",
- orig_str, u_errorName(status))));
+ loc_str, u_errorName(status))));
if (U_ICU_VERSION_MAJOR_NUM < 54)
{
status = U_ZERO_ERROR;
- icu_set_collation_attributes(collator, loc_str, &status);
+ icu_set_collation_attributes(collator, fixed_str, &status);
/*
* Pretend the error came from ucol_open(), for consistent error
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("could not open collator for locale \"%s\": %s",
- orig_str, u_errorName(status))));
+ loc_str, u_errorName(status))));
}
}
- if (fixed_str != NULL)
- pfree(fixed_str);
+ pfree(fixed_str);
return collator;
}
+/*
+ * Wrapper around ucasemap_open() to handle API differences for older ICU
+ * versions.
+ *
+ * Additionally makes sure we get the right options for case folding.
+ */
+static UCaseMap *
+pg_ucasemap_open(const char *loc_str)
+{
+ UErrorCode status = U_ZERO_ERROR;
+ UCaseMap *casemap;
+ char *fixed_str;
+
+ fixed_str = fix_icu_locale_str(loc_str);
+
+ casemap = ucasemap_open(fixed_str, foldcase_options(fixed_str), &status);
+ if (U_FAILURE(status))
+ /* use original string for error report */
+ ereport(ERROR,
+ errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("could not open casemap for locale \"%s\": %s",
+ loc_str, u_errorName(status)));
+
+ pfree(fixed_str);
+
+ return casemap;
+}
+
/*
* Create a UCollator with the given locale string and rules.
*
strlower_icu(char *dest, size_t destsize, const char *src, ssize_t srclen,
pg_locale_t locale)
{
- int32_t len_uchar;
- int32_t len_conv;
- UChar *buff_uchar;
- UChar *buff_conv;
- size_t result_len;
-
- len_uchar = icu_to_uchar(&buff_uchar, src, srclen);
- len_conv = icu_convert_case(u_strToLower, locale,
- &buff_conv, buff_uchar, len_uchar);
- result_len = icu_from_uchar(dest, destsize, buff_conv, len_conv);
- pfree(buff_uchar);
- pfree(buff_conv);
-
- return result_len;
+ return icu_convert_case(u_strToLower, dest, destsize, src, srclen, locale);
}
static size_t
strtitle_icu(char *dest, size_t destsize, const char *src, ssize_t srclen,
pg_locale_t locale)
{
- int32_t len_uchar;
- int32_t len_conv;
- UChar *buff_uchar;
- UChar *buff_conv;
- size_t result_len;
-
- len_uchar = icu_to_uchar(&buff_uchar, src, srclen);
- len_conv = icu_convert_case(u_strToTitle_default_BI, locale,
- &buff_conv, buff_uchar, len_uchar);
- result_len = icu_from_uchar(dest, destsize, buff_conv, len_conv);
- pfree(buff_uchar);
- pfree(buff_conv);
-
- return result_len;
+ return icu_convert_case(u_strToTitle_default_BI, dest, destsize, src, srclen, locale);
}
static size_t
strupper_icu(char *dest, size_t destsize, const char *src, ssize_t srclen,
pg_locale_t locale)
{
- int32_t len_uchar;
- int32_t len_conv;
- UChar *buff_uchar;
- UChar *buff_conv;
- size_t result_len;
-
- len_uchar = icu_to_uchar(&buff_uchar, src, srclen);
- len_conv = icu_convert_case(u_strToUpper, locale,
- &buff_conv, buff_uchar, len_uchar);
- result_len = icu_from_uchar(dest, destsize, buff_conv, len_conv);
- pfree(buff_uchar);
- pfree(buff_conv);
-
- return result_len;
+ return icu_convert_case(u_strToUpper, dest, destsize, src, srclen, locale);
}
static size_t
strfold_icu(char *dest, size_t destsize, const char *src, ssize_t srclen,
pg_locale_t locale)
{
- int32_t len_uchar;
- int32_t len_conv;
- UChar *buff_uchar;
- UChar *buff_conv;
- size_t result_len;
+ return icu_convert_case(u_strFoldCase_default, dest, destsize, src, srclen, locale);
+}
- len_uchar = icu_to_uchar(&buff_uchar, src, srclen);
- len_conv = icu_convert_case(u_strFoldCase_default, locale,
- &buff_conv, buff_uchar, len_uchar);
- result_len = icu_from_uchar(dest, destsize, buff_conv, len_conv);
- pfree(buff_uchar);
- pfree(buff_conv);
+static size_t
+strlower_icu_utf8(char *dest, size_t destsize, const char *src, ssize_t srclen,
+ pg_locale_t locale)
+{
+ UErrorCode status = U_ZERO_ERROR;
+ int32_t needed;
- return result_len;
+ needed = ucasemap_utf8ToLower(locale->icu.ucasemap, dest, destsize, src, srclen, &status);
+ if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR)
+ ereport(ERROR,
+ errmsg("case conversion failed: %s", u_errorName(status)));
+ return needed;
+}
+
+static size_t
+strtitle_icu_utf8(char *dest, size_t destsize, const char *src, ssize_t srclen,
+ pg_locale_t locale)
+{
+ UErrorCode status = U_ZERO_ERROR;
+ int32_t needed;
+
+ needed = ucasemap_utf8ToTitle(locale->icu.ucasemap, dest, destsize, src, srclen, &status);
+ if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR)
+ ereport(ERROR,
+ errmsg("case conversion failed: %s", u_errorName(status)));
+ return needed;
+}
+
+static size_t
+strupper_icu_utf8(char *dest, size_t destsize, const char *src, ssize_t srclen,
+ pg_locale_t locale)
+{
+ UErrorCode status = U_ZERO_ERROR;
+ int32_t needed;
+
+ needed = ucasemap_utf8ToUpper(locale->icu.ucasemap, dest, destsize, src, srclen, &status);
+ if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR)
+ ereport(ERROR,
+ errmsg("case conversion failed: %s", u_errorName(status)));
+ return needed;
+}
+
+static size_t
+strfold_icu_utf8(char *dest, size_t destsize, const char *src, ssize_t srclen,
+ pg_locale_t locale)
+{
+ UErrorCode status = U_ZERO_ERROR;
+ int32_t needed;
+
+ needed = ucasemap_utf8FoldCase(locale->icu.ucasemap, dest, destsize, src, srclen, &status);
+ if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR)
+ ereport(ERROR,
+ errmsg("case conversion failed: %s", u_errorName(status)));
+ return needed;
}
/*
}
static int32_t
-icu_convert_case(ICU_Convert_Func func, pg_locale_t mylocale,
- UChar **buff_dest, UChar *buff_source, int32_t len_source)
+convert_case_uchar(ICU_Convert_Func func, pg_locale_t mylocale,
+ UChar **buff_dest, UChar *buff_source, int32_t len_source)
{
UErrorCode status;
int32_t len_dest;
return len_dest;
}
+static int32_t
+icu_convert_case(ICU_Convert_Func func, char *dest, size_t destsize,
+ const char *src, ssize_t srclen, pg_locale_t locale)
+{
+ int32_t len_uchar;
+ int32_t len_conv;
+ UChar *buff_uchar;
+ UChar *buff_conv;
+ size_t result_len;
+
+ len_uchar = icu_to_uchar(&buff_uchar, src, srclen);
+ len_conv = convert_case_uchar(func, locale, &buff_conv,
+ buff_uchar, len_uchar);
+ result_len = icu_from_uchar(dest, destsize, buff_conv, len_conv);
+ pfree(buff_uchar);
+ pfree(buff_conv);
+
+ return result_len;
+}
+
static int32_t
u_strToTitle_default_BI(UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
const UChar *src, int32_t srcLength,
const char *locale,
UErrorCode *pErrorCode)
+{
+ return u_strFoldCase(dest, destCapacity, src, srcLength,
+ foldcase_options(locale), pErrorCode);
+}
+
+/*
+ * Return the correct u_strFoldCase() options for the given locale.
+ *
+ * Unlike the ICU APIs for lowercasing, titlecasing, and uppercasing, case
+ * folding does not accept a locale. Instead it just supports a single option
+ * relevant to Turkic languages 'az' and 'tr'; check for those languages.
+ */
+static int32_t
+foldcase_options(const char *locale)
{
uint32 options = U_FOLD_CASE_DEFAULT;
char lang[3];
- UErrorCode status;
+ UErrorCode status = U_ZERO_ERROR;
- /*
- * Unlike the ICU APIs for lowercasing, titlecasing, and uppercasing, case
- * folding does not accept a locale. Instead it just supports a single
- * option relevant to Turkic languages 'az' and 'tr'; check for those
- * languages to enable the option.
- */
- status = U_ZERO_ERROR;
uloc_getLanguage(locale, lang, 3, &status);
if (U_SUCCESS(status))
{
options = U_FOLD_CASE_EXCLUDE_SPECIAL_I;
}
- return u_strFoldCase(dest, destCapacity, src, srcLength,
- options, pErrorCode);
+ return options;
}
/*