#include "common/unicode_category.h"
#include "utils/pg_locale.h"
-/*
- * For the libc provider, to provide as much functionality as possible on a
- * variety of platforms without going so far as to implement everything from
- * scratch, we use several implementation strategies depending on the
- * situation:
- *
- * 1. In C/POSIX collations, we use hard-wired code. We can't depend on
- * the <ctype.h> functions since those will obey LC_CTYPE. Note that these
- * collations don't give a fig about multibyte characters.
- *
- * 2. When working in UTF8 encoding, we use the <wctype.h> functions.
- * This assumes that every platform uses Unicode codepoints directly
- * as the wchar_t representation of Unicode. (XXX: ICU makes this assumption
- * even for non-UTF8 encodings, which may be a problem.) On some platforms
- * wchar_t is only 16 bits wide, so we have to punt for codepoints > 0xFFFF.
- *
- * 3. In all other encodings, we use the <ctype.h> functions for pg_wchar
- * values up to 255, and punt for values above that. This is 100% correct
- * only in single-byte encodings such as LATINn. However, non-Unicode
- * multibyte encodings are mostly Far Eastern character sets for which the
- * properties being tested here aren't very relevant for higher code values
- * anyway. The difficulty with using the <wctype.h> functions with
- * non-Unicode multibyte encodings is that we can have no certainty that
- * the platform's wchar_t representation matches what we do in pg_wchar
- * conversions.
- *
- * As a special case, in the "default" collation, (2) and (3) force ASCII
- * letters to follow ASCII upcase/downcase rules, while in a non-default
- * collation we just let the library functions do what they will. The case
- * where this matters is treatment of I/i in Turkish, and the behavior is
- * meant to match the upper()/lower() SQL functions.
- *
- * We store the active collation setting in static variables. In principle
- * it could be passed down to here via the regex library's "struct vars" data
- * structure; but that would require somewhat invasive changes in the regex
- * library, and right now there's no real benefit to be gained from that.
- *
- * NB: the coding here assumes pg_wchar is an unsigned type.
- */
-
-typedef enum
-{
- PG_REGEX_STRATEGY_C, /* C locale (encoding independent) */
- PG_REGEX_STRATEGY_BUILTIN, /* built-in Unicode semantics */
- PG_REGEX_STRATEGY_LIBC_WIDE, /* Use locale_t <wctype.h> functions */
- PG_REGEX_STRATEGY_LIBC_1BYTE, /* Use locale_t <ctype.h> functions */
- PG_REGEX_STRATEGY_ICU, /* Use ICU uchar.h functions */
-} PG_Locale_Strategy;
-
-static PG_Locale_Strategy pg_regex_strategy;
static pg_locale_t pg_regex_locale;
+static struct pg_locale_struct dummy_c_locale = {
+ .collate_is_c = true,
+ .ctype_is_c = true,
+};
+
/*
* Hard-wired character properties for C locale
*/
pg_set_regex_collation(Oid collation)
{
pg_locale_t locale = 0;
- PG_Locale_Strategy strategy;
if (!OidIsValid(collation))
{
* catalog access is available, so we can't call
* pg_newlocale_from_collation().
*/
- strategy = PG_REGEX_STRATEGY_C;
- locale = 0;
+ locale = &dummy_c_locale;
}
else
{
* C/POSIX collations use this path regardless of database
* encoding
*/
- strategy = PG_REGEX_STRATEGY_C;
- locale = 0;
- }
- else if (locale->provider == COLLPROVIDER_BUILTIN)
- {
- Assert(GetDatabaseEncoding() == PG_UTF8);
- strategy = PG_REGEX_STRATEGY_BUILTIN;
- }
-#ifdef USE_ICU
- else if (locale->provider == COLLPROVIDER_ICU)
- {
- strategy = PG_REGEX_STRATEGY_ICU;
- }
-#endif
- else
- {
- Assert(locale->provider == COLLPROVIDER_LIBC);
- if (GetDatabaseEncoding() == PG_UTF8)
- strategy = PG_REGEX_STRATEGY_LIBC_WIDE;
- else
- strategy = PG_REGEX_STRATEGY_LIBC_1BYTE;
+ locale = &dummy_c_locale;
}
}
- pg_regex_strategy = strategy;
pg_regex_locale = locale;
}
static int
pg_wc_isdigit(pg_wchar c)
{
- switch (pg_regex_strategy)
- {
- case PG_REGEX_STRATEGY_C:
- return (c <= (pg_wchar) 127 &&
- (pg_char_properties[c] & PG_ISDIGIT));
- case PG_REGEX_STRATEGY_BUILTIN:
- return pg_u_isdigit(c, !pg_regex_locale->info.builtin.casemap_full);
- case PG_REGEX_STRATEGY_LIBC_WIDE:
- if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
- return iswdigit_l((wint_t) c, pg_regex_locale->info.lt);
- /* FALL THRU */
- case PG_REGEX_STRATEGY_LIBC_1BYTE:
- return (c <= (pg_wchar) UCHAR_MAX &&
- isdigit_l((unsigned char) c, pg_regex_locale->info.lt));
- break;
- case PG_REGEX_STRATEGY_ICU:
-#ifdef USE_ICU
- return u_isdigit(c);
-#endif
- break;
- }
- return 0; /* can't get here, but keep compiler quiet */
+ if (pg_regex_locale->ctype_is_c)
+ return (c <= (pg_wchar) 127 &&
+ (pg_char_properties[c] & PG_ISDIGIT));
+ else
+ return pg_regex_locale->ctype->wc_isdigit(c, pg_regex_locale);
}
static int
pg_wc_isalpha(pg_wchar c)
{
- switch (pg_regex_strategy)
- {
- case PG_REGEX_STRATEGY_C:
- return (c <= (pg_wchar) 127 &&
- (pg_char_properties[c] & PG_ISALPHA));
- case PG_REGEX_STRATEGY_BUILTIN:
- return pg_u_isalpha(c);
- case PG_REGEX_STRATEGY_LIBC_WIDE:
- if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
- return iswalpha_l((wint_t) c, pg_regex_locale->info.lt);
- /* FALL THRU */
- case PG_REGEX_STRATEGY_LIBC_1BYTE:
- return (c <= (pg_wchar) UCHAR_MAX &&
- isalpha_l((unsigned char) c, pg_regex_locale->info.lt));
- break;
- case PG_REGEX_STRATEGY_ICU:
-#ifdef USE_ICU
- return u_isalpha(c);
-#endif
- break;
- }
- return 0; /* can't get here, but keep compiler quiet */
+ if (pg_regex_locale->ctype_is_c)
+ return (c <= (pg_wchar) 127 &&
+ (pg_char_properties[c] & PG_ISALPHA));
+ else
+ return pg_regex_locale->ctype->wc_isalpha(c, pg_regex_locale);
}
static int
pg_wc_isalnum(pg_wchar c)
{
- switch (pg_regex_strategy)
- {
- case PG_REGEX_STRATEGY_C:
- return (c <= (pg_wchar) 127 &&
- (pg_char_properties[c] & PG_ISALNUM));
- case PG_REGEX_STRATEGY_BUILTIN:
- return pg_u_isalnum(c, !pg_regex_locale->info.builtin.casemap_full);
- case PG_REGEX_STRATEGY_LIBC_WIDE:
- if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
- return iswalnum_l((wint_t) c, pg_regex_locale->info.lt);
- /* FALL THRU */
- case PG_REGEX_STRATEGY_LIBC_1BYTE:
- return (c <= (pg_wchar) UCHAR_MAX &&
- isalnum_l((unsigned char) c, pg_regex_locale->info.lt));
- break;
- case PG_REGEX_STRATEGY_ICU:
-#ifdef USE_ICU
- return u_isalnum(c);
-#endif
- break;
- }
- return 0; /* can't get here, but keep compiler quiet */
+ if (pg_regex_locale->ctype_is_c)
+ return (c <= (pg_wchar) 127 &&
+ (pg_char_properties[c] & PG_ISALNUM));
+ else
+ return pg_regex_locale->ctype->wc_isalnum(c, pg_regex_locale);
}
static int
static int
pg_wc_isupper(pg_wchar c)
{
- switch (pg_regex_strategy)
- {
- case PG_REGEX_STRATEGY_C:
- return (c <= (pg_wchar) 127 &&
- (pg_char_properties[c] & PG_ISUPPER));
- case PG_REGEX_STRATEGY_BUILTIN:
- return pg_u_isupper(c);
- case PG_REGEX_STRATEGY_LIBC_WIDE:
- if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
- return iswupper_l((wint_t) c, pg_regex_locale->info.lt);
- /* FALL THRU */
- case PG_REGEX_STRATEGY_LIBC_1BYTE:
- return (c <= (pg_wchar) UCHAR_MAX &&
- isupper_l((unsigned char) c, pg_regex_locale->info.lt));
- break;
- case PG_REGEX_STRATEGY_ICU:
-#ifdef USE_ICU
- return u_isupper(c);
-#endif
- break;
- }
- return 0; /* can't get here, but keep compiler quiet */
+ if (pg_regex_locale->ctype_is_c)
+ return (c <= (pg_wchar) 127 &&
+ (pg_char_properties[c] & PG_ISUPPER));
+ else
+ return pg_regex_locale->ctype->wc_isupper(c, pg_regex_locale);
}
static int
pg_wc_islower(pg_wchar c)
{
- switch (pg_regex_strategy)
- {
- case PG_REGEX_STRATEGY_C:
- return (c <= (pg_wchar) 127 &&
- (pg_char_properties[c] & PG_ISLOWER));
- case PG_REGEX_STRATEGY_BUILTIN:
- return pg_u_islower(c);
- case PG_REGEX_STRATEGY_LIBC_WIDE:
- if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
- return iswlower_l((wint_t) c, pg_regex_locale->info.lt);
- /* FALL THRU */
- case PG_REGEX_STRATEGY_LIBC_1BYTE:
- return (c <= (pg_wchar) UCHAR_MAX &&
- islower_l((unsigned char) c, pg_regex_locale->info.lt));
- break;
- case PG_REGEX_STRATEGY_ICU:
-#ifdef USE_ICU
- return u_islower(c);
-#endif
- break;
- }
- return 0; /* can't get here, but keep compiler quiet */
+ if (pg_regex_locale->ctype_is_c)
+ return (c <= (pg_wchar) 127 &&
+ (pg_char_properties[c] & PG_ISLOWER));
+ else
+ return pg_regex_locale->ctype->wc_islower(c, pg_regex_locale);
}
static int
pg_wc_isgraph(pg_wchar c)
{
- switch (pg_regex_strategy)
- {
- case PG_REGEX_STRATEGY_C:
- return (c <= (pg_wchar) 127 &&
- (pg_char_properties[c] & PG_ISGRAPH));
- case PG_REGEX_STRATEGY_BUILTIN:
- return pg_u_isgraph(c);
- case PG_REGEX_STRATEGY_LIBC_WIDE:
- if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
- return iswgraph_l((wint_t) c, pg_regex_locale->info.lt);
- /* FALL THRU */
- case PG_REGEX_STRATEGY_LIBC_1BYTE:
- return (c <= (pg_wchar) UCHAR_MAX &&
- isgraph_l((unsigned char) c, pg_regex_locale->info.lt));
- break;
- case PG_REGEX_STRATEGY_ICU:
-#ifdef USE_ICU
- return u_isgraph(c);
-#endif
- break;
- }
- return 0; /* can't get here, but keep compiler quiet */
+ if (pg_regex_locale->ctype_is_c)
+ return (c <= (pg_wchar) 127 &&
+ (pg_char_properties[c] & PG_ISGRAPH));
+ else
+ return pg_regex_locale->ctype->wc_isgraph(c, pg_regex_locale);
}
static int
pg_wc_isprint(pg_wchar c)
{
- switch (pg_regex_strategy)
- {
- case PG_REGEX_STRATEGY_C:
- return (c <= (pg_wchar) 127 &&
- (pg_char_properties[c] & PG_ISPRINT));
- case PG_REGEX_STRATEGY_BUILTIN:
- return pg_u_isprint(c);
- case PG_REGEX_STRATEGY_LIBC_WIDE:
- if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
- return iswprint_l((wint_t) c, pg_regex_locale->info.lt);
- /* FALL THRU */
- case PG_REGEX_STRATEGY_LIBC_1BYTE:
- return (c <= (pg_wchar) UCHAR_MAX &&
- isprint_l((unsigned char) c, pg_regex_locale->info.lt));
- break;
- case PG_REGEX_STRATEGY_ICU:
-#ifdef USE_ICU
- return u_isprint(c);
-#endif
- break;
- }
- return 0; /* can't get here, but keep compiler quiet */
+ if (pg_regex_locale->ctype_is_c)
+ return (c <= (pg_wchar) 127 &&
+ (pg_char_properties[c] & PG_ISPRINT));
+ else
+ return pg_regex_locale->ctype->wc_isprint(c, pg_regex_locale);
}
static int
pg_wc_ispunct(pg_wchar c)
{
- switch (pg_regex_strategy)
- {
- case PG_REGEX_STRATEGY_C:
- return (c <= (pg_wchar) 127 &&
- (pg_char_properties[c] & PG_ISPUNCT));
- case PG_REGEX_STRATEGY_BUILTIN:
- return pg_u_ispunct(c, !pg_regex_locale->info.builtin.casemap_full);
- case PG_REGEX_STRATEGY_LIBC_WIDE:
- if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
- return iswpunct_l((wint_t) c, pg_regex_locale->info.lt);
- /* FALL THRU */
- case PG_REGEX_STRATEGY_LIBC_1BYTE:
- return (c <= (pg_wchar) UCHAR_MAX &&
- ispunct_l((unsigned char) c, pg_regex_locale->info.lt));
- break;
- case PG_REGEX_STRATEGY_ICU:
-#ifdef USE_ICU
- return u_ispunct(c);
-#endif
- break;
- }
- return 0; /* can't get here, but keep compiler quiet */
+ if (pg_regex_locale->ctype_is_c)
+ return (c <= (pg_wchar) 127 &&
+ (pg_char_properties[c] & PG_ISPUNCT));
+ else
+ return pg_regex_locale->ctype->wc_ispunct(c, pg_regex_locale);
}
static int
pg_wc_isspace(pg_wchar c)
{
- switch (pg_regex_strategy)
- {
- case PG_REGEX_STRATEGY_C:
- return (c <= (pg_wchar) 127 &&
- (pg_char_properties[c] & PG_ISSPACE));
- case PG_REGEX_STRATEGY_BUILTIN:
- return pg_u_isspace(c);
- case PG_REGEX_STRATEGY_LIBC_WIDE:
- if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
- return iswspace_l((wint_t) c, pg_regex_locale->info.lt);
- /* FALL THRU */
- case PG_REGEX_STRATEGY_LIBC_1BYTE:
- return (c <= (pg_wchar) UCHAR_MAX &&
- isspace_l((unsigned char) c, pg_regex_locale->info.lt));
- break;
- case PG_REGEX_STRATEGY_ICU:
-#ifdef USE_ICU
- return u_isspace(c);
-#endif
- break;
- }
- return 0; /* can't get here, but keep compiler quiet */
+ if (pg_regex_locale->ctype_is_c)
+ return (c <= (pg_wchar) 127 &&
+ (pg_char_properties[c] & PG_ISSPACE));
+ else
+ return pg_regex_locale->ctype->wc_isspace(c, pg_regex_locale);
}
static pg_wchar
pg_wc_toupper(pg_wchar c)
{
- switch (pg_regex_strategy)
+ if (pg_regex_locale->ctype_is_c)
{
- case PG_REGEX_STRATEGY_C:
- if (c <= (pg_wchar) 127)
- return pg_ascii_toupper((unsigned char) c);
- return c;
- case PG_REGEX_STRATEGY_BUILTIN:
- return unicode_uppercase_simple(c);
- case PG_REGEX_STRATEGY_LIBC_WIDE:
- /* force C behavior for ASCII characters, per comments above */
- if (pg_regex_locale->is_default && c <= (pg_wchar) 127)
- return pg_ascii_toupper((unsigned char) c);
- if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
- return towupper_l((wint_t) c, pg_regex_locale->info.lt);
- /* FALL THRU */
- case PG_REGEX_STRATEGY_LIBC_1BYTE:
- /* force C behavior for ASCII characters, per comments above */
- if (pg_regex_locale->is_default && c <= (pg_wchar) 127)
- return pg_ascii_toupper((unsigned char) c);
- if (c <= (pg_wchar) UCHAR_MAX)
- return toupper_l((unsigned char) c, pg_regex_locale->info.lt);
- return c;
- case PG_REGEX_STRATEGY_ICU:
-#ifdef USE_ICU
- return u_toupper(c);
-#endif
- break;
+ if (c <= (pg_wchar) 127)
+ return pg_ascii_toupper((unsigned char) c);
+ return c;
}
- return 0; /* can't get here, but keep compiler quiet */
+ else
+ return pg_regex_locale->ctype->wc_toupper(c, pg_regex_locale);
}
static pg_wchar
pg_wc_tolower(pg_wchar c)
{
- switch (pg_regex_strategy)
+ if (pg_regex_locale->ctype_is_c)
{
- case PG_REGEX_STRATEGY_C:
- if (c <= (pg_wchar) 127)
- return pg_ascii_tolower((unsigned char) c);
- return c;
- case PG_REGEX_STRATEGY_BUILTIN:
- return unicode_lowercase_simple(c);
- case PG_REGEX_STRATEGY_LIBC_WIDE:
- /* force C behavior for ASCII characters, per comments above */
- if (pg_regex_locale->is_default && c <= (pg_wchar) 127)
- return pg_ascii_tolower((unsigned char) c);
- if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
- return towlower_l((wint_t) c, pg_regex_locale->info.lt);
- /* FALL THRU */
- case PG_REGEX_STRATEGY_LIBC_1BYTE:
- /* force C behavior for ASCII characters, per comments above */
- if (pg_regex_locale->is_default && c <= (pg_wchar) 127)
- return pg_ascii_tolower((unsigned char) c);
- if (c <= (pg_wchar) UCHAR_MAX)
- return tolower_l((unsigned char) c, pg_regex_locale->info.lt);
- return c;
- case PG_REGEX_STRATEGY_ICU:
-#ifdef USE_ICU
- return u_tolower(c);
-#endif
- break;
+ if (c <= (pg_wchar) 127)
+ return pg_ascii_tolower((unsigned char) c);
+ return c;
}
- return 0; /* can't get here, but keep compiler quiet */
+ else
+ return pg_regex_locale->ctype->wc_tolower(c, pg_regex_locale);
}
* would always be true for production values of MAX_SIMPLE_CHR, but it's
* useful to allow it to be small for testing purposes.)
*/
- switch (pg_regex_strategy)
+ if (pg_regex_locale->ctype_is_c)
{
- case PG_REGEX_STRATEGY_C:
#if MAX_SIMPLE_CHR >= 127
- max_chr = (pg_wchar) 127;
- pcc->cv.cclasscode = -1;
+ max_chr = (pg_wchar) 127;
+ pcc->cv.cclasscode = -1;
#else
- max_chr = (pg_wchar) MAX_SIMPLE_CHR;
+ max_chr = (pg_wchar) MAX_SIMPLE_CHR;
#endif
- break;
- case PG_REGEX_STRATEGY_BUILTIN:
- max_chr = (pg_wchar) MAX_SIMPLE_CHR;
- break;
- case PG_REGEX_STRATEGY_LIBC_WIDE:
- max_chr = (pg_wchar) MAX_SIMPLE_CHR;
- break;
- case PG_REGEX_STRATEGY_LIBC_1BYTE:
-#if MAX_SIMPLE_CHR >= UCHAR_MAX
- max_chr = (pg_wchar) UCHAR_MAX;
+ }
+ else
+ {
+ if (pg_regex_locale->ctype->max_chr != 0 &&
+ pg_regex_locale->ctype->max_chr <= MAX_SIMPLE_CHR)
+ {
+ max_chr = pg_regex_locale->ctype->max_chr;
pcc->cv.cclasscode = -1;
-#else
- max_chr = (pg_wchar) MAX_SIMPLE_CHR;
-#endif
- break;
- case PG_REGEX_STRATEGY_ICU:
+ }
+ else
max_chr = (pg_wchar) MAX_SIMPLE_CHR;
- break;
- default:
- Assert(false);
- max_chr = 0; /* can't get here, but keep compiler quiet */
- break;
}
/*
else if (locale->is_default)
return pg_tolower(c);
else
- return tolower_l(c, locale->info.lt);
+ return char_tolower(c, locale);
}
* way.
*/
- if (pg_database_encoding_max_length() > 1 || (locale->provider == COLLPROVIDER_ICU))
+ if (locale->ctype_is_c ||
+ (char_tolower_enabled(locale) &&
+ pg_database_encoding_max_length() == 1))
+ {
+ p = VARDATA_ANY(pat);
+ plen = VARSIZE_ANY_EXHDR(pat);
+ s = VARDATA_ANY(str);
+ slen = VARSIZE_ANY_EXHDR(str);
+ return SB_IMatchText(s, slen, p, plen, locale);
+ }
+ else
{
pat = DatumGetTextPP(DirectFunctionCall1Coll(lower, collation,
PointerGetDatum(pat)));
else
return MB_MatchText(s, slen, p, plen, 0);
}
- else
- {
- p = VARDATA_ANY(pat);
- plen = VARSIZE_ANY_EXHDR(pat);
- s = VARDATA_ANY(str);
- slen = VARSIZE_ANY_EXHDR(str);
- return SB_IMatchText(s, slen, p, plen, locale);
- }
}
/*
{
if (locale->ctype_is_c)
return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
- else if (is_multibyte && IS_HIGHBIT_SET(c))
- return true;
- else if (locale->provider != COLLPROVIDER_LIBC)
- return IS_HIGHBIT_SET(c) ||
- (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
else
- return isalpha_l((unsigned char) c, locale->info.lt);
+ return char_is_cased(c, locale);
}
extern pg_locale_t create_pg_locale_libc(Oid collid, MemoryContext context);
extern char *get_collation_actual_version_libc(const char *collcollate);
-extern size_t strlower_builtin(char *dst, size_t dstsize, const char *src,
- ssize_t srclen, pg_locale_t locale);
-extern size_t strtitle_builtin(char *dst, size_t dstsize, const char *src,
- ssize_t srclen, pg_locale_t locale);
-extern size_t strupper_builtin(char *dst, size_t dstsize, const char *src,
- ssize_t srclen, pg_locale_t locale);
-extern size_t strfold_builtin(char *dst, size_t dstsize, const char *src,
- ssize_t srclen, pg_locale_t locale);
-
-extern size_t strlower_icu(char *dst, size_t dstsize, const char *src,
- ssize_t srclen, pg_locale_t locale);
-extern size_t strtitle_icu(char *dst, size_t dstsize, const char *src,
- ssize_t srclen, pg_locale_t locale);
-extern size_t strupper_icu(char *dst, size_t dstsize, const char *src,
- ssize_t srclen, pg_locale_t locale);
-extern size_t strfold_icu(char *dst, size_t dstsize, const char *src,
- ssize_t srclen, pg_locale_t locale);
-
-extern size_t strlower_libc(char *dst, size_t dstsize, const char *src,
- ssize_t srclen, pg_locale_t locale);
-extern size_t strtitle_libc(char *dst, size_t dstsize, const char *src,
- ssize_t srclen, pg_locale_t locale);
-extern size_t strupper_libc(char *dst, size_t dstsize, const char *src,
- ssize_t srclen, pg_locale_t locale);
-
/* GUC settings */
char *locale_messages;
char *locale_monetary;
Assert((result->collate_is_c && result->collate == NULL) ||
(!result->collate_is_c && result->collate != NULL));
+ Assert((result->ctype_is_c && result->ctype == NULL) ||
+ (!result->ctype_is_c && result->ctype != NULL));
+
datum = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_collversion,
&isnull);
if (!isnull)
pg_strlower(char *dst, size_t dstsize, const char *src, ssize_t srclen,
pg_locale_t locale)
{
- if (locale->provider == COLLPROVIDER_BUILTIN)
- return strlower_builtin(dst, dstsize, src, srclen, locale);
-#ifdef USE_ICU
- else if (locale->provider == COLLPROVIDER_ICU)
- return strlower_icu(dst, dstsize, src, srclen, locale);
-#endif
- else if (locale->provider == COLLPROVIDER_LIBC)
- return strlower_libc(dst, dstsize, src, srclen, locale);
- else
- /* shouldn't happen */
- PGLOCALE_SUPPORT_ERROR(locale->provider);
-
- return 0; /* keep compiler quiet */
+ return locale->ctype->strlower(dst, dstsize, src, srclen, locale);
}
size_t
pg_strtitle(char *dst, size_t dstsize, const char *src, ssize_t srclen,
pg_locale_t locale)
{
- if (locale->provider == COLLPROVIDER_BUILTIN)
- return strtitle_builtin(dst, dstsize, src, srclen, locale);
-#ifdef USE_ICU
- else if (locale->provider == COLLPROVIDER_ICU)
- return strtitle_icu(dst, dstsize, src, srclen, locale);
-#endif
- else if (locale->provider == COLLPROVIDER_LIBC)
- return strtitle_libc(dst, dstsize, src, srclen, locale);
- else
- /* shouldn't happen */
- PGLOCALE_SUPPORT_ERROR(locale->provider);
-
- return 0; /* keep compiler quiet */
+ return locale->ctype->strtitle(dst, dstsize, src, srclen, locale);
}
size_t
pg_strupper(char *dst, size_t dstsize, const char *src, ssize_t srclen,
pg_locale_t locale)
{
- if (locale->provider == COLLPROVIDER_BUILTIN)
- return strupper_builtin(dst, dstsize, src, srclen, locale);
-#ifdef USE_ICU
- else if (locale->provider == COLLPROVIDER_ICU)
- return strupper_icu(dst, dstsize, src, srclen, locale);
-#endif
- else if (locale->provider == COLLPROVIDER_LIBC)
- return strupper_libc(dst, dstsize, src, srclen, locale);
- else
- /* shouldn't happen */
- PGLOCALE_SUPPORT_ERROR(locale->provider);
-
- return 0; /* keep compiler quiet */
+ return locale->ctype->strupper(dst, dstsize, src, srclen, locale);
}
size_t
pg_strfold(char *dst, size_t dstsize, const char *src, ssize_t srclen,
pg_locale_t locale)
{
- if (locale->provider == COLLPROVIDER_BUILTIN)
- return strfold_builtin(dst, dstsize, src, srclen, locale);
-#ifdef USE_ICU
- else if (locale->provider == COLLPROVIDER_ICU)
- return strfold_icu(dst, dstsize, src, srclen, locale);
-#endif
- /* for libc, just use strlower */
- else if (locale->provider == COLLPROVIDER_LIBC)
- return strlower_libc(dst, dstsize, src, srclen, locale);
+ if (locale->ctype->strfold)
+ return locale->ctype->strfold(dst, dstsize, src, srclen, locale);
else
- /* shouldn't happen */
- PGLOCALE_SUPPORT_ERROR(locale->provider);
-
- return 0; /* keep compiler quiet */
+ return locale->ctype->strlower(dst, dstsize, src, srclen, locale);
}
/*
return locale->collate->strnxfrm_prefix(dest, destsize, src, srclen, locale);
}
+/*
+ * char_is_cased()
+ *
+ * Fuzzy test of whether the given char is case-varying or not. The argument
+ * is a single byte, so in a multibyte encoding, just assume any non-ASCII
+ * char is case-varying.
+ */
+bool
+char_is_cased(char ch, pg_locale_t locale)
+{
+ return locale->ctype->char_is_cased(ch, locale);
+}
+
+/*
+ * char_tolower_enabled()
+ *
+ * Does the provider support char_tolower()?
+ */
+bool
+char_tolower_enabled(pg_locale_t locale)
+{
+ return (locale->ctype->char_tolower != NULL);
+}
+
+/*
+ * char_tolower()
+ *
+ * Convert char (single-byte encoding) to lowercase.
+ */
+char
+char_tolower(unsigned char ch, pg_locale_t locale)
+{
+ return locale->ctype->char_tolower(ch, locale);
+}
+
/*
* Return required encoding ID for the given locale, or -1 if any encoding is
* valid for the locale.
extern pg_locale_t create_pg_locale_builtin(Oid collid,
MemoryContext context);
extern char *get_collation_actual_version_builtin(const char *collcollate);
-extern size_t strlower_builtin(char *dest, size_t destsize, const char *src,
- ssize_t srclen, pg_locale_t locale);
-extern size_t strtitle_builtin(char *dest, size_t destsize, const char *src,
- ssize_t srclen, pg_locale_t locale);
-extern size_t strupper_builtin(char *dest, size_t destsize, const char *src,
- ssize_t srclen, pg_locale_t locale);
-extern size_t strfold_builtin(char *dest, size_t destsize, const char *src,
- ssize_t srclen, pg_locale_t locale);
-
struct WordBoundaryState
{
return wbstate->len;
}
-size_t
+static size_t
strlower_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen,
pg_locale_t locale)
{
locale->info.builtin.casemap_full);
}
-size_t
+static size_t
strtitle_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen,
pg_locale_t locale)
{
initcap_wbnext, &wbstate);
}
-size_t
+static size_t
strupper_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen,
pg_locale_t locale)
{
locale->info.builtin.casemap_full);
}
-size_t
+static size_t
strfold_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen,
pg_locale_t locale)
{
locale->info.builtin.casemap_full);
}
+static bool
+wc_isdigit_builtin(pg_wchar wc, pg_locale_t locale)
+{
+ return pg_u_isdigit(wc, !locale->info.builtin.casemap_full);
+}
+
+static bool
+wc_isalpha_builtin(pg_wchar wc, pg_locale_t locale)
+{
+ return pg_u_isalpha(wc);
+}
+
+static bool
+wc_isalnum_builtin(pg_wchar wc, pg_locale_t locale)
+{
+ return pg_u_isalnum(wc, !locale->info.builtin.casemap_full);
+}
+
+static bool
+wc_isupper_builtin(pg_wchar wc, pg_locale_t locale)
+{
+ return pg_u_isupper(wc);
+}
+
+static bool
+wc_islower_builtin(pg_wchar wc, pg_locale_t locale)
+{
+ return pg_u_islower(wc);
+}
+
+static bool
+wc_isgraph_builtin(pg_wchar wc, pg_locale_t locale)
+{
+ return pg_u_isgraph(wc);
+}
+
+static bool
+wc_isprint_builtin(pg_wchar wc, pg_locale_t locale)
+{
+ return pg_u_isprint(wc);
+}
+
+static bool
+wc_ispunct_builtin(pg_wchar wc, pg_locale_t locale)
+{
+ return pg_u_ispunct(wc, !locale->info.builtin.casemap_full);
+}
+
+static bool
+wc_isspace_builtin(pg_wchar wc, pg_locale_t locale)
+{
+ return pg_u_isspace(wc);
+}
+
+static bool
+char_is_cased_builtin(char ch, pg_locale_t locale)
+{
+ return IS_HIGHBIT_SET(ch) ||
+ (ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z');
+}
+
+static pg_wchar
+wc_toupper_builtin(pg_wchar wc, pg_locale_t locale)
+{
+ return unicode_uppercase_simple(wc);
+}
+
+static pg_wchar
+wc_tolower_builtin(pg_wchar wc, pg_locale_t locale)
+{
+ return unicode_lowercase_simple(wc);
+}
+
+static const struct ctype_methods ctype_methods_builtin = {
+ .strlower = strlower_builtin,
+ .strtitle = strtitle_builtin,
+ .strupper = strupper_builtin,
+ .strfold = strfold_builtin,
+ .wc_isdigit = wc_isdigit_builtin,
+ .wc_isalpha = wc_isalpha_builtin,
+ .wc_isalnum = wc_isalnum_builtin,
+ .wc_isupper = wc_isupper_builtin,
+ .wc_islower = wc_islower_builtin,
+ .wc_isgraph = wc_isgraph_builtin,
+ .wc_isprint = wc_isprint_builtin,
+ .wc_ispunct = wc_ispunct_builtin,
+ .wc_isspace = wc_isspace_builtin,
+ .char_is_cased = char_is_cased_builtin,
+ .wc_tolower = wc_tolower_builtin,
+ .wc_toupper = wc_toupper_builtin,
+};
+
pg_locale_t
create_pg_locale_builtin(Oid collid, MemoryContext context)
{
result->deterministic = true;
result->collate_is_c = true;
result->ctype_is_c = (strcmp(locstr, "C") == 0);
+ if (!result->ctype_is_c)
+ result->ctype = &ctype_methods_builtin;
return result;
}
#define TEXTBUFLEN 1024
extern pg_locale_t create_pg_locale_icu(Oid collid, MemoryContext context);
-extern size_t strlower_icu(char *dest, size_t destsize, const char *src,
- ssize_t srclen, pg_locale_t locale);
-extern size_t strtitle_icu(char *dest, size_t destsize, const char *src,
- ssize_t srclen, pg_locale_t locale);
-extern size_t strupper_icu(char *dest, size_t destsize, const char *src,
- ssize_t srclen, pg_locale_t locale);
-extern size_t strfold_icu(char *dest, size_t destsize, const char *src,
- ssize_t srclen, pg_locale_t locale);
#ifdef USE_ICU
extern UCollator *pg_ucol_open(const char *loc_str);
+static size_t strlower_icu(char *dest, size_t destsize, const char *src,
+ ssize_t srclen, pg_locale_t locale);
+static size_t strtitle_icu(char *dest, size_t destsize, const char *src,
+ ssize_t srclen, pg_locale_t locale);
+static size_t strupper_icu(char *dest, size_t destsize, const char *src,
+ ssize_t srclen, pg_locale_t locale);
+static size_t strfold_icu(char *dest, size_t destsize, const char *src,
+ ssize_t srclen, pg_locale_t locale);
+static int strncoll_icu(const char *arg1, ssize_t len1,
+ const char *arg2, ssize_t len2,
+ pg_locale_t locale);
static size_t strnxfrm_icu(char *dest, size_t destsize,
const char *src, ssize_t srclen,
pg_locale_t locale);
const char *locale,
UErrorCode *pErrorCode);
+static bool
+char_is_cased_icu(char ch, pg_locale_t locale)
+{
+ return IS_HIGHBIT_SET(ch) ||
+ (ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z');
+}
+
+static pg_wchar
+toupper_icu(pg_wchar wc, pg_locale_t locale)
+{
+ return u_toupper(wc);
+}
+
+static pg_wchar
+tolower_icu(pg_wchar wc, pg_locale_t locale)
+{
+ return u_tolower(wc);
+}
+
static const struct collate_methods collate_methods_icu = {
.strncoll = strncoll_icu,
.strnxfrm = strnxfrm_icu,
.strxfrm_is_safe = true,
};
+static bool
+wc_isdigit_icu(pg_wchar wc, pg_locale_t locale)
+{
+ return u_isdigit(wc);
+}
+
+static bool
+wc_isalpha_icu(pg_wchar wc, pg_locale_t locale)
+{
+ return u_isalpha(wc);
+}
+
+static bool
+wc_isalnum_icu(pg_wchar wc, pg_locale_t locale)
+{
+ return u_isalnum(wc);
+}
+
+static bool
+wc_isupper_icu(pg_wchar wc, pg_locale_t locale)
+{
+ return u_isupper(wc);
+}
+
+static bool
+wc_islower_icu(pg_wchar wc, pg_locale_t locale)
+{
+ return u_islower(wc);
+}
+
+static bool
+wc_isgraph_icu(pg_wchar wc, pg_locale_t locale)
+{
+ return u_isgraph(wc);
+}
+
+static bool
+wc_isprint_icu(pg_wchar wc, pg_locale_t locale)
+{
+ return u_isprint(wc);
+}
+
+static bool
+wc_ispunct_icu(pg_wchar wc, pg_locale_t locale)
+{
+ return u_ispunct(wc);
+}
+
+static bool
+wc_isspace_icu(pg_wchar wc, pg_locale_t locale)
+{
+ return u_isspace(wc);
+}
+
+static const struct ctype_methods ctype_methods_icu = {
+ .strlower = strlower_icu,
+ .strtitle = strtitle_icu,
+ .strupper = strupper_icu,
+ .strfold = strfold_icu,
+ .wc_isdigit = wc_isdigit_icu,
+ .wc_isalpha = wc_isalpha_icu,
+ .wc_isalnum = wc_isalnum_icu,
+ .wc_isupper = wc_isupper_icu,
+ .wc_islower = wc_islower_icu,
+ .wc_isgraph = wc_isgraph_icu,
+ .wc_isprint = wc_isprint_icu,
+ .wc_ispunct = wc_ispunct_icu,
+ .wc_isspace = wc_isspace_icu,
+ .char_is_cased = char_is_cased_icu,
+ .wc_toupper = toupper_icu,
+ .wc_tolower = tolower_icu,
+};
#endif
pg_locale_t
result->collate = &collate_methods_icu_utf8;
else
result->collate = &collate_methods_icu;
+ result->ctype = &ctype_methods_icu;
return result;
#else
}
}
-size_t
+static size_t
strlower_icu(char *dest, size_t destsize, const char *src, ssize_t srclen,
pg_locale_t locale)
{
return result_len;
}
-size_t
+static size_t
strtitle_icu(char *dest, size_t destsize, const char *src, ssize_t srclen,
pg_locale_t locale)
{
return result_len;
}
-size_t
+static size_t
strupper_icu(char *dest, size_t destsize, const char *src, ssize_t srclen,
pg_locale_t locale)
{
return result_len;
}
-size_t
+static size_t
strfold_icu(char *dest, size_t destsize, const char *src, ssize_t srclen,
pg_locale_t locale)
{
#include <shlwapi.h>
#endif
+/*
+ * For the libc provider, to provide as much functionality as possible on a
+ * variety of platforms without going so far as to implement everything from
+ * scratch, we use several implementation strategies depending on the
+ * situation:
+ *
+ * 1. In C/POSIX collations, we use hard-wired code. We can't depend on
+ * the <ctype.h> functions since those will obey LC_CTYPE. Note that these
+ * collations don't give a fig about multibyte characters.
+ *
+ * 2. When working in UTF8 encoding, we use the <wctype.h> functions.
+ * This assumes that every platform uses Unicode codepoints directly
+ * as the wchar_t representation of Unicode. (XXX: ICU makes this assumption
+ * even for non-UTF8 encodings, which may be a problem.) On some platforms
+ * wchar_t is only 16 bits wide, so we have to punt for codepoints > 0xFFFF.
+ *
+ * 3. In all other encodings, we use the <ctype.h> functions for pg_wchar
+ * values up to 255, and punt for values above that. This is 100% correct
+ * only in single-byte encodings such as LATINn. However, non-Unicode
+ * multibyte encodings are mostly Far Eastern character sets for which the
+ * properties being tested here aren't very relevant for higher code values
+ * anyway. The difficulty with using the <wctype.h> functions with
+ * non-Unicode multibyte encodings is that we can have no certainty that
+ * the platform's wchar_t representation matches what we do in pg_wchar
+ * conversions.
+ *
+ * As a special case, in the "default" collation, (2) and (3) force ASCII
+ * letters to follow ASCII upcase/downcase rules, while in a non-default
+ * collation we just let the library functions do what they will. The case
+ * where this matters is treatment of I/i in Turkish, and the behavior is
+ * meant to match the upper()/lower() SQL functions.
+ *
+ * We store the active collation setting in static variables. In principle
+ * it could be passed down to here via the regex library's "struct vars" data
+ * structure; but that would require somewhat invasive changes in the regex
+ * library, and right now there's no real benefit to be gained from that.
+ *
+ * NB: the coding here assumes pg_wchar is an unsigned type.
+ */
+
/*
* Size of stack buffer to use for string transformations, used to avoid heap
* allocations in typical cases. This should be large enough that most strings
extern pg_locale_t create_pg_locale_libc(Oid collid, MemoryContext context);
-extern size_t strlower_libc(char *dst, size_t dstsize, const char *src,
- ssize_t srclen, pg_locale_t locale);
-extern size_t strtitle_libc(char *dst, size_t dstsize, const char *src,
- ssize_t srclen, pg_locale_t locale);
-extern size_t strupper_libc(char *dst, size_t dstsize, const char *src,
- ssize_t srclen, pg_locale_t locale);
-
static int strncoll_libc(const char *arg1, ssize_t len1,
const char *arg2, ssize_t len2,
pg_locale_t locale);
const char *src, ssize_t srclen,
pg_locale_t locale);
+static bool
+wc_isdigit_libc_sb(pg_wchar wc, pg_locale_t locale)
+{
+ return isdigit_l((unsigned char) wc, locale->info.lt);
+}
+
+static bool
+wc_isalpha_libc_sb(pg_wchar wc, pg_locale_t locale)
+{
+ return isalpha_l((unsigned char) wc, locale->info.lt);
+}
+
+static bool
+wc_isalnum_libc_sb(pg_wchar wc, pg_locale_t locale)
+{
+ return isalnum_l((unsigned char) wc, locale->info.lt);
+}
+
+static bool
+wc_isupper_libc_sb(pg_wchar wc, pg_locale_t locale)
+{
+ return isupper_l((unsigned char) wc, locale->info.lt);
+}
+
+static bool
+wc_islower_libc_sb(pg_wchar wc, pg_locale_t locale)
+{
+ return islower_l((unsigned char) wc, locale->info.lt);
+}
+
+static bool
+wc_isgraph_libc_sb(pg_wchar wc, pg_locale_t locale)
+{
+ return isgraph_l((unsigned char) wc, locale->info.lt);
+}
+
+static bool
+wc_isprint_libc_sb(pg_wchar wc, pg_locale_t locale)
+{
+ return isprint_l((unsigned char) wc, locale->info.lt);
+}
+
+static bool
+wc_ispunct_libc_sb(pg_wchar wc, pg_locale_t locale)
+{
+ return ispunct_l((unsigned char) wc, locale->info.lt);
+}
+
+static bool
+wc_isspace_libc_sb(pg_wchar wc, pg_locale_t locale)
+{
+ return isspace_l((unsigned char) wc, locale->info.lt);
+}
+
+static bool
+wc_isdigit_libc_mb(pg_wchar wc, pg_locale_t locale)
+{
+ return iswdigit_l((wint_t) wc, locale->info.lt);
+}
+
+static bool
+wc_isalpha_libc_mb(pg_wchar wc, pg_locale_t locale)
+{
+ return iswalpha_l((wint_t) wc, locale->info.lt);
+}
+
+static bool
+wc_isalnum_libc_mb(pg_wchar wc, pg_locale_t locale)
+{
+ return iswalnum_l((wint_t) wc, locale->info.lt);
+}
+
+static bool
+wc_isupper_libc_mb(pg_wchar wc, pg_locale_t locale)
+{
+ return iswupper_l((wint_t) wc, locale->info.lt);
+}
+
+static bool
+wc_islower_libc_mb(pg_wchar wc, pg_locale_t locale)
+{
+ return iswlower_l((wint_t) wc, locale->info.lt);
+}
+
+static bool
+wc_isgraph_libc_mb(pg_wchar wc, pg_locale_t locale)
+{
+ return iswgraph_l((wint_t) wc, locale->info.lt);
+}
+
+static bool
+wc_isprint_libc_mb(pg_wchar wc, pg_locale_t locale)
+{
+ return iswprint_l((wint_t) wc, locale->info.lt);
+}
+
+static bool
+wc_ispunct_libc_mb(pg_wchar wc, pg_locale_t locale)
+{
+ return iswpunct_l((wint_t) wc, locale->info.lt);
+}
+
+static bool
+wc_isspace_libc_mb(pg_wchar wc, pg_locale_t locale)
+{
+ return iswspace_l((wint_t) wc, locale->info.lt);
+}
+
+static char
+char_tolower_libc(unsigned char ch, pg_locale_t locale)
+{
+ Assert(pg_database_encoding_max_length() == 1);
+ return tolower_l(ch, locale->info.lt);
+}
+
+static bool
+char_is_cased_libc(char ch, pg_locale_t locale)
+{
+ bool is_multibyte = pg_database_encoding_max_length() > 1;
+
+ if (is_multibyte && IS_HIGHBIT_SET(ch))
+ return true;
+ else
+ return isalpha_l((unsigned char) ch, locale->info.lt);
+}
+
+static pg_wchar
+toupper_libc_sb(pg_wchar wc, pg_locale_t locale)
+{
+ Assert(GetDatabaseEncoding() != PG_UTF8);
+
+ /* force C behavior for ASCII characters, per comments above */
+ if (locale->is_default && wc <= (pg_wchar) 127)
+ return pg_ascii_toupper((unsigned char) wc);
+ if (wc <= (pg_wchar) UCHAR_MAX)
+ return toupper_l((unsigned char) wc, locale->info.lt);
+ else
+ return wc;
+}
+
+static pg_wchar
+toupper_libc_mb(pg_wchar wc, pg_locale_t locale)
+{
+ Assert(GetDatabaseEncoding() == PG_UTF8);
+
+ /* force C behavior for ASCII characters, per comments above */
+ if (locale->is_default && wc <= (pg_wchar) 127)
+ return pg_ascii_toupper((unsigned char) wc);
+ if (sizeof(wchar_t) >= 4 || wc <= (pg_wchar) 0xFFFF)
+ return towupper_l((wint_t) wc, locale->info.lt);
+ else
+ return wc;
+}
+
+static pg_wchar
+tolower_libc_sb(pg_wchar wc, pg_locale_t locale)
+{
+ Assert(GetDatabaseEncoding() != PG_UTF8);
+
+ /* force C behavior for ASCII characters, per comments above */
+ if (locale->is_default && wc <= (pg_wchar) 127)
+ return pg_ascii_tolower((unsigned char) wc);
+ if (wc <= (pg_wchar) UCHAR_MAX)
+ return tolower_l((unsigned char) wc, locale->info.lt);
+ else
+ return wc;
+}
+
+static pg_wchar
+tolower_libc_mb(pg_wchar wc, pg_locale_t locale)
+{
+ Assert(GetDatabaseEncoding() == PG_UTF8);
+
+ /* force C behavior for ASCII characters, per comments above */
+ if (locale->is_default && wc <= (pg_wchar) 127)
+ return pg_ascii_tolower((unsigned char) wc);
+ if (sizeof(wchar_t) >= 4 || wc <= (pg_wchar) 0xFFFF)
+ return towlower_l((wint_t) wc, locale->info.lt);
+ else
+ return wc;
+}
+
+static const struct ctype_methods ctype_methods_libc_sb = {
+ .strlower = strlower_libc_sb,
+ .strtitle = strtitle_libc_sb,
+ .strupper = strupper_libc_sb,
+ .wc_isdigit = wc_isdigit_libc_sb,
+ .wc_isalpha = wc_isalpha_libc_sb,
+ .wc_isalnum = wc_isalnum_libc_sb,
+ .wc_isupper = wc_isupper_libc_sb,
+ .wc_islower = wc_islower_libc_sb,
+ .wc_isgraph = wc_isgraph_libc_sb,
+ .wc_isprint = wc_isprint_libc_sb,
+ .wc_ispunct = wc_ispunct_libc_sb,
+ .wc_isspace = wc_isspace_libc_sb,
+ .char_is_cased = char_is_cased_libc,
+ .char_tolower = char_tolower_libc,
+ .wc_toupper = toupper_libc_sb,
+ .wc_tolower = tolower_libc_sb,
+ .max_chr = UCHAR_MAX,
+};
+
+/*
+ * Non-UTF8 multibyte encodings use multibyte semantics for case mapping, but
+ * single-byte semantics for pattern matching.
+ */
+static const struct ctype_methods ctype_methods_libc_other_mb = {
+ .strlower = strlower_libc_mb,
+ .strtitle = strtitle_libc_mb,
+ .strupper = strupper_libc_mb,
+ .wc_isdigit = wc_isdigit_libc_sb,
+ .wc_isalpha = wc_isalpha_libc_sb,
+ .wc_isalnum = wc_isalnum_libc_sb,
+ .wc_isupper = wc_isupper_libc_sb,
+ .wc_islower = wc_islower_libc_sb,
+ .wc_isgraph = wc_isgraph_libc_sb,
+ .wc_isprint = wc_isprint_libc_sb,
+ .wc_ispunct = wc_ispunct_libc_sb,
+ .wc_isspace = wc_isspace_libc_sb,
+ .char_is_cased = char_is_cased_libc,
+ .char_tolower = char_tolower_libc,
+ .wc_toupper = toupper_libc_sb,
+ .wc_tolower = tolower_libc_sb,
+ .max_chr = UCHAR_MAX,
+};
+
+static const struct ctype_methods ctype_methods_libc_utf8 = {
+ .strlower = strlower_libc_mb,
+ .strtitle = strtitle_libc_mb,
+ .strupper = strupper_libc_mb,
+ .wc_isdigit = wc_isdigit_libc_mb,
+ .wc_isalpha = wc_isalpha_libc_mb,
+ .wc_isalnum = wc_isalnum_libc_mb,
+ .wc_isupper = wc_isupper_libc_mb,
+ .wc_islower = wc_islower_libc_mb,
+ .wc_isgraph = wc_isgraph_libc_mb,
+ .wc_isprint = wc_isprint_libc_mb,
+ .wc_ispunct = wc_ispunct_libc_mb,
+ .wc_isspace = wc_isspace_libc_mb,
+ .char_is_cased = char_is_cased_libc,
+ .char_tolower = char_tolower_libc,
+ .wc_toupper = toupper_libc_mb,
+ .wc_tolower = tolower_libc_mb,
+};
+
static const struct collate_methods collate_methods_libc = {
.strncoll = strncoll_libc,
.strnxfrm = strnxfrm_libc,
};
#endif
-size_t
-strlower_libc(char *dst, size_t dstsize, const char *src,
- ssize_t srclen, pg_locale_t locale)
-{
- if (pg_database_encoding_max_length() > 1)
- return strlower_libc_mb(dst, dstsize, src, srclen, locale);
- else
- return strlower_libc_sb(dst, dstsize, src, srclen, locale);
-}
-
-size_t
-strtitle_libc(char *dst, size_t dstsize, const char *src,
- ssize_t srclen, pg_locale_t locale)
-{
- if (pg_database_encoding_max_length() > 1)
- return strtitle_libc_mb(dst, dstsize, src, srclen, locale);
- else
- return strtitle_libc_sb(dst, dstsize, src, srclen, locale);
-}
-
-size_t
-strupper_libc(char *dst, size_t dstsize, const char *src,
- ssize_t srclen, pg_locale_t locale)
-{
- if (pg_database_encoding_max_length() > 1)
- return strupper_libc_mb(dst, dstsize, src, srclen, locale);
- else
- return strupper_libc_sb(dst, dstsize, src, srclen, locale);
-}
-
static size_t
strlower_libc_sb(char *dest, size_t destsize, const char *src, ssize_t srclen,
pg_locale_t locale)
#endif
result->collate = &collate_methods_libc;
}
+ if (!result->ctype_is_c)
+ {
+ if (GetDatabaseEncoding() == PG_UTF8)
+ result->ctype = &ctype_methods_libc_utf8;
+ else if (pg_database_encoding_max_length() > 1)
+ result->ctype = &ctype_methods_libc_other_mb;
+ else
+ result->ctype = &ctype_methods_libc_sb;
+ }
return result;
}
#ifndef _PG_LOCALE_
#define _PG_LOCALE_
+#include "mb/pg_wchar.h"
+
#ifdef USE_ICU
#include <unicode/ucol.h>
#endif
bool strxfrm_is_safe;
};
+struct ctype_methods
+{
+ /* case mapping: LOWER()/INITCAP()/UPPER() */
+ size_t (*strlower) (char *dest, size_t destsize,
+ const char *src, ssize_t srclen,
+ pg_locale_t locale);
+ size_t (*strtitle) (char *dest, size_t destsize,
+ const char *src, ssize_t srclen,
+ pg_locale_t locale);
+ size_t (*strupper) (char *dest, size_t destsize,
+ const char *src, ssize_t srclen,
+ pg_locale_t locale);
+ size_t (*strfold) (char *dest, size_t destsize,
+ const char *src, ssize_t srclen,
+ pg_locale_t locale);
+
+ /* required */
+ bool (*wc_isdigit) (pg_wchar wc, pg_locale_t locale);
+ bool (*wc_isalpha) (pg_wchar wc, pg_locale_t locale);
+ bool (*wc_isalnum) (pg_wchar wc, pg_locale_t locale);
+ bool (*wc_isupper) (pg_wchar wc, pg_locale_t locale);
+ bool (*wc_islower) (pg_wchar wc, pg_locale_t locale);
+ bool (*wc_isgraph) (pg_wchar wc, pg_locale_t locale);
+ bool (*wc_isprint) (pg_wchar wc, pg_locale_t locale);
+ bool (*wc_ispunct) (pg_wchar wc, pg_locale_t locale);
+ bool (*wc_isspace) (pg_wchar wc, pg_locale_t locale);
+ pg_wchar (*wc_toupper) (pg_wchar wc, pg_locale_t locale);
+ pg_wchar (*wc_tolower) (pg_wchar wc, pg_locale_t locale);
+
+ /* required */
+ bool (*char_is_cased) (char ch, pg_locale_t locale);
+
+ /*
+ * Optional. If defined, will only be called for single-byte encodings. If
+ * not defined, or if the encoding is multibyte, will fall back to
+ * pg_strlower().
+ */
+ char (*char_tolower) (unsigned char ch, pg_locale_t locale);
+
+ /*
+ * For regex and pattern matching efficiency, the maximum char value
+ * supported by the above methods. If zero, limit is set by regex code.
+ */
+ pg_wchar max_chr;
+};
+
/*
* We use a discriminated union to hold either a locale_t or an ICU collator.
* pg_locale_t is occasionally checked for truth, so make it a pointer.
bool is_default;
const struct collate_methods *collate; /* NULL if collate_is_c */
+ const struct ctype_methods *ctype; /* NULL if ctype_is_c */
union
{
extern pg_locale_t pg_newlocale_from_collation(Oid collid);
extern char *get_collation_actual_version(char collprovider, const char *collcollate);
+
+extern bool char_is_cased(char ch, pg_locale_t locale);
+extern bool char_tolower_enabled(pg_locale_t locale);
+extern char char_tolower(unsigned char ch, pg_locale_t locale);
extern size_t pg_strlower(char *dst, size_t dstsize,
const char *src, ssize_t srclen,
pg_locale_t locale);
PGTernaryBool
PGTransactionStatusType
PGVerbosity
-PG_Locale_Strategy
PG_Lock_Status
PG_init_t
PGauthData