Use C11 char16_t and char32_t for Unicode code points.

author Jeff Davis <jdavis@postgresql.org>

Wed, 29 Oct 2025 21:17:13 +0000 (14:17 -0700)

committer Jeff Davis <jdavis@postgresql.org>

Wed, 29 Oct 2025 21:17:13 +0000 (14:17 -0700)
author Jeff Davis <jdavis@postgresql.org>
Wed, 29 Oct 2025 21:17:13 +0000 (14:17 -0700)
committer Jeff Davis <jdavis@postgresql.org>
Wed, 29 Oct 2025 21:17:13 +0000 (14:17 -0700)
diff --git a/configure b/configure

index 7ce52173dd855abb2ca5397dc522d58c671ff052..f7c24c8f5768ed5153174574cc07585552ad9636 100755 (executable)
--- a/configure
+++ b/configure
@@ -13627,7 +13627,7 @@ fi
  ## Header files
  ##
  
-for ac_header in atomic.h copyfile.h execinfo.h getopt.h ifaddrs.h mbarrier.h sys/epoll.h sys/event.h sys/personality.h sys/prctl.h sys/procctl.h sys/signalfd.h sys/ucred.h termios.h ucred.h xlocale.h
+for ac_header in atomic.h copyfile.h execinfo.h getopt.h ifaddrs.h mbarrier.h sys/epoll.h sys/event.h sys/personality.h sys/prctl.h sys/procctl.h sys/signalfd.h sys/ucred.h termios.h uchar.h ucred.h xlocale.h
  do :
    as_ac_Header=`$as_echo "ac_cv_header_$ac_header" | $as_tr_sh`
  ac_fn_c_check_header_mongrel "$LINENO" "$ac_header" "$as_ac_Header" "$ac_includes_default"
diff --git a/configure.ac b/configure.ac

index 0842fd06259caf53f73217c7627a15a7cc93c627..6c802deaacb12ede983f0b5ed1f6594b461e1c29 100644 (file)
--- a/configure.ac
+++ b/configure.ac
@@ -1513,6 +1513,7 @@ AC_CHECK_HEADERS(m4_normalize([
         sys/signalfd.h
         sys/ucred.h
         termios.h
+       uchar.h
         ucred.h
         xlocale.h
  ]))
diff --git a/meson.build b/meson.build

index 1a123ce151a473018fee8102be5c2661a11ac02f..0f61ff6a7006ed1815269f7af7187d4369f471b6 100644 (file)
--- a/meson.build
+++ b/meson.build
@@ -2613,6 +2613,7 @@ header_checks = [
    'sys/signalfd.h',
    'sys/ucred.h',
    'termios.h',
+  'uchar.h',
    'ucred.h',
    'xlocale.h',
  ]
diff --git a/src/backend/parser/parser.c b/src/backend/parser/parser.c

index 33a040506b47f039f0011634642c04045cdb0439..a3679f8e86ce5d2d02606997303cd5bac4c81b53 100644 (file)
--- a/src/backend/parser/parser.c
+++ b/src/backend/parser/parser.c
@@ -339,7 +339,7 @@ hexval(unsigned char c)
  
  /* is Unicode code point acceptable? */
  static void
-check_unicode_value(pg_wchar c)
+check_unicode_value(char32_t c)
  {
         if (!is_valid_unicode_codepoint(c))
                 ereport(ERROR,
@@ -376,7 +376,7 @@ str_udeescape(const char *str, char escape,
         char       *new,
                            *out;
         size_t          new_len;
-       pg_wchar        pair_first = 0;
+       char16_t        pair_first = 0;
         ScannerCallbackState scbstate;
  
         /*
@@ -420,7 +420,7 @@ str_udeescape(const char *str, char escape,
                                          isxdigit((unsigned char) in[3]) &&
                                          isxdigit((unsigned char) in[4]))
                         {
-                               pg_wchar        unicode;
+                               char32_t        unicode;
  
                                 unicode = (hexval(in[1]) << 12) +
                                         (hexval(in[2]) << 8) +
@@ -457,7 +457,7 @@ str_udeescape(const char *str, char escape,
                                          isxdigit((unsigned char) in[6]) &&
                                          isxdigit((unsigned char) in[7]))
                         {
-                               pg_wchar        unicode;
+                               char32_t        unicode;
  
                                 unicode = (hexval(in[2]) << 20) +
                                         (hexval(in[3]) << 16) +
diff --git a/src/backend/parser/scan.l b/src/backend/parser/scan.l

index 08990831fe81ad463bceb8a8fcd52e63d230d81c..a67815339b7ca074d85e633d2ef9fa990e4b9388 100644 (file)
--- a/src/backend/parser/scan.l
+++ b/src/backend/parser/scan.l
@@ -121,7 +121,7 @@ static void addlitchar(unsigned char ychar, core_yyscan_t yyscanner);
  static char *litbufdup(core_yyscan_t yyscanner);
  static unsigned char unescape_single_char(unsigned char c, core_yyscan_t yyscanner);
  static int     process_integer_literal(const char *token, YYSTYPE *lval, int base);
-static void addunicode(pg_wchar c, yyscan_t yyscanner);
+static void addunicode(char32_t c, yyscan_t yyscanner);
  
  #define yyerror(msg)  scanner_yyerror(msg, yyscanner)
  
@@ -640,7 +640,7 @@ other                       .
                                         addlit(yytext, yyleng, yyscanner);
                                 }
  <xe>{xeunicode} {
-                                       pg_wchar        c = strtoul(yytext + 2, NULL, 16);
+                                       char32_t        c = strtoul(yytext + 2, NULL, 16);
  
                                         /*
                                          * For consistency with other productions, issue any
@@ -668,7 +668,7 @@ other                       .
                                         POP_YYLLOC();
                                 }
  <xeu>{xeunicode} {
-                                       pg_wchar        c = strtoul(yytext + 2, NULL, 16);
+                                       char32_t        c = strtoul(yytext + 2, NULL, 16);
  
                                         /* Remember start of overall string token ... */
                                         PUSH_YYLLOC();
@@ -1376,7 +1376,7 @@ process_integer_literal(const char *token, YYSTYPE *lval, int base)
  }
  
  static void
-addunicode(pg_wchar c, core_yyscan_t yyscanner)
+addunicode(char32_t c, core_yyscan_t yyscanner)
  {
         ScannerCallbackState scbstate;
         char            buf[MAX_UNICODE_EQUIVALENT_STRING + 1];
diff --git a/src/backend/utils/adt/jsonpath_scan.l b/src/backend/utils/adt/jsonpath_scan.l

index c7aab83eeb4f674ef3c1a1cca3c5734e0c886ede..8c3a0a9c6424104e256e756e818c715cbd756755 100644 (file)
--- a/src/backend/utils/adt/jsonpath_scan.l
+++ b/src/backend/utils/adt/jsonpath_scan.l
@@ -574,7 +574,7 @@ hexval(char c, int *result, struct Node *escontext, yyscan_t yyscanner)
  
  /* Add given unicode character to scanstring */
  static bool
-addUnicodeChar(int ch, struct Node *escontext, yyscan_t yyscanner)
+addUnicodeChar(char32_t ch, struct Node *escontext, yyscan_t yyscanner)
  {
         if (ch == 0)
         {
@@ -607,7 +607,7 @@ addUnicodeChar(int ch, struct Node *escontext, yyscan_t yyscanner)
  
  /* Add unicode character, processing any surrogate pairs */
  static bool
-addUnicode(int ch, int *hi_surrogate, struct Node *escontext, yyscan_t yyscanner)
+addUnicode(char32_t ch, int *hi_surrogate, struct Node *escontext, yyscan_t yyscanner)
  {
         if (is_utf16_surrogate_first(ch))
         {
@@ -655,7 +655,7 @@ parseUnicode(char *s, int l, struct Node *escontext, yyscan_t yyscanner)
  
         for (i = 2; i < l; i += 2)      /* skip '\u' */
         {
-               int                     ch = 0;
+               char32_t                ch = 0;
                 int                     j,
                                         si;
  
diff --git a/src/backend/utils/adt/pg_locale_builtin.c b/src/backend/utils/adt/pg_locale_builtin.c

index 3dc611b50e15bb79275e8cddb517635f9e959aa3..1021e0d129b351d63ff28a2e19e65d260ad9fbd6 100644 (file)
--- a/src/backend/utils/adt/pg_locale_builtin.c
+++ b/src/backend/utils/adt/pg_locale_builtin.c
@@ -15,7 +15,6 @@
  #include "catalog/pg_collation.h"
  #include "common/unicode_case.h"
  #include "common/unicode_category.h"
-#include "mb/pg_wchar.h"
  #include "miscadmin.h"
  #include "utils/builtins.h"
  #include "utils/pg_locale.h"
@@ -35,6 +34,23 @@ struct WordBoundaryState
         bool            prev_alnum;
  };
  
+/*
+ * In UTF-8, pg_wchar is guaranteed to be the code point value.
+ */
+static inline char32_t
+to_char32(pg_wchar wc)
+{
+       Assert(GetDatabaseEncoding() == PG_UTF8);
+       return (char32_t) wc;
+}
+
+static inline pg_wchar
+to_pg_wchar(char32_t c32)
+{
+       Assert(GetDatabaseEncoding() == PG_UTF8);
+       return (pg_wchar) c32;
+}
+
  /*
   * Simple word boundary iterator that draws boundaries each time the result of
   * pg_u_isalnum() changes.
@@ -47,7 +63,7 @@ initcap_wbnext(void *state)
         while (wbstate->offset < wbstate->len &&
                    wbstate->str[wbstate->offset] != '\0')
         {
-               pg_wchar        u = utf8_to_unicode((unsigned char *) wbstate->str +
+               char32_t        u = utf8_to_unicode((unsigned char *) wbstate->str +
                                                                                 wbstate->offset);
                 bool            curr_alnum = pg_u_isalnum(u, wbstate->posix);
  
@@ -112,61 +128,61 @@ strfold_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen,
  static bool
  wc_isdigit_builtin(pg_wchar wc, pg_locale_t locale)
  {
-       return pg_u_isdigit(wc, !locale->builtin.casemap_full);
+       return pg_u_isdigit(to_char32(wc), !locale->builtin.casemap_full);
  }
  
  static bool
  wc_isalpha_builtin(pg_wchar wc, pg_locale_t locale)
  {
-       return pg_u_isalpha(wc);
+       return pg_u_isalpha(to_char32(wc));
  }
  
  static bool
  wc_isalnum_builtin(pg_wchar wc, pg_locale_t locale)
  {
-       return pg_u_isalnum(wc, !locale->builtin.casemap_full);
+       return pg_u_isalnum(to_char32(wc), !locale->builtin.casemap_full);
  }
  
  static bool
  wc_isupper_builtin(pg_wchar wc, pg_locale_t locale)
  {
-       return pg_u_isupper(wc);
+       return pg_u_isupper(to_char32(wc));
  }
  
  static bool
  wc_islower_builtin(pg_wchar wc, pg_locale_t locale)
  {
-       return pg_u_islower(wc);
+       return pg_u_islower(to_char32(wc));
  }
  
  static bool
  wc_isgraph_builtin(pg_wchar wc, pg_locale_t locale)
  {
-       return pg_u_isgraph(wc);
+       return pg_u_isgraph(to_char32(wc));
  }
  
  static bool
  wc_isprint_builtin(pg_wchar wc, pg_locale_t locale)
  {
-       return pg_u_isprint(wc);
+       return pg_u_isprint(to_char32(wc));
  }
  
  static bool
  wc_ispunct_builtin(pg_wchar wc, pg_locale_t locale)
  {
-       return pg_u_ispunct(wc, !locale->builtin.casemap_full);
+       return pg_u_ispunct(to_char32(wc), !locale->builtin.casemap_full);
  }
  
  static bool
  wc_isspace_builtin(pg_wchar wc, pg_locale_t locale)
  {
-       return pg_u_isspace(wc);
+       return pg_u_isspace(to_char32(wc));
  }
  
  static bool
  wc_isxdigit_builtin(pg_wchar wc, pg_locale_t locale)
  {
-       return pg_u_isxdigit(wc, !locale->builtin.casemap_full);
+       return pg_u_isxdigit(to_char32(wc), !locale->builtin.casemap_full);
  }
  
  static bool
@@ -179,13 +195,13 @@ char_is_cased_builtin(char ch, pg_locale_t locale)
  static pg_wchar
  wc_toupper_builtin(pg_wchar wc, pg_locale_t locale)
  {
-       return unicode_uppercase_simple(wc);
+       return to_pg_wchar(unicode_uppercase_simple(to_char32(wc)));
  }
  
  static pg_wchar
  wc_tolower_builtin(pg_wchar wc, pg_locale_t locale)
  {
-       return unicode_lowercase_simple(wc);
+       return to_pg_wchar(unicode_lowercase_simple(to_char32(wc)));
  }
  
  static const struct ctype_methods ctype_methods_builtin = {
diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c

index 2c398cd9e5cb1a42bb18a0b307c1b1720ec36794..8d735786e51bc8e06ee516243373bb958fabdd18 100644 (file)
--- a/src/backend/utils/adt/varlena.c
+++ b/src/backend/utils/adt/varlena.c
@@ -5419,12 +5419,12 @@ unicode_assigned(PG_FUNCTION_ARGS)
                 ereport(ERROR,
                                 (errmsg("Unicode categorization can only be performed if server encoding is UTF8")));
  
-       /* convert to pg_wchar */
+       /* convert to char32_t */
         size = pg_mbstrlen_with_len(VARDATA_ANY(input), VARSIZE_ANY_EXHDR(input));
         p = (unsigned char *) VARDATA_ANY(input);
         for (int i = 0; i < size; i++)
         {
-               pg_wchar        uchar = utf8_to_unicode(p);
+               char32_t        uchar = utf8_to_unicode(p);
                 int                     category = unicode_category(uchar);
  
                 if (category == PG_U_UNASSIGNED)
@@ -5443,24 +5443,24 @@ unicode_normalize_func(PG_FUNCTION_ARGS)
         char       *formstr = text_to_cstring(PG_GETARG_TEXT_PP(1));
         UnicodeNormalizationForm form;
         int                     size;
-       pg_wchar   *input_chars;
-       pg_wchar   *output_chars;
+       char32_t   *input_chars;
+       char32_t   *output_chars;
         unsigned char *p;
         text       *result;
         int                     i;
  
         form = unicode_norm_form_from_string(formstr);
  
-       /* convert to pg_wchar */
+       /* convert to char32_t */
         size = pg_mbstrlen_with_len(VARDATA_ANY(input), VARSIZE_ANY_EXHDR(input));
-       input_chars = palloc((size + 1) * sizeof(pg_wchar));
+       input_chars = palloc((size + 1) * sizeof(char32_t));
         p = (unsigned char *) VARDATA_ANY(input);
         for (i = 0; i < size; i++)
         {
                 input_chars[i] = utf8_to_unicode(p);
                 p += pg_utf_mblen(p);
         }
-       input_chars[i] = (pg_wchar) '\0';
+       input_chars[i] = (char32_t) '\0';
         Assert((char *) p == VARDATA_ANY(input) + VARSIZE_ANY_EXHDR(input));
  
         /* action */
@@ -5468,7 +5468,7 @@ unicode_normalize_func(PG_FUNCTION_ARGS)
  
         /* convert back to UTF-8 string */
         size = 0;
-       for (pg_wchar *wp = output_chars; *wp; wp++)
+       for (char32_t *wp = output_chars; *wp; wp++)
         {
                 unsigned char buf[4];
  
@@ -5480,7 +5480,7 @@ unicode_normalize_func(PG_FUNCTION_ARGS)
         SET_VARSIZE(result, size + VARHDRSZ);
  
         p = (unsigned char *) VARDATA_ANY(result);
-       for (pg_wchar *wp = output_chars; *wp; wp++)
+       for (char32_t *wp = output_chars; *wp; wp++)
         {
                 unicode_to_utf8(*wp, p);
                 p += pg_utf_mblen(p);
@@ -5509,8 +5509,8 @@ unicode_is_normalized(PG_FUNCTION_ARGS)
         char       *formstr = text_to_cstring(PG_GETARG_TEXT_PP(1));
         UnicodeNormalizationForm form;
         int                     size;
-       pg_wchar   *input_chars;
-       pg_wchar   *output_chars;
+       char32_t   *input_chars;
+       char32_t   *output_chars;
         unsigned char *p;
         int                     i;
         UnicodeNormalizationQC quickcheck;
@@ -5519,16 +5519,16 @@ unicode_is_normalized(PG_FUNCTION_ARGS)
  
         form = unicode_norm_form_from_string(formstr);
  
-       /* convert to pg_wchar */
+       /* convert to char32_t */
         size = pg_mbstrlen_with_len(VARDATA_ANY(input), VARSIZE_ANY_EXHDR(input));
-       input_chars = palloc((size + 1) * sizeof(pg_wchar));
+       input_chars = palloc((size + 1) * sizeof(char32_t));
         p = (unsigned char *) VARDATA_ANY(input);
         for (i = 0; i < size; i++)
         {
                 input_chars[i] = utf8_to_unicode(p);
                 p += pg_utf_mblen(p);
         }
-       input_chars[i] = (pg_wchar) '\0';
+       input_chars[i] = (char32_t) '\0';
         Assert((char *) p == VARDATA_ANY(input) + VARSIZE_ANY_EXHDR(input));
  
         /* quick check (see UAX #15) */
@@ -5542,11 +5542,11 @@ unicode_is_normalized(PG_FUNCTION_ARGS)
         output_chars = unicode_normalize(form, input_chars);
  
         output_size = 0;
-       for (pg_wchar *wp = output_chars; *wp; wp++)
+       for (char32_t *wp = output_chars; *wp; wp++)
                 output_size++;
  
         result = (size == output_size) &&
-               (memcmp(input_chars, output_chars, size * sizeof(pg_wchar)) == 0);
+               (memcmp(input_chars, output_chars, size * sizeof(char32_t)) == 0);
  
         PG_RETURN_BOOL(result);
  }
@@ -5602,7 +5602,7 @@ unistr(PG_FUNCTION_ARGS)
         int                     len;
         StringInfoData str;
         text       *result;
-       pg_wchar        pair_first = 0;
+       char16_t        pair_first = 0;
         char            cbuf[MAX_UNICODE_EQUIVALENT_STRING + 1];
  
         instr = VARDATA_ANY(input_text);
@@ -5626,7 +5626,7 @@ unistr(PG_FUNCTION_ARGS)
                         else if ((len >= 5 && isxdigits_n(instr + 1, 4)) ||
                                          (len >= 6 && instr[1] == 'u' && isxdigits_n(instr + 2, 4)))
                         {
-                               pg_wchar        unicode;
+                               char32_t        unicode;
                                 int                     offset = instr[1] == 'u' ? 2 : 1;
  
                                 unicode = hexval_n(instr + offset, 4);
@@ -5662,7 +5662,7 @@ unistr(PG_FUNCTION_ARGS)
                         }
                         else if (len >= 8 && instr[1] == '+' && isxdigits_n(instr + 2, 6))
                         {
-                               pg_wchar        unicode;
+                               char32_t        unicode;
  
                                 unicode = hexval_n(instr + 2, 6);
  
@@ -5697,7 +5697,7 @@ unistr(PG_FUNCTION_ARGS)
                         }
                         else if (len >= 10 && instr[1] == 'U' && isxdigits_n(instr + 2, 8))
                         {
-                               pg_wchar        unicode;
+                               char32_t        unicode;
  
                                 unicode = hexval_n(instr + 2, 8);
  
diff --git a/src/backend/utils/mb/mbutils.c b/src/backend/utils/mb/mbutils.c

index 886ecbad87183df36d970f790e1f74e1a48b428e..fb629ed5c8fa6d0f127db648988e8ab9fb0feb0d 100644 (file)
--- a/src/backend/utils/mb/mbutils.c
+++ b/src/backend/utils/mb/mbutils.c
@@ -862,7 +862,7 @@ perform_default_encoding_conversion(const char *src, int len,
   * may call this outside any transaction, or in an aborted transaction.
   */
  void
-pg_unicode_to_server(pg_wchar c, unsigned char *s)
+pg_unicode_to_server(char32_t c, unsigned char *s)
  {
         unsigned char c_as_utf8[MAX_MULTIBYTE_CHAR_LEN + 1];
         int                     c_as_utf8_len;
@@ -924,7 +924,7 @@ pg_unicode_to_server(pg_wchar c, unsigned char *s)
   * but simply return false on conversion failure.
   */
  bool
-pg_unicode_to_server_noerror(pg_wchar c, unsigned char *s)
+pg_unicode_to_server_noerror(char32_t c, unsigned char *s)
  {
         unsigned char c_as_utf8[MAX_MULTIBYTE_CHAR_LEN + 1];
         int                     c_as_utf8_len;
diff --git a/src/common/saslprep.c b/src/common/saslprep.c

index 97beb47940bfe5c9742ed2d558ef750a759c9ff3..101e8d65a4d4320d218b8c34182a442fbc48fab3 100644 (file)
--- a/src/common/saslprep.c
+++ b/src/common/saslprep.c
@@ -47,7 +47,7 @@
  
  /* Prototypes for local functions */
  static int     codepoint_range_cmp(const void *a, const void *b);
-static bool is_code_in_table(pg_wchar code, const pg_wchar *map, int mapsize);
+static bool is_code_in_table(char32_t code, const char32_t *map, int mapsize);
  static int     pg_utf8_string_len(const char *source);
  
  /*
@@ -64,7 +64,7 @@ static int    pg_utf8_string_len(const char *source);
   *
   * These are all mapped to the ASCII space character (U+00A0).
   */
-static const pg_wchar non_ascii_space_ranges[] =
+static const char32_t non_ascii_space_ranges[] =
  {
         0x00A0, 0x00A0,
         0x1680, 0x1680,
@@ -79,7 +79,7 @@ static const pg_wchar non_ascii_space_ranges[] =
   *
   * If any of these appear in the input, they are removed.
   */
-static const pg_wchar commonly_mapped_to_nothing_ranges[] =
+static const char32_t commonly_mapped_to_nothing_ranges[] =
  {
         0x00AD, 0x00AD,
         0x034F, 0x034F,
@@ -114,7 +114,7 @@ static const pg_wchar commonly_mapped_to_nothing_ranges[] =
   * tables, so one code might originate from multiple source tables.
   * Adjacent ranges have also been merged together, to save space.
   */
-static const pg_wchar prohibited_output_ranges[] =
+static const char32_t prohibited_output_ranges[] =
  {
         0x0000, 0x001F,                         /* C.2.1 */
         0x007F, 0x00A0,                         /* C.1.2, C.2.1, C.2.2 */
@@ -155,7 +155,7 @@ static const pg_wchar prohibited_output_ranges[] =
  };
  
  /* A.1 Unassigned code points in Unicode 3.2 */
-static const pg_wchar unassigned_codepoint_ranges[] =
+static const char32_t unassigned_codepoint_ranges[] =
  {
         0x0221, 0x0221,
         0x0234, 0x024F,
@@ -556,7 +556,7 @@ static const pg_wchar unassigned_codepoint_ranges[] =
  };
  
  /* D.1 Characters with bidirectional property "R" or "AL" */
-static const pg_wchar RandALCat_codepoint_ranges[] =
+static const char32_t RandALCat_codepoint_ranges[] =
  {
         0x05BE, 0x05BE,
         0x05C0, 0x05C0,
@@ -595,7 +595,7 @@ static const pg_wchar RandALCat_codepoint_ranges[] =
  };
  
  /* D.2 Characters with bidirectional property "L" */
-static const pg_wchar LCat_codepoint_ranges[] =
+static const char32_t LCat_codepoint_ranges[] =
  {
         0x0041, 0x005A,
         0x0061, 0x007A,
@@ -968,8 +968,8 @@ static const pg_wchar LCat_codepoint_ranges[] =
  static int
  codepoint_range_cmp(const void *a, const void *b)
  {
-       const pg_wchar *key = (const pg_wchar *) a;
-       const pg_wchar *range = (const pg_wchar *) b;
+       const char32_t *key = (const char32_t *) a;
+       const char32_t *range = (const char32_t *) b;
  
         if (*key < range[0])
                 return -1;                              /* less than lower bound */
@@ -980,14 +980,14 @@ codepoint_range_cmp(const void *a, const void *b)
  }
  
  static bool
-is_code_in_table(pg_wchar code, const pg_wchar *map, int mapsize)
+is_code_in_table(char32_t code, const char32_t *map, int mapsize)
  {
         Assert(mapsize % 2 == 0);
  
         if (code < map[0] || code > map[mapsize - 1])
                 return false;
  
-       if (bsearch(&code, map, mapsize / 2, sizeof(pg_wchar) * 2,
+       if (bsearch(&code, map, mapsize / 2, sizeof(char32_t) * 2,
                                 codepoint_range_cmp))
                 return true;
         else
@@ -1046,8 +1046,8 @@ pg_utf8_string_len(const char *source)
  pg_saslprep_rc
  pg_saslprep(const char *input, char **output)
  {
-       pg_wchar   *input_chars = NULL;
-       pg_wchar   *output_chars = NULL;
+       char32_t   *input_chars = NULL;
+       char32_t   *output_chars = NULL;
         int                     input_size;
         char       *result;
         int                     result_size;
@@ -1055,7 +1055,7 @@ pg_saslprep(const char *input, char **output)
         int                     i;
         bool            contains_RandALCat;
         unsigned char *p;
-       pg_wchar   *wp;
+       char32_t   *wp;
  
         /* Ensure we return *output as NULL on failure */
         *output = NULL;
@@ -1080,10 +1080,10 @@ pg_saslprep(const char *input, char **output)
         input_size = pg_utf8_string_len(input);
         if (input_size < 0)
                 return SASLPREP_INVALID_UTF8;
-       if (input_size >= MaxAllocSize / sizeof(pg_wchar))
+       if (input_size >= MaxAllocSize / sizeof(char32_t))
                 goto oom;
  
-       input_chars = ALLOC((input_size + 1) * sizeof(pg_wchar));
+       input_chars = ALLOC((input_size + 1) * sizeof(char32_t));
         if (!input_chars)
                 goto oom;
  
@@ -1093,7 +1093,7 @@ pg_saslprep(const char *input, char **output)
                 input_chars[i] = utf8_to_unicode(p);
                 p += pg_utf_mblen(p);
         }
-       input_chars[i] = (pg_wchar) '\0';
+       input_chars[i] = (char32_t) '\0';
  
         /*
          * The steps below correspond to the steps listed in [RFC3454], Section
@@ -1107,7 +1107,7 @@ pg_saslprep(const char *input, char **output)
         count = 0;
         for (i = 0; i < input_size; i++)
         {
-               pg_wchar        code = input_chars[i];
+               char32_t        code = input_chars[i];
  
                 if (IS_CODE_IN_TABLE(code, non_ascii_space_ranges))
                         input_chars[count++] = 0x0020;
@@ -1118,7 +1118,7 @@ pg_saslprep(const char *input, char **output)
                 else
                         input_chars[count++] = code;
         }
-       input_chars[count] = (pg_wchar) '\0';
+       input_chars[count] = (char32_t) '\0';
         input_size = count;
  
         if (input_size == 0)
@@ -1138,7 +1138,7 @@ pg_saslprep(const char *input, char **output)
          */
         for (i = 0; i < input_size; i++)
         {
-               pg_wchar        code = input_chars[i];
+               char32_t        code = input_chars[i];
  
                 if (IS_CODE_IN_TABLE(code, prohibited_output_ranges))
                         goto prohibited;
@@ -1170,7 +1170,7 @@ pg_saslprep(const char *input, char **output)
         contains_RandALCat = false;
         for (i = 0; i < input_size; i++)
         {
-               pg_wchar        code = input_chars[i];
+               char32_t        code = input_chars[i];
  
                 if (IS_CODE_IN_TABLE(code, RandALCat_codepoint_ranges))
                 {
@@ -1181,12 +1181,12 @@ pg_saslprep(const char *input, char **output)
  
         if (contains_RandALCat)
         {
-               pg_wchar        first = input_chars[0];
-               pg_wchar        last = input_chars[input_size - 1];
+               char32_t        first = input_chars[0];
+               char32_t        last = input_chars[input_size - 1];
  
                 for (i = 0; i < input_size; i++)
                 {
-                       pg_wchar        code = input_chars[i];
+                       char32_t        code = input_chars[i];
  
                         if (IS_CODE_IN_TABLE(code, LCat_codepoint_ranges))
                                 goto prohibited;
diff --git a/src/common/unicode/case_test.c b/src/common/unicode/case_test.c

index fdfb62e855286fb1d0d2c74e8cc26b1b3ad4e4bd..00d4f85e5a5ef4c4340c8316ad237ecc430ccce6 100644 (file)
--- a/src/common/unicode/case_test.c
+++ b/src/common/unicode/case_test.c
@@ -24,6 +24,7 @@
  #include "common/unicode_case.h"
  #include "common/unicode_category.h"
  #include "common/unicode_version.h"
+#include "mb/pg_wchar.h"
  
  /* enough to hold largest source or result string, including NUL */
  #define BUFSZ 256
@@ -54,7 +55,7 @@ initcap_wbnext(void *state)
         while (wbstate->offset < wbstate->len &&
                    wbstate->str[wbstate->offset] != '\0')
         {
-               pg_wchar        u = utf8_to_unicode((unsigned char *) wbstate->str +
+               char32_t        u = utf8_to_unicode((unsigned char *) wbstate->str +
                                                                                 wbstate->offset);
                 bool            curr_alnum = pg_u_isalnum(u, wbstate->posix);
  
@@ -77,16 +78,16 @@ initcap_wbnext(void *state)
  #ifdef USE_ICU
  
  static void
-icu_test_simple(pg_wchar code)
+icu_test_simple(char32_t code)
  {
-       pg_wchar        lower = unicode_lowercase_simple(code);
-       pg_wchar        title = unicode_titlecase_simple(code);
-       pg_wchar        upper = unicode_uppercase_simple(code);
-       pg_wchar        fold = unicode_casefold_simple(code);
-       pg_wchar        iculower = u_tolower(code);
-       pg_wchar        icutitle = u_totitle(code);
-       pg_wchar        icuupper = u_toupper(code);
-       pg_wchar        icufold = u_foldCase(code, U_FOLD_CASE_DEFAULT);
+       char32_t        lower = unicode_lowercase_simple(code);
+       char32_t        title = unicode_titlecase_simple(code);
+       char32_t        upper = unicode_uppercase_simple(code);
+       char32_t        fold = unicode_casefold_simple(code);
+       char32_t        iculower = u_tolower(code);
+       char32_t        icutitle = u_totitle(code);
+       char32_t        icuupper = u_toupper(code);
+       char32_t        icufold = u_foldCase(code, U_FOLD_CASE_DEFAULT);
  
         if (lower != iculower || title != icutitle || upper != icuupper ||
                 fold != icufold)
@@ -172,7 +173,7 @@ test_icu(void)
         int                     successful = 0;
         int                     skipped_mismatch = 0;
  
-       for (pg_wchar code = 0; code <= 0x10ffff; code++)
+       for (char32_t code = 0; code <= 0x10ffff; code++)
         {
                 pg_unicode_category category = unicode_category(code);
  
diff --git a/src/common/unicode/category_test.c b/src/common/unicode/category_test.c

index 5d37ba391968e19f248d5f05e5db3418474793de..1e8c1f7905ff337dc49e06d6da395a548ee7a9f1 100644 (file)
--- a/src/common/unicode/category_test.c
+++ b/src/common/unicode/category_test.c
@@ -22,6 +22,7 @@
  
  #include "common/unicode_category.h"
  #include "common/unicode_version.h"
+#include "mb/pg_wchar.h"
  
  static int     pg_unicode_version = 0;
  #ifdef USE_ICU
@@ -59,7 +60,7 @@ icu_test()
         int                     pg_skipped_codepoints = 0;
         int                     icu_skipped_codepoints = 0;
  
-       for (pg_wchar code = 0; code <= 0x10ffff; code++)
+       for (char32_t code = 0; code <= 0x10ffff; code++)
         {
                 uint8_t         pg_category = unicode_category(code);
                 uint8_t         icu_category = u_charType(code);
diff --git a/src/common/unicode/generate-norm_test_table.pl b/src/common/unicode/generate-norm_test_table.pl

index 1b401be9409318bbb6d4d2f4355ec76989b3a34e..1a8b908ff33f2dde7e285c59caa6f12d56fccd47 100644 (file)
--- a/src/common/unicode/generate-norm_test_table.pl
+++ b/src/common/unicode/generate-norm_test_table.pl
@@ -47,8 +47,8 @@ print $OUTPUT <<HEADER;
  typedef struct
  {
         int                     linenum;
-       pg_wchar        input[50];
-       pg_wchar        output[4][50];
+       char32_t        input[50];
+       char32_t        output[4][50];
  } pg_unicode_test;
  
  /* test table */
diff --git a/src/common/unicode/generate-unicode_case_table.pl b/src/common/unicode/generate-unicode_case_table.pl

index 5d9ddd628038c2c49607a0b26ea2e2485923df13..f71eb25c94e0d19998e27bbc0cc02ea486acafad 100644 (file)
--- a/src/common/unicode/generate-unicode_case_table.pl
+++ b/src/common/unicode/generate-unicode_case_table.pl
@@ -270,7 +270,6 @@ print $OT <<"EOS";
   */
  
  #include "common/unicode_case.h"
-#include "mb/pg_wchar.h"
  
  /*
   * The maximum number of codepoints that can result from case mapping
@@ -297,7 +296,7 @@ typedef enum
  typedef struct
  {
         int16           conditions;
-       pg_wchar        map[NCaseKind][MAX_CASE_EXPANSION];
+       char32_t        map[NCaseKind][MAX_CASE_EXPANSION];
  } pg_special_case;
  
  /*
@@ -430,7 +429,7 @@ foreach my $kind ('lower', 'title', 'upper', 'fold')
   * The entry case_map_${kind}[case_index(codepoint)] is the mapping for the
   * given codepoint.
   */
-static const pg_wchar case_map_$kind\[$index\] =
+static const char32_t case_map_$kind\[$index\] =
  {
  EOS
  
@@ -502,7 +501,7 @@ print $OT <<"EOS";
   * the offset into the mapping tables.
   */
  static inline uint16
-case_index(pg_wchar cp)
+case_index(char32_t cp)
  {
         /* Fast path for codepoints < $fastpath_limit */
         if (cp < $fastpath_limit)
diff --git a/src/common/unicode/generate-unicode_category_table.pl b/src/common/unicode/generate-unicode_category_table.pl

index abab5cd96968c69f4d7a33e88d995dc0e01f7183..7e094b13720d46d172379c7b9a3e128824276c35 100644 (file)
--- a/src/common/unicode/generate-unicode_category_table.pl
+++ b/src/common/unicode/generate-unicode_category_table.pl
@@ -366,15 +366,15 @@ print $OT <<"EOS";
   */
  typedef struct
  {
-       uint32          first;                  /* Unicode codepoint */
-       uint32          last;                   /* Unicode codepoint */
+       char32_t        first;                  /* Unicode codepoint */
+       char32_t        last;                   /* Unicode codepoint */
         uint8           category;               /* General Category */
  } pg_category_range;
  
  typedef struct
  {
-       uint32          first;                  /* Unicode codepoint */
-       uint32          last;                   /* Unicode codepoint */
+       char32_t        first;                  /* Unicode codepoint */
+       char32_t        last;                   /* Unicode codepoint */
  } pg_unicode_range;
  
  typedef struct
diff --git a/src/common/unicode/norm_test.c b/src/common/unicode/norm_test.c

index 25bc59463f24db8d80fc349946211d8c68113764..058817f1719b8456ef241d53c7faf6a105b7d3e0 100644 (file)
--- a/src/common/unicode/norm_test.c
+++ b/src/common/unicode/norm_test.c
@@ -20,7 +20,7 @@
  #include "norm_test_table.h"
  
  static char *
-print_wchar_str(const pg_wchar *s)
+print_wchar_str(const char32_t *s)
  {
  #define BUF_DIGITS 50
         static char buf[BUF_DIGITS * 11 + 1];
@@ -41,7 +41,7 @@ print_wchar_str(const pg_wchar *s)
  }
  
  static int
-pg_wcscmp(const pg_wchar *s1, const pg_wchar *s2)
+pg_wcscmp(const char32_t *s1, const char32_t *s2)
  {
         for (;;)
         {
@@ -65,7 +65,7 @@ main(int argc, char **argv)
         {
                 for (int form = 0; form < 4; form++)
                 {
-                       pg_wchar   *result;
+                       char32_t   *result;
  
                         result = unicode_normalize(form, test->input);
  
diff --git a/src/common/unicode_case.c b/src/common/unicode_case.c

index 073faf6a0d58b9c5c36e7ab1cfe471f4f42e15f2..e5e494db43c6dca42b48f4b03c23e9c5db9ac867 100644 (file)
--- a/src/common/unicode_case.c
+++ b/src/common/unicode_case.c
@@ -30,7 +30,7 @@ enum CaseMapResult
  /*
   * Map for each case kind.
   */
-static const pg_wchar *const casekind_map[NCaseKind] =
+static const char32_t *const casekind_map[NCaseKind] =
  {
         [CaseLower] = case_map_lower,
         [CaseTitle] = case_map_title,
@@ -38,42 +38,42 @@ static const pg_wchar *const casekind_map[NCaseKind] =
         [CaseFold] = case_map_fold,
  };
  
-static pg_wchar find_case_map(pg_wchar ucs, const pg_wchar *map);
+static char32_t find_case_map(char32_t ucs, const char32_t *map);
  static size_t convert_case(char *dst, size_t dstsize, const char *src, ssize_t srclen,
                                                    CaseKind str_casekind, bool full, WordBoundaryNext wbnext,
                                                    void *wbstate);
-static enum CaseMapResult casemap(pg_wchar u1, CaseKind casekind, bool full,
+static enum CaseMapResult casemap(char32_t u1, CaseKind casekind, bool full,
                                                                   const char *src, size_t srclen, size_t srcoff,
-                                                                 pg_wchar *simple, const pg_wchar **special);
+                                                                 char32_t *simple, const char32_t **special);
  
-pg_wchar
-unicode_lowercase_simple(pg_wchar code)
+char32_t
+unicode_lowercase_simple(char32_t code)
  {
-       pg_wchar        cp = find_case_map(code, case_map_lower);
+       char32_t        cp = find_case_map(code, case_map_lower);
  
         return cp != 0 ? cp : code;
  }
  
-pg_wchar
-unicode_titlecase_simple(pg_wchar code)
+char32_t
+unicode_titlecase_simple(char32_t code)
  {
-       pg_wchar        cp = find_case_map(code, case_map_title);
+       char32_t        cp = find_case_map(code, case_map_title);
  
         return cp != 0 ? cp : code;
  }
  
-pg_wchar
-unicode_uppercase_simple(pg_wchar code)
+char32_t
+unicode_uppercase_simple(char32_t code)
  {
-       pg_wchar        cp = find_case_map(code, case_map_upper);
+       char32_t        cp = find_case_map(code, case_map_upper);
  
         return cp != 0 ? cp : code;
  }
  
-pg_wchar
-unicode_casefold_simple(pg_wchar code)
+char32_t
+unicode_casefold_simple(char32_t code)
  {
-       pg_wchar        cp = find_case_map(code, case_map_fold);
+       char32_t        cp = find_case_map(code, case_map_fold);
  
         return cp != 0 ? cp : code;
  }
@@ -231,10 +231,10 @@ convert_case(char *dst, size_t dstsize, const char *src, ssize_t srclen,
  
         while ((srclen < 0 || srcoff < srclen) && src[srcoff] != '\0')
         {
-               pg_wchar        u1 = utf8_to_unicode((unsigned char *) src + srcoff);
+               char32_t        u1 = utf8_to_unicode((unsigned char *) src + srcoff);
                 int                     u1len = unicode_utf8len(u1);
-               pg_wchar        simple = 0;
-               const pg_wchar *special = NULL;
+               char32_t        simple = 0;
+               const char32_t *special = NULL;
                 enum CaseMapResult casemap_result;
  
                 if (str_casekind == CaseTitle)
@@ -265,8 +265,8 @@ convert_case(char *dst, size_t dstsize, const char *src, ssize_t srclen,
                         case CASEMAP_SIMPLE:
                                 {
                                         /* replace with single character */
-                                       pg_wchar        u2 = simple;
-                                       pg_wchar        u2len = unicode_utf8len(u2);
+                                       char32_t        u2 = simple;
+                                       char32_t        u2len = unicode_utf8len(u2);
  
                                         Assert(special == NULL);
                                         if (result_len + u2len <= dstsize)
@@ -280,7 +280,7 @@ convert_case(char *dst, size_t dstsize, const char *src, ssize_t srclen,
                                 Assert(simple == 0);
                                 for (int i = 0; i < MAX_CASE_EXPANSION && special[i]; i++)
                                 {
-                                       pg_wchar        u2 = special[i];
+                                       char32_t        u2 = special[i];
                                         size_t          u2len = unicode_utf8len(u2);
  
                                         if (result_len + u2len <= dstsize)
@@ -320,7 +320,7 @@ check_final_sigma(const unsigned char *str, size_t len, size_t offset)
         {
                 if ((str[i] & 0x80) == 0 || (str[i] & 0xC0) == 0xC0)
                 {
-                       pg_wchar        curr = utf8_to_unicode(str + i);
+                       char32_t        curr = utf8_to_unicode(str + i);
  
                         if (pg_u_prop_case_ignorable(curr))
                                 continue;
@@ -344,7 +344,7 @@ check_final_sigma(const unsigned char *str, size_t len, size_t offset)
         {
                 if ((str[i] & 0x80) == 0 || (str[i] & 0xC0) == 0xC0)
                 {
-                       pg_wchar        curr = utf8_to_unicode(str + i);
+                       char32_t        curr = utf8_to_unicode(str + i);
  
                         if (pg_u_prop_case_ignorable(curr))
                                 continue;
@@ -394,9 +394,9 @@ check_special_conditions(int conditions, const char *str, size_t len,
   * character without modification.
   */
  static enum CaseMapResult
-casemap(pg_wchar u1, CaseKind casekind, bool full,
+casemap(char32_t u1, CaseKind casekind, bool full,
                 const char *src, size_t srclen, size_t srcoff,
-               pg_wchar *simple, const pg_wchar **special)
+               char32_t *simple, const char32_t **special)
  {
         uint16          idx;
  
@@ -434,8 +434,8 @@ casemap(pg_wchar u1, CaseKind casekind, bool full,
   * Find entry in simple case map.
   * If the entry does not exist, 0 will be returned.
   */
-static pg_wchar
-find_case_map(pg_wchar ucs, const pg_wchar *map)
+static char32_t
+find_case_map(char32_t ucs, const char32_t *map)
  {
         /* Fast path for codepoints < 0x80 */
         if (ucs < 0x80)
diff --git a/src/common/unicode_category.c b/src/common/unicode_category.c

index 4136c4d4f926f03ccb7a271877d68127d970b692..aab667a7bb47f64938302bb2f520aa1a14f165b5 100644 (file)
--- a/src/common/unicode_category.c
+++ b/src/common/unicode_category.c
@@ -1,7 +1,7 @@
  /*-------------------------------------------------------------------------
   * unicode_category.c
   *             Determine general category and character properties of Unicode
- *             characters. Encoding must be UTF8, where we assume that the pg_wchar
+ *             characters. Encoding must be UTF8, where we assume that the char32_t
   *             representation is a code point.
   *
   * Portions Copyright (c) 2017-2025, PostgreSQL Global Development Group
@@ -76,13 +76,13 @@
  #define PG_U_CHARACTER_TAB     0x09
  
  static bool range_search(const pg_unicode_range *tbl, size_t size,
-                                                pg_wchar code);
+                                                char32_t code);
  
  /*
   * Unicode general category for the given codepoint.
   */
  pg_unicode_category
-unicode_category(pg_wchar code)
+unicode_category(char32_t code)
  {
         int                     min = 0;
         int                     mid;
@@ -108,7 +108,7 @@ unicode_category(pg_wchar code)
  }
  
  bool
-pg_u_prop_alphabetic(pg_wchar code)
+pg_u_prop_alphabetic(char32_t code)
  {
         if (code < 0x80)
                 return unicode_opt_ascii[code].properties & PG_U_PROP_ALPHABETIC;
@@ -119,7 +119,7 @@ pg_u_prop_alphabetic(pg_wchar code)
  }
  
  bool
-pg_u_prop_lowercase(pg_wchar code)
+pg_u_prop_lowercase(char32_t code)
  {
         if (code < 0x80)
                 return unicode_opt_ascii[code].properties & PG_U_PROP_LOWERCASE;
@@ -130,7 +130,7 @@ pg_u_prop_lowercase(pg_wchar code)
  }
  
  bool
-pg_u_prop_uppercase(pg_wchar code)
+pg_u_prop_uppercase(char32_t code)
  {
         if (code < 0x80)
                 return unicode_opt_ascii[code].properties & PG_U_PROP_UPPERCASE;
@@ -141,7 +141,7 @@ pg_u_prop_uppercase(pg_wchar code)
  }
  
  bool
-pg_u_prop_cased(pg_wchar code)
+pg_u_prop_cased(char32_t code)
  {
         uint32          category_mask;
  
@@ -156,7 +156,7 @@ pg_u_prop_cased(pg_wchar code)
  }
  
  bool
-pg_u_prop_case_ignorable(pg_wchar code)
+pg_u_prop_case_ignorable(char32_t code)
  {
         if (code < 0x80)
                 return unicode_opt_ascii[code].properties & PG_U_PROP_CASE_IGNORABLE;
@@ -167,7 +167,7 @@ pg_u_prop_case_ignorable(pg_wchar code)
  }
  
  bool
-pg_u_prop_white_space(pg_wchar code)
+pg_u_prop_white_space(char32_t code)
  {
         if (code < 0x80)
                 return unicode_opt_ascii[code].properties & PG_U_PROP_WHITE_SPACE;
@@ -178,7 +178,7 @@ pg_u_prop_white_space(pg_wchar code)
  }
  
  bool
-pg_u_prop_hex_digit(pg_wchar code)
+pg_u_prop_hex_digit(char32_t code)
  {
         if (code < 0x80)
                 return unicode_opt_ascii[code].properties & PG_U_PROP_HEX_DIGIT;
@@ -189,7 +189,7 @@ pg_u_prop_hex_digit(pg_wchar code)
  }
  
  bool
-pg_u_prop_join_control(pg_wchar code)
+pg_u_prop_join_control(char32_t code)
  {
         if (code < 0x80)
                 return unicode_opt_ascii[code].properties & PG_U_PROP_JOIN_CONTROL;
@@ -208,7 +208,7 @@ pg_u_prop_join_control(pg_wchar code)
   */
  
  bool
-pg_u_isdigit(pg_wchar code, bool posix)
+pg_u_isdigit(char32_t code, bool posix)
  {
         if (posix)
                 return ('0' <= code && code <= '9');
@@ -217,19 +217,19 @@ pg_u_isdigit(pg_wchar code, bool posix)
  }
  
  bool
-pg_u_isalpha(pg_wchar code)
+pg_u_isalpha(char32_t code)
  {
         return pg_u_prop_alphabetic(code);
  }
  
  bool
-pg_u_isalnum(pg_wchar code, bool posix)
+pg_u_isalnum(char32_t code, bool posix)
  {
         return pg_u_isalpha(code) || pg_u_isdigit(code, posix);
  }
  
  bool
-pg_u_isword(pg_wchar code)
+pg_u_isword(char32_t code)
  {
         uint32          category_mask = PG_U_CATEGORY_MASK(unicode_category(code));
  
@@ -240,32 +240,32 @@ pg_u_isword(pg_wchar code)
  }
  
  bool
-pg_u_isupper(pg_wchar code)
+pg_u_isupper(char32_t code)
  {
         return pg_u_prop_uppercase(code);
  }
  
  bool
-pg_u_islower(pg_wchar code)
+pg_u_islower(char32_t code)
  {
         return pg_u_prop_lowercase(code);
  }
  
  bool
-pg_u_isblank(pg_wchar code)
+pg_u_isblank(char32_t code)
  {
         return code == PG_U_CHARACTER_TAB ||
                 unicode_category(code) == PG_U_SPACE_SEPARATOR;
  }
  
  bool
-pg_u_iscntrl(pg_wchar code)
+pg_u_iscntrl(char32_t code)
  {
         return unicode_category(code) == PG_U_CONTROL;
  }
  
  bool
-pg_u_isgraph(pg_wchar code)
+pg_u_isgraph(char32_t code)
  {
         uint32          category_mask = PG_U_CATEGORY_MASK(unicode_category(code));
  
@@ -276,7 +276,7 @@ pg_u_isgraph(pg_wchar code)
  }
  
  bool
-pg_u_isprint(pg_wchar code)
+pg_u_isprint(char32_t code)
  {
         pg_unicode_category category = unicode_category(code);
  
@@ -287,7 +287,7 @@ pg_u_isprint(pg_wchar code)
  }
  
  bool
-pg_u_ispunct(pg_wchar code, bool posix)
+pg_u_ispunct(char32_t code, bool posix)
  {
         uint32          category_mask;
  
@@ -308,13 +308,13 @@ pg_u_ispunct(pg_wchar code, bool posix)
  }
  
  bool
-pg_u_isspace(pg_wchar code)
+pg_u_isspace(char32_t code)
  {
         return pg_u_prop_white_space(code);
  }
  
  bool
-pg_u_isxdigit(pg_wchar code, bool posix)
+pg_u_isxdigit(char32_t code, bool posix)
  {
         if (posix)
                 return (('0' <= code && code <= '9') ||
@@ -478,7 +478,7 @@ unicode_category_abbrev(pg_unicode_category category)
   * given table.
   */
  static bool
-range_search(const pg_unicode_range *tbl, size_t size, pg_wchar code)
+range_search(const pg_unicode_range *tbl, size_t size, char32_t code)
  {
         int                     min = 0;
         int                     mid;
diff --git a/src/common/unicode_norm.c b/src/common/unicode_norm.c

index 6654b4cbc49cfda482267b77d98caa7c12a01f6c..489d99cd5abb0d6123883abf8368dea8e7053872 100644 (file)
--- a/src/common/unicode_norm.c
+++ b/src/common/unicode_norm.c
@@ -69,7 +69,7 @@ conv_compare(const void *p1, const void *p2)
   * lookup, while the frontend version uses a binary search.
   */
  static const pg_unicode_decomposition *
-get_code_entry(pg_wchar code)
+get_code_entry(char32_t code)
  {
  #ifndef FRONTEND
         int                     h;
@@ -109,7 +109,7 @@ get_code_entry(pg_wchar code)
   * Get the combining class of the given codepoint.
   */
  static uint8
-get_canonical_class(pg_wchar code)
+get_canonical_class(char32_t code)
  {
         const pg_unicode_decomposition *entry = get_code_entry(code);
  
@@ -130,15 +130,15 @@ get_canonical_class(pg_wchar code)
   * Note: the returned pointer can point to statically allocated buffer, and
   * is only valid until next call to this function!
   */
-static const pg_wchar *
+static const char32_t *
  get_code_decomposition(const pg_unicode_decomposition *entry, int *dec_size)
  {
-       static pg_wchar x;
+       static char32_t x;
  
         if (DECOMPOSITION_IS_INLINE(entry))
         {
                 Assert(DECOMPOSITION_SIZE(entry) == 1);
-               x = (pg_wchar) entry->dec_index;
+               x = (char32_t) entry->dec_index;
                 *dec_size = 1;
                 return &x;
         }
@@ -156,7 +156,7 @@ get_code_decomposition(const pg_unicode_decomposition *entry, int *dec_size)
   * are, in turn, decomposable.
   */
  static int
-get_decomposed_size(pg_wchar code, bool compat)
+get_decomposed_size(char32_t code, bool compat)
  {
         const pg_unicode_decomposition *entry;
         int                     size = 0;
@@ -318,7 +318,7 @@ recompose_code(uint32 start, uint32 code, uint32 *result)
   * in the array result.
   */
  static void
-decompose_code(pg_wchar code, bool compat, pg_wchar **result, int *current)
+decompose_code(char32_t code, bool compat, char32_t **result, int *current)
  {
         const pg_unicode_decomposition *entry;
         int                     i;
@@ -337,7 +337,7 @@ decompose_code(pg_wchar code, bool compat, pg_wchar **result, int *current)
                                         v,
                                         tindex,
                                         sindex;
-               pg_wchar   *res = *result;
+               char32_t   *res = *result;
  
                 sindex = code - SBASE;
                 l = LBASE + sindex / (VCOUNT * TCOUNT);
@@ -369,7 +369,7 @@ decompose_code(pg_wchar code, bool compat, pg_wchar **result, int *current)
         if (entry == NULL || DECOMPOSITION_SIZE(entry) == 0 ||
                 (!compat && DECOMPOSITION_IS_COMPAT(entry)))
         {
-               pg_wchar   *res = *result;
+               char32_t   *res = *result;
  
                 res[*current] = code;
                 (*current)++;
@@ -382,7 +382,7 @@ decompose_code(pg_wchar code, bool compat, pg_wchar **result, int *current)
         decomp = get_code_decomposition(entry, &dec_size);
         for (i = 0; i < dec_size; i++)
         {
-               pg_wchar        lcode = (pg_wchar) decomp[i];
+               char32_t        lcode = (char32_t) decomp[i];
  
                 /* Leave if no more decompositions */
                 decompose_code(lcode, compat, result, current);
@@ -398,17 +398,17 @@ decompose_code(pg_wchar code, bool compat, pg_wchar **result, int *current)
   * malloc. Or NULL if we run out of memory. In backend, the returned
   * string is palloc'd instead, and OOM is reported with ereport().
   */
-pg_wchar *
-unicode_normalize(UnicodeNormalizationForm form, const pg_wchar *input)
+char32_t *
+unicode_normalize(UnicodeNormalizationForm form, const char32_t *input)
  {
         bool            compat = (form == UNICODE_NFKC || form == UNICODE_NFKD);
         bool            recompose = (form == UNICODE_NFC || form == UNICODE_NFKC);
-       pg_wchar   *decomp_chars;
-       pg_wchar   *recomp_chars;
+       char32_t   *decomp_chars;
+       char32_t   *recomp_chars;
         int                     decomp_size,
                                 current_size;
         int                     count;
-       const pg_wchar *p;
+       const char32_t *p;
  
         /* variables for recomposition */
         int                     last_class;
@@ -425,7 +425,7 @@ unicode_normalize(UnicodeNormalizationForm form, const pg_wchar *input)
         for (p = input; *p; p++)
                 decomp_size += get_decomposed_size(*p, compat);
  
-       decomp_chars = (pg_wchar *) ALLOC((decomp_size + 1) * sizeof(pg_wchar));
+       decomp_chars = (char32_t *) ALLOC((decomp_size + 1) * sizeof(char32_t));
         if (decomp_chars == NULL)
                 return NULL;
  
@@ -448,9 +448,9 @@ unicode_normalize(UnicodeNormalizationForm form, const pg_wchar *input)
          */
         for (count = 1; count < decomp_size; count++)
         {
-               pg_wchar        prev = decomp_chars[count - 1];
-               pg_wchar        next = decomp_chars[count];
-               pg_wchar        tmp;
+               char32_t        prev = decomp_chars[count - 1];
+               char32_t        next = decomp_chars[count];
+               char32_t        tmp;
                 const uint8 prevClass = get_canonical_class(prev);
                 const uint8 nextClass = get_canonical_class(next);
  
@@ -487,7 +487,7 @@ unicode_normalize(UnicodeNormalizationForm form, const pg_wchar *input)
          * longer than the decomposed one, so make the allocation of the output
          * string based on that assumption.
          */
-       recomp_chars = (pg_wchar *) ALLOC((decomp_size + 1) * sizeof(pg_wchar));
+       recomp_chars = (char32_t *) ALLOC((decomp_size + 1) * sizeof(char32_t));
         if (!recomp_chars)
         {
                 FREE(decomp_chars);
@@ -501,9 +501,9 @@ unicode_normalize(UnicodeNormalizationForm form, const pg_wchar *input)
  
         for (count = 1; count < decomp_size; count++)
         {
-               pg_wchar        ch = decomp_chars[count];
+               char32_t        ch = decomp_chars[count];
                 int                     ch_class = get_canonical_class(ch);
-               pg_wchar        composite;
+               char32_t        composite;
  
                 if (last_class < ch_class &&
                         recompose_code(starter_ch, ch, &composite))
@@ -524,7 +524,7 @@ unicode_normalize(UnicodeNormalizationForm form, const pg_wchar *input)
                         recomp_chars[target_pos++] = ch;
                 }
         }
-       recomp_chars[target_pos] = (pg_wchar) '\0';
+       recomp_chars[target_pos] = (char32_t) '\0';
  
         FREE(decomp_chars);
  
@@ -540,7 +540,7 @@ unicode_normalize(UnicodeNormalizationForm form, const pg_wchar *input)
  #ifndef FRONTEND
  
  static const pg_unicode_normprops *
-qc_hash_lookup(pg_wchar ch, const pg_unicode_norminfo *norminfo)
+qc_hash_lookup(char32_t ch, const pg_unicode_norminfo *norminfo)
  {
         int                     h;
         uint32          hashkey;
@@ -571,7 +571,7 @@ qc_hash_lookup(pg_wchar ch, const pg_unicode_norminfo *norminfo)
   * Look up the normalization quick check character property
   */
  static UnicodeNormalizationQC
-qc_is_allowed(UnicodeNormalizationForm form, pg_wchar ch)
+qc_is_allowed(UnicodeNormalizationForm form, char32_t ch)
  {
         const pg_unicode_normprops *found = NULL;
  
@@ -595,7 +595,7 @@ qc_is_allowed(UnicodeNormalizationForm form, pg_wchar ch)
  }
  
  UnicodeNormalizationQC
-unicode_is_normalized_quickcheck(UnicodeNormalizationForm form, const pg_wchar *input)
+unicode_is_normalized_quickcheck(UnicodeNormalizationForm form, const char32_t *input)
  {
         uint8           lastCanonicalClass = 0;
         UnicodeNormalizationQC result = UNICODE_NORM_QC_YES;
@@ -610,9 +610,9 @@ unicode_is_normalized_quickcheck(UnicodeNormalizationForm form, const pg_wchar *
         if (form == UNICODE_NFD || form == UNICODE_NFKD)
                 return UNICODE_NORM_QC_MAYBE;
  
-       for (const pg_wchar *p = input; *p; p++)
+       for (const char32_t *p = input; *p; p++)
         {
-               pg_wchar        ch = *p;
+               char32_t        ch = *p;
                 uint8           canonicalClass;
                 UnicodeNormalizationQC check;
  
diff --git a/src/fe_utils/mbprint.c b/src/fe_utils/mbprint.c

index eb3eeee9925cb9cbb1cd185eb84ab30f69da2447..abffdbe18a25b7f0c9eaa152a212ccc1b89120d4 100644 (file)
--- a/src/fe_utils/mbprint.c
+++ b/src/fe_utils/mbprint.c
@@ -49,20 +49,20 @@ pg_get_utf8_id(void)
   *
   * No error checks here, c must point to a long-enough string.
   */
-static pg_wchar
+static char32_t
  utf8_to_unicode(const unsigned char *c)
  {
         if ((*c & 0x80) == 0)
-               return (pg_wchar) c[0];
+               return (char32_t) c[0];
         else if ((*c & 0xe0) == 0xc0)
-               return (pg_wchar) (((c[0] & 0x1f) << 6) |
+               return (char32_t) (((c[0] & 0x1f) << 6) |
                                                    (c[1] & 0x3f));
         else if ((*c & 0xf0) == 0xe0)
-               return (pg_wchar) (((c[0] & 0x0f) << 12) |
+               return (char32_t) (((c[0] & 0x0f) << 12) |
                                                    ((c[1] & 0x3f) << 6) |
                                                    (c[2] & 0x3f));
         else if ((*c & 0xf8) == 0xf0)
-               return (pg_wchar) (((c[0] & 0x07) << 18) |
+               return (char32_t) (((c[0] & 0x07) << 18) |
                                                    ((c[1] & 0x3f) << 12) |
                                                    ((c[2] & 0x3f) << 6) |
                                                    (c[3] & 0x3f));
diff --git a/src/include/c.h b/src/include/c.h

index f4ec33e9b07df37a1a7473873fc1fdaa94d7b255..757dfff47825d2a043b72147960ab2cd34e18aad 100644 (file)
--- a/src/include/c.h
+++ b/src/include/c.h
@@ -1376,6 +1376,29 @@ typedef intptr_t sigjmp_buf[5];
  /* /port compatibility functions */
  #include "port.h"
  
+/*
+ * char16_t and char32_t
+ *      Unicode code points.
+ *
+ * uchar.h should always be available in C11, but it's not available on
+ * Mac. However, these types are keywords in C++11, so when using C++, we
+ * can't redefine the types.
+ *
+ * XXX: when uchar.h is available everywhere, we can remove this check and
+ * just include uchar.h unconditionally.
+ *
+ * XXX: this section is out of place because uchar.h needs to be included
+ * after port.h, due to an interaction with win32_port.h in some cases.
+ */
+#ifdef HAVE_UCHAR_H
+#include <uchar.h>
+#else
+#ifndef __cplusplus
+typedef uint16_t char16_t;
+typedef uint32_t char32_t;
+#endif
+#endif
+
  /* IWYU pragma: end_exports */
  
  #endif                                                 /* C_H */
diff --git a/src/include/common/unicode_case.h b/src/include/common/unicode_case.h

index 41e2c1f4b33f5a72f711365c097089948f69dd2e..6bcffd349c289b9a9d91cce94d27af25245fe399 100644 (file)
--- a/src/include/common/unicode_case.h
+++ b/src/include/common/unicode_case.h
@@ -14,14 +14,12 @@
  #ifndef UNICODE_CASE_H
  #define UNICODE_CASE_H
  
-#include "mb/pg_wchar.h"
-
  typedef size_t (*WordBoundaryNext) (void *wbstate);
  
-pg_wchar       unicode_lowercase_simple(pg_wchar code);
-pg_wchar       unicode_titlecase_simple(pg_wchar code);
-pg_wchar       unicode_uppercase_simple(pg_wchar code);
-pg_wchar       unicode_casefold_simple(pg_wchar code);
+char32_t       unicode_lowercase_simple(char32_t code);
+char32_t       unicode_titlecase_simple(char32_t code);
+char32_t       unicode_uppercase_simple(char32_t code);
+char32_t       unicode_casefold_simple(char32_t code);
  size_t         unicode_strlower(char *dst, size_t dstsize, const char *src,
                                                          ssize_t srclen, bool full);
  size_t         unicode_strtitle(char *dst, size_t dstsize, const char *src,
diff --git a/src/include/common/unicode_case_table.h b/src/include/common/unicode_case_table.h

index d53117865820bb484a23064aace6cd58cf6e6cfc..0a14fb2d97bbc1963a8d1ee62c93724500cd09c7 100644 (file)
--- a/src/include/common/unicode_case_table.h
+++ b/src/include/common/unicode_case_table.h
@@ -18,7 +18,6 @@
   */
  
  #include "common/unicode_case.h"
-#include "mb/pg_wchar.h"
  
  /*
   * The maximum number of codepoints that can result from case mapping
@@ -45,7 +44,7 @@ typedef enum
  typedef struct
  {
         int16           conditions;
-       pg_wchar        map[NCaseKind][MAX_CASE_EXPANSION];
+       char32_t        map[NCaseKind][MAX_CASE_EXPANSION];
  } pg_special_case;
  
  /*
@@ -166,7 +165,7 @@ static const pg_special_case special_case[106] =
   * The entry case_map_lower[case_index(codepoint)] is the mapping for the
   * given codepoint.
   */
-static const pg_wchar case_map_lower[1704] =
+static const char32_t case_map_lower[1704] =
  {
         0x000000,                                       /* reserved */
         0x000000,                                       /* U+000000 */
@@ -1879,7 +1878,7 @@ static const pg_wchar case_map_lower[1704] =
   * The entry case_map_title[case_index(codepoint)] is the mapping for the
   * given codepoint.
   */
-static const pg_wchar case_map_title[1704] =
+static const char32_t case_map_title[1704] =
  {
         0x000000,                                       /* reserved */
         0x000000,                                       /* U+000000 */
@@ -3592,7 +3591,7 @@ static const pg_wchar case_map_title[1704] =
   * The entry case_map_upper[case_index(codepoint)] is the mapping for the
   * given codepoint.
   */
-static const pg_wchar case_map_upper[1704] =
+static const char32_t case_map_upper[1704] =
  {
         0x000000,                                       /* reserved */
         0x000000,                                       /* U+000000 */
@@ -5305,7 +5304,7 @@ static const pg_wchar case_map_upper[1704] =
   * The entry case_map_fold[case_index(codepoint)] is the mapping for the
   * given codepoint.
   */
-static const pg_wchar case_map_fold[1704] =
+static const char32_t case_map_fold[1704] =
  {
         0x000000,                                       /* reserved */
         0x000000,                                       /* U+000000 */
@@ -13522,7 +13521,7 @@ static const uint16 case_map[4778] =
   * the offset into the mapping tables.
   */
  static inline uint16
-case_index(pg_wchar cp)
+case_index(char32_t cp)
  {
         /* Fast path for codepoints < 0x0588 */
         if (cp < 0x0588)
diff --git a/src/include/common/unicode_category.h b/src/include/common/unicode_category.h

index 8fd8b67a416e6a9c4a49de4673dc26137f7d0871..684143d3c8a9f409f880daed112b2f929f400a5a 100644 (file)
--- a/src/include/common/unicode_category.h
+++ b/src/include/common/unicode_category.h
@@ -14,8 +14,6 @@
  #ifndef UNICODE_CATEGORY_H
  #define UNICODE_CATEGORY_H
  
-#include "mb/pg_wchar.h"
-
  /*
   * Unicode General Category Values
   *
@@ -61,31 +59,31 @@ typedef enum pg_unicode_category
         PG_U_FINAL_PUNCTUATION = 29 /* Pf */
  } pg_unicode_category;
  
-extern pg_unicode_category unicode_category(pg_wchar code);
+extern pg_unicode_category unicode_category(char32_t code);
  extern const char *unicode_category_string(pg_unicode_category category);
  extern const char *unicode_category_abbrev(pg_unicode_category category);
  
-extern bool pg_u_prop_alphabetic(pg_wchar code);
-extern bool pg_u_prop_lowercase(pg_wchar code);
-extern bool pg_u_prop_uppercase(pg_wchar code);
-extern bool pg_u_prop_cased(pg_wchar code);
-extern bool pg_u_prop_case_ignorable(pg_wchar code);
-extern bool pg_u_prop_white_space(pg_wchar code);
-extern bool pg_u_prop_hex_digit(pg_wchar code);
-extern bool pg_u_prop_join_control(pg_wchar code);
+extern bool pg_u_prop_alphabetic(char32_t code);
+extern bool pg_u_prop_lowercase(char32_t code);
+extern bool pg_u_prop_uppercase(char32_t code);
+extern bool pg_u_prop_cased(char32_t code);
+extern bool pg_u_prop_case_ignorable(char32_t code);
+extern bool pg_u_prop_white_space(char32_t code);
+extern bool pg_u_prop_hex_digit(char32_t code);
+extern bool pg_u_prop_join_control(char32_t code);
  
-extern bool pg_u_isdigit(pg_wchar code, bool posix);
-extern bool pg_u_isalpha(pg_wchar code);
-extern bool pg_u_isalnum(pg_wchar code, bool posix);
-extern bool pg_u_isword(pg_wchar code);
-extern bool pg_u_isupper(pg_wchar code);
-extern bool pg_u_islower(pg_wchar code);
-extern bool pg_u_isblank(pg_wchar code);
-extern bool pg_u_iscntrl(pg_wchar code);
-extern bool pg_u_isgraph(pg_wchar code);
-extern bool pg_u_isprint(pg_wchar code);
-extern bool pg_u_ispunct(pg_wchar code, bool posix);
-extern bool pg_u_isspace(pg_wchar code);
-extern bool pg_u_isxdigit(pg_wchar code, bool posix);
+extern bool pg_u_isdigit(char32_t code, bool posix);
+extern bool pg_u_isalpha(char32_t code);
+extern bool pg_u_isalnum(char32_t code, bool posix);
+extern bool pg_u_isword(char32_t code);
+extern bool pg_u_isupper(char32_t code);
+extern bool pg_u_islower(char32_t code);
+extern bool pg_u_isblank(char32_t code);
+extern bool pg_u_iscntrl(char32_t code);
+extern bool pg_u_isgraph(char32_t code);
+extern bool pg_u_isprint(char32_t code);
+extern bool pg_u_ispunct(char32_t code, bool posix);
+extern bool pg_u_isspace(char32_t code);
+extern bool pg_u_isxdigit(char32_t code, bool posix);
  
  #endif                                                 /* UNICODE_CATEGORY_H */
diff --git a/src/include/common/unicode_category_table.h b/src/include/common/unicode_category_table.h

index 95a1c65da7e6f91a11d4cd689f4c6e34a262254c..466a41b72b02bf414e9e447aca38e4646452d972 100644 (file)
--- a/src/include/common/unicode_category_table.h
+++ b/src/include/common/unicode_category_table.h
@@ -20,15 +20,15 @@
   */
  typedef struct
  {
-       uint32          first;                  /* Unicode codepoint */
-       uint32          last;                   /* Unicode codepoint */
+       char32_t        first;                  /* Unicode codepoint */
+       char32_t        last;                   /* Unicode codepoint */
         uint8           category;               /* General Category */
  } pg_category_range;
  
  typedef struct
  {
-       uint32          first;                  /* Unicode codepoint */
-       uint32          last;                   /* Unicode codepoint */
+       char32_t        first;                  /* Unicode codepoint */
+       char32_t        last;                   /* Unicode codepoint */
  } pg_unicode_range;
  
  typedef struct
diff --git a/src/include/common/unicode_norm.h b/src/include/common/unicode_norm.h

index 5bc3b79e78e08bad27ad96b658065e84199b2b3e..516c192cc4c08c913680d38f3d2ed77d201e4546 100644 (file)
--- a/src/include/common/unicode_norm.h
+++ b/src/include/common/unicode_norm.h
@@ -14,8 +14,6 @@
  #ifndef UNICODE_NORM_H
  #define UNICODE_NORM_H
  
-#include "mb/pg_wchar.h"
-
  typedef enum
  {
         UNICODE_NFC = 0,
@@ -32,8 +30,8 @@ typedef enum
         UNICODE_NORM_QC_MAYBE = -1,
  } UnicodeNormalizationQC;
  
-extern pg_wchar *unicode_normalize(UnicodeNormalizationForm form, const pg_wchar *input);
+extern char32_t *unicode_normalize(UnicodeNormalizationForm form, const char32_t *input);
  
-extern UnicodeNormalizationQC unicode_is_normalized_quickcheck(UnicodeNormalizationForm form, const pg_wchar *input);
+extern UnicodeNormalizationQC unicode_is_normalized_quickcheck(UnicodeNormalizationForm form, const char32_t *input);
  
  #endif                                                 /* UNICODE_NORM_H */
diff --git a/src/include/mb/pg_wchar.h b/src/include/mb/pg_wchar.h

index 4b4a9974b75b367d89dde3ea32088e89ca72a1e2..4d84bdc81e49ad308dbae25cac629c0bb1fa7ced 100644 (file)
--- a/src/include/mb/pg_wchar.h
+++ b/src/include/mb/pg_wchar.h
@@ -532,25 +532,25 @@ typedef uint32 (*utf_local_conversion_func) (uint32 code);
   * Some handy functions for Unicode-specific tests.
   */
  static inline bool
-is_valid_unicode_codepoint(pg_wchar c)
+is_valid_unicode_codepoint(char32_t c)
  {
         return (c > 0 && c <= 0x10FFFF);
  }
  
  static inline bool
-is_utf16_surrogate_first(pg_wchar c)
+is_utf16_surrogate_first(char32_t c)
  {
         return (c >= 0xD800 && c <= 0xDBFF);
  }
  
  static inline bool
-is_utf16_surrogate_second(pg_wchar c)
+is_utf16_surrogate_second(char32_t c)
  {
         return (c >= 0xDC00 && c <= 0xDFFF);
  }
  
-static inline pg_wchar
-surrogate_pair_to_codepoint(pg_wchar first, pg_wchar second)
+static inline char32_t
+surrogate_pair_to_codepoint(char16_t first, char16_t second)
  {
         return ((first & 0x3FF) << 10) + 0x10000 + (second & 0x3FF);
  }
@@ -561,20 +561,20 @@ surrogate_pair_to_codepoint(pg_wchar first, pg_wchar second)
   *
   * No error checks here, c must point to a long-enough string.
   */
-static inline pg_wchar
+static inline char32_t
  utf8_to_unicode(const unsigned char *c)
  {
         if ((*c & 0x80) == 0)
-               return (pg_wchar) c[0];
+               return (char32_t) c[0];
         else if ((*c & 0xe0) == 0xc0)
-               return (pg_wchar) (((c[0] & 0x1f) << 6) |
+               return (char32_t) (((c[0] & 0x1f) << 6) |
                                                    (c[1] & 0x3f));
         else if ((*c & 0xf0) == 0xe0)
-               return (pg_wchar) (((c[0] & 0x0f) << 12) |
+               return (char32_t) (((c[0] & 0x0f) << 12) |
                                                    ((c[1] & 0x3f) << 6) |
                                                    (c[2] & 0x3f));
         else if ((*c & 0xf8) == 0xf0)
-               return (pg_wchar) (((c[0] & 0x07) << 18) |
+               return (char32_t) (((c[0] & 0x07) << 18) |
                                                    ((c[1] & 0x3f) << 12) |
                                                    ((c[2] & 0x3f) << 6) |
                                                    (c[3] & 0x3f));
@@ -588,7 +588,7 @@ utf8_to_unicode(const unsigned char *c)
   * unicode_utf8len(c) bytes available.
   */
  static inline unsigned char *
-unicode_to_utf8(pg_wchar c, unsigned char *utf8string)
+unicode_to_utf8(char32_t c, unsigned char *utf8string)
  {
         if (c <= 0x7F)
         {
@@ -620,7 +620,7 @@ unicode_to_utf8(pg_wchar c, unsigned char *utf8string)
   * Number of bytes needed to represent the given char in UTF8.
   */
  static inline int
-unicode_utf8len(pg_wchar c)
+unicode_utf8len(char32_t c)
  {
         if (c <= 0x7F)
                 return 1;
@@ -676,8 +676,8 @@ extern int  pg_valid_server_encoding(const char *name);
  extern bool is_encoding_supported_by_icu(int encoding);
  extern const char *get_encoding_name_for_icu(int encoding);
  
-extern unsigned char *unicode_to_utf8(pg_wchar c, unsigned char *utf8string);
-extern pg_wchar utf8_to_unicode(const unsigned char *c);
+extern unsigned char *unicode_to_utf8(char32_t c, unsigned char *utf8string);
+extern char32_t utf8_to_unicode(const unsigned char *c);
  extern bool pg_utf8_islegal(const unsigned char *source, int length);
  extern int     pg_utf_mblen(const unsigned char *s);
  extern int     pg_mule_mblen(const unsigned char *s);
@@ -739,8 +739,8 @@ extern char *pg_server_to_client(const char *s, int len);
  extern char *pg_any_to_server(const char *s, int len, int encoding);
  extern char *pg_server_to_any(const char *s, int len, int encoding);
  
-extern void pg_unicode_to_server(pg_wchar c, unsigned char *s);
-extern bool pg_unicode_to_server_noerror(pg_wchar c, unsigned char *s);
+extern void pg_unicode_to_server(char32_t c, unsigned char *s);
+extern bool pg_unicode_to_server_noerror(char32_t c, unsigned char *s);
  
  extern unsigned short BIG5toCNS(unsigned short big5, unsigned char *lc);
  extern unsigned short CNStoBIG5(unsigned short cns, unsigned char lc);
diff --git a/src/include/pg_config.h.in b/src/include/pg_config.h.in

index 08d7bfbee10634bc2249e6707b10539a22c4cd65..f52f14cc5664e95cde6654faacd95690aab8f815 100644 (file)
--- a/src/include/pg_config.h.in
+++ b/src/include/pg_config.h.in
@@ -463,6 +463,9 @@
  /* Define to 1 if you have the <termios.h> header file. */
  #undef HAVE_TERMIOS_H
  
+/* Define to 1 if you have the <uchar.h> header file. */
+#undef HAVE_UCHAR_H
+
  /* Define to 1 if curl_global_init() is guaranteed to be thread-safe. */
  #undef HAVE_THREADSAFE_CURL_GLOBAL_INIT
  
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list

index ac2da4c98cfc4341faac9fb5d4745564c86cdd35..df88c78fe3a42a54cd797a0352cd55e46b47f130 100644 (file)
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -3505,6 +3505,8 @@ cb_cleanup_dir
  cb_options
  cb_tablespace
  cb_tablespace_mapping
+char16_t
+char32_t
  check_agg_arguments_context
  check_function_callback
  check_network_data
author	Jeff Davis <jdavis@postgresql.org>
	Wed, 29 Oct 2025 21:17:13 +0000 (14:17 -0700)
committer	Jeff Davis <jdavis@postgresql.org>
	Wed, 29 Oct 2025 21:17:13 +0000 (14:17 -0700)
configure		patch \| blob \| blame \| history
configure.ac		patch \| blob \| blame \| history
meson.build		patch \| blob \| blame \| history
src/backend/parser/parser.c		patch \| blob \| blame \| history
src/backend/parser/scan.l		patch \| blob \| blame \| history
src/backend/utils/adt/jsonpath_scan.l		patch \| blob \| blame \| history
src/backend/utils/adt/pg_locale_builtin.c		patch \| blob \| blame \| history
src/backend/utils/adt/varlena.c		patch \| blob \| blame \| history
src/backend/utils/mb/mbutils.c		patch \| blob \| blame \| history
src/common/saslprep.c		patch \| blob \| blame \| history
src/common/unicode/case_test.c		patch \| blob \| blame \| history
src/common/unicode/category_test.c		patch \| blob \| blame \| history
src/common/unicode/generate-norm_test_table.pl		patch \| blob \| blame \| history
src/common/unicode/generate-unicode_case_table.pl		patch \| blob \| blame \| history
src/common/unicode/generate-unicode_category_table.pl		patch \| blob \| blame \| history
src/common/unicode/norm_test.c		patch \| blob \| blame \| history
src/common/unicode_case.c		patch \| blob \| blame \| history
src/common/unicode_category.c		patch \| blob \| blame \| history
src/common/unicode_norm.c		patch \| blob \| blame \| history
src/fe_utils/mbprint.c		patch \| blob \| blame \| history
src/include/c.h		patch \| blob \| blame \| history
src/include/common/unicode_case.h		patch \| blob \| blame \| history
src/include/common/unicode_case_table.h		patch \| blob \| blame \| history
src/include/common/unicode_category.h		patch \| blob \| blame \| history
src/include/common/unicode_category_table.h		patch \| blob \| blame \| history
src/include/common/unicode_norm.h		patch \| blob \| blame \| history
src/include/mb/pg_wchar.h		patch \| blob \| blame \| history
src/include/pg_config.h.in		patch \| blob \| blame \| history
src/tools/pgindent/typedefs.list		patch \| blob \| blame \| history