Fix encoding length for EUC_CN.

author Thomas Munro <tmunro@postgresql.org>

Wed, 4 Feb 2026 12:04:24 +0000 (01:04 +1300)

committer Thomas Munro <tmunro@postgresql.org>

Sun, 8 Feb 2026 23:08:58 +0000 (12:08 +1300)
author Thomas Munro <tmunro@postgresql.org>
Wed, 4 Feb 2026 12:04:24 +0000 (01:04 +1300)
committer Thomas Munro <tmunro@postgresql.org>
Sun, 8 Feb 2026 23:08:58 +0000 (12:08 +1300)
diff --git a/src/common/wchar.c b/src/common/wchar.c

index 5631e2c9363909c05edaccc91bf976dd990a1de2..191f3552ed350cc19e4777f1719bb9a7a13f50c0 100644 (file)
--- a/src/common/wchar.c
+++ b/src/common/wchar.c
@@ -267,12 +267,22 @@ pg_euccn2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
         return cnt;
  }
  
+/*
+ * mbverifychar does not accept SS2 or SS3 (CS2 and CS3 are not defined for
+ * EUC_CN), but mb2wchar_with_len does.  Tell a coherent story for code that
+ * relies on agreement between mb2wchar_with_len and mblen.  Invalid text
+ * datums (e.g. from shared catalogs) reach this.
+ */
  static int
  pg_euccn_mblen(const unsigned char *s)
  {
         int                     len;
  
-       if (IS_HIGHBIT_SET(*s))
+       if (*s == SS2)
+               len = 3;
+       else if (*s == SS3)
+               len = 3;
+       else if (IS_HIGHBIT_SET(*s))
                 len = 2;
         else
                 len = 1;
@@ -2064,7 +2074,7 @@ pg_encoding_set_invalid(int encoding, char *dst)
  const pg_wchar_tbl pg_wchar_table[] = {
         [PG_SQL_ASCII] = {pg_ascii2wchar_with_len, pg_wchar2single_with_len, pg_ascii_mblen, pg_ascii_dsplen, pg_ascii_verifychar, pg_ascii_verifystr, 1},
         [PG_EUC_JP] = {pg_eucjp2wchar_with_len, pg_wchar2euc_with_len, pg_eucjp_mblen, pg_eucjp_dsplen, pg_eucjp_verifychar, pg_eucjp_verifystr, 3},
-       [PG_EUC_CN] = {pg_euccn2wchar_with_len, pg_wchar2euc_with_len, pg_euccn_mblen, pg_euccn_dsplen, pg_euccn_verifychar, pg_euccn_verifystr, 2},
+       [PG_EUC_CN] = {pg_euccn2wchar_with_len, pg_wchar2euc_with_len, pg_euccn_mblen, pg_euccn_dsplen, pg_euccn_verifychar, pg_euccn_verifystr, 3},
         [PG_EUC_KR] = {pg_euckr2wchar_with_len, pg_wchar2euc_with_len, pg_euckr_mblen, pg_euckr_dsplen, pg_euckr_verifychar, pg_euckr_verifystr, 3},
         [PG_EUC_TW] = {pg_euctw2wchar_with_len, pg_wchar2euc_with_len, pg_euctw_mblen, pg_euctw_dsplen, pg_euctw_verifychar, pg_euctw_verifystr, 4},
         [PG_EUC_JIS_2004] = {pg_eucjp2wchar_with_len, pg_wchar2euc_with_len, pg_eucjp_mblen, pg_eucjp_dsplen, pg_eucjp_verifychar, pg_eucjp_verifystr, 3},
author	Thomas Munro <tmunro@postgresql.org>
	Wed, 4 Feb 2026 12:04:24 +0000 (01:04 +1300)
committer	Thomas Munro <tmunro@postgresql.org>
	Sun, 8 Feb 2026 23:08:58 +0000 (12:08 +1300)