]> git.ipfire.org Git - thirdparty/postgresql.git/commitdiff
Fix encoding length for EUC_CN.
authorThomas Munro <tmunro@postgresql.org>
Wed, 4 Feb 2026 12:04:24 +0000 (01:04 +1300)
committerThomas Munro <tmunro@postgresql.org>
Sun, 8 Feb 2026 23:08:58 +0000 (12:08 +1300)
While EUC_CN supports only 1- and 2-byte sequences (CS0, CS1), the
mb<->wchar conversion functions allow 3-byte sequences beginning SS2,
SS3.

Change pg_encoding_max_length() to return 3, not 2, to close a
hypothesized buffer overrun if a corrupted string is converted to wchar
and back again in a newly allocated buffer.  We might reconsider that in
master (ie harmonizing in a different direction), but this change seems
better for the back-branches.

Also change pg_euccn_mblen() to report SS2 and SS3 characters as having
length 3 (following the example of EUC_KR).  Even though such characters
would not pass verification, it's remotely possible that invalid bytes
could be used to compute a buffer size for use in wchar conversion.

Security: CVE-2026-2006
Backpatch-through: 14
Author: Thomas Munro <thomas.munro@gmail.com>
Reviewed-by: Noah Misch <noah@leadboat.com>
Reviewed-by: Heikki Linnakangas <hlinnaka@iki.fi>
src/common/wchar.c

index 5631e2c9363909c05edaccc91bf976dd990a1de2..191f3552ed350cc19e4777f1719bb9a7a13f50c0 100644 (file)
@@ -267,12 +267,22 @@ pg_euccn2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
        return cnt;
 }
 
+/*
+ * mbverifychar does not accept SS2 or SS3 (CS2 and CS3 are not defined for
+ * EUC_CN), but mb2wchar_with_len does.  Tell a coherent story for code that
+ * relies on agreement between mb2wchar_with_len and mblen.  Invalid text
+ * datums (e.g. from shared catalogs) reach this.
+ */
 static int
 pg_euccn_mblen(const unsigned char *s)
 {
        int                     len;
 
-       if (IS_HIGHBIT_SET(*s))
+       if (*s == SS2)
+               len = 3;
+       else if (*s == SS3)
+               len = 3;
+       else if (IS_HIGHBIT_SET(*s))
                len = 2;
        else
                len = 1;
@@ -2064,7 +2074,7 @@ pg_encoding_set_invalid(int encoding, char *dst)
 const pg_wchar_tbl pg_wchar_table[] = {
        [PG_SQL_ASCII] = {pg_ascii2wchar_with_len, pg_wchar2single_with_len, pg_ascii_mblen, pg_ascii_dsplen, pg_ascii_verifychar, pg_ascii_verifystr, 1},
        [PG_EUC_JP] = {pg_eucjp2wchar_with_len, pg_wchar2euc_with_len, pg_eucjp_mblen, pg_eucjp_dsplen, pg_eucjp_verifychar, pg_eucjp_verifystr, 3},
-       [PG_EUC_CN] = {pg_euccn2wchar_with_len, pg_wchar2euc_with_len, pg_euccn_mblen, pg_euccn_dsplen, pg_euccn_verifychar, pg_euccn_verifystr, 2},
+       [PG_EUC_CN] = {pg_euccn2wchar_with_len, pg_wchar2euc_with_len, pg_euccn_mblen, pg_euccn_dsplen, pg_euccn_verifychar, pg_euccn_verifystr, 3},
        [PG_EUC_KR] = {pg_euckr2wchar_with_len, pg_wchar2euc_with_len, pg_euckr_mblen, pg_euckr_dsplen, pg_euckr_verifychar, pg_euckr_verifystr, 3},
        [PG_EUC_TW] = {pg_euctw2wchar_with_len, pg_wchar2euc_with_len, pg_euctw_mblen, pg_euctw_dsplen, pg_euctw_verifychar, pg_euctw_verifystr, 4},
        [PG_EUC_JIS_2004] = {pg_eucjp2wchar_with_len, pg_wchar2euc_with_len, pg_eucjp_mblen, pg_eucjp_dsplen, pg_eucjp_verifychar, pg_eucjp_verifystr, 3},