From af79c30dc3e5369cd6d2bfdccd2c2c0ffbd60ef3 Mon Sep 17 00:00:00 2001 From: Thomas Munro Date: Thu, 5 Feb 2026 01:04:24 +1300 Subject: [PATCH] Fix encoding length for EUC_CN. While EUC_CN supports only 1- and 2-byte sequences (CS0, CS1), the mb<->wchar conversion functions allow 3-byte sequences beginning SS2, SS3. Change pg_encoding_max_length() to return 3, not 2, to close a hypothesized buffer overrun if a corrupted string is converted to wchar and back again in a newly allocated buffer. We might reconsider that in master (ie harmonizing in a different direction), but this change seems better for the back-branches. Also change pg_euccn_mblen() to report SS2 and SS3 characters as having length 3 (following the example of EUC_KR). Even though such characters would not pass verification, it's remotely possible that invalid bytes could be used to compute a buffer size for use in wchar conversion. Security: CVE-2026-2006 Backpatch-through: 14 Author: Thomas Munro Reviewed-by: Noah Misch Reviewed-by: Heikki Linnakangas --- src/common/wchar.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/src/common/wchar.c b/src/common/wchar.c index 5631e2c9363..191f3552ed3 100644 --- a/src/common/wchar.c +++ b/src/common/wchar.c @@ -267,12 +267,22 @@ pg_euccn2wchar_with_len(const unsigned char *from, pg_wchar *to, int len) return cnt; } +/* + * mbverifychar does not accept SS2 or SS3 (CS2 and CS3 are not defined for + * EUC_CN), but mb2wchar_with_len does. Tell a coherent story for code that + * relies on agreement between mb2wchar_with_len and mblen. Invalid text + * datums (e.g. from shared catalogs) reach this. + */ static int pg_euccn_mblen(const unsigned char *s) { int len; - if (IS_HIGHBIT_SET(*s)) + if (*s == SS2) + len = 3; + else if (*s == SS3) + len = 3; + else if (IS_HIGHBIT_SET(*s)) len = 2; else len = 1; @@ -2064,7 +2074,7 @@ pg_encoding_set_invalid(int encoding, char *dst) const pg_wchar_tbl pg_wchar_table[] = { [PG_SQL_ASCII] = {pg_ascii2wchar_with_len, pg_wchar2single_with_len, pg_ascii_mblen, pg_ascii_dsplen, pg_ascii_verifychar, pg_ascii_verifystr, 1}, [PG_EUC_JP] = {pg_eucjp2wchar_with_len, pg_wchar2euc_with_len, pg_eucjp_mblen, pg_eucjp_dsplen, pg_eucjp_verifychar, pg_eucjp_verifystr, 3}, - [PG_EUC_CN] = {pg_euccn2wchar_with_len, pg_wchar2euc_with_len, pg_euccn_mblen, pg_euccn_dsplen, pg_euccn_verifychar, pg_euccn_verifystr, 2}, + [PG_EUC_CN] = {pg_euccn2wchar_with_len, pg_wchar2euc_with_len, pg_euccn_mblen, pg_euccn_dsplen, pg_euccn_verifychar, pg_euccn_verifystr, 3}, [PG_EUC_KR] = {pg_euckr2wchar_with_len, pg_wchar2euc_with_len, pg_euckr_mblen, pg_euckr_dsplen, pg_euckr_verifychar, pg_euckr_verifystr, 3}, [PG_EUC_TW] = {pg_euctw2wchar_with_len, pg_wchar2euc_with_len, pg_euctw_mblen, pg_euctw_dsplen, pg_euctw_verifychar, pg_euctw_verifystr, 4}, [PG_EUC_JIS_2004] = {pg_eucjp2wchar_with_len, pg_wchar2euc_with_len, pg_eucjp_mblen, pg_eucjp_dsplen, pg_eucjp_verifychar, pg_eucjp_verifystr, 3}, -- 2.47.3