From: Yasuhiro Matsumoto Date: Wed, 25 Mar 2026 21:20:21 +0000 (+0000) Subject: patch 9.2.0248: json_decode() is not strict enough X-Git-Tag: v9.2.0248^0 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=af3ccc28452c777fc47395fcb65bc5fcaee90fc5;p=thirdparty%2Fvim.git patch 9.2.0248: json_decode() is not strict enough Problem: json_decode() accepted keywords case-insensitively, violating RFC 7159. Both json_decode() and js_decode() silently accepted lone surrogates, which are invalid Unicode. Solution: Only allow lowercase keyword in json_decode(), reject lone surrogates, improve encoding performance in write_string() and blob byte serialization. 1. Fix surrogate pair range check (0xDFFF -> 0xDBFF) so only high surrogates trigger pair decoding. Reject lone surrogates that do not form a valid pair instead of producing invalid UTF-8. 2. Use case-sensitive matching for JSON keywords (true, false, null, NaN, Infinity) in json_decode() per RFC 7159. js_decode() retains case-insensitive behavior. 3. Replace double ga_append() calls for escape sequences with single GA_CONCAT_LITERAL() calls, halving function call and buffer growth check overhead. 4. Replace vim_snprintf_safelen() for blob byte encoding (0-255) with direct digit conversion. closes: #19807 Signed-off-by: Yasuhiro Matsumoto Signed-off-by: Christian Brabandt --- diff --git a/runtime/doc/builtin.txt b/runtime/doc/builtin.txt index ffc2b04f5c..250c07b627 100644 --- a/runtime/doc/builtin.txt +++ b/runtime/doc/builtin.txt @@ -1,4 +1,4 @@ -*builtin.txt* For Vim version 9.2. Last change: 2026 Mar 17 +*builtin.txt* For Vim version 9.2. Last change: 2026 Mar 25 VIM REFERENCE MANUAL by Bram Moolenaar @@ -6432,6 +6432,8 @@ js_decode({string}) *js_decode()* - Strings can be in single quotes. - Empty items in an array (between two commas) are allowed and result in v:none items. + - Capitalization is ignored in keywords: true, false, null, + NaN, Infinity and -Infinity. Can also be used as a |method|: > ReadObject()->js_decode() @@ -6470,12 +6472,9 @@ json_decode({string}) *json_decode()* *E491* same as {"1":2}. - More floating point numbers are recognized, e.g. "1." for "1.0", or "001.2" for "1.2". Special floating point values - "Infinity", "-Infinity" and "NaN" (capitalization ignored) - are accepted. + "Infinity", "-Infinity" and "NaN" are accepted. - Leading zeroes in integer numbers are ignored, e.g. "012" for "12" or "-012" for "-12". - - Capitalization is ignored in literal names null, true or - false, e.g. "NULL" for "null", "True" for "true". - Control characters U+0000 through U+001F which are not escaped in strings are accepted, e.g. " " (tab character in string) for "\t". @@ -6483,10 +6482,10 @@ json_decode({string}) *json_decode()* *E491* and results in v:none. - Backslash in an invalid 2-character sequence escape is ignored, e.g. "\a" is decoded as "a". - - A correct surrogate pair in JSON strings should normally be - a 12 character sequence such as "\uD834\uDD1E", but - json_decode() silently accepts truncated surrogate pairs - such as "\uD834" or "\uD834\u" + - A surrogate pair in JSON strings is a 12 character sequence + such as "\uD834\uDD1E". A lone surrogate or an invalid + surrogate pair (e.g. "\uD800" or "\uD800\uD800") results + in an error. *E938* A duplicate key in an object, valid in rfc7159, is not accepted by json_decode() as the result must be a valid Vim diff --git a/runtime/doc/version9.txt b/runtime/doc/version9.txt index 3168994851..ed58dae145 100644 --- a/runtime/doc/version9.txt +++ b/runtime/doc/version9.txt @@ -1,4 +1,4 @@ -*version9.txt* For Vim version 9.2. Last change: 2026 Mar 22 +*version9.txt* For Vim version 9.2. Last change: 2026 Mar 25 VIM REFERENCE MANUAL by Bram Moolenaar @@ -52620,6 +52620,9 @@ Add "-t" option to append a terminating NUL byte to C include output (-i). Changed~ ------- - Support for NeXTStep was dropped with patch v9.2.0122 +- |json_decode()| is stricter: keywords must be lowercase, lone surrogates are + now invalid +- |js_decode()| rejects lone surrogates *added-9.3* Added ~ diff --git a/src/json.c b/src/json.c index 7c4874e5ae..a21ed6e771 100644 --- a/src/json.c +++ b/src/json.c @@ -163,6 +163,8 @@ write_string(garray_T *gap, char_u *str) } #endif ga_append(gap, '"'); + // Pre-grow for the common case: input length + quotes + some escapes. + ga_grow(gap, (int)STRLEN(res) + 2); // `from` is the beginning of a sequence of bytes we can directly copy from // the input string, avoiding the overhead associated to decoding/encoding // them. @@ -185,20 +187,19 @@ write_string(garray_T *gap, char_u *str) switch (c) { case 0x08: - ga_append(gap, '\\'); ga_append(gap, 'b'); break; + GA_CONCAT_LITERAL(gap, "\\b"); break; case 0x09: - ga_append(gap, '\\'); ga_append(gap, 't'); break; + GA_CONCAT_LITERAL(gap, "\\t"); break; case 0x0a: - ga_append(gap, '\\'); ga_append(gap, 'n'); break; + GA_CONCAT_LITERAL(gap, "\\n"); break; case 0x0c: - ga_append(gap, '\\'); ga_append(gap, 'f'); break; + GA_CONCAT_LITERAL(gap, "\\f"); break; case 0x0d: - ga_append(gap, '\\'); ga_append(gap, 'r'); break; + GA_CONCAT_LITERAL(gap, "\\r"); break; case 0x22: // " + GA_CONCAT_LITERAL(gap, "\\\""); break; case 0x5c: // backslash - ga_append(gap, '\\'); - ga_append(gap, c); - break; + GA_CONCAT_LITERAL(gap, "\\\\"); break; default: { size_t numbuflen; @@ -341,13 +342,24 @@ json_encode_item(garray_T *gap, typval_T *val, int copyID, int options, int dept ga_append(gap, '['); for (i = 0; i < b->bv_ga.ga_len; i++) { - size_t numbuflen; + int byte = blob_get(b, i); if (i > 0) - GA_CONCAT_LITERAL(gap, ","); - numbuflen = vim_snprintf_safelen((char *)numbuf, sizeof(numbuf), - "%d", blob_get(b, i)); - ga_concat_len(gap, numbuf, numbuflen); + ga_append(gap, ','); + // blob bytes are 0-255, use simple conversion + if (byte >= 100) + { + ga_append(gap, '0' + byte / 100); + ga_append(gap, '0' + (byte / 10) % 10); + ga_append(gap, '0' + byte % 10); + } + else if (byte >= 10) + { + ga_append(gap, '0' + byte / 10); + ga_append(gap, '0' + byte % 10); + } + else + ga_append(gap, '0' + byte); } ga_append(gap, ']'); } @@ -610,7 +622,7 @@ json_decode_string(js_read_T *reader, typval_T *res, int quote) return FAIL; } p += len + 2; - if (0xd800 <= nr && nr <= 0xdfff + if (0xd800 <= nr && nr <= 0xdbff && (int)(reader->js_end - p) >= 6 && *p == '\\' && *(p+1) == 'u') { @@ -633,6 +645,13 @@ json_decode_string(js_read_T *reader, typval_T *res, int quote) ((nr2 - 0xdc00) & 0x3ff)) + 0x10000; } } + // Lone surrogate is invalid. + if (0xd800 <= nr && nr <= 0xdfff) + { + if (res != NULL) + ga_clear(&ga); + return FAIL; + } if (res != NULL) { char_u buf[NUMBUFLEN]; @@ -975,7 +994,13 @@ json_decode_item(js_read_T *reader, typval_T *res, int options) retval = OK; break; } - if (STRNICMP((char *)p, "false", 5) == 0) + // In strinct JSON mode, keywords must be lowercase. + // In JS mode, keywords are case-insensitive. +#define MATCH_KW(p, kw, len) \ + ((options & JSON_JS) \ + ? STRNICMP((char *)(p), (kw), (len)) == 0 \ + : STRNCMP((char *)(p), (kw), (len)) == 0) + if (MATCH_KW(p, "false", 5)) { reader->js_used += 5; if (cur_item != NULL) @@ -986,7 +1011,7 @@ json_decode_item(js_read_T *reader, typval_T *res, int options) retval = OK; break; } - if (STRNICMP((char *)p, "true", 4) == 0) + if (MATCH_KW(p, "true", 4)) { reader->js_used += 4; if (cur_item != NULL) @@ -997,7 +1022,7 @@ json_decode_item(js_read_T *reader, typval_T *res, int options) retval = OK; break; } - if (STRNICMP((char *)p, "null", 4) == 0) + if (MATCH_KW(p, "null", 4)) { reader->js_used += 4; if (cur_item != NULL) @@ -1008,7 +1033,7 @@ json_decode_item(js_read_T *reader, typval_T *res, int options) retval = OK; break; } - if (STRNICMP((char *)p, "NaN", 3) == 0) + if (MATCH_KW(p, "NaN", 3)) { reader->js_used += 3; if (cur_item != NULL) @@ -1019,7 +1044,7 @@ json_decode_item(js_read_T *reader, typval_T *res, int options) retval = OK; break; } - if (STRNICMP((char *)p, "-Infinity", 9) == 0) + if (MATCH_KW(p, "-Infinity", 9)) { reader->js_used += 9; if (cur_item != NULL) @@ -1030,7 +1055,7 @@ json_decode_item(js_read_T *reader, typval_T *res, int options) retval = OK; break; } - if (STRNICMP((char *)p, "Infinity", 8) == 0) + if (MATCH_KW(p, "Infinity", 8)) { reader->js_used += 8; if (cur_item != NULL) @@ -1041,6 +1066,7 @@ json_decode_item(js_read_T *reader, typval_T *res, int options) retval = OK; break; } +#undef MATCH_KW // check for truncated name len = (int)(reader->js_end - (reader->js_buf + reader->js_used)); diff --git a/src/testdir/test_json.vim b/src/testdir/test_json.vim index 515ce9b38c..c65d85c8f0 100644 --- a/src/testdir/test_json.vim +++ b/src/testdir/test_json.vim @@ -14,8 +14,8 @@ let s:var5 = "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" " surrogate pair let s:jsonsp1 = '"\ud83c\udf63"' let s:varsp1 = "\xf0\x9f\x8d\xa3" +" high surrogate followed by non-surrogate is invalid (lone surrogate) let s:jsonsp2 = '"\ud83c\u00a0"' -let s:varsp2 = "\ud83c\u00a0" let s:jsonmb = '"s¢cĴgё"' let s:varmb = "s¢cĴgё" @@ -126,7 +126,7 @@ func Test_json_decode() call assert_equal(s:varmb, json_decode(s:jsonmb)) call assert_equal(s:varsp1, json_decode(s:jsonsp1)) - call assert_equal(s:varsp2, json_decode(s:jsonsp2)) + call assert_fails('call json_decode(s:jsonsp2)', 'E491:') call assert_equal(s:varnr, json_decode(s:jsonnr)) call assert_equal(s:varfl, json_decode(s:jsonfl)) @@ -151,6 +151,18 @@ func Test_json_decode() call assert_equal(type(v:none), type(json_decode(''))) call assert_equal("", json_decode('""')) + " json_decode() requires lowercase keywords (RFC 7159) + call assert_fails('call json_decode("True")', 'E491:') + call assert_fails('call json_decode("FALSE")', 'E491:') + call assert_fails('call json_decode("Null")', 'E491:') + call assert_fails('call json_decode("NULL")', 'E491:') + call assert_fails('call json_decode("nan")', 'E491:') + call assert_fails('call json_decode("NAN")', 'E491:') + call assert_fails('call json_decode("infinity")', 'E491:') + call assert_fails('call json_decode("INFINITY")', 'E491:') + call assert_fails('call json_decode("-infinity")', 'E491:') + call assert_fails('call json_decode("-INFINITY")', 'E491:') + " Character in string after \ is ignored if not special. call assert_equal("x", json_decode('"\x"')) @@ -165,6 +177,12 @@ func Test_json_decode() " but not twice call assert_fails("call json_decode('{\"\": \"ok\", \"\": \"bad\"}')", 'E938:') + " lone surrogate is invalid + call assert_fails('call json_decode("\"\\uD800\"")', 'E491:') + call assert_fails('call json_decode("\"\\uDC00\"")', 'E491:') + call assert_fails('call json_decode("\"\\uD800\\uD800\"")', 'E491:') + call assert_fails('call json_decode("\"\\uDC00\\uDC00\"")', 'E491:') + call assert_equal({'n': 1}, json_decode('{"n":1,}')) call assert_fails("call json_decode(\"{'n':'1',}\")", 'E491:') call assert_fails("call json_decode(\"'n'\")", 'E491:') @@ -257,7 +275,7 @@ func Test_js_decode() call assert_equal(s:varmb, js_decode(s:jsonmb)) call assert_equal(s:varsp1, js_decode(s:jsonsp1)) - call assert_equal(s:varsp2, js_decode(s:jsonsp2)) + call assert_fails('call js_decode(s:jsonsp2)', 'E491:') call assert_equal(s:varnr, js_decode(s:jsonnr)) call assert_equal(s:varfl, js_decode(s:jsonfl)) @@ -293,6 +311,20 @@ func Test_js_decode() call assert_equal("", js_decode("''")) call assert_equal('n', js_decode("'n'")) + + " js_decode() accepts keywords case-insensitively + call assert_equal(v:true, js_decode('True')) + call assert_equal(v:true, js_decode('TRUE')) + call assert_equal(v:false, js_decode('False')) + call assert_equal(v:false, js_decode('FALSE')) + call assert_equal(v:null, js_decode('Null')) + call assert_equal(v:null, js_decode('NULL')) + call assert_true(isnan(js_decode('nan'))) + call assert_equal(s:varposinf, js_decode('infinity')) + call assert_equal(s:varneginf, js_decode('-infinity')) + call assert_equal(s:varposinf, js_decode('INFINITY')) + call assert_equal(s:varneginf, js_decode('-INFINITY')) + call assert_equal({'n': 1}, js_decode('{"n":1,}')) call assert_equal({'n': '1'}, js_decode("{'n':'1',}")) diff --git a/src/version.c b/src/version.c index c86fb7c329..c24a1dd178 100644 --- a/src/version.c +++ b/src/version.c @@ -734,6 +734,8 @@ static char *(features[]) = static int included_patches[] = { /* Add new patch number below this line */ +/**/ + 248, /**/ 247, /**/