patch 9.2.0248: json_decode() is not strict enough

author Yasuhiro Matsumoto <mattn.jp@gmail.com>

Wed, 25 Mar 2026 21:20:21 +0000 (21:20 +0000)

committer Christian Brabandt <cb@256bit.org>

Wed, 25 Mar 2026 21:20:21 +0000 (21:20 +0000)
author Yasuhiro Matsumoto <mattn.jp@gmail.com>
Wed, 25 Mar 2026 21:20:21 +0000 (21:20 +0000)
committer Christian Brabandt <cb@256bit.org>
Wed, 25 Mar 2026 21:20:21 +0000 (21:20 +0000)
diff --git a/runtime/doc/builtin.txt b/runtime/doc/builtin.txt

index ffc2b04f5c990ed51ffb47db65ce7572d42f1622..250c07b6273405204e91bfdd0ab76b6a8f5aae91 100644 (file)
--- a/runtime/doc/builtin.txt
+++ b/runtime/doc/builtin.txt
@@ -1,4 +1,4 @@
-*builtin.txt*  For Vim version 9.2.  Last change: 2026 Mar 17
+*builtin.txt*  For Vim version 9.2.  Last change: 2026 Mar 25
  
  
                   VIM REFERENCE MANUAL    by Bram Moolenaar
@@ -6432,6 +6432,8 @@ js_decode({string})                                       *js_decode()*
                 - Strings can be in single quotes.
                 - Empty items in an array (between two commas) are allowed and
                   result in v:none items.
+               - Capitalization is ignored in keywords: true, false, null,
+                 NaN, Infinity and -Infinity.
  
                 Can also be used as a |method|: >
                         ReadObject()->js_decode()
@@ -6470,12 +6472,9 @@ json_decode({string})                                    *json_decode()* *E491*
                   same as {"1":2}.
                 - More floating point numbers are recognized, e.g. "1." for
                   "1.0", or "001.2" for "1.2".  Special floating point values
-                 "Infinity", "-Infinity" and "NaN" (capitalization ignored)
-                 are accepted.
+                 "Infinity", "-Infinity" and "NaN" are accepted.
                 - Leading zeroes in integer numbers are ignored, e.g. "012"
                   for "12" or "-012" for "-12".
-               - Capitalization is ignored in literal names null, true or
-                 false, e.g. "NULL" for "null", "True" for "true".
                 - Control characters U+0000 through U+001F which are not
                   escaped in strings are accepted, e.g. "       " (tab
                   character in string) for "\t".
@@ -6483,10 +6482,10 @@ json_decode({string})                                   *json_decode()* *E491*
                   and results in v:none.
                 - Backslash in an invalid 2-character sequence escape is
                   ignored, e.g. "\a" is decoded as "a".
-               - A correct surrogate pair in JSON strings should normally be
-                 a 12 character sequence such as "\uD834\uDD1E", but
-                 json_decode() silently accepts truncated surrogate pairs
-                 such as "\uD834" or "\uD834\u"
+               - A surrogate pair in JSON strings is a 12 character sequence
+                 such as "\uD834\uDD1E".  A lone surrogate or an invalid
+                 surrogate pair (e.g. "\uD800" or "\uD800\uD800") results
+                 in an error.
                                                         *E938*
                 A duplicate key in an object, valid in rfc7159, is not
                 accepted by json_decode() as the result must be a valid Vim
diff --git a/runtime/doc/version9.txt b/runtime/doc/version9.txt

index 3168994851cf5c9a6d2d19e0384b73f6688eb5d3..ed58dae1452e1bd3674413a66328c2603f22ecbd 100644 (file)
--- a/runtime/doc/version9.txt
+++ b/runtime/doc/version9.txt
@@ -1,4 +1,4 @@
-*version9.txt* For Vim version 9.2.  Last change: 2026 Mar 22
+*version9.txt* For Vim version 9.2.  Last change: 2026 Mar 25
  
  
                   VIM REFERENCE MANUAL    by Bram Moolenaar
@@ -52620,6 +52620,9 @@ Add "-t" option to append a terminating NUL byte to C include output (-i).
  Changed~
  -------
  - Support for NeXTStep was dropped with patch v9.2.0122
+- |json_decode()| is stricter: keywords must be lowercase, lone surrogates are
+  now invalid
+- |js_decode()| rejects lone surrogates
  
                                                         *added-9.3*
  Added ~
diff --git a/src/json.c b/src/json.c

index 7c4874e5ae294e766467556ba515ece37ea4a763..a21ed6e771313bf9eca2143e32b82aa112ea26a7 100644 (file)
--- a/src/json.c
+++ b/src/json.c
@@ -163,6 +163,8 @@ write_string(garray_T *gap, char_u *str)
      }
  #endif
      ga_append(gap, '"');
+    // Pre-grow for the common case: input length + quotes + some escapes.
+    ga_grow(gap, (int)STRLEN(res) + 2);
      // `from` is the beginning of a sequence of bytes we can directly copy from
      // the input string, avoiding the overhead associated to decoding/encoding
      // them.
@@ -185,20 +187,19 @@ write_string(garray_T *gap, char_u *str)
             switch (c)
             {
                 case 0x08:
-                   ga_append(gap, '\\'); ga_append(gap, 'b'); break;
+                   GA_CONCAT_LITERAL(gap, "\\b"); break;
                 case 0x09:
-                   ga_append(gap, '\\'); ga_append(gap, 't'); break;
+                   GA_CONCAT_LITERAL(gap, "\\t"); break;
                 case 0x0a:
-                   ga_append(gap, '\\'); ga_append(gap, 'n'); break;
+                   GA_CONCAT_LITERAL(gap, "\\n"); break;
                 case 0x0c:
-                   ga_append(gap, '\\'); ga_append(gap, 'f'); break;
+                   GA_CONCAT_LITERAL(gap, "\\f"); break;
                 case 0x0d:
-                   ga_append(gap, '\\'); ga_append(gap, 'r'); break;
+                   GA_CONCAT_LITERAL(gap, "\\r"); break;
                 case 0x22: // "
+                   GA_CONCAT_LITERAL(gap, "\\\""); break;
                 case 0x5c: // backslash
-                   ga_append(gap, '\\');
-                   ga_append(gap, c);
-                   break;
+                   GA_CONCAT_LITERAL(gap, "\\\\"); break;
                 default:
                 {
                     size_t  numbuflen;
@@ -341,13 +342,24 @@ json_encode_item(garray_T *gap, typval_T *val, int copyID, int options, int dept
                 ga_append(gap, '[');
                 for (i = 0; i < b->bv_ga.ga_len; i++)
                 {
-                   size_t  numbuflen;
+                   int     byte = blob_get(b, i);
  
                     if (i > 0)
-                       GA_CONCAT_LITERAL(gap, ",");
-                   numbuflen = vim_snprintf_safelen((char *)numbuf, sizeof(numbuf),
-                       "%d", blob_get(b, i));
-                   ga_concat_len(gap, numbuf, numbuflen);
+                       ga_append(gap, ',');
+                   // blob bytes are 0-255, use simple conversion
+                   if (byte >= 100)
+                   {
+                       ga_append(gap, '0' + byte / 100);
+                       ga_append(gap, '0' + (byte / 10) % 10);
+                       ga_append(gap, '0' + byte % 10);
+                   }
+                   else if (byte >= 10)
+                   {
+                       ga_append(gap, '0' + byte / 10);
+                       ga_append(gap, '0' + byte % 10);
+                   }
+                   else
+                       ga_append(gap, '0' + byte);
                 }
                 ga_append(gap, ']');
             }
@@ -610,7 +622,7 @@ json_decode_string(js_read_T *reader, typval_T *res, int quote)
                         return FAIL;
                     }
                     p += len + 2;
-                   if (0xd800 <= nr && nr <= 0xdfff
+                   if (0xd800 <= nr && nr <= 0xdbff
                             && (int)(reader->js_end - p) >= 6
                             && *p == '\\' && *(p+1) == 'u')
                     {
@@ -633,6 +645,13 @@ json_decode_string(js_read_T *reader, typval_T *res, int quote)
                                 ((nr2 - 0xdc00) & 0x3ff)) + 0x10000;
                         }
                     }
+                   // Lone surrogate is invalid.
+                   if (0xd800 <= nr && nr <= 0xdfff)
+                   {
+                       if (res != NULL)
+                           ga_clear(&ga);
+                       return FAIL;
+                   }
                     if (res != NULL)
                     {
                         char_u  buf[NUMBUFLEN];
@@ -975,7 +994,13 @@ json_decode_item(js_read_T *reader, typval_T *res, int options)
                         retval = OK;
                         break;
                     }
-                   if (STRNICMP((char *)p, "false", 5) == 0)
+                   // In strinct JSON mode, keywords must be lowercase.
+                   // In JS mode, keywords are case-insensitive.
+#define MATCH_KW(p, kw, len) \
+    ((options & JSON_JS) \
+     ? STRNICMP((char *)(p), (kw), (len)) == 0 \
+     : STRNCMP((char *)(p), (kw), (len)) == 0)
+                   if (MATCH_KW(p, "false", 5))
                     {
                         reader->js_used += 5;
                         if (cur_item != NULL)
@@ -986,7 +1011,7 @@ json_decode_item(js_read_T *reader, typval_T *res, int options)
                         retval = OK;
                         break;
                     }
-                   if (STRNICMP((char *)p, "true", 4) == 0)
+                   if (MATCH_KW(p, "true", 4))
                     {
                         reader->js_used += 4;
                         if (cur_item != NULL)
@@ -997,7 +1022,7 @@ json_decode_item(js_read_T *reader, typval_T *res, int options)
                         retval = OK;
                         break;
                     }
-                   if (STRNICMP((char *)p, "null", 4) == 0)
+                   if (MATCH_KW(p, "null", 4))
                     {
                         reader->js_used += 4;
                         if (cur_item != NULL)
@@ -1008,7 +1033,7 @@ json_decode_item(js_read_T *reader, typval_T *res, int options)
                         retval = OK;
                         break;
                     }
-                   if (STRNICMP((char *)p, "NaN", 3) == 0)
+                   if (MATCH_KW(p, "NaN", 3))
                     {
                         reader->js_used += 3;
                         if (cur_item != NULL)
@@ -1019,7 +1044,7 @@ json_decode_item(js_read_T *reader, typval_T *res, int options)
                         retval = OK;
                         break;
                     }
-                   if (STRNICMP((char *)p, "-Infinity", 9) == 0)
+                   if (MATCH_KW(p, "-Infinity", 9))
                     {
                         reader->js_used += 9;
                         if (cur_item != NULL)
@@ -1030,7 +1055,7 @@ json_decode_item(js_read_T *reader, typval_T *res, int options)
                         retval = OK;
                         break;
                     }
-                   if (STRNICMP((char *)p, "Infinity", 8) == 0)
+                   if (MATCH_KW(p, "Infinity", 8))
                     {
                         reader->js_used += 8;
                         if (cur_item != NULL)
@@ -1041,6 +1066,7 @@ json_decode_item(js_read_T *reader, typval_T *res, int options)
                         retval = OK;
                         break;
                     }
+#undef MATCH_KW
                     // check for truncated name
                     len = (int)(reader->js_end
                                          - (reader->js_buf + reader->js_used));
diff --git a/src/testdir/test_json.vim b/src/testdir/test_json.vim

index 515ce9b38c13487a16b1942584129c8c5c6280c0..c65d85c8f0e14e4b53f36eee18b3b1f8fa94c9ab 100644 (file)
--- a/src/testdir/test_json.vim
+++ b/src/testdir/test_json.vim
@@ -14,8 +14,8 @@ let s:var5 = "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
  " surrogate pair
  let s:jsonsp1 = '"\ud83c\udf63"'
  let s:varsp1 = "\xf0\x9f\x8d\xa3"
+" high surrogate followed by non-surrogate is invalid (lone surrogate)
  let s:jsonsp2 = '"\ud83c\u00a0"'
-let s:varsp2 = "\ud83c\u00a0"
  
  let s:jsonmb = '"s¢cĴgё"'
  let s:varmb = "s¢cĴgё"
@@ -126,7 +126,7 @@ func Test_json_decode()
  
    call assert_equal(s:varmb, json_decode(s:jsonmb))
    call assert_equal(s:varsp1, json_decode(s:jsonsp1))
-  call assert_equal(s:varsp2, json_decode(s:jsonsp2))
+  call assert_fails('call json_decode(s:jsonsp2)', 'E491:')
  
    call assert_equal(s:varnr, json_decode(s:jsonnr))
    call assert_equal(s:varfl, json_decode(s:jsonfl))
@@ -151,6 +151,18 @@ func Test_json_decode()
    call assert_equal(type(v:none), type(json_decode('')))
    call assert_equal("", json_decode('""'))
  
+  " json_decode() requires lowercase keywords (RFC 7159)
+  call assert_fails('call json_decode("True")', 'E491:')
+  call assert_fails('call json_decode("FALSE")', 'E491:')
+  call assert_fails('call json_decode("Null")', 'E491:')
+  call assert_fails('call json_decode("NULL")', 'E491:')
+  call assert_fails('call json_decode("nan")', 'E491:')
+  call assert_fails('call json_decode("NAN")', 'E491:')
+  call assert_fails('call json_decode("infinity")', 'E491:')
+  call assert_fails('call json_decode("INFINITY")', 'E491:')
+  call assert_fails('call json_decode("-infinity")', 'E491:')
+  call assert_fails('call json_decode("-INFINITY")', 'E491:')
+
    " Character in string after \ is ignored if not special.
    call assert_equal("x", json_decode('"\x"'))
  
@@ -165,6 +177,12 @@ func Test_json_decode()
    " but not twice
    call assert_fails("call json_decode('{\"\": \"ok\", \"\": \"bad\"}')", 'E938:')
  
+  " lone surrogate is invalid
+  call assert_fails('call json_decode("\"\\uD800\"")', 'E491:')
+  call assert_fails('call json_decode("\"\\uDC00\"")', 'E491:')
+  call assert_fails('call json_decode("\"\\uD800\\uD800\"")', 'E491:')
+  call assert_fails('call json_decode("\"\\uDC00\\uDC00\"")', 'E491:')
+
    call assert_equal({'n': 1}, json_decode('{"n":1,}'))
    call assert_fails("call json_decode(\"{'n':'1',}\")", 'E491:')
    call assert_fails("call json_decode(\"'n'\")", 'E491:')
@@ -257,7 +275,7 @@ func Test_js_decode()
  
    call assert_equal(s:varmb, js_decode(s:jsonmb))
    call assert_equal(s:varsp1, js_decode(s:jsonsp1))
-  call assert_equal(s:varsp2, js_decode(s:jsonsp2))
+  call assert_fails('call js_decode(s:jsonsp2)', 'E491:')
  
    call assert_equal(s:varnr, js_decode(s:jsonnr))
    call assert_equal(s:varfl, js_decode(s:jsonfl))
@@ -293,6 +311,20 @@ func Test_js_decode()
    call assert_equal("", js_decode("''"))
  
    call assert_equal('n', js_decode("'n'"))
+
+  " js_decode() accepts keywords case-insensitively
+  call assert_equal(v:true, js_decode('True'))
+  call assert_equal(v:true, js_decode('TRUE'))
+  call assert_equal(v:false, js_decode('False'))
+  call assert_equal(v:false, js_decode('FALSE'))
+  call assert_equal(v:null, js_decode('Null'))
+  call assert_equal(v:null, js_decode('NULL'))
+  call assert_true(isnan(js_decode('nan')))
+  call assert_equal(s:varposinf, js_decode('infinity'))
+  call assert_equal(s:varneginf, js_decode('-infinity'))
+  call assert_equal(s:varposinf, js_decode('INFINITY'))
+  call assert_equal(s:varneginf, js_decode('-INFINITY'))
+
    call assert_equal({'n': 1}, js_decode('{"n":1,}'))
    call assert_equal({'n': '1'}, js_decode("{'n':'1',}"))
  
diff --git a/src/version.c b/src/version.c

index c86fb7c3290e28d767a177eb3c91f2e6ff99dcb5..c24a1dd1786aef8f64a2560c7f25f9d16a2d1bd0 100644 (file)
--- a/src/version.c
+++ b/src/version.c
@@ -734,6 +734,8 @@ static char *(features[]) =
  
  static int included_patches[] =
  {   /* Add new patch number below this line */
+/**/
+    248,
  /**/
      247,
  /**/
author	Yasuhiro Matsumoto <mattn.jp@gmail.com>
	Wed, 25 Mar 2026 21:20:21 +0000 (21:20 +0000)
committer	Christian Brabandt <cb@256bit.org>
	Wed, 25 Mar 2026 21:20:21 +0000 (21:20 +0000)
runtime/doc/builtin.txt		patch \| blob \| blame \| history
runtime/doc/version9.txt		patch \| blob \| blame \| history
src/json.c		patch \| blob \| blame \| history
src/testdir/test_json.vim		patch \| blob \| blame \| history
src/version.c		patch \| blob \| blame \| history