]> git.ipfire.org Git - thirdparty/sqlite.git/commitdiff
Improved handling of malformed unicode within JSON strings.
authordrh <>
Tue, 26 Dec 2023 13:20:57 +0000 (13:20 +0000)
committerdrh <>
Tue, 26 Dec 2023 13:20:57 +0000 (13:20 +0000)
FossilOrigin-Name: e252bdf5f5de26ba8e2bcc6b0ad94121ed6fc4d86c02fe4a2a058ada93747beb

manifest
manifest.uuid
src/json.c

index c02d741663500e16a0bb32ba035f75d90b5c65e2..99482bbc2b46aab64a54ac8cc8253fc35adbdfaf 100644 (file)
--- a/manifest
+++ b/manifest
@@ -1,5 +1,5 @@
-C Remove\san\sALWAYS()\sadded\sin\s[c50e6c2ace49d092]\sbecause\sit\sis\ssometimes\sfalse.\ndbsqlfuzz\sc393a4f783d42efd9552772110aff7e5d937f15e.
-D 2023-12-24T12:02:36.449
+C Improved\shandling\sof\smalformed\sunicode\swithin\sJSON\sstrings.
+D 2023-12-26T13:20:57.593
 F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1
 F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea
 F LICENSE.md df5091916dbb40e6e9686186587125e1b2ff51f022cc334e886c19a0e9982724
@@ -697,7 +697,7 @@ F src/hash.h 3340ab6e1d13e725571d7cee6d3e3135f0779a7d8e76a9ce0a85971fa3953c51
 F src/hwtime.h f9c2dfb84dce7acf95ce6d289e46f5f9d3d1afd328e53da8f8e9008e3b3caae6
 F src/in-operator.md 10cd8f4bcd225a32518407c2fb2484089112fd71
 F src/insert.c 3f0a94082d978bbdd33c38fefea15346c6c6bffb70bc645a71dc0f1f87dd3276
-F src/json.c 498cbe3346f216e3119e6d01585c3bc09994612c16459f2fa682a1cec61b1cf7
+F src/json.c bc90605da937ca0cd72ff0492216fbb38fd8f9025e6344499f9db235be98e36f
 F src/legacy.c d7874bc885906868cd51e6c2156698f2754f02d9eee1bae2d687323c3ca8e5aa
 F src/loadext.c 7432c944ff197046d67a1207790a1b13eec4548c85a9457eb0896bb3641dfb36
 F src/main.c ce714ee501122c76eb2e69b292bebe443aba611fc3b88e6786eb910285515fe4
@@ -2156,8 +2156,8 @@ F vsixtest/vsixtest.tcl 6a9a6ab600c25a91a7acc6293828957a386a8a93
 F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc
 F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e
 F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0
-P e55d1c2333f35fc20615aa83a7843d08cae7945710a2156d44eee0cc37d90ade
-R 3402eea35cb157b90bc34e52efc1a052
+P b9daf37e57cde12c4de271a2b1995e8e91b6411f8c2e8882e536241929609b3a
+R 8e5515c216d097520f23a07a88eb7f34
 U drh
-Z 3f09f0e552099f2a662787477373f861
+Z acb4ee2fdca29b1d699d6a53a9b5165e
 # Remove this line to create a well-formed Fossil manifest.
index 6f2035ba05c8cfcc9bc91017c353fd8d954f5074..aa146ba31bcd2ab12f11e7f33fca05b67e8e2192 100644 (file)
@@ -1 +1 @@
-b9daf37e57cde12c4de271a2b1995e8e91b6411f8c2e8882e536241929609b3a
\ No newline at end of file
+e252bdf5f5de26ba8e2bcc6b0ad94121ed6fc4d86c02fe4a2a058ada93747beb
\ No newline at end of file
index e4f658ca7f9b52a0c1bde90028078ff9712fdd73..42d6155fb6ca757347739456e11e40d5c9d03f96 100644 (file)
@@ -214,6 +214,12 @@ typedef struct JsonParse JsonParse;
 #define JSON_CACHE_ID    (-429938)  /* Cache entry */
 #define JSON_CACHE_SIZE  4          /* Max number of cache entries */
 
+/*
+** jsonUnescapeOneChar() returns this invalid code point if it encounters
+** a syntax error.
+*/
+#define JSON_INVALID_CHAR 0x99999
+
 /* A cache mapping JSON text into JSONB blobs.
 **
 ** Each cache entry is a JsonParse object with the following restrictions:
@@ -1380,7 +1386,7 @@ static u32 jsonbValidityCheck(
           }else{
             u32 c = 0;
             u32 szC = jsonUnescapeOneChar((const char*)&z[j], k-j, &c);
-            if( c==0xfffd ) return j+1;
+            if( c==JSON_INVALID_CHAR ) return j+1;
             j += szC - 1;
           }
         }
@@ -2390,19 +2396,23 @@ static u32 jsonBytesToBypass(const char *z, u32 n){
 ** Input z[0..n] defines JSON escape sequence including the leading '\\'.
 ** Decode that escape sequence into a single character.  Write that
 ** character into *piOut.  Return the number of bytes in the escape sequence.
+**
+** If there is a syntax error of some kind (for example too few characters
+** after the '\\' to complete the encoding) then *piOut is set to
+** JSON_INVALID_CHAR.
 */
 static u32 jsonUnescapeOneChar(const char *z, u32 n, u32 *piOut){
   assert( n>0 );
   assert( z[0]=='\\' );
   if( n<2 ){
-    *piOut = 0xFFFD;
+    *piOut = JSON_INVALID_CHAR;
     return n;
   }
   switch( (u8)z[1] ){
     case 'u': {
       u32 v, vlo;
       if( n<6 ){
-        *piOut = 0xFFFD;
+        *piOut = JSON_INVALID_CHAR;
         return n;
       }
       v = jsonHexToInt4(&z[2]);
@@ -2432,7 +2442,7 @@ static u32 jsonUnescapeOneChar(const char *z, u32 n, u32 *piOut){
     case '\\':{   *piOut = z[1];  return 2; }
     case 'x': {
       if( n<4 ){
-        *piOut = 0xFFFD;
+        *piOut = JSON_INVALID_CHAR;
         return n;
       }
       *piOut = (jsonHexToInt(z[2])<<4) | jsonHexToInt(z[3]);
@@ -2443,7 +2453,7 @@ static u32 jsonUnescapeOneChar(const char *z, u32 n, u32 *piOut){
     case '\n': {
       u32 nSkip = jsonBytesToBypass(z, n);
       if( nSkip==0 ){
-        *piOut = 0xFFFD;
+        *piOut = JSON_INVALID_CHAR;
         return n;
       }else if( nSkip==n ){
         *piOut = 0;
@@ -2456,7 +2466,7 @@ static u32 jsonUnescapeOneChar(const char *z, u32 n, u32 *piOut){
       }
     }
     default: {
-      *piOut = 0xFFFD;
+      *piOut = JSON_INVALID_CHAR;
       return 2;
     }
   }
@@ -2930,8 +2940,6 @@ static void jsonReturnFromBlob(
           u32 szEscape = jsonUnescapeOneChar(&z[iIn], sz-iIn, &v);
           if( v<=0x7f ){
             zOut[iOut++] = (char)v;
-          }else if( v==0xfffd ){
-            /* Silently ignore illegal unicode */
           }else if( v<=0x7ff ){
             assert( szEscape>=2 );
             zOut[iOut++] = (char)(0xc0 | (v>>6));
@@ -2941,6 +2949,8 @@ static void jsonReturnFromBlob(
             zOut[iOut++] = 0xe0 | (v>>12);
             zOut[iOut++] = 0x80 | ((v>>6)&0x3f);
             zOut[iOut++] = 0x80 | (v&0x3f);
+          }else if( v==JSON_INVALID_CHAR ){
+            /* Silently ignore illegal unicode */
           }else{
             assert( szEscape>=4 );
             zOut[iOut++] = 0xf0 | (v>>18);