]> git.ipfire.org Git - thirdparty/sqlite.git/commitdiff
Increased rigor in comparisons between object labels in JSON.
authordrh <>
Wed, 6 Dec 2023 14:50:48 +0000 (14:50 +0000)
committerdrh <>
Wed, 6 Dec 2023 14:50:48 +0000 (14:50 +0000)
FossilOrigin-Name: 2bc86d145fccc07107b7753cb1a69122676d4096fe59c454497bd81a6142d45e

manifest
manifest.uuid
src/json.c

index c0dab2ba231a2447db4dc9e4632aaf409786f9e7..918632105ec57b4da8dc3005d5fdd87d41a2c276 100644 (file)
--- a/manifest
+++ b/manifest
@@ -1,5 +1,5 @@
-C Rework\sthe\sJSON\sfunctions\sso\sthat\sthey\suse\sthe\sJSONB\sformat\sinternally.\nThe\soriginal\sJsonNode\sparse\stree\sdesign\sis\sremoved.\s\sAll\sJSON\sfunctions\nthat\saccept\stext\sJSON\salso\saccept\sJSONB.\s\sNew\sfunctions\sgenerate\sJSONB.
-D 2023-12-05T19:45:09.048
+C Increased\srigor\sin\scomparisons\sbetween\sobject\slabels\sin\sJSON.
+D 2023-12-06T14:50:48.135
 F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1
 F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea
 F LICENSE.md df5091916dbb40e6e9686186587125e1b2ff51f022cc334e886c19a0e9982724
@@ -690,7 +690,7 @@ F src/hash.h 3340ab6e1d13e725571d7cee6d3e3135f0779a7d8e76a9ce0a85971fa3953c51
 F src/hwtime.h f9c2dfb84dce7acf95ce6d289e46f5f9d3d1afd328e53da8f8e9008e3b3caae6
 F src/in-operator.md 10cd8f4bcd225a32518407c2fb2484089112fd71
 F src/insert.c 3f0a94082d978bbdd33c38fefea15346c6c6bffb70bc645a71dc0f1f87dd3276
-F src/json.c 0a6095d10a8c8251e1838a1f12abc89125b0b05f82497ad12896a7f714e397ce
+F src/json.c 15efd213cc95bde5b714ec068fdb1f6817b1ed9a253644833fed5f196957445a
 F src/legacy.c d7874bc885906868cd51e6c2156698f2754f02d9eee1bae2d687323c3ca8e5aa
 F src/loadext.c 7432c944ff197046d67a1207790a1b13eec4548c85a9457eb0896bb3641dfb36
 F src/main.c 1b89f3de98d1b59fec5bac1d66d6ece21f703821b8eaa0d53d9604c35309f6f9
@@ -2147,9 +2147,11 @@ F vsixtest/vsixtest.tcl 6a9a6ab600c25a91a7acc6293828957a386a8a93
 F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc
 F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e
 F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0
-P 8abc2ccaf8106f20243568cd7fa74174386eb85d7ea381201e97e2fd527033e0 174c2b2eef5fecd96a5fc89b81032fe81f7801f12097cea10e7e7f0a02114813
-R 2fc453038906bf8ee420a799d6f3c8a1
-T +closed 174c2b2eef5fecd96a5fc89b81032fe81f7801f12097cea10e7e7f0a02114813
+P 7f0c79b94e8f55e5013e52ba64ba8b32dad1dc4e2224d2099733cbc561de1810
+R 2684d5ad233d4af8d84cd782c9755832
+T *branch * json-label-compare
+T *sym-json-label-compare *
+T -sym-trunk *
 U drh
-Z fc61504cc821223c214ac8778fb2caf0
+Z 93dccc3939eaec1e85cde17c2d8ca8af
 # Remove this line to create a well-formed Fossil manifest.
index 634d6e05e0538a212cd0f8e8e845dcefa7300341..6aa957e936445f7b5420150e14d958c02fb37e30 100644 (file)
@@ -1 +1 @@
-7f0c79b94e8f55e5013e52ba64ba8b32dad1dc4e2224d2099733cbc561de1810
\ No newline at end of file
+2bc86d145fccc07107b7753cb1a69122676d4096fe59c454497bd81a6142d45e
\ No newline at end of file
index 8165eb4f319f591a014c463fce9d3d5a21702616..37a0ebfe1a32332ab89b9c7a1fef3d8a3999fc4b 100644 (file)
@@ -2119,6 +2119,188 @@ static void jsonBlobEdit(
   if( nIns && aIns ) memcpy(&pParse->aBlob[iDel], aIns, nIns);
 }
 
+/*
+** Return the number of escaped newlines to be ignored.
+** An escaped newline is a one of the following byte sequences:
+**
+**    0x5c 0x0a
+**    0x5c 0x0d
+**    0x5c 0x0d 0x0a
+**    0x5c 0xe2 0x80 0xa8
+**    0x5c 0xe2 0x80 0xa9
+*/
+static u32 jsonBytesToBypass(const char *z, u32 n){
+  u32 i = 0;
+  while( i+1<n ){
+    if( z[i]!='\\' ) return i;
+    if( z[i+1]=='\n' ){
+      i += 2;
+      continue;
+    }
+    if( z[i+1]=='\r' ){
+      if( i+2<n && z[i+2]=='\n' ){
+        i += 3;
+      }else{
+        i += 2;
+      }
+      continue;
+    }
+    if( 0xe2==(u8)z[i+1]
+     && i+3<n
+     && 0x80==(u8)z[i+2]
+     && (0xa8==(u8)z[i+3] || 0xa9==(u8)z[i+3])
+    ){
+      i += 4;
+      continue;
+    }
+    break;
+  }
+  return i;
+}
+
+/*
+** Input z[0..n] defines JSON escape sequence including the leading '\\'.
+** Decode that escape sequence into a single character.  Write that
+** character into *piOut.  Return the number of bytes in the escape sequence.
+*/
+static u32 jsonUnescapeOneChar(const char *z, u32 n, u32 *piOut){
+  assert( n>0 );
+  assert( z[0]=='\\' );
+  if( n<2 ){
+    *piOut = 0xFFFD;
+    return n;
+  }
+  switch( (u8)z[1] ){
+    case 'u': {
+      u32 v, vlo;
+      if( n<6 ){
+        *piOut = 0xFFFD;
+        return n;
+      }
+      v = jsonHexToInt4(&z[2]);
+      if( (v & 0xfc00)==0xd800
+       && n>=12
+       && z[6]=='\\'
+       && z[7]=='u'
+       && ((vlo = jsonHexToInt4(&z[8]))&0xfc00)==0xdc00
+      ){
+        *piOut = ((v&0x3ff)<<10) + (vlo&0x3ff) + 0x10000;
+        return 12;
+      }else{
+        *piOut = v;
+        return 6;
+      }
+    }
+    case 'b': {   *piOut = '\b';  return 2; }
+    case 'f': {   *piOut = '\f';  return 2; }
+    case 'n': {   *piOut = '\n';  return 2; }
+    case 'r': {   *piOut = '\r';  return 2; }
+    case 't': {   *piOut = '\t';  return 2; }
+    case 'v': {   *piOut = '\v';  return 2; }
+    case '0': {   *piOut = 0;     return 2; }
+    case '\'':
+    case '"':
+    case '/':
+    case '\\':{   *piOut = z[1];  return 2; }
+    case 'x': {
+      if( n<4 ){
+        *piOut = 0xFFFD;
+        return n;
+      }
+      *piOut = (jsonHexToInt(z[2])<<4) | jsonHexToInt(z[3]);
+      return 4;
+    }
+    case 0xe2:
+    case '\r':
+    case '\n': {
+      u32 nSkip = jsonBytesToBypass(z, n);
+      if( nSkip==0 ){
+        *piOut = 0xFFFD;
+        return n;
+      }else if( nSkip==n ){
+        *piOut = 0;
+        return n;
+      }else if( z[nSkip]=='\\' ){
+        return nSkip + jsonUnescapeOneChar(&z[nSkip], n-nSkip, piOut);
+      }else{
+        *piOut = z[nSkip];
+        return nSkip+1;
+      }
+    }
+    default: {
+      *piOut = 0xFFFD;
+      return 2;
+    }
+  }
+}
+
+
+/*
+** Compare two object labels.  Return 1 if they are equal and
+** 0 if they differ.
+**
+** In this version, we know that one or the other or both of the
+** two comparands contains an escape sequence.
+*/
+static SQLITE_NOINLINE int jsonLabelCompareEscaped(
+  const char *zLeft,          /* The left label */
+  u32 nLeft,                  /* Size of the left label in bytes */
+  int rawLeft,                /* True if zLeft contains no escapes */
+  const char *zRight,         /* The right label */
+  u32 nRight,                 /* Size of the right label in bytes */
+  int rawRight                /* True if zRight is escape-free */
+){
+  u32 cLeft, cRight;
+  assert( rawLeft==0 || rawRight==0 );
+  while( nLeft>0 && nRight>0 ){
+    if( rawLeft || zLeft[0]!='\\' ){
+      cLeft = ((u8*)zLeft)[0];
+      zLeft++;
+      nLeft--;
+    }else{
+      u32 n = jsonUnescapeOneChar(zLeft, nLeft, &cLeft);
+      zLeft += n;
+      assert( n<=nLeft );
+      nLeft -= n;
+    }
+    if( rawRight || zRight[0]!='\\' ){
+      cRight = ((u8*)zRight)[0];
+      zRight++;
+      nRight--;
+    }else{
+      u32 n = jsonUnescapeOneChar(zRight, nRight, &cRight);
+      zRight += n;
+      assert( n<=nRight );
+      nRight -= n;
+    }
+    if( cLeft!=cRight ) return 0;
+  }
+  return nLeft==0 && nRight==0;
+}
+
+/*
+** Compare two object labels.  Return 1 if they are equal and
+** 0 if they differ.  Return -1 if an OOM occurs.
+*/
+static int jsonLabelCompare(
+  const char *zLeft,          /* The left label */
+  u32 nLeft,                  /* Size of the left label in bytes */
+  int rawLeft,                /* True if zLeft contains no escapes */
+  const char *zRight,         /* The right label */
+  u32 nRight,                 /* Size of the right label in bytes */
+  int rawRight                /* True if zRight is escape-free */
+){
+  if( rawLeft && rawRight ){
+    /* Simpliest case:  Neither label contains escapes.  A simple
+    ** memcmp() is sufficient. */
+    if( nLeft!=nRight ) return 0;
+    return memcmp(zLeft, zRight, nLeft)==0;
+  }else{
+    return jsonLabelCompareEscaped(zLeft, nLeft, rawLeft,
+                                   zRight, nRight, rawRight);
+  }
+}
+
 /*
 ** Error returns from jsonLookupStep()
 */
@@ -2224,6 +2406,7 @@ static u32 jsonLookupStep(
     return iRoot;
   }
   if( zPath[0]=='.' ){
+    int rawKey = 1;
     x = pParse->aBlob[iRoot];
     zPath++;
     if( zPath[0]=='"' ){
@@ -2236,6 +2419,7 @@ static u32 jsonLookupStep(
         return JSON_LOOKUP_PATHERROR;
       }
       testcase( nKey==0 );
+      rawKey = memchr(zKey, '\\', nKey)==0;
     }else{
       zKey = zPath;
       for(i=0; zPath[i] && zPath[i]!='.' && zPath[i]!='['; i++){}
@@ -2249,13 +2433,17 @@ static u32 jsonLookupStep(
     j = iRoot + n;  /* j is the index of a label */
     iEnd = j+sz;
     while( j<iEnd ){
+      int rawLabel;
+      const char *zLabel;
       x = pParse->aBlob[j] & 0x0f;
       if( x<JSONB_TEXT || x>JSONB_TEXTRAW ) return JSON_LOOKUP_ERROR;
       n = jsonbPayloadSize(pParse, j, &sz);
       if( n==0 ) return JSON_LOOKUP_ERROR;
       k = j+n;  /* k is the index of the label text */
       if( k+sz>=iEnd ) return JSON_LOOKUP_ERROR;
-      if( sz==nKey && memcmp(&pParse->aBlob[k], zKey, nKey)==0 ){
+      zLabel = (const char*)&pParse->aBlob[k];
+      rawLabel = x==JSONB_TEXT || x==JSONB_TEXTRAW;
+      if( jsonLabelCompare(zKey, nKey, rawKey, zLabel, sz, rawLabel) ){
         u32 v = k+sz;  /* v is the index of the value */
         if( ((pParse->aBlob[v])&0x0f)>JSONB_OBJECT ) return JSON_LOOKUP_ERROR;
         n = jsonbPayloadSize(pParse, v, &sz);
@@ -2279,7 +2467,7 @@ static u32 jsonLookupStep(
       testcase( pParse->eEdit==JEDIT_INS );
       testcase( pParse->eEdit==JEDIT_SET );
       memset(&ix, 0, sizeof(ix));
-      jsonBlobAppendNode(&ix,JSONB_TEXTRAW, nKey, 0);
+      jsonBlobAppendNode(&ix, rawKey?JSONB_TEXTRAW:JSONB_TEXT5, nKey, 0);
       pParse->oom |= ix.oom;
       rc = jsonCreateEditSubstructure(pParse, &v, &zPath[i]);
       if( !JSON_LOOKUP_ISERROR(rc)
@@ -2483,72 +2671,27 @@ static void jsonReturnFromBlob(
       for(iIn=iOut=0; iIn<sz; iIn++){
         char c = z[iIn];
         if( c=='\\' ){
-          c = z[++iIn];
-          if( c=='u' ){
-            u32 v = jsonHexToInt4(z+iIn+1);
-            iIn += 4;
-            if( v==0 ) break;
-            if( v<=0x7f ){
-              zOut[iOut++] = (char)v;
-            }else if( v<=0x7ff ){
-              zOut[iOut++] = (char)(0xc0 | (v>>6));
-              zOut[iOut++] = 0x80 | (v&0x3f);
-            }else{
-              u32 vlo;
-              if( (v&0xfc00)==0xd800
-                && iIn<sz-6
-                && z[iIn+1]=='\\'
-                && z[iIn+2]=='u'
-                && ((vlo = jsonHexToInt4(z+iIn+3))&0xfc00)==0xdc00
-              ){
-                /* We have a surrogate pair */
-                v = ((v&0x3ff)<<10) + (vlo&0x3ff) + 0x10000;
-                iIn += 6;
-                zOut[iOut++] = 0xf0 | (v>>18);
-                zOut[iOut++] = 0x80 | ((v>>12)&0x3f);
-                zOut[iOut++] = 0x80 | ((v>>6)&0x3f);
-                zOut[iOut++] = 0x80 | (v&0x3f);
-              }else{
-                zOut[iOut++] = 0xe0 | (v>>12);
-                zOut[iOut++] = 0x80 | ((v>>6)&0x3f);
-                zOut[iOut++] = 0x80 | (v&0x3f);
-              }
-            }
-            continue;
-          }else if( c=='b' ){
-            c = '\b';
-          }else if( c=='f' ){
-            c = '\f';
-          }else if( c=='n' ){
-            c = '\n';
-          }else if( c=='r' ){
-            c = '\r';
-          }else if( c=='t' ){
-            c = '\t';
-          }else if( c=='v' ){
-            c = '\v';
-          }else if( c=='\'' || c=='"' || c=='/' || c=='\\' ){
-            /* pass through unchanged */
-          }else if( c=='0' ){
-            c = 0;
-          }else if( c=='x' ){
-            c = (jsonHexToInt(z[iIn+1])<<4) | jsonHexToInt(z[iIn+2]);
-            iIn += 2;
-          }else if( c=='\r' && z[i+1]=='\n' ){
-            iIn++;
-            continue;
-          }else if( 0xe2==(u8)c
-                 && iIn<sz-2
-                 && 0x80==(u8)z[iIn+1]
-                 && (0xa8==(u8)z[iIn+2] || 0xa9==(u8)z[iIn+2])
-          ){
-            iIn += 2;
-            continue;
+          u32 v;
+          u32 szEscape = jsonUnescapeOneChar(&z[iIn], sz-iIn, &v);
+          if( v<=0x7f ){
+            zOut[iOut++] = (char)v;
+          }else if( v<=0x7ff ){
+            zOut[iOut++] = (char)(0xc0 | (v>>6));
+            zOut[iOut++] = 0x80 | (v&0x3f);
+          }else if( v<0x10000 ){
+            zOut[iOut++] = 0xe0 | (v>>12);
+            zOut[iOut++] = 0x80 | ((v>>6)&0x3f);
+            zOut[iOut++] = 0x80 | (v&0x3f);
           }else{
-            continue;
+            zOut[iOut++] = 0xf0 | (v>>18);
+            zOut[iOut++] = 0x80 | ((v>>12)&0x3f);
+            zOut[iOut++] = 0x80 | ((v>>6)&0x3f);
+            zOut[iOut++] = 0x80 | (v&0x3f);
           }
-        } /* end if( c=='\\' ) */
-        zOut[iOut++] = c;
+          iIn += szEscape - 1;
+        }else{
+          zOut[iOut++] = c;
+        }
       } /* end for() */
       zOut[iOut] = 0;
       sqlite3_result_text(pCtx, zOut, iOut, sqlite3_free);
@@ -3384,6 +3527,7 @@ static int jsonMergePatch(
     iTCursor = iTStart;
     iTEnd = iTEndBE + pTarget->delta;
     while( iTCursor<iTEnd ){
+      int isEqual;   /* true if the patch and target labels match */
       iTLabel = iTCursor;
       eTLabel = pTarget->aBlob[iTCursor] & 0x0f;
       if( eTLabel<JSONB_TEXT || eTLabel>JSONB_TEXTRAW ){
@@ -3396,33 +3540,14 @@ static int jsonMergePatch(
       nTValue = jsonbPayloadSize(pTarget, iTValue, &szTValue);
       if( nTValue==0 ) return JSON_MERGE_BADTARGET;
       if( iTValue + nTValue + szTValue > iTEnd ) return JSON_MERGE_BADTARGET;
-      if( eTLabel==ePLabel ){
-        /* Common case */
-        if( szTLabel==szPLabel
-         && memcmp(&pTarget->aBlob[iTLabel+nTLabel],
-                   &pPatch->aBlob[iPLabel+nPLabel], szTLabel)==0
-        ){
-          break;  /* Labels match. */
-        }
-      }else{
-        /* Should rarely happen */
-        JsonString s1, s2;
-        int isEqual, isOom;
-        jsonStringInit(&s1, 0);
-        jsonXlateBlobToText(pTarget, iTLabel, &s1);
-        jsonStringInit(&s2, 0);
-        jsonXlateBlobToText(pPatch, iPLabel, &s2);
-        isOom = s1.eErr || s2.eErr;
-        if( s1.nUsed==s2.nUsed && memcmp(s1.zBuf, s2.zBuf, s1.nUsed)==0 ){
-          isEqual = 1;
-        }else{
-          isEqual = 0;
-        }
-        jsonStringReset(&s1);
-        jsonStringReset(&s2);
-        if( isOom ) return JSON_MERGE_OOM;
-        if( isEqual ) break;
-      }
+      isEqual = jsonLabelCompare(
+                   (const char*)&pPatch->aBlob[iPLabel+nPLabel],
+                   szPLabel,
+                   (ePLabel==JSONB_TEXT || ePLabel==JSONB_TEXTRAW),
+                   (const char*)&pTarget->aBlob[iTLabel+nTLabel],
+                   szTLabel,
+                   (eTLabel==JSONB_TEXT || eTLabel==JSONB_TEXTRAW));
+      if( isEqual ) break;
       iTCursor = iTValue + nTValue + szTValue;
     }
     x = pPatch->aBlob[iPValue] & 0x0f;