]> git.ipfire.org Git - thirdparty/sqlite.git/commitdiff
When scanning the full-text index as part of the fts5 integrity-check, also run a...
authordan <dan@noemail.net>
Sat, 21 Mar 2015 15:37:19 +0000 (15:37 +0000)
committerdan <dan@noemail.net>
Sat, 21 Mar 2015 15:37:19 +0000 (15:37 +0000)
FossilOrigin-Name: ce972f6aab90f6929d018696f1ab3c2649eca802

ext/fts5/fts5Int.h
ext/fts5/fts5_index.c
manifest
manifest.uuid

index 3e2045d4c1280643f6d9302275a1306d4be532f8..2065d9d3cf1ffec465b5ea0ddeb8575c11097a6e 100644 (file)
@@ -242,7 +242,7 @@ int sqlite3Fts5IndexClose(Fts5Index *p, int bDestroy);
 **   0==sqlite3Fts5IterEof(pIter);
 **   sqlite3Fts5IterNext(pIter)
 ** ){
-**   i64 iDocid = sqlite3Fts5IndexDocid(pIter);
+**   i64 iRowid = sqlite3Fts5IterRowid(pIter);
 ** }
 */
 
index 9fc9d2b7993c742f2904f33d9b7de9af071ef1d9..7ce2e2fbc4b6b7ee384c0ed69334f395d58e156c 100644 (file)
@@ -4312,6 +4312,8 @@ int sqlite3Fts5IndexIntegrityCheck(Fts5Index *p, u64 cksum){
   Fts5Config *pConfig = p->pConfig;
   int iIdx;                       /* Used to iterate through indexes */
   u64 cksum2 = 0;                 /* Checksum based on contents of indexes */
+  u64 cksum3 = 0;                 /* Checksum based on contents of indexes */
+  Fts5Buffer term = {0,0,0};      /* Buffer used to hold most recent term */
 
   /* Check that the internal nodes of each segment match the leaves */
   for(iIdx=0; p->rc==SQLITE_OK && iIdx<=pConfig->nPrefix; iIdx++){
@@ -4328,7 +4330,19 @@ int sqlite3Fts5IndexIntegrityCheck(Fts5Index *p, u64 cksum){
     fts5StructureRelease(pStruct);
   }
 
-  /* Check that the checksum of the index matches the argument checksum */
+  /* The cksum argument passed to this function is a checksum calculated
+  ** based on all expected entries in the FTS index (including prefix index
+  ** entries). This block checks that a checksum calculated based on the
+  ** actual contents of FTS index is identical.
+  **
+  ** Two versions of the same checksum are calculated. The first (stack
+  ** variable cksum2) based on entries extracted from the full-text index
+  ** while doing a linear scan of each individual index in turn. 
+  **
+  ** As each term visited by the linear scans, a separate query for the
+  ** same term is performed. cksum3 is calculated based on the entries
+  ** extracted by these queries.
+  */
   for(iIdx=0; iIdx<=pConfig->nPrefix; iIdx++){
     Fts5MultiSegIter *pIter;
     Fts5Structure *pStruct = fts5StructureRead(p, iIdx);
@@ -4341,25 +4355,50 @@ int sqlite3Fts5IndexIntegrityCheck(Fts5Index *p, u64 cksum){
       i64 iRowid = fts5MultiIterRowid(pIter);
       char *z = (char*)fts5MultiIterTerm(pIter, &n);
 
+      /* Update cksum2 with the entries associated with the current term
+      ** and rowid.  */
       for(fts5PosIterInit(p, pIter, &sPos);
           fts5PosIterEof(p, &sPos)==0;
           fts5PosIterNext(p, &sPos)
       ){
         cksum2 ^= fts5IndexEntryCksum(iRowid, sPos.iCol, sPos.iPos, z, n);
-#if 0
-        fprintf(stdout, "rowid=%d ", (int)iRowid);
-        fprintf(stdout, "term=%.*s ", n, z);
-        fprintf(stdout, "col=%d ", sPos.iCol);
-        fprintf(stdout, "off=%d\n", sPos.iPos);
-        fflush(stdout);
-#endif
+      }
+
+      /* If this is a new term, query for it. Update cksum3 with the results. */
+      if( p->rc==SQLITE_OK && (term.n!=n || memcmp(term.p, z, n)) ){
+        Fts5IndexIter *pIdxIter = 0;
+        int flags = (iIdx==0 ? 0 : FTS5INDEX_QUERY_PREFIX);
+        int rc = sqlite3Fts5IndexQuery(p, z, n, flags, &pIdxIter);
+        while( rc==SQLITE_OK && 0==sqlite3Fts5IterEof(pIdxIter) ){
+          const u8 *pPos;
+          int nPos;
+          i64 rowid = sqlite3Fts5IterRowid(pIdxIter);
+          rc = sqlite3Fts5IterPoslist(pIdxIter, &pPos, &nPos);
+          if( rc==SQLITE_OK ){
+            Fts5PoslistReader sReader;
+            for(sqlite3Fts5PoslistReaderInit(-1, pPos, nPos, &sReader);
+                sReader.bEof==0;
+                sqlite3Fts5PoslistReaderNext(&sReader)
+            ){
+              int iCol = FTS5_POS2COLUMN(sReader.iPos);
+              int iOff = FTS5_POS2OFFSET(sReader.iPos);
+              cksum3 ^= fts5IndexEntryCksum(rowid, iCol, iOff, z, n);
+            }
+            rc = sqlite3Fts5IterNext(pIdxIter);
+          }
+        }
+        sqlite3Fts5IterClose(pIdxIter);
+        fts5BufferSet(&rc, &term, n, (const u8*)z);
+        p->rc = rc;
       }
     }
     fts5MultiIterFree(p, pIter);
     fts5StructureRelease(pStruct);
   }
   if( p->rc==SQLITE_OK && cksum!=cksum2 ) p->rc = FTS5_CORRUPT;
+  if( p->rc==SQLITE_OK && cksum!=cksum3 ) p->rc = FTS5_CORRUPT;
 
+  fts5BufferFree(&term);
   return fts5IndexReturn(p);
 }
 
index aa18705b891169a15be716f7b6e8b8cf4a4a7704..d7abe658959f08311ebb5fa6e1b9bcbf0c5c9ca4 100644 (file)
--- a/manifest
+++ b/manifest
@@ -1,5 +1,5 @@
-C Add\san\soptimization\sto\sthe\sfts5\sunicode\stokenizer\scode.
-D 2015-03-11T14:51:39.375
+C When\sscanning\sthe\sfull-text\sindex\sas\spart\sof\sthe\sfts5\sintegrity-check,\salso\srun\sa\spoint\squery\sfor\severy\sterm\sand\sverify\sthat\sthese\sresults\sare\sconsistent\swith\sthose\sfound\sby\sthe\slinear\sscan.
+D 2015-03-21T15:37:19.761
 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f
 F Makefile.in 5407a688f4d77a05c18a8142be8ae5a2829dd610
 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23
@@ -106,13 +106,13 @@ F ext/fts3/unicode/mkunicode.tcl 159c1194da0bc72f51b3c2eb71022568006dc5ad
 F ext/fts5/extract_api_docs.tcl 55a6d648d516f35d9a1e580ac00de27154e1904a
 F ext/fts5/fts5.c 1eb8ca073be5222c43e4eee5408764c2cbb4200b
 F ext/fts5/fts5.h 24a2cc35b5e76eec57b37ba48c12d9d2cb522b3a
-F ext/fts5/fts5Int.h 1dcb02943f3a55d275d5473911a7e991d638c73c
+F ext/fts5/fts5Int.h 8d09f7894e83b00a18a7e2149354a153904002df
 F ext/fts5/fts5_aux.c fcea18b1a2a3f95a498b52aba2983557d7678a22
 F ext/fts5/fts5_buffer.c 29f79841bf6eef5220eef41b122419b1bcb07b06
 F ext/fts5/fts5_config.c 0847facc8914f57ea4452c43ce109200dc65e894
 F ext/fts5/fts5_expr.c 5215137efab527577d36bdf9e44bfc2ec3e1be98
 F ext/fts5/fts5_hash.c 9959b5408f649487d4b0ee081416f37dc3cd8cdd
-F ext/fts5/fts5_index.c b00f7147f9660e66d9d1a8149d4faea3a06cd48e
+F ext/fts5/fts5_index.c 4adc5e18ab6d0648faeb18f18a89d6aec57d77be
 F ext/fts5/fts5_storage.c ac0f0937059c8d4f38a1f13aa5f2c2cd7edf3e0d
 F ext/fts5/fts5_tcl.c 617b6bb96545be8d9045de6967c688cd9cd15541
 F ext/fts5/fts5_tokenize.c c07f2c2f749282c1dbbf46bde1f6d7095c740b8b
@@ -1284,7 +1284,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1
 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4
 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32
 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f
-P 5c46820d9b4aae791a8704b69145bd81f1e6780d
-R 3ab21807b5c69b091448271467250abb
+P f5db489250029678fce845dfb2b1109fde46bea5
+R 258f390b03b29f1c61f33d36002f03e9
 U dan
-Z 588a6ddf00bd9c069b244e50951b58e0
+Z 6cf104c99eec6cd34913e25fa048b8ab
index 9a4fa3320df658feaedc60af92824cafc8347e35..f5fccf09fafcfec1a5ed9e473ebcac31cc1ae43e 100644 (file)
@@ -1 +1 @@
-f5db489250029678fce845dfb2b1109fde46bea5
\ No newline at end of file
+ce972f6aab90f6929d018696f1ab3c2649eca802
\ No newline at end of file