]> git.ipfire.org Git - thirdparty/sqlite.git/commitdiff
Have the fts5 integrity-check verify that doclist indexes match the contents of the...
authordan <dan@noemail.net>
Fri, 1 Aug 2014 19:27:07 +0000 (19:27 +0000)
committerdan <dan@noemail.net>
Fri, 1 Aug 2014 19:27:07 +0000 (19:27 +0000)
FossilOrigin-Name: 37a7d3035eb4bbad7e32fe550321ac9fae611a57

ext/fts5/fts5_index.c
manifest
manifest.uuid
test/fts5ah.test
test/permutations.test

index 144375104693fd765415baa026c60462b9c4e0ad..685492d0b743e7ec4a773c81cc3f1bd76dcd0f38 100644 (file)
@@ -255,6 +255,12 @@ static int fts5Corrupt() { return SQLITE_CORRUPT_VTAB; }
 # define FTS5_CORRUPT SQLITE_CORRUPT_VTAB
 #endif
 
+#ifdef SQLITE_DEBUG
+static int fts5MissingData() { return 0; }
+#else
+# define fts5MissingData() 
+#endif
+
 
 typedef struct Fts5BtreeIter Fts5BtreeIter;
 typedef struct Fts5BtreeIterLevel Fts5BtreeIterLevel;
@@ -530,6 +536,7 @@ struct Fts5NodeIter {
   Fts5Buffer term;
   int nEmpty;
   int iChild;
+  int bDlidx;
 };
 
 /*
@@ -566,6 +573,7 @@ struct Fts5BtreeIter {
   int iLeaf;                      /* Leaf containing terms >= current term */
   int nEmpty;                     /* Number of "empty" leaves following iLeaf */
   int bEof;                       /* Set to true at EOF */
+  int bDlidx;                     /* True if there exists a dlidx */
 };
 
 static void fts5PutU16(u8 *aOut, u16 iVal){
@@ -670,6 +678,8 @@ static Fts5Data *fts5DataReadOrBuffer(
       rc = sqlite3_blob_reopen(p->pReader, iRowid);
     }
 
+    if( rc ) fts5MissingData();
+
     if( rc==SQLITE_OK ){
       int nByte = sqlite3_blob_bytes(p->pReader);
       if( pBuf ){
@@ -980,10 +990,12 @@ static void fts5StructureWrite(Fts5Index *p, int iIdx, Fts5Structure *pStruct){
 */
 static void fts5NodeIterGobbleNEmpty(Fts5NodeIter *pIter){
   if( pIter->iOff<pIter->nData && 0==(pIter->aData[pIter->iOff] & 0xfe) ){
+    pIter->bDlidx = pIter->aData[pIter->iOff] & 0x01;
     pIter->iOff++;
     pIter->iOff += getVarint32(&pIter->aData[pIter->iOff], pIter->nEmpty);
   }else{
     pIter->nEmpty = 0;
+    pIter->bDlidx = 0;
   }
 }
 
@@ -2082,13 +2094,15 @@ static int fts5PrefixCompress(
 */
 static void fts5WriteBtreeNEmpty(Fts5Index *p, Fts5SegWriter *pWriter){
   if( pWriter->nEmpty ){
-    Fts5PageWriter *pPg = &pWriter->aWriter[1];
     int bFlag = 0;
+    Fts5PageWriter *pPg;
+    pPg = &pWriter->aWriter[1];
     if( pWriter->nEmpty>=FTS5_MIN_DLIDX_SIZE ){
       i64 iKey = FTS5_DOCLIST_IDX_ROWID(
           pWriter->iIdx, pWriter->iSegid, 
           pWriter->aWriter[0].pgno - 1 - pWriter->nEmpty
       );
+      assert( pWriter->dlidx.n>0 );
       fts5DataWrite(p, iKey, pWriter->dlidx.p, pWriter->dlidx.n);
       bFlag = 1;
     }
@@ -2103,6 +2117,22 @@ static void fts5WriteBtreeNEmpty(Fts5Index *p, Fts5SegWriter *pWriter){
   pWriter->bDlidxPrevValid = 0;
 }
 
+static void fts5WriteBtreeGrow(Fts5Index *p, Fts5SegWriter *pWriter){
+  Fts5PageWriter *aNew;
+  Fts5PageWriter *pNew;
+  int nNew = sizeof(Fts5PageWriter) * (pWriter->nWriter+1);
+
+  aNew = (Fts5PageWriter*)sqlite3_realloc(pWriter->aWriter, nNew);
+  if( aNew==0 ) return;
+
+  pNew = &aNew[pWriter->nWriter];
+  memset(pNew, 0, sizeof(Fts5PageWriter));
+  pNew->pgno = 1;
+  fts5BufferAppendVarint(&p->rc, &pNew->buf, 1);
+
+  pWriter->nWriter++;
+  pWriter->aWriter = aNew;
+}
 
 /*
 ** This is called once for each leaf page except the first that contains
@@ -2123,19 +2153,8 @@ static void fts5WriteBtreeTerm(
     Fts5PageWriter *pPage;
 
     if( iHeight>=pWriter->nWriter ){
-      Fts5PageWriter *aNew;
-      Fts5PageWriter *pNew;
-      int nNew = sizeof(Fts5PageWriter) * (pWriter->nWriter+1);
-      aNew = (Fts5PageWriter*)sqlite3_realloc(pWriter->aWriter, nNew);
-      if( aNew==0 ) return;
-
-      pNew = &aNew[pWriter->nWriter];
-      memset(pNew, 0, sizeof(Fts5PageWriter));
-      pNew->pgno = 1;
-      fts5BufferAppendVarint(&p->rc, &pNew->buf, 1);
-
-      pWriter->nWriter++;
-      pWriter->aWriter = aNew;
+      fts5WriteBtreeGrow(p, pWriter);
+      if( p->rc ) return;
     }
     pPage = &pWriter->aWriter[iHeight];
 
@@ -2202,6 +2221,7 @@ static void fts5WriteFlushLeaf(Fts5Index *p, Fts5SegWriter *pWriter){
 
   if( pPage->term.n==0 ){
     /* No term was written to this page. */
+    assert( 0==fts5GetU16(&pPage->buf.p[2]) );
     fts5WriteBtreeNoTerm(p, pWriter);
   }
 
@@ -2379,11 +2399,15 @@ static void fts5WriteFinish(
 ){
   int i;
   *pnLeaf = pWriter->aWriter[0].pgno;
-  *pnHeight = pWriter->nWriter;
   fts5WriteFlushLeaf(p, pWriter);
+  if( pWriter->nWriter==1 && pWriter->nEmpty>=FTS5_MIN_DLIDX_SIZE ){
+    fts5WriteBtreeGrow(p, pWriter);
+  }
   if( pWriter->nWriter>1 ){
     fts5WriteBtreeNEmpty(p, pWriter);
   }
+  *pnHeight = pWriter->nWriter;
+
   for(i=1; i<pWriter->nWriter; i++){
     Fts5PageWriter *pPg = &pWriter->aWriter[i];
     i64 iRow = FTS5_SEGMENT_ROWID(pWriter->iIdx, pWriter->iSegid, i, pPg->pgno);
@@ -2905,6 +2929,7 @@ static void fts5BtreeIterInit(
   }else{
     pIter->nEmpty = pIter->aLvl[0].s.nEmpty;
     pIter->iLeaf = pIter->aLvl[0].s.iChild;
+    pIter->bDlidx = pIter->aLvl[0].s.bDlidx;
   }
 }
 
@@ -2940,6 +2965,7 @@ static void fts5BtreeIterNext(Fts5BtreeIter *pIter){
   }
 
   pIter->nEmpty = pIter->aLvl[0].s.nEmpty;
+  pIter->bDlidx = pIter->aLvl[0].s.bDlidx;
   pIter->iLeaf = pIter->aLvl[0].s.iChild;
   assert( p->rc==SQLITE_OK || pIter->bEof );
 }
@@ -2958,6 +2984,37 @@ static void fts5BtreeIterFree(Fts5BtreeIter *pIter){
   fts5BufferFree(&pIter->term);
 }
 
+typedef struct DoclistIdxIter DoclistIdxIter;
+struct DoclistIdxIter {
+  Fts5Data *pDlidx;             /* Data for doclist index, if any */
+  int iOff;                     /* Current offset into pDlidx */
+  int bRowidValid;              /* iRowid is valid */
+
+  int bZero;                    /* True if current leaf has no rowid */
+  i64 iRowid;                   /* If bZero==0, first rowid on leaf */
+};
+
+/*
+** Return non-zero if EOF is reached.
+*/
+static int fts5IndexDoclistIterNext(DoclistIdxIter *pIter){
+  i64 iVal;
+  if( pIter->iOff>=pIter->pDlidx->n ) return 1;
+  pIter->iOff += getVarint(&pIter->pDlidx->p[pIter->iOff], (u64*)&iVal);
+  if( iVal==0 ){
+    pIter->bZero = 1;
+  }else{
+    pIter->bZero = 0;
+    if( pIter->bRowidValid ){
+      pIter->iRowid -= iVal;
+    }else{
+      pIter->bRowidValid = 1;
+      pIter->iRowid = iVal;
+    }
+  }
+  return 0;
+}
+
 static void fts5IndexIntegrityCheckSegment(
   Fts5Index *p,                   /* FTS5 backend object */
   int iIdx,                       /* Index that pSeg is a part of */
@@ -2974,6 +3031,7 @@ static void fts5IndexIntegrityCheckSegment(
     Fts5Data *pLeaf;              /* Data for this leaf */
     int iOff;                     /* Offset of first term on leaf */
     int i;                        /* Used to iterate through empty leaves */
+    DoclistIdxIter dliter;        /* For iterating through any doclist index */
 
     /* If the leaf in question has already been trimmed from the segment, 
     ** ignore this b-tree entry. Otherwise, load it into memory. */
@@ -3000,6 +3058,12 @@ static void fts5IndexIntegrityCheckSegment(
     fts5DataRelease(pLeaf);
     if( p->rc ) break;
 
+    memset(&dliter, 0, sizeof(DoclistIdxIter));
+    if( iter.bDlidx ){
+      i64 iDlidxRowid = FTS5_DOCLIST_IDX_ROWID(iIdx, pSeg->iSegid, iter.iLeaf);
+      dliter.pDlidx = fts5DataRead(p, iDlidxRowid);
+    }
+
     /* Now check that the iter.nEmpty leaves following the current leaf
     ** (a) exist and (b) contain no terms. */
     for(i=1; i<=iter.nEmpty; i++){
@@ -3007,8 +3071,23 @@ static void fts5IndexIntegrityCheckSegment(
       if( pLeaf && 0!=fts5GetU16(&pLeaf->p[2]) ){
         p->rc = FTS5_CORRUPT;
       }
+      if( pLeaf && dliter.pDlidx ){
+        if( fts5IndexDoclistIterNext(&dliter) ){
+          p->rc = FTS5_CORRUPT;
+        }else{
+          int iRowidOff = fts5GetU16(&pLeaf->p[0]);
+          if( dliter.bZero ){
+            if( iRowidOff!=0 ) p->rc = FTS5_CORRUPT;
+          }else{
+            i64 iRowid;
+            getVarint(&pLeaf->p[iRowidOff], (u64*)&iRowid);
+            if( iRowid!=dliter.iRowid ) p->rc = FTS5_CORRUPT;
+          }
+        }
+      }
       fts5DataRelease(pLeaf);
     }
+    fts5DataRelease(dliter.pDlidx);
   }
 
   if( p->rc==SQLITE_OK && iter.iLeaf!=pSeg->pgnoLast ){
@@ -3218,7 +3297,7 @@ static void fts5DecodeFunction(
     int i = 0;
     i64 iPrev;
     sqlite3Fts5BufferAppendPrintf(&rc, &s, "(dlidx idx=%d segid=%d pgno=%d)",
-        iIdx, iSegid, iHeight, iPgno
+        iIdx, iSegid, iPgno
     );
     if( n>0 ){
       i = getVarint(&a[i], (u64*)&iPrev);
@@ -3305,7 +3384,9 @@ static void fts5DecodeFunction(
           );
         }
         if( ss.nEmpty ){
-          sqlite3Fts5BufferAppendPrintf(&rc, &s, " empty=%d", ss.nEmpty);
+          sqlite3Fts5BufferAppendPrintf(&rc, &s, " empty=%d%s", ss.nEmpty,
+              ss.bDlidx ? "*" : ""
+          );
         }
       }
       fts5NodeIterFree(&ss);
index f006bde39eb0c33d4d261824d023df86c7e4993f..ec7f134bbc257652b5080e2caffeecaa5a82d50e 100644 (file)
--- a/manifest
+++ b/manifest
@@ -1,5 +1,5 @@
-C Add\s"doclist\sindex"\srecords\sto\sthe\sdatabase.\sThese\sare\sto\smake\snavigating\swithin\svery\slarge\sdoclists\sfaster.\sThey\sare\snot\syet\sused\sby\squeries.
-D 2014-08-01T11:16:25.207
+C Have\sthe\sfts5\sintegrity-check\sverify\sthat\sdoclist\sindexes\smatch\sthe\scontents\sof\sthe\sleaf\spages\sthat\sthey\sindex.
+D 2014-08-01T19:27:07.492
 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f
 F Makefile.in b03432313a3aad96c706f8164fb9f5307eaf19f5
 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23
@@ -110,7 +110,7 @@ F ext/fts5/fts5_aux.c 366057c7186bc3615deb5ecc0ff61de50b6d2dbc
 F ext/fts5/fts5_buffer.c 248c61ac9fec001602efc72a45704f3b8d367c00
 F ext/fts5/fts5_config.c f4ebf143e141b8c77355e3b15aba81b7be51d710
 F ext/fts5/fts5_expr.c e764d75c58a3accda795f1da1b45960ac87dc77a
-F ext/fts5/fts5_index.c 618d54ecf41887b6db59491b71e654ae3315f8c9
+F ext/fts5/fts5_index.c 3e33e3b86f026fc5b2cb3c573ba05375c8e4de0b
 F ext/fts5/fts5_storage.c 2866e7e1de9dc851756c3a9c76b6e1d75e0facb7
 F ext/fts5/fts5parse.y 777da8e5819f75c217982c79c29d014c293acac9
 F ext/icu/README.txt d9fbbad0c2f647c3fdf715fc9fd64af53aedfc43
@@ -602,7 +602,7 @@ F test/fts5ad.test 2ed38bbc865678cb2905247120d02ebba7f20e07
 F test/fts5ae.test cb37b3135a00d3afd5492ec534ecf654be5ff69e
 F test/fts5af.test 9ebe23aa3875896076952c7bc6e8308813a63c74
 F test/fts5ag.test 0747bf3bade16d5165810cf891f875933b28b420
-F test/fts5ah.test bfa6ebd7ee87f73c4146b9e316a105fd0e43d01a
+F test/fts5ah.test dfb54897c470e2dcf88912fc4f5b1ca4ac8307f7
 F test/fts5ea.test ff43b40f8879ba50b82def70f2ab67c195d1a1d4
 F test/full.test 6b3c8fb43c6beab6b95438c1675374b95fab245d
 F test/func.test ae97561957aba6ca9e3a7b8a13aac41830d701ef
@@ -770,7 +770,7 @@ F test/pagesize.test 1dd51367e752e742f58e861e65ed7390603827a0
 F test/pcache.test b09104b03160aca0d968d99e8cd2c5b1921a993d
 F test/pcache2.test a83efe2dec0d392f814bfc998def1d1833942025
 F test/percentile.test b98fc868d71eb5619d42a1702e9ab91718cbed54
-F test/permutations.test 5f1f942bae4139b33626b82627aa262c0f72d936
+F test/permutations.test 542edb965245565d06b9284e708f17bb93d70691
 F test/pragma.test adb21a90875bc54a880fa939c4d7c46598905aa0
 F test/pragma2.test aea7b3d82c76034a2df2b38a13745172ddc0bc13
 F test/printf.test ec9870c4dce8686a37818e0bf1aba6e6a1863552
@@ -1199,7 +1199,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1
 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4
 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32
 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f
-P e6af3b7a3cf331210f4c87848e2af007dbd5ef30
-R a017a4de54c141d4f4f840978af83e33
+P 89377421ff69f2450364987afe781b6d8bcbf087
+R 49a5d37abb265ab7fa662e06ee8ea874
 U dan
-Z 90f2786a7e9f28e43c6798f77c65d6dc
+Z 90844fe42071f9a2a3f80f69e16c73d8
index 5a6a2d5b548ea71b0a28cfb1b47ce7298bf1a021..e25707dfd10d9b20ea6bbadf221707dbf6492f1a 100644 (file)
@@ -1 +1 @@
-89377421ff69f2450364987afe781b6d8bcbf087
\ No newline at end of file
+37a7d3035eb4bbad7e32fe550321ac9fae611a57
\ No newline at end of file
index 88fd524eb972eafa0eecd628677468bff2117144..f5d1eee1b9a445ff1da91cefca60709a09046235 100644 (file)
@@ -49,9 +49,6 @@ do_execsql_test 1.3 {
   INSERT INTO t1(t1) VALUES('integrity-check');
 }
 
-do_execsql_test 1.4 {
-  SELECT count(*) FROM t1_data
-}
 
 
 finish_test
index 9587d3bef0d5da941f831b84893e4ae6c4a3f6b1..41659ef8981b1ecdfa03b8fc5d9c1721841d27d0 100644 (file)
@@ -226,7 +226,7 @@ test_suite "fts5" -prefix "" -description {
   All FTS5 tests.
 } -files {
   fts5aa.test fts5ab.test fts5ac.test fts5ad.test fts5ae.test fts5ea.test
-  fts5af.test fts5ag.test
+  fts5af.test fts5ag.test fts5ah.test
 }
 
 test_suite "nofaultsim" -prefix "" -description {