From: dan <dan@noemail.net>
Date: Fri, 29 Oct 2010 18:45:10 +0000 (+0000)
Subject: Add extra test cases and changes to fts3 to avoid crashing on a corrupt database.
X-Git-Tag: version-3.7.4~82
X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=797f3ee87cae877119eeda0847f74bf8064a669f;p=thirdparty%2Fsqlite.git

Add extra test cases and changes to fts3 to avoid crashing on a corrupt database.

FossilOrigin-Name: 252f0e457d3e33404df87d3e6c44ede61b78319c
---

diff --git a/ext/fts3/fts3_write.c b/ext/fts3/fts3_write.c
index 50dafe06ee..4982c87042 100644
--- a/ext/fts3/fts3_write.c
+++ b/ext/fts3/fts3_write.c
@@ -24,6 +24,18 @@
 #include <assert.h>
 #include <stdlib.h>
 
+/*
+** When full-text index nodes are loaded from disk, the buffer that they
+** are loaded into has the following number of bytes of padding at the end 
+** of it. i.e. if a full-text index node is 900 bytes in size, then a buffer
+** of 920 bytes is allocated for it.
+**
+** This means that if we have a pointer into a buffer containing node data,
+** it is always safe to read up to two varints from it without risking an
+** overread, even if the node data is corrupted.
+*/
+#define FTS3_NODE_PADDING (FTS3_VARINT_MAX*2)
+
 typedef struct PendingList PendingList;
 typedef struct SegmentNode SegmentNode;
 typedef struct SegmentWriter SegmentWriter;
@@ -819,7 +831,7 @@ int sqlite3Fts3ReadBlock(
   if( rc==SQLITE_OK ){
     int nByte = sqlite3_blob_bytes(p->pSegments);
     if( paBlob ){
-      char *aByte = sqlite3_malloc(nByte);
+      char *aByte = sqlite3_malloc(nByte + FTS3_NODE_PADDING);
       if( !aByte ){
         rc = SQLITE_NOMEM;
       }else{
@@ -900,8 +912,16 @@ static int fts3SegReaderNext(Fts3Table *p, Fts3SegReader *pReader){
     pNext = pReader->aNode;
   }
   
+  /* Because of the FTS3_NODE_PADDING bytes of padding, the following is 
+  ** safe (no risk of overread) even if the node data is corrupted.  
+  */
   pNext += sqlite3Fts3GetVarint32(pNext, &nPrefix);
   pNext += sqlite3Fts3GetVarint32(pNext, &nSuffix);
+  if( nPrefix<0 || nSuffix<=0 
+   || &pNext[nSuffix]>&pReader->aNode[pReader->nNode] 
+  ){
+    return SQLITE_CORRUPT;
+  }
 
   if( nPrefix+nSuffix>pReader->nTermAlloc ){
     int nNew = (nPrefix+nSuffix)*2;
@@ -920,12 +940,11 @@ static int fts3SegReaderNext(Fts3Table *p, Fts3SegReader *pReader){
   pReader->pOffsetList = 0;
 
   /* Check that the doclist does not appear to extend past the end of the
-  ** b-tree node. And that the final byte of the doclist is either an 0x00 
-  ** or 0x01. If either of these statements is untrue, then the data structure 
-  ** is corrupt.
+  ** b-tree node. And that the final byte of the doclist is 0x00. If either 
+  ** of these statements is untrue, then the data structure is corrupt.
   */
   if( &pReader->aDoclist[pReader->nDoclist]>&pReader->aNode[pReader->nNode] 
-   || (pReader->aDoclist[pReader->nDoclist-1]&0xFE)!=0
+   || pReader->aDoclist[pReader->nDoclist-1]
   ){
     return SQLITE_CORRUPT;
   }
@@ -1106,7 +1125,7 @@ int sqlite3Fts3SegReaderNew(
 
   assert( iStartLeaf<=iEndLeaf );
   if( iStartLeaf==0 ){
-    nExtra = nRoot;
+    nExtra = nRoot + FTS3_NODE_PADDING;
   }
 
   pReader = (Fts3SegReader *)sqlite3_malloc(sizeof(Fts3SegReader) + nExtra);
@@ -1127,7 +1146,6 @@ int sqlite3Fts3SegReaderNew(
   }else{
     pReader->iCurrentBlock = iStartLeaf-1;
   }
-  rc = fts3SegReaderNext(p, pReader);
 
   if( rc==SQLITE_OK ){
     *ppReader = pReader;
@@ -1223,7 +1241,6 @@ int sqlite3Fts3SegReaderPending(
       pReader->iIdx = 0x7FFFFFFF;
       pReader->ppNextElem = (Fts3HashElem **)&pReader[1];
       memcpy(pReader->ppNextElem, aElem, nElem*sizeof(Fts3HashElem *));
-      fts3SegReaderNext(p, pReader);
     }
   }
 
@@ -2095,15 +2112,14 @@ int sqlite3Fts3SegReaderIterate(
   ** unnecessary merge/sort operations for the case where single segment
   ** b-tree leaf nodes contain more than one term.
   */
-  if( pFilter->zTerm ){
+  for(i=0; i<nSegment; i++){
     int nTerm = pFilter->nTerm;
     const char *zTerm = pFilter->zTerm;
-    for(i=0; i<nSegment; i++){
-      Fts3SegReader *pSeg = apSegment[i];
-      while( fts3SegReaderTermCmp(pSeg, zTerm, nTerm)<0 ){
-        rc = fts3SegReaderNext(p, pSeg);
-        if( rc!=SQLITE_OK ) goto finished; }
-    }
+    Fts3SegReader *pSeg = apSegment[i];
+    do {
+      rc = fts3SegReaderNext(p, pSeg);
+      if( rc!=SQLITE_OK ) goto finished;
+    }while( zTerm && fts3SegReaderTermCmp(pSeg, zTerm, nTerm)<0 );
   }
 
   fts3SegReaderSort(apSegment, nSegment, nSegment, fts3SegReaderCmp);
diff --git a/manifest b/manifest
index e90c58bba8..f0906b1e70 100644
--- a/manifest
+++ b/manifest
@@ -1,5 +1,5 @@
-C Add\snew\s"dynamic_triggers"\stest\scase\sto\sthreadtest3.c.
-D 2010-10-28T15:52:05
+C Add\sextra\stest\scases\sand\schanges\sto\sfts3\sto\savoid\scrashing\son\sa\scorrupt\sdatabase.
+D 2010-10-29T18:45:11
 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f
 F Makefile.in 2c8cefd962eca0147132c7cf9eaa4bb24c656f3f
 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23
@@ -73,7 +73,7 @@ F ext/fts3/fts3_snippet.c 300c12b7f0a2a6ae0491bb2d00e2d5ff9c28f685
 F ext/fts3/fts3_tokenizer.c b4f2d01c24573852755bc92864816785dae39318
 F ext/fts3/fts3_tokenizer.h 13ffd9fcb397fec32a05ef5cd9e0fa659bf3dbd3
 F ext/fts3/fts3_tokenizer1.c 6e5cbaa588924ac578263a598e4fb9f5c9bb179d
-F ext/fts3/fts3_write.c a9189fa8719158b695f33e1490f56256308e2525
+F ext/fts3/fts3_write.c f2f5caed5ebaff2f6f00724063b415a27b02269d
 F ext/fts3/fts3speed.tcl b54caf6a18d38174f1a6e84219950d85e98bb1e9
 F ext/fts3/mkfts3amal.tcl 252ecb7fe6467854f2aa237bf2c390b74e71f100
 F ext/icu/README.txt bf8461d8cdc6b8f514c080e4e10dc3b2bbdfefa9
@@ -431,7 +431,7 @@ F test/fts3ao.test b83f99f70e9eec85f27d75801a974b3f820e01f9
 F test/fts3atoken.test 25c2070e1e8755d414bf9c8200427b277a9f99fa
 F test/fts3b.test e93bbb653e52afde110ad53bbd793f14fe7a8984
 F test/fts3c.test fc723a9cf10b397fdfc2b32e73c53c8b1ec02958
-F test/fts3corrupt.test 8d2ef629be9eff997db32ec3f0f7b53b0c61d086
+F test/fts3corrupt.test 33136f735b072c30cb81506a778c553e1dbfee24
 F test/fts3cov.test e0fb00d8b715ddae4a94c305992dfc3ef70353d7
 F test/fts3d.test 95fb3c862cbc4297c93fceb9a635543744e9ef52
 F test/fts3defer.test eab4f24c8402fb4e1e6aad44bcdfbe5bf42160b2
@@ -880,7 +880,7 @@ F tool/speedtest2.tcl ee2149167303ba8e95af97873c575c3e0fab58ff
 F tool/speedtest8.c 2902c46588c40b55661e471d7a86e4dd71a18224
 F tool/speedtest8inst1.c 293327bc76823f473684d589a8160bde1f52c14e
 F tool/vdbe-compress.tcl d70ea6d8a19e3571d7ab8c9b75cba86d1173ff0f
-P 2625eee0cb7f22dba61459b689e33a03ecebb6da
-R 72ae68a6bead35074543f0fbb3f99813
+P a4691563dd2b3e5e8474467b0c5c46fb26351b77
+R bff2ac7c319afc7951a784339e29e0c8
 U dan
-Z d97641c607fcd38b195631c16e5de58c
+Z 67ba15939e37c99dcbd00f2c2cfc47a0
diff --git a/manifest.uuid b/manifest.uuid
index 15d6501bea..b5b162b129 100644
--- a/manifest.uuid
+++ b/manifest.uuid
@@ -1 +1 @@
-a4691563dd2b3e5e8474467b0c5c46fb26351b77
\ No newline at end of file
+252f0e457d3e33404df87d3e6c44ede61b78319c
\ No newline at end of file
diff --git a/test/fts3corrupt.test b/test/fts3corrupt.test
index 217ef940c4..3c97d64f5f 100644
--- a/test/fts3corrupt.test
+++ b/test/fts3corrupt.test
@@ -18,26 +18,78 @@ ifcapable !fts3 { finish_test ; return }
 
 set ::testprefix fts3corrupt
 
+
+# Test that a doclist with a length field that indicates that the doclist
+# extends past the end of the node on which it resides is correctly identified
+# as database corruption.
+#
 do_execsql_test 1.0 {
   CREATE VIRTUAL TABLE t1 USING fts3;
   INSERT INTO t1 VALUES('hello');
 } {}
-
 do_test fts3corrupt-1.1 {
   set blob [db one {SELECT root from t1_segdir}]
   set blob [binary format a7ca* $blob 24 [string range $blob 8 end]]
   execsql { UPDATE t1_segdir SET root = $blob }
 } {}
-
 do_test fts3corrupt-1.2 {
   foreach w {a b c d e f g h i j k l m n o} {
     execsql { INSERT INTO t1 VALUES($w) }
   }
 } {}
-
 do_catchsql_test 1.3 {
   INSERT INTO t1 VALUES('world');
 } {1 {database disk image is malformed}}
+do_execsql_test 1.4 { 
+  DROP TABLE t1;
+} 
+
+# This block of tests checks that corruption is correctly detected if the
+# length field of a term on a leaf node indicates that the term extends past
+# the end of the node on which it resides. There are two cases:
+#
+#   1. The first term on the node.
+#   2. The second or subsequent term on the node (prefix compressed term).
+#
+do_execsql_test 2.0 {
+  CREATE VIRTUAL TABLE t1 USING fts3;
+  BEGIN;
+    INSERT INTO t1 VALUES('hello');
+    INSERT INTO t1 VALUES('hello');
+    INSERT INTO t1 VALUES('hello');
+    INSERT INTO t1 VALUES('hello');
+    INSERT INTO t1 VALUES('hello');
+  COMMIT;
+} {}
+do_test fts3corrupt-2.1 {
+  set blob [db one {SELECT root from t1_segdir}]
+  set blob [binary format a*a* "\x00\x7F" [string range $blob 2 end]]
+  execsql { UPDATE t1_segdir SET root = $blob }
+} {}
+do_catchsql_test 2.2 {
+  SELECT rowid FROM t1 WHERE t1 MATCH 'hello'
+} {1 {database disk image is malformed}}
+
+do_execsql_test 3.0 {
+  DROP TABLE t1;
+  CREATE VIRTUAL TABLE t1 USING fts3;
+  BEGIN;
+    INSERT INTO t1 VALUES('hello');
+    INSERT INTO t1 VALUES('world');
+  COMMIT;
+} {}
+do_test fts3corrupt-3.1 {
+  set blob [db one {SELECT quote(root) from t1_segdir}]
+  set blob [binary format a11a*a* $blob "\x7F" [string range $blob 12 end]]
+  execsql { UPDATE t1_segdir SET root = $blob }
+} {}
+do_catchsql_test 3.2 {
+  SELECT rowid FROM t1 WHERE t1 MATCH 'world'
+} {1 {database disk image is malformed}}
+
+
+
+
 
 finish_test