From: dan Date: Tue, 13 May 2014 20:11:37 +0000 (+0000) Subject: Fix a problem preventing delete markers from ever being removed from the FTS index. X-Git-Tag: version-3.8.5~39^2~7 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=c0caea21dee91b1f80745bf9b81a07b237ec314e;p=thirdparty%2Fsqlite.git Fix a problem preventing delete markers from ever being removed from the FTS index. FossilOrigin-Name: 7f47ae5c5ddb1227484ddae7c6960183932a052a --- diff --git a/ext/fts3/fts3_write.c b/ext/fts3/fts3_write.c index b54d61390a..e2093376e6 100644 --- a/ext/fts3/fts3_write.c +++ b/ext/fts3/fts3_write.c @@ -382,8 +382,9 @@ static int fts3SqlStmt( "ORDER BY level DESC, idx ASC", /* Update statements used while promoting segments */ -/* 38 */ "UPDATE %Q.'%q_segdir' SET level=-1,idx=? WHERE level=? AND idx=?", -/* 39 */ "UPDATE %Q.'%q_segdir' SET level=? WHERE level=-1" +/* 38 */ "UPDATE OR FAIL %Q.'%q_segdir' SET level=-1,idx=? " + "WHERE level=? AND idx=?", +/* 39 */ "UPDATE OR FAIL %Q.'%q_segdir' SET level=? WHERE level=-1" }; int rc = SQLITE_OK; @@ -3091,6 +3092,7 @@ static int fts3SegmentMerge( Fts3SegFilter filter; /* Segment term filter condition */ Fts3MultiSegReader csr; /* Cursor to iterate through level(s) */ int bIgnoreEmpty = 0; /* True to ignore empty segments */ + i64 iMaxLevel = 0; /* Max level number for this index/langid */ assert( iLevel==FTS3_SEGCURSOR_ALL || iLevel==FTS3_SEGCURSOR_PENDING @@ -3102,6 +3104,11 @@ static int fts3SegmentMerge( rc = sqlite3Fts3SegReaderCursor(p, iLangid, iIndex, iLevel, 0, 0, 1, 0, &csr); if( rc!=SQLITE_OK || csr.nSegment==0 ) goto finished; + if( iLevel!=FTS3_SEGCURSOR_PENDING ){ + rc = fts3SegmentMaxLevel(p, iLangid, iIndex, &iMaxLevel); + if( rc!=SQLITE_OK ) goto finished; + } + if( iLevel==FTS3_SEGCURSOR_ALL ){ /* This call is to merge all segments in the database to a single ** segment. The level of the new segment is equal to the numerically @@ -3111,21 +3118,21 @@ static int fts3SegmentMerge( rc = SQLITE_DONE; goto finished; } - rc = fts3SegmentMaxLevel(p, iLangid, iIndex, &iNewLevel); + iNewLevel = iMaxLevel; bIgnoreEmpty = 1; - }else if( iLevel==FTS3_SEGCURSOR_PENDING ){ - iNewLevel = getAbsoluteLevel(p, iLangid, iIndex, 0); - rc = fts3AllocateSegdirIdx(p, iLangid, iIndex, 0, &iIdx); }else{ /* This call is to merge all segments at level iLevel. find the next ** available segment index at level iLevel+1. The call to ** fts3AllocateSegdirIdx() will merge the segments at level iLevel+1 to ** a single iLevel+2 segment if necessary. */ - rc = fts3AllocateSegdirIdx(p, iLangid, iIndex, iLevel+1, &iIdx); + assert( FTS3_SEGCURSOR_PENDING==-1 ); iNewLevel = getAbsoluteLevel(p, iLangid, iIndex, iLevel+1); + rc = fts3AllocateSegdirIdx(p, iLangid, iIndex, iLevel+1, &iIdx); + bIgnoreEmpty = (iLevel!=FTS3_SEGCURSOR_PENDING) && (iNewLevel>iMaxLevel); } if( rc!=SQLITE_OK ) goto finished; + assert( csr.nSegment>0 ); assert( iNewLevel>=getAbsoluteLevel(p, iLangid, iIndex, 0) ); assert( iNewLevelnLeafData); + if( pWriter ){ + rc = fts3SegWriterFlush(p, pWriter, iNewLevel, iIdx); + if( rc==SQLITE_OK ){ + rc = fts3PromoteSegments(p, iNewLevel, pWriter->nLeafData); + } } finished: @@ -4725,6 +4734,7 @@ int sqlite3Fts3Incrmerge(Fts3Table *p, int nMerge, int nMin){ const i64 nMod = FTS3_SEGDIR_MAXLEVEL * p->nIndex; sqlite3_stmt *pFindLevel = 0; /* SQL used to determine iAbsLevel */ int bUseHint = 0; /* True if attempting to append */ + int iIdx = 0; /* Largest idx in level (iAbsLevel+1) */ /* Search the %_segdir table for the absolute level with the smallest ** relative level number that contains at least nMin segments, if any. @@ -4778,6 +4788,12 @@ int sqlite3Fts3Incrmerge(Fts3Table *p, int nMerge, int nMin){ ** to start work on some other level. */ memset(pWriter, 0, nAlloc); pFilter->flags = FTS3_SEGMENT_REQUIRE_POS; + + if( rc==SQLITE_OK ){ + rc = fts3IncrmergeOutputIdx(p, iAbsLevel, &iIdx); + assert( bUseHint==1 || bUseHint==0 ); + if( (iIdx-bUseHint)==0 ) pFilter->flags |= FTS3_SEGMENT_IGNORE_EMPTY; + } if( rc==SQLITE_OK ){ rc = fts3IncrmergeCsr(p, iAbsLevel, nSeg, pCsr); } @@ -4785,16 +4801,12 @@ int sqlite3Fts3Incrmerge(Fts3Table *p, int nMerge, int nMin){ && SQLITE_OK==(rc = sqlite3Fts3SegReaderStart(p, pCsr, pFilter)) && SQLITE_ROW==(rc = sqlite3Fts3SegReaderStep(p, pCsr)) ){ - int iIdx = 0; /* Largest idx in level (iAbsLevel+1) */ - rc = fts3IncrmergeOutputIdx(p, iAbsLevel, &iIdx); - if( rc==SQLITE_OK ){ - if( bUseHint && iIdx>0 ){ - const char *zKey = pCsr->zTerm; - int nKey = pCsr->nTerm; - rc = fts3IncrmergeLoad(p, iAbsLevel, iIdx-1, zKey, nKey, pWriter); - }else{ - rc = fts3IncrmergeWriter(p, iAbsLevel, iIdx, pCsr, pWriter); - } + if( bUseHint && iIdx>0 ){ + const char *zKey = pCsr->zTerm; + int nKey = pCsr->nTerm; + rc = fts3IncrmergeLoad(p, iAbsLevel, iIdx-1, zKey, nKey, pWriter); + }else{ + rc = fts3IncrmergeWriter(p, iAbsLevel, iIdx, pCsr, pWriter); } if( rc==SQLITE_OK && pWriter->nLeafEst ){ diff --git a/manifest b/manifest index 1ff6b237e6..357a99fd07 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Experimental\scode\sto\sprevent\sFTS\sindexes\sfrom\sgrowing\sindefinitely\sas\sthe\stable\sis\supdated. -D 2014-05-12T20:04:48.571 +C Fix\sa\sproblem\spreventing\sdelete\smarkers\sfrom\sever\sbeing\sremoved\sfrom\sthe\sFTS\sindex. +D 2014-05-13T20:11:37.423 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in dd2b1aba364ff9b05de41086f74407f285c57670 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -96,7 +96,7 @@ F ext/fts3/fts3_tokenizer.h 64c6ef6c5272c51ebe60fc607a896e84288fcbc3 F ext/fts3/fts3_tokenizer1.c 5c98225a53705e5ee34824087478cf477bdb7004 F ext/fts3/fts3_unicode.c 92391b4b4fb043564c6539ea9b8661e3bcba47b9 F ext/fts3/fts3_unicode2.c 0113d3acf13429e6dc38e0647d1bc71211c31a4d -F ext/fts3/fts3_write.c 5fd2aa9d1812387c6254304e20d9ac2b29e16700 +F ext/fts3/fts3_write.c 283b24477729129a210d91b48f7c53181583a848 F ext/fts3/fts3speed.tcl b54caf6a18d38174f1a6e84219950d85e98bb1e9 F ext/fts3/mkfts3amal.tcl 252ecb7fe6467854f2aa237bf2c390b74e71f100 F ext/fts3/tool/fts3view.c 6cfc5b67a5f0e09c0d698f9fd012c784bfaa9197 @@ -539,7 +539,7 @@ F test/fts3conf.test ee8500c86dd58ec075e8831a1e216a79989436de F test/fts3corrupt.test 2710b77983cc7789295ddbffea52c1d3b7506dbb F test/fts3corrupt2.test 6d96efae2f8a6af3eeaf283aba437e6d0e5447ba F test/fts3cov.test e0fb00d8b715ddae4a94c305992dfc3ef70353d7 -F test/fts3d.test 597b0b76e41f0d672e2731c4d7b631d628efd13f +F test/fts3d.test 95c17d1b67b33a5eac0bf5a0d11116a0c0ac7a3a F test/fts3defer.test 0be4440b73a2e651fc1e472066686d6ada4b9963 F test/fts3defer2.test e880e3b65bdf999f4746cdaefa65f14a98b9b724 F test/fts3defer3.test dd53fc13223c6d8264a98244e9b19abd35ed71cd @@ -570,7 +570,7 @@ F test/fts4aa.test 0c3152322c7f0b548cc942ad763eaba0da87ccca F test/fts4check.test 66fa274cab2b615f2fb338b257713aba8fad88a8 F test/fts4content.test 2e7252557d6d24afa101d9ba1de710d6140e6d06 F test/fts4docid.test e33c383cfbdff0284685604d256f347a18fdbf01 -F test/fts4growth.test f7eac9fadfe67765c4a0d6202c85f7272766fb9e +F test/fts4growth.test 3b1f8c98b603b38dc9fe4a266f4f5ddb0c73f092 F test/fts4incr.test 361960ed3550e781f3f313e17e2182ef9cefc0e9 F test/fts4langid.test 24a6e41063b416bbdf371ff6b4476fa41c194aa7 F test/fts4merge.test c424309743fdd203f8e56a1f1cd7872cd66cc0ee @@ -1171,10 +1171,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh d1a6de74685f360ab718efda6265994b99bbea01 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 0a4f59676bd0ab33b2c86c9a35a2ebbdbaf09ee7 -R de45a14af3a90f6b390273a9a2d8cfa9 -T *branch * fts4-experimental -T *sym-fts4-experimental * -T -sym-trunk * +P b3b505a4dd0c679437a4272109f1188175088cd1 +R d1a058fc899c4ecbd09fd7fceab935e7 U dan -Z a03d85f9bfa278d813164b1e97a88ff7 +Z f678f48fb654227badac5ce98e23d62c diff --git a/manifest.uuid b/manifest.uuid index a9facd1d8e..a6856e6014 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -b3b505a4dd0c679437a4272109f1188175088cd1 \ No newline at end of file +7f47ae5c5ddb1227484ddae7c6960183932a052a \ No newline at end of file diff --git a/test/fts3d.test b/test/fts3d.test index 2914818d4e..5c04ead0a0 100644 --- a/test/fts3d.test +++ b/test/fts3d.test @@ -213,16 +213,17 @@ do_test fts3d-4.matches { {0 1 0 4 0 2 5 3 0 3 9 1 0 5 11 4} \ {0 0 0 4 0 4 5 2 0 3 8 1 0 5 10 4}] -check_terms_all fts3d-4.1 {a four is one test that this three two was} +puts [db eval {SELECT c FROM t1 } ] +check_terms_all fts3d-4.1 {a four is test that this was} check_doclist_all fts3d-4.1.1 a {[1 0[2]] [2 0[2]] [3 0[2]]} check_doclist_all fts3d-4.1.2 four {} check_doclist_all fts3d-4.1.3 is {[1 0[1]] [3 0[1]]} -check_doclist_all fts3d-4.1.4 one {} +#check_doclist_all fts3d-4.1.4 one {} check_doclist_all fts3d-4.1.5 test {[1 0[3]] [2 0[3]] [3 0[3]]} check_doclist_all fts3d-4.1.6 that {[2 0[0]]} check_doclist_all fts3d-4.1.7 this {[1 0[0]] [3 0[0]]} -check_doclist_all fts3d-4.1.8 three {} -check_doclist_all fts3d-4.1.9 two {} +#check_doclist_all fts3d-4.1.8 three {} +#check_doclist_all fts3d-4.1.9 two {} check_doclist_all fts3d-4.1.10 was {[2 0[1]]} check_terms fts3d-4.2 0 0 {a four test that was} @@ -239,16 +240,16 @@ check_doclist fts3d-4.3.3 0 1 is {[3 0[1]]} check_doclist fts3d-4.3.4 0 1 test {[3 0[3]]} check_doclist fts3d-4.3.5 0 1 this {[3 0[0]]} -check_terms fts3d-4.4 1 0 {a four is one test that this three two was} +check_terms fts3d-4.4 1 0 {a four is test that this was} check_doclist fts3d-4.4.1 1 0 a {[1 0[2]] [2 0[2]] [3 0[2]]} -check_doclist fts3d-4.4.2 1 0 four {[1] [2 0[4]] [3 0[4]]} +check_doclist fts3d-4.4.2 1 0 four {[2 0[4]] [3 0[4]]} check_doclist fts3d-4.4.3 1 0 is {[1 0[1]] [3 0[1]]} -check_doclist fts3d-4.4.4 1 0 one {[1] [2] [3]} +#check_doclist fts3d-4.4.4 1 0 one {[1] [2] [3]} check_doclist fts3d-4.4.5 1 0 test {[1 0[3]] [2 0[3]] [3 0[3]]} check_doclist fts3d-4.4.6 1 0 that {[2 0[0]]} check_doclist fts3d-4.4.7 1 0 this {[1 0[0]] [3 0[0]]} -check_doclist fts3d-4.4.8 1 0 three {[1] [2] [3]} -check_doclist fts3d-4.4.9 1 0 two {[1] [2] [3]} +#check_doclist fts3d-4.4.8 1 0 three {[1] [2] [3]} +#check_doclist fts3d-4.4.9 1 0 two {[1] [2] [3]} check_doclist fts3d-4.4.10 1 0 was {[2 0[1]]} # Optimize should leave the result in the level of the highest-level diff --git a/test/fts4growth.test b/test/fts4growth.test index ab2ad62142..e5b419a389 100644 --- a/test/fts4growth.test +++ b/test/fts4growth.test @@ -151,7 +151,118 @@ do_execsql_test 2.8 { } {86120} #-------------------------------------------------------------------------- +# Test that delete markers are removed from FTS segments when possible. +# It is only possible to remove delete markers when the output of the +# merge operation will become the oldest segment in the index. +# +# 3.1 - when the oldest segment is created by an 'optimize'. +# 3.2 - when the oldest segment is created by an incremental merge. +# 3.3 - by a crisis merge. +# + +proc insert_doc {args} { + foreach iDoc $args { + set L [lindex { + {In your eagerness to engage the Trojans,} + {don’t any of you charge ahead of others,} + {trusting in your strength and horsemanship.} + {And don’t lag behind. That will hurt our charge.} + {Any man whose chariot confronts an enemy’s} + {should thrust with his spear at him from there.} + {That’s the most effective tactic, the way} + {men wiped out city strongholds long ago —} + {their chests full of that style and spirit.} + } [expr $iDoc%9]] + execsql { REPLACE INTO x3(docid, content) VALUES($iDoc, $L) } + } +} + +proc delete_doc {args} { + foreach iDoc $args { + execsql { DELETE FROM x3 WHERE docid = $iDoc } + } +} + +proc second {x} { lindex $x 1 } +db func second second + +do_execsql_test 3.0 { CREATE VIRTUAL TABLE x3 USING fts4 } + +do_test 3.1.1 { + db transaction { insert_doc 1 2 3 4 5 6 } + execsql { SELECT level, idx, second(end_block) FROM x3_segdir } +} {0 0 412} +do_test 3.1.2 { + delete_doc 1 2 3 4 5 6 + execsql { SELECT count(*) FROM x3_segdir } +} {0} +do_test 3.1.3 { + db transaction { + insert_doc 1 2 3 4 5 6 7 8 9 + delete_doc 9 8 7 + } + execsql { SELECT level, idx, second(end_block) FROM x3_segdir } +} {0 0 591 0 1 65 0 2 72 0 3 76} +do_test 3.1.4 { + execsql { INSERT INTO x3(x3) VALUES('optimize') } + execsql { SELECT level, idx, second(end_block) FROM x3_segdir } +} {0 0 412} + +do_test 3.2.1 { + execsql { DELETE FROM x3 } + insert_doc 8 7 6 5 4 3 2 1 + delete_doc 7 8 + execsql { SELECT count(*) FROM x3_segdir } +} {10} +do_test 3.2.2 { + execsql { INSERT INTO x3(x3) VALUES('merge=500,10') } + execsql { SELECT level, idx, second(end_block) FROM x3_segdir } +} {1 0 412} + +# This assumes the crisis merge happens when there are already 16 +# segments and one more is added. +# +do_test 3.3.1 { + execsql { DELETE FROM x3 } + insert_doc 1 2 3 4 5 6 7 8 9 10 11 + delete_doc 11 10 9 8 7 + execsql { SELECT count(*) FROM x3_segdir } +} {16} +do_test 3.3.2 { + insert_doc 12 + execsql { SELECT level, idx, second(end_block) FROM x3_segdir WHERE level=1 } +} {1 0 412} + +#-------------------------------------------------------------------------- +do_execsql_test 4.1 { + DROP TABLE IF EXISTS x4; + DROP TABLE IF EXISTS t1; + CREATE TABLE t1(docid, words); + CREATE VIRTUAL TABLE x4 USING fts4(words); +} +do_test 4.2 { + fts_kjv_genesis + execsql { INSERT INTO x4 SELECT words FROM t1 } + execsql { INSERT INTO x4 SELECT words FROM t1 } +} {} + +do_execsql_test 4.3 { + SELECT level, idx, second(end_block) FROM x4_segdir +} {0 0 117483 0 1 118006} + +do_execsql_test 4.4 { + INSERT INTO x4(x4) VALUES('merge=10,2'); + SELECT count(*) FROM x4_segdir; +} {3} + +breakpoint +do_execsql_test 4.5 { + INSERT INTO x4(x4) VALUES('merge=10,2'); + SELECT count(*) FROM x4_segdir; +} {3} + +if 0 { do_execsql_test 3.1 { DROP TABLE IF EXISTS x2; DROP TABLE IF EXISTS t1; @@ -174,16 +285,17 @@ proc t1_to_x2 {} { #execsql {SELECT level, count(*) FROM x2_segdir GROUP BY level} #} {0 13 1 15 2 5} -#proc second {x} { lindex $x 1 } -#db func second second -#for {set i 0} {$i <1000} {incr i} { -# t1_to_x2 -# db eval { -# SELECT level, group_concat( second(end_block), ' ' ) AS c FROM x2_segdir GROUP BY level; -# } { -# puts "$i.$level: $c" -# } -#} +proc second {x} { lindex $x 1 } +db func second second +for {set i 0} {$i <1000} {incr i} { + t1_to_x2 + db eval { + SELECT level, group_concat( second(end_block), ' ' ) AS c FROM x2_segdir GROUP BY level; + } { + puts "$i.$level: $c" + } +} +} finish_test