From: drh Date: Sat, 24 Mar 2012 17:29:05 +0000 (+0000) Subject: Enable fts3 tables to use incremental merge by automatically creating the X-Git-Tag: mountain-lion~3^2~9^2~16 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=6c2e7e19fa7f03707ac80961d5c29d3ba5095ea7;p=thirdparty%2Fsqlite.git Enable fts3 tables to use incremental merge by automatically creating the %_stat table when it is needed. FossilOrigin-Name: cc051fc0b2d89603b27b94cf2afdbda417ee9d94 --- diff --git a/ext/fts3/fts3.c b/ext/fts3/fts3.c index cf9fe64ed9..63661393a5 100644 --- a/ext/fts3/fts3.c +++ b/ext/fts3/fts3.c @@ -570,6 +570,18 @@ static void fts3DeclareVtab(int *pRc, Fts3Table *p){ } } +/* +** Create the %_stat table if it does not already exist. +*/ +void sqlite3Fts3CreateStatTable(int *pRc, Fts3Table *p){ + fts3DbExec(pRc, p->db, + "CREATE TABLE IF NOT EXISTS %Q.'%q_stat'" + "(id INTEGER PRIMARY KEY, value BLOB);", + p->zDb, p->zName + ); + if( (*pRc)==SQLITE_OK ) p->bHasStat = 1; +} + /* ** Create the backing store tables (%_content, %_segments and %_segdir) ** required by the FTS3 table passed as the only argument. This is done @@ -631,10 +643,7 @@ static int fts3CreateTables(Fts3Table *p){ ); } if( p->bHasStat ){ - fts3DbExec(&rc, db, - "CREATE TABLE %Q.'%q_stat'(id INTEGER PRIMARY KEY, value BLOB);", - p->zDb, p->zName - ); + sqlite3Fts3CreateStatTable(&rc, p); } return rc; } @@ -1329,6 +1338,16 @@ static int fts3InitVtab( rc = fts3CreateTables(p); } + /* Check to see if a legacy fts3 table has been "upgraded" by the + ** addition of a %_stat table so that it can use incremental merge. + */ + if( !isFts4 && !isCreate ){ + int rc2 = SQLITE_OK; + fts3DbExec(&rc2, db, "SELECT 1 FROM %Q.'%q_stat' WHERE id=2", + p->zDb, p->zName); + if( rc2==SQLITE_OK ) p->bHasStat = 1; + } + /* Figure out the page-size for the database. This is required in order to ** estimate the cost of loading large doclists from the database. */ fts3DatabasePageSize(&rc, p); diff --git a/ext/fts3/fts3Int.h b/ext/fts3/fts3Int.h index af310c5738..0ce67a2d10 100644 --- a/ext/fts3/fts3Int.h +++ b/ext/fts3/fts3Int.h @@ -493,6 +493,7 @@ void sqlite3Fts3Dequote(char *); void sqlite3Fts3DoclistPrev(int,char*,int,char**,sqlite3_int64*,int*,u8*); int sqlite3Fts3EvalPhraseStats(Fts3Cursor *, Fts3Expr *, u32 *); int sqlite3Fts3FirstFilter(sqlite3_int64, char *, int, char *); +void sqlite3Fts3CreateStatTable(int*, Fts3Table*); /* fts3_tokenizer.c */ const char *sqlite3Fts3NextToken(const char *, int *); diff --git a/ext/fts3/fts3_write.c b/ext/fts3/fts3_write.c index ad4e5e4887..2d3a91d87e 100644 --- a/ext/fts3/fts3_write.c +++ b/ext/fts3/fts3_write.c @@ -4668,7 +4668,11 @@ static int fts3DoIncrmerge( if( z[0]!='\0' || nMin<2 ){ rc = SQLITE_ERROR; }else{ - rc = sqlite3Fts3Incrmerge(p, nMerge, nMin); + rc = SQLITE_OK; + if( !p->bHasStat ) sqlite3Fts3CreateStatTable(&rc, p); + if( rc==SQLITE_OK ){ + rc = sqlite3Fts3Incrmerge(p, nMerge, nMin); + } sqlite3Fts3SegmentsClose(p); } return rc; @@ -4686,9 +4690,13 @@ static int fts3DoAutoincrmerge( Fts3Table *p, /* FTS3 table handle */ const char *zParam /* Nul-terminated string containing boolean */ ){ - int rc; + int rc = SQLITE_OK; sqlite3_stmt *pStmt = 0; p->bAutoincrmerge = fts3Getint(&zParam)!=0; + if( !p->bHasStat ){ + sqlite3Fts3CreateStatTable(&rc, p); + if( rc ) return rc; + } rc = fts3SqlStmt(p, SQL_REPLACE_STAT, &pStmt, 0); if( rc ) return rc;; sqlite3_bind_int(pStmt, 1, FTS_STAT_AUTOINCRMERGE); diff --git a/manifest b/manifest index 252baaa8e3..3c55a976b3 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Fix\sa\sfailing\sassert()\sin\sthe\sFTS3_LOG_MERGES\srelated\scode. -D 2012-03-24T17:09:11.161 +C Enable\sfts3\stables\sto\suse\sincremental\smerge\sby\sautomatically\screating\sthe\n%_stat\stable\swhen\sit\sis\sneeded. +D 2012-03-24T17:29:05.827 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 2f37e468503dbe79d35c9f6dffcf3fae1ae9ec20 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -63,9 +63,9 @@ F ext/fts3/README.content fdc666a70d5257a64fee209f97cf89e0e6e32b51 F ext/fts3/README.syntax a19711dc5458c20734b8e485e75fb1981ec2427a F ext/fts3/README.tokenizers 998756696647400de63d5ba60e9655036cb966e9 F ext/fts3/README.txt 8c18f41574404623b76917b9da66fcb0ab38328d -F ext/fts3/fts3.c 10988e19ba78d846e5979a5118843e22822a0147 +F ext/fts3/fts3.c 95409b49801ee7736755d7e307e606571b754a58 F ext/fts3/fts3.h 3a10a0af180d502cecc50df77b1b22df142817fe -F ext/fts3/fts3Int.h 6d4ffaca18df57533a7d6240dbdd835c4f3f096a +F ext/fts3/fts3Int.h eb749124db7c94b6f89d793cdd4d993a52c46646 F ext/fts3/fts3_aux.c 5205182bd8f372782597888156404766edf5781e F ext/fts3/fts3_expr.c dbc7ba4c3a6061adde0f38ed8e9b349568299551 F ext/fts3/fts3_hash.c 8dd2d06b66c72c628c2732555a32bc0943114914 @@ -78,7 +78,7 @@ F ext/fts3/fts3_test.c 6b7cc68aef4efb084e1449f7d20c4b20d3bdf6b4 F ext/fts3/fts3_tokenizer.c 3da7254a9881f7e270ab28e2004e0d22b3212bce F ext/fts3/fts3_tokenizer.h 66dec98e365854b6cd2d54f1a96bb6d428fc5a68 F ext/fts3/fts3_tokenizer1.c 5c98225a53705e5ee34824087478cf477bdb7004 -F ext/fts3/fts3_write.c ee3aeaa51aa633ada26e6d14073b1f893f094973 +F ext/fts3/fts3_write.c 6a092ee27198716969bfbaa2194aa67eabeb2ff6 F ext/fts3/fts3speed.tcl b54caf6a18d38174f1a6e84219950d85e98bb1e9 F ext/fts3/mkfts3amal.tcl 252ecb7fe6467854f2aa237bf2c390b74e71f100 F ext/icu/README.txt bf8461d8cdc6b8f514c080e4e10dc3b2bbdfefa9 @@ -487,6 +487,7 @@ F test/fts3fault2.test b62a2bc843c20414405f80e5eeb78e39bc68fe53 F test/fts3first.test dbdedd20914c8d539aa3206c9b34a23775644641 F test/fts3malloc.test b86ea33db9e8c58c0c2f8027a9fcadaf6a1568be F test/fts3matchinfo.test 6507fe1c342e542300d65ea637d4110eccf894e6 +F test/fts3merge.test acb0be43658029565e7b448f8968149de80549d7 F test/fts3near.test 2e318ee434d32babd27c167142e2b94ddbab4844 F test/fts3prefix.test b36d4f00b128a51e7b386cc013a874246d9d7dc1 F test/fts3prefix2.test 477ca96e67f60745b7ac931cfa6e9b080c562da5 @@ -997,7 +998,7 @@ F tool/tostr.awk e75472c2f98dd76e06b8c9c1367f4ab07e122d06 F tool/vdbe-compress.tcl d70ea6d8a19e3571d7ab8c9b75cba86d1173ff0f F tool/warnings-clang.sh 9f406d66e750e8ac031c63a9ef3248aaa347ef2a F tool/warnings.sh fbc018d67fd7395f440c28f33ef0f94420226381 -P af55ca5fc6778cb6d1a79a17dfa2d4e567ea1ccc -R c679b33a6e75fcdd2f180faacc012668 -U dan -Z 4d0762459d11aa2e9f32a4956c0ceb5b +P 4220d52cb3426f1680b72d57ecc9f4ade029357d +R d86044022236a45ab8294cd3002625f6 +U drh +Z 5e5822ed626477e734b96980ea604331 diff --git a/manifest.uuid b/manifest.uuid index 5d8bce9828..1019a1397f 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -4220d52cb3426f1680b72d57ecc9f4ade029357d \ No newline at end of file +cc051fc0b2d89603b27b94cf2afdbda417ee9d94 \ No newline at end of file diff --git a/test/fts3merge.test b/test/fts3merge.test new file mode 100644 index 0000000000..a0854fa1a4 --- /dev/null +++ b/test/fts3merge.test @@ -0,0 +1,330 @@ +# 2012 March 06 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#************************************************************************* +# This file implements regression tests for SQLite library. The +# focus of this script is testing the incremental merge function. +# + +set testdir [file dirname $argv0] +source $testdir/tester.tcl +source $testdir/fts3_common.tcl +set ::testprefix fts3merge + +# If SQLITE_ENABLE_FTS3 is defined, omit this file. +ifcapable !fts3 { + finish_test + return +} + +#------------------------------------------------------------------------- +# Test cases 1.* +# +do_test 1.0 { fts3_build_db_1 1004 } {} +do_test 1.1 { fts3_integrity_check t1 } {ok} +do_execsql_test 1.1 { + SELECT level, group_concat(idx, ' ') FROM t1_segdir GROUP BY level +} { + 0 {0 1 2 3 4 5 6 7 8 9 10 11} + 1 {0 1 2 3 4 5 6 7 8 9 10 11 12 13} + 2 {0 1 2} +} + +for {set i 0} {$i<20} {incr i} { + do_execsql_test 1.2.$i.1 { INSERT INTO t1(t1) VALUES('merge=1') } + do_test 1.2.$i.2 { fts3_integrity_check t1 } ok + do_execsql_test 1.2.$i.3 { + SELECT docid FROM t1 WHERE t1 MATCH 'zero one two three' + } {123 132 213 231 312 321} +} + +do_execsql_test 1.3 { + SELECT level, group_concat(idx, ' ') FROM t1_segdir GROUP BY level +} { + 0 {0 1 2 3} + 1 {0 1 2 3 4 5 6} + 2 {0 1 2 3} +} + +for {set i 0} {$i<100} {incr i} { + do_execsql_test 1.4.$i { INSERT INTO t1(t1) VALUES('merge=1,4') } + do_test 1.4.$i.2 { fts3_integrity_check t1 } ok + do_execsql_test 1.4.$i.3 { + SELECT docid FROM t1 WHERE t1 MATCH 'zero one two three' + } {123 132 213 231 312 321} +} + +do_execsql_test 1.5 { + SELECT level, group_concat(idx, ' ') FROM t1_segdir GROUP BY level +} { + 2 {0 1} + 3 0 +} + +#------------------------------------------------------------------------- +# Test cases 2.* test that errors in the xxx part of the 'merge=xxx' are +# handled correctly. +# +do_execsql_test 2.0 { CREATE VIRTUAL TABLE t2 USING fts3 } + +foreach {tn arg} { + 1 {merge=abc} + 2 {merge=%%%} + 3 {merge=,} + 4 {merge=5,} + 5 {merge=6,%} + 6 {merge=6,six} + 7 {merge=6,1} + 8 {merge=6,0} +} { + do_catchsql_test 2.$tn { + INSERT INTO t2(t2) VALUES($arg); + } {1 {SQL logic error or missing database}} +} + +#------------------------------------------------------------------------- +# Test cases 3.* +# +do_test 3.0 { + reset_db + execsql { PRAGMA page_size = 512 } + fts3_build_db_2 30040 +} {} +do_test 3.1 { fts3_integrity_check t2 } {ok} + +do_execsql_test 3.2 { + SELECT level, group_concat(idx, ' ') FROM t2_segdir GROUP BY level +} { + 0 {0 1 2 3 4 5 6} + 1 {0 1 2 3 4} + 2 {0 1 2 3 4} + 3 {0 1 2 3 4 5 6} +} + +do_execsql_test 3.3 { + INSERT INTO t2(t2) VALUES('merge=1000000,2'); + SELECT level, group_concat(idx, ' ') FROM t2_segdir GROUP BY level +} { + 0 0 + 2 0 + 3 0 + 4 0 + 6 0 +} + +#------------------------------------------------------------------------- +# Test cases 4.* +# +reset_db +do_execsql_test 4.1 { + PRAGMA page_size = 512; + CREATE VIRTUAL TABLE t4 USING fts3; + PRAGMA main.page_size; +} {512} + +do_test 4.2 { + foreach x {a c b d e f g h i j k l m n o p} { + execsql "INSERT INTO t4 VALUES('[string repeat $x 600]')" + } + execsql {SELECT level, group_concat(idx, ' ') FROM t4_segdir GROUP BY level} +} {0 {0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15}} + +foreach {tn expect} { + 1 "0 {0 1 2 3 4 5 6 7 8 9 10 11 12 13} 1 0" + 2 "0 {0 1 2 3 4 5 6 7 8 9 10 11 12} 1 0" + 3 "0 {0 1 2 3 4 5 6 7 8 9 10 11} 1 0" + 4 "0 {0 1 2 3 4 5 6 7 8 9 10} 1 0" + 5 "0 {0 1 2 3 4 5 6 7 8 9} 1 0" + 6 "0 {0 1 2 3 4 5 6 7 8} 1 0" + 7 "0 {0 1 2 3 4 5 6 7} 1 0" + 8 "0 {0 1 2 3 4 5 6} 1 0" + 9 "0 {0 1 2 3 4 5} 1 0" +} { + do_execsql_test 4.3.$tn { + INSERT INTO t4(t4) VALUES('merge=1,16'); + SELECT level, group_concat(idx, ' ') FROM t4_segdir GROUP BY level; + } $expect +} + +do_execsql_test 4.4.1 { + SELECT quote(value) FROM t4_stat WHERE rowid=1 +} {X'0006'} + +do_execsql_test 4.4.2 { + DELETE FROM t4_stat WHERE rowid=1; + INSERT INTO t4(t4) VALUES('merge=1,12'); + SELECT level, group_concat(idx, ' ') FROM t4_segdir GROUP BY level; +} "0 {0 1 2 3 4 5} 1 0" + + +#------------------------------------------------------------------------- +# Test cases 5.* +# +# Test that if a crisis-merge occurs that disrupts an ongoing incremental +# merge, the next call to "merge=A,B" identifies this and starts a new +# incremental merge. There are two scenarios: +# +# * There are less segments on the input level that the disrupted +# incremental merge operated on, or +# +# * Sufficient segments exist on the input level but the segments +# contain keys smaller than the largest key in the potential output +# segment. +# +do_test 5.1 { + reset_db + fts3_build_db_1 1000 +} {} + +do_execsql_test 5.2 { + SELECT level, group_concat(idx, ' ') FROM t1_segdir GROUP BY level; +} { + 0 {0 1 2 3 4 5 6 7} + 1 {0 1 2 3 4 5 6 7 8 9 10 11 12 13} + 2 {0 1 2} +} + +do_execsql_test 5.3 { + INSERT INTO t1(t1) VALUES('merge=1,4'); + SELECT level, group_concat(idx, ' ') FROM t1_segdir GROUP BY level; +} { + 0 {0 1 2 3 4 5 6 7} + 1 {0 1 2 3 4 5 6 7 8 9 10 11 12 13} + 2 {0 1 2 3} +} + +do_execsql_test 5.4 {SELECT quote(value) from t1_stat WHERE rowid=1} {X'0104'} +do_test 5.5 { + foreach docid [execsql {SELECT docid FROM t1}] { + execsql {INSERT INTO t1 SELECT * FROM t1 WHERE docid=$docid} + } +} {} + +do_execsql_test 5.6 {SELECT quote(value) from t1_stat WHERE rowid=1} {X'0104'} + +do_execsql_test 5.7 { + SELECT level, group_concat(idx, ' ') FROM t1_segdir GROUP BY level; + SELECT quote(value) from t1_stat WHERE rowid=1; +} { + 0 {0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15} + 1 {0 1 2 3 4 5 6 7 8 9 10 11} + 2 {0 1 2 3 4 5 6 7} + X'0104' +} + +do_execsql_test 5.8 { + INSERT INTO t1(t1) VALUES('merge=1,4'); + SELECT level, group_concat(idx, ' ') FROM t1_segdir GROUP BY level; + SELECT quote(value) from t1_stat WHERE rowid=1; +} { + 0 {0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15} + 1 {0 1 2 3 4 5 6 7 8 9 10 11} + 2 {0 1 2 3 4 5 6 7} + 3 {0} + X'0204' +} + +do_test 5.9 { + set L [expr 16*16*8 + 16*4 + 1] + foreach docid [execsql { + SELECT docid FROM t1 UNION ALL SELECT docid FROM t1 LIMIT $L + }] { + execsql {INSERT INTO t1 SELECT * FROM t1 WHERE docid=$docid} + } +} {} + +do_execsql_test 5.10 { + SELECT level, group_concat(idx, ' ') FROM t1_segdir GROUP BY level; + SELECT quote(value) from t1_stat WHERE rowid=1; +} { + 0 0 1 0 2 0 3 {0 1} + X'0204' +} + +do_execsql_test 5.11 { + INSERT INTO t1(t1) VALUES('merge=10,4'); + SELECT level, group_concat(idx, ' ') FROM t1_segdir GROUP BY level; + SELECT quote(value) from t1_stat WHERE rowid=1; +} { + 0 0 1 0 2 0 3 {0 1} + X'0000' +} + +#------------------------------------------------------------------------- +# Test cases 6.* +# +# At one point the following test caused an assert() to fail (because the +# second 'merge=1,2' operation below actually "merges" a single input +# segment, which was unexpected). +# +do_test 6.1 { + reset_db + set a [string repeat a 900] + set b [string repeat b 900] + set c [string repeat c 900] + set d [string repeat d 900] + execsql { + CREATE VIRTUAL TABLE t1 USING fts3; + BEGIN; + INSERT INTO t1 VALUES($a); + INSERT INTO t1 VALUES($b); + COMMIT; + BEGIN; + INSERT INTO t1 VALUES($c); + INSERT INTO t1 VALUES($d); + COMMIT; + } + + execsql { + INSERT INTO t1(t1) VALUES('merge=1,2'); + INSERT INTO t1(t1) VALUES('merge=1,2'); + } +} {} + +#------------------------------------------------------------------------- +# Test cases 7.* +# +# Test that the value returned by sqlite3_total_changes() increases by +# 1 following a no-op "merge=A,B", or by more than 1 if actual work is +# performed. +# +do_test 7.0 { + reset_db + fts3_build_db_1 1000 +} {} + +do_execsql_test 7.1 { + SELECT level, group_concat(idx, ' ') FROM t1_segdir GROUP BY level +} { + 0 {0 1 2 3 4 5 6 7} + 1 {0 1 2 3 4 5 6 7 8 9 10 11 12 13} + 2 {0 1 2} +} +do_test 7.2 { + set x [db total_changes] + execsql { INSERT INTO t1(t1) VALUES('merge=2,10') } + expr { ([db total_changes] - $x)>1 } +} {1} +do_test 7.3 { + set x [db total_changes] + execsql { INSERT INTO t1(t1) VALUES('merge=200,10') } + expr { ([db total_changes] - $x)>1 } +} {1} +do_test 7.4 { + set x [db total_changes] + execsql { INSERT INTO t1(t1) VALUES('merge=200,10') } + expr { ([db total_changes] - $x)>1 } +} {0} +do_test 7.5 { + set x [db total_changes] + execsql { INSERT INTO t1(t1) VALUES('merge=200,10') } + expr { ([db total_changes] - $x)>1 } +} {0} + +finish_test