From: dan Date: Mon, 6 Jul 2015 20:27:19 +0000 (+0000) Subject: Speed up seek operations on fts5 b-tree structures. X-Git-Tag: version-3.8.11~54 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=656773b6751d29e7d240cdc4f3e2345c4f101102;p=thirdparty%2Fsqlite.git Speed up seek operations on fts5 b-tree structures. FossilOrigin-Name: 7b7da1eb435d321fc4283f6aa2161fa1e16f2cf3 --- diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index ae3ff35dbb..3ba3046e36 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -1580,6 +1580,23 @@ static void fts5SegIterLoadNPos(Fts5Index *p, Fts5SegIter *pIter){ } } +static void fts5SegIterLoadRowid(Fts5Index *p, Fts5SegIter *pIter){ + u8 *a = pIter->pLeaf->p; /* Buffer to read data from */ + int iOff = pIter->iLeafOffset; + + if( iOff>=pIter->pLeaf->n ){ + fts5SegIterNextPage(p, pIter); + if( pIter->pLeaf==0 ){ + if( p->rc==SQLITE_OK ) p->rc = FTS5_CORRUPT; + return; + } + iOff = 4; + a = pIter->pLeaf->p; + } + iOff += sqlite3Fts5GetVarint(&a[iOff], (u64*)&pIter->iRowid); + pIter->iLeafOffset = iOff; +} + /* ** Fts5SegIter.iLeafOffset currently points to the first byte of the ** "nSuffix" field of a term. Function parameter nKeep contains the value @@ -1606,17 +1623,9 @@ static void fts5SegIterLoadTerm(Fts5Index *p, Fts5SegIter *pIter, int nKeep){ iOff += nNew; pIter->iTermLeafOffset = iOff; pIter->iTermLeafPgno = pIter->iLeafPgno; - if( iOff>=pIter->pLeaf->n ){ - fts5SegIterNextPage(p, pIter); - if( pIter->pLeaf==0 ){ - if( p->rc==SQLITE_OK ) p->rc = FTS5_CORRUPT; - return; - } - iOff = 4; - a = pIter->pLeaf->p; - } - iOff += sqlite3Fts5GetVarint(&a[iOff], (u64*)&pIter->iRowid); pIter->iLeafOffset = iOff; + + fts5SegIterLoadRowid(p, pIter); } /* @@ -2124,6 +2133,129 @@ static int fts5NodeSeek( return iPg; } +/* +** The iterator object passed as the second argument currently contains +** no valid values except for the Fts5SegIter.pLeaf member variable. This +** function searches the leaf page for a term matching (pTerm/nTerm). +** +*/ +static void fts5LeafSeek( + Fts5Index *p, /* Leave any error code here */ + int bGe, /* True for a >= search */ + Fts5SegIter *pIter, /* Iterator to seek */ + const u8 *pTerm, int nTerm /* Term to search for */ +){ + int iOff; + const u8 *a = pIter->pLeaf->p; + int n = pIter->pLeaf->n; + + int nMatch = 0; + int nKeep = 0; + int nNew = 0; + + assert( p->rc==SQLITE_OK ); + assert( pIter->pLeaf ); + + iOff = fts5GetU16(&a[2]); + if( iOff<4 || iOff>=n ){ + p->rc = FTS5_CORRUPT; + return; + } + + while( 1 ){ + int i; + int nCmp; + i64 rowid; + + /* Figure out how many new bytes are in this term */ + + nNew = a[iOff++]; + if( nNew & 0x80 ){ + iOff--; + iOff += fts5GetVarint32(&a[iOff], nNew); + } + + if( nKeep=nMatch ); + if( nKeep==nMatch ){ + nCmp = MIN(nNew, nTerm-nMatch); + for(i=0; ipTerm[nMatch] ){ + goto search_failed; + } + } + iOff += nNew; + + /* Skip past the doclist. If the end of the page is reached, bail out. */ + iOff += fts5GetVarint(&a[iOff], &rowid); + while( iOff=n ) goto search_failed; + + /* Read the nKeep field of the next term. */ + nKeep = a[iOff++]; + if( nKeep & 0x80 ){ + iOff--; + iOff += fts5GetVarint32(&a[iOff], nKeep); + } + } + + search_failed: + if( bGe==0 ){ + fts5DataRelease(pIter->pLeaf); + pIter->pLeaf = 0; + return; + }else if( iOff>=n ){ + do { + fts5SegIterNextPage(p, pIter); + if( pIter->pLeaf==0 ) return; + a = pIter->pLeaf->p; + iOff = fts5GetU16(&a[2]); + if( iOff ){ + if( iOff<4 || iOff>=n ){ + p->rc = FTS5_CORRUPT; + }else{ + nKeep = 0; + iOff += fts5GetVarint32(&a[iOff], nNew); + break; + } + } + }while( 1 ); + } + + search_success: + pIter->iLeafOffset = iOff + nNew; + pIter->iTermLeafOffset = pIter->iLeafOffset; + pIter->iTermLeafPgno = pIter->iLeafPgno; + + fts5BufferSet(&p->rc, &pIter->term, nKeep, pTerm); + fts5BufferAppendBlob(&p->rc, &pIter->term, nNew, &a[iOff]); + + fts5SegIterLoadRowid(p, pIter); + fts5SegIterLoadNPos(p, pIter); +} + /* ** Initialize the object pIter to point to term pTerm/nTerm within segment ** pSeg. If there is no such term in the index, the iterator is set to EOF. @@ -2168,26 +2300,8 @@ static void fts5SegIterSeekInit( pIter->iLeafPgno = iPg - 1; fts5SegIterNextPage(p, pIter); - if( (pLeaf = pIter->pLeaf) ){ - int res; - pIter->iLeafOffset = fts5GetU16(&pLeaf->p[2]); - if( pIter->iLeafOffset<4 || pIter->iLeafOffset>=pLeaf->n ){ - p->rc = FTS5_CORRUPT; - }else{ - fts5SegIterLoadTerm(p, pIter, 0); - fts5SegIterLoadNPos(p, pIter); - do { - res = fts5BufferCompareBlob(&pIter->term, pTerm, nTerm); - if( res>=0 ) break; - fts5SegIterNext(p, pIter, 0); - }while( pIter->pLeaf && p->rc==SQLITE_OK ); - - if( bGe==0 && res ){ - /* Set iterator to point to EOF */ - fts5DataRelease(pIter->pLeaf); - pIter->pLeaf = 0; - } - } + if( pIter->pLeaf ){ + fts5LeafSeek(p, bGe, pIter, pTerm, nTerm); } if( p->rc==SQLITE_OK && bGe==0 ){ diff --git a/ext/fts5/test/fts5aa.test b/ext/fts5/test/fts5aa.test index 77ef19dd67..a18005cfdb 100644 --- a/ext/fts5/test/fts5aa.test +++ b/ext/fts5/test/fts5aa.test @@ -50,7 +50,17 @@ do_execsql_test 2.1 { do_test 2.2 { execsql { SELECT fts5_decode(id, block) FROM t1_data WHERE id==10 } } {/{\(structure\) {lvl=0 nMerge=0 {id=[0123456789]* h=1 leaves=1..1}}}/} -do_execsql_test 2.3 { + +foreach w {a b c d e f} { + do_execsql_test 2.3.$w.asc { + SELECT rowid FROM t1 WHERE t1 MATCH $w; + } {1} + do_execsql_test 2.3.$w.desc { + SELECT rowid FROM t1 WHERE t1 MATCH $w ORDER BY rowid DESC; + } {1} +} + +do_execsql_test 2.4 { INSERT INTO t1(t1) VALUES('integrity-check'); } @@ -190,8 +200,6 @@ for {set i 1} {$i <= 10} {incr i} { execsql { INSERT INTO t1(t1) VALUES('integrity-check'); } } {} } -#db eval {SELECT rowid, fts5_decode(rowid, block) aS r FROM t1_data} {puts $r} -#exit #------------------------------------------------------------------------- # diff --git a/ext/fts5/test/fts5ac.test b/ext/fts5/test/fts5ac.test index 8bc8accb9b..0de4848145 100644 --- a/ext/fts5/test/fts5ac.test +++ b/ext/fts5/test/fts5ac.test @@ -205,6 +205,7 @@ foreach {tn2 sql} { execsql { INSERT INTO xx(xx) VALUES('integrity-check') } } {} + #------------------------------------------------------------------------- # Test phrase queries. # diff --git a/ext/fts5/test/fts5ad.test b/ext/fts5/test/fts5ad.test index 66ca1f1640..b998db05ab 100644 --- a/ext/fts5/test/fts5ad.test +++ b/ext/fts5/test/fts5ad.test @@ -204,10 +204,11 @@ foreach {T create} { } return $ret } + foreach {bAsc sql} { - 0 {SELECT rowid FROM t1 WHERE t1 MATCH $prefix ORDER BY rowid DESC} 1 {SELECT rowid FROM t1 WHERE t1 MATCH $prefix} + 0 {SELECT rowid FROM t1 WHERE t1 MATCH $prefix ORDER BY rowid DESC} } { foreach {tn prefix} { 1 {a*} 2 {ab*} 3 {abc*} 4 {abcd*} 5 {abcde*} diff --git a/ext/fts5/test/fts5alter.test b/ext/fts5/test/fts5alter.test index 0ed788b8a9..eae01b7386 100644 --- a/ext/fts5/test/fts5alter.test +++ b/ext/fts5/test/fts5alter.test @@ -81,6 +81,23 @@ do_execsql_test 2.3 { SELECT rowid FROM yy WHERE yy MATCH 'a + b + c'; } {-56 -22} +#------------------------------------------------------------------------- + +do_execsql_test 3.1 { + CREATE VIRTUAL TABLE abc USING fts5(a); + INSERT INTO abc(rowid, a) VALUES(1, 'a'); + BEGIN; + INSERT INTO abc(rowid, a) VALUES(2, 'a'); +} +breakpoint +do_execsql_test 3.2 { + SELECT rowid FROM abc WHERE abc MATCH 'a'; +} {1 2} + +do_execsql_test 3.3 { + COMMIT; + SELECT rowid FROM abc WHERE abc MATCH 'a'; +} {1 2} finish_test diff --git a/manifest b/manifest index 1954e6998c..6b86af9d85 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Do\snot\sallow\srecursive\sCTEs\sthat\suse\saggregate\squeries\sin\sthe\srecursive\spart. -D 2015-07-05T22:15:10.026 +C Speed\sup\sseek\soperations\son\sfts5\sb-tree\sstructures. +D 2015-07-06T20:27:19.997 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 017bf0511d1b2dd1db5e16488fbf75a17b526cbc F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -112,7 +112,7 @@ F ext/fts5/fts5_buffer.c 80f9ba4431848cb857e3d2158f5280093dcd8015 F ext/fts5/fts5_config.c b2456e9625bca41c51d54c363e369c6356895c90 F ext/fts5/fts5_expr.c d2e148345639c5a5583e0daa39a639bf298ae6a7 F ext/fts5/fts5_hash.c 219f4edd72e5cf95b19c33f1058809a18fad5229 -F ext/fts5/fts5_index.c 6224454702f852392cae8755c920f93b06b09283 +F ext/fts5/fts5_index.c d81cada8367eae5e5482860ccb6ae574eee3d74a F ext/fts5/fts5_main.c 37b0055cb4036c4b4bb4eb36e30ebd1c21c63939 F ext/fts5/fts5_storage.c 4cae85b5287b159d9d98174a4e70adf872b0930a F ext/fts5/fts5_tcl.c 85eb4e0d0fefa9420b78151496ad4599a1783e20 @@ -123,10 +123,10 @@ F ext/fts5/fts5_vocab.c 4e268a3fcbc099e50e335a1135be985a41ff6f7f F ext/fts5/fts5parse.y 833db1101b78c0c47686ab1b84918e38c36e9452 F ext/fts5/mkportersteps.tcl 5acf962d2e0074f701620bb5308155fa1e4a63ba F ext/fts5/test/fts5_common.tcl 9553cce0757092d194307c2168d4edd100eab578 -F ext/fts5/test/fts5aa.test 0be21c89fd66b588db355a6398911fd875bdcc6c +F ext/fts5/test/fts5aa.test f7f057811eb0113d3259e059218bc85cef444280 F ext/fts5/test/fts5ab.test 6fe3a56731d15978afbb74ae51b355fc9310f2ad -F ext/fts5/test/fts5ac.test 0990ae7497ebaea2ab5f7fd5caedd93a71a905fc -F ext/fts5/test/fts5ad.test 312f3c8ed9592533499c5b94d2059ae6382913a0 +F ext/fts5/test/fts5ac.test 9737992d08c56bfd4803e933744d2d764e23795c +F ext/fts5/test/fts5ad.test b2edee8b7de0c21d2c88f8a18c195034aad6952d F ext/fts5/test/fts5ae.test ddc558e3e3b52db0101f7541b2e3849b77052c92 F ext/fts5/test/fts5af.test c2501ec2b61d6b179c305f5d2b8782ab3d4f832a F ext/fts5/test/fts5ag.test ec3e119b728196620a31507ef503c455a7a73505 @@ -135,7 +135,7 @@ F ext/fts5/test/fts5ai.test f20e53bbf0c55bc596f1fd47f2740dae028b8f37 F ext/fts5/test/fts5aj.test 05b569f5c16ea3098fb1984eec5cf50dbdaae5d8 F ext/fts5/test/fts5ak.test 7b8c5df96df599293f920b7e5521ebc79f647592 F ext/fts5/test/fts5al.test fc60ebeac9d8e366e71309d4c31fa72199d711d7 -F ext/fts5/test/fts5alter.test 78b63e088646dd623cacbdc1899a54d638dcf3d8 +F ext/fts5/test/fts5alter.test 6022c61467a82aa11c70822ccad22b328dcf0d04 F ext/fts5/test/fts5auto.test caa5bcf917db11944655a2a9bd38c67c520376ca F ext/fts5/test/fts5aux.test 8c687c948cc98e9a94be014df7d518acc1b3b74f F ext/fts5/test/fts5auxdata.test 141a7cbffcceb1bd2799b4b29c183ff8780d586e @@ -1364,7 +1364,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 48bd54594752d5be3337f12c72f28d2080cb630b F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 8cf02090ce53ec150492d77d9e5e5f27665bd34f -R 2ced95f329e3743e78bec1f4f82122ca -U drh -Z 74748fde014fce363e747a5eecd57396 +P 6d2999afbc25b9c238e4028f637c10eaaf0ec75e +R d5f5e61506b7123fb097865134e2b1bc +U dan +Z 4783766915d09d11d606d5d129590086 diff --git a/manifest.uuid b/manifest.uuid index 0975b167fb..94b18735e0 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -6d2999afbc25b9c238e4028f637c10eaaf0ec75e \ No newline at end of file +7b7da1eb435d321fc4283f6aa2161fa1e16f2cf3 \ No newline at end of file