From c51f5b3fd983630a707b710095f6d65f7dca404d Mon Sep 17 00:00:00 2001 From: dan Date: Wed, 22 Apr 2015 20:14:46 +0000 Subject: [PATCH] Fix problems with doclist-indexes involving very large rowids. FossilOrigin-Name: 1e5994097e4c740c5173ea9718c3935728fdb86f --- ext/fts5/fts5_index.c | 16 +++----- ext/fts5/test/fts5dlidx.test | 71 +++++++++++++++++++++--------------- manifest | 14 +++---- manifest.uuid | 2 +- 4 files changed, 55 insertions(+), 48 deletions(-) diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index 05c8d68319..ee8eb40630 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -2993,8 +2993,9 @@ static void fts5WriteFlushLeaf(Fts5Index *p, Fts5SegWriter *pWriter){ /* Increase the leaves written counter */ pWriter->nLeafWritten++; - /* The new leaf holds no terms */ + /* The new leaf holds no terms or rowids */ pWriter->bFirstTermInPage = 1; + pWriter->bFirstRowidInPage = 1; } /* @@ -3067,7 +3068,6 @@ static void fts5WriteAppendTerm( /* If the current leaf page is full, flush it to disk. */ if( pPage->buf.n>=p->pConfig->pgsz ){ fts5WriteFlushLeaf(p, pWriter); - pWriter->bFirstRowidInPage = 1; } } @@ -3106,7 +3106,6 @@ static void fts5WriteAppendRowid( if( pPage->buf.n>=p->pConfig->pgsz ){ fts5WriteFlushLeaf(p, pWriter); - pWriter->bFirstRowidInPage = 1; } } } @@ -3121,7 +3120,6 @@ static void fts5WriteAppendPoslistInt( fts5BufferAppendVarint(&p->rc, &pPage->buf, iVal); if( pPage->buf.n>=p->pConfig->pgsz ){ fts5WriteFlushLeaf(p, pWriter); - pWriter->bFirstRowidInPage = 1; } } } @@ -3148,7 +3146,6 @@ static void fts5WriteAppendPoslistData( a += nCopy; n -= nCopy; fts5WriteFlushLeaf(p, pWriter); - pWriter->bFirstRowidInPage = 1; } if( n>0 ){ fts5BufferAppendBlob(&p->rc, &pPage->buf, n, a); @@ -3667,7 +3664,8 @@ static void fts5FlushOneHash(Fts5Index *p, int iHash, int *pnLeaf){ i64 iRowid = 0; i64 iDelta = 0; int iOff = 0; - int bFirstDocid = 0; + + writer.bFirstRowidInPage = 0; /* The entire doclist will not fit on this leaf. The following ** loop iterates through the poslists that make up the current @@ -3681,10 +3679,10 @@ static void fts5FlushOneHash(Fts5Index *p, int iHash, int *pnLeaf){ nCopy += nPos; iRowid += iDelta; - if( bFirstDocid ){ + if( writer.bFirstRowidInPage ){ fts5PutU16(&pBuf->p[0], pBuf->n); /* first docid on page */ pBuf->n += sqlite3PutVarint(&pBuf->p[pBuf->n], iRowid); - bFirstDocid = 0; + writer.bFirstRowidInPage = 0; fts5WriteDlidxAppend(p, &writer, iRowid); }else{ pBuf->n += sqlite3PutVarint(&pBuf->p[pBuf->n], iDelta); @@ -3708,7 +3706,6 @@ static void fts5FlushOneHash(Fts5Index *p, int iHash, int *pnLeaf){ n = nCopy - iPos; }else{ n = fts5PoslistPrefix(&pPoslist[iPos], nSpace); - assert( n>=nSpace ); } assert( n>0 ); fts5BufferSafeAppendBlob(pBuf, &pPoslist[iPos], n); @@ -3719,7 +3716,6 @@ static void fts5FlushOneHash(Fts5Index *p, int iHash, int *pnLeaf){ } if( iPos>=nCopy ) break; } - bFirstDocid = 1; } iOff += nCopy; } diff --git a/ext/fts5/test/fts5dlidx.test b/ext/fts5/test/fts5dlidx.test index 0bfc3f331e..6a23622ce0 100644 --- a/ext/fts5/test/fts5dlidx.test +++ b/ext/fts5/test/fts5dlidx.test @@ -31,50 +31,61 @@ do_execsql_test 1.0 { INSERT INTO t1(t1, rank) VALUES('pgsz', 32); } -foreach {tn spc1 spc2 mul} { - 1 10 100 1000 - 2 1 1 128 -} { +# This test populates the FTS5 table containing $nEntry entries. Rows are +# numbered from 0 to ($nEntry-1). The rowid for row $i is: +# +# ($iFirst + $i*$nStep) +# +# Each document is of the form "a b c a b c a b c...". If the row number ($i) +# is an integer multiple of $spc1, then an "x" token is appended to the +# document. If it is *also* a multiple of $spc2, a "y" token is also appended. +# +proc do_dlidx_test1 {tn spc1 spc2 nEntry iFirst nStep} { + + do_execsql_test $tn.0 { DELETE FROM t1 } + set xdoc [list] set ydoc [list] - - execsql { DELETE FROM t1 } - do_test 1.$tn.1 { - - execsql BEGIN - for {set i 0} {$i < 10000} {incr i} { - set rowid [expr $i * $mul] - set doc "a b c a b c a b c a b c a b c" - if {($i % $spc1)==0} { - lappend xdoc $rowid - append doc " x" - if {($i % $spc2)==0} { - lappend ydoc $rowid - append doc " y" - } + execsql BEGIN + for {set i 0} {$i < $nEntry} {incr i} { + set rowid [expr $i * $nStep] + set doc [string trim [string repeat "a b c " 100]] + if {($i % $spc1)==0} { + lappend xdoc $rowid + append doc " x" + if {($i % $spc2)==0} { + lappend ydoc $rowid + append doc " y" } - execsql { INSERT INTO t1(rowid, x) VALUES($rowid, $doc) } } - execsql COMMIT + execsql { INSERT INTO t1(rowid, x) VALUES($rowid, $doc) } + } + execsql COMMIT + + do_test $tn.1 { execsql { INSERT INTO t1(t1) VALUES('integrity-check') } } {} - do_execsql_test 1.$tn.2 { INSERT INTO t1(t1) VALUES('integrity-check') } - - do_fb_test 1.$tn.3.1 { SELECT rowid FROM t1 WHERE t1 MATCH 'a AND x' } $xdoc - do_fb_test 1.$tn.3.2 { SELECT rowid FROM t1 WHERE t1 MATCH 'x AND a' } $xdoc + do_fb_test $tn.3.1 { SELECT rowid FROM t1 WHERE t1 MATCH 'a AND x' } $xdoc + do_fb_test $tn.3.2 { SELECT rowid FROM t1 WHERE t1 MATCH 'x AND a' } $xdoc - do_fb_test 1.$tn.4.1 { SELECT rowid FROM t1 WHERE t1 MATCH 'a AND y' } $ydoc - do_fb_test 1.$tn.4.2 { SELECT rowid FROM t1 WHERE t1 MATCH 'y AND a' } $ydoc + do_fb_test $tn.4.1 { SELECT rowid FROM t1 WHERE t1 MATCH 'a AND y' } $ydoc + do_fb_test $tn.4.2 { SELECT rowid FROM t1 WHERE t1 MATCH 'y AND a' } $ydoc - do_fb_test 1.$tn.5.1 { + do_fb_test $tn.5.1 { SELECT rowid FROM t1 WHERE t1 MATCH 'a + b + c + x' } $xdoc - do_fb_test 1.$tn.5.2 { + do_fb_test $tn.5.2 { SELECT rowid FROM t1 WHERE t1 MATCH 'b + c + x + y' } $ydoc - } +do_dlidx_test1 1.1 10 100 10000 0 1000 +do_dlidx_test1 1.2 10 10 10000 0 128 +do_dlidx_test1 1.3 10 10 100 0 36028797018963970 +do_dlidx_test1 1.3 10 10 50 0 150000000000000000 + + + finish_test diff --git a/manifest b/manifest index 151780783a..7b1af793d3 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Update\sthis\sbranch\swith\slatest\strunk\schanges. -D 2015-04-22T09:40:35.867 +C Fix\sproblems\swith\sdoclist-indexes\sinvolving\svery\slarge\srowids. +D 2015-04-22T20:14:46.893 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in faaf75b89840659d74501bea269c7e33414761c1 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -112,7 +112,7 @@ F ext/fts5/fts5_buffer.c 3ba56cc6824c9f7b1e0695159e0a9c636f6b4a23 F ext/fts5/fts5_config.c 0847facc8914f57ea4452c43ce109200dc65e894 F ext/fts5/fts5_expr.c 5215137efab527577d36bdf9e44bfc2ec3e1be98 F ext/fts5/fts5_hash.c 3cb5a3d04dd2030eb0ac8d544711dfd37c0e6529 -F ext/fts5/fts5_index.c 6ae86ef3f266c303cbf4a04fe63e8da54d91cd09 +F ext/fts5/fts5_index.c 446f70d131e8d12a464f2fb7a4d0348e8334f45f F ext/fts5/fts5_storage.c ac0f0937059c8d4f38a1f13aa5f2c2cd7edf3e0d F ext/fts5/fts5_tcl.c 617b6bb96545be8d9045de6967c688cd9cd15541 F ext/fts5/fts5_tokenize.c c07f2c2f749282c1dbbf46bde1f6d7095c740b8b @@ -136,7 +136,7 @@ F ext/fts5/test/fts5auxdata.test c69b86092bf1a157172de5f9169731af3403179b F ext/fts5/test/fts5bigpl.test b1cfd00561350ab04994ba7dd9d48468e5e0ec3b F ext/fts5/test/fts5content.test 8dc302fccdff834d946497e9d862750ea87d4517 F ext/fts5/test/fts5corrupt.test dbdcfe75749ed2f2eb3915cf68fd55d3dc3b058d -F ext/fts5/test/fts5dlidx.test 710d1eaf44e6fbb09dfa73b7fd488227d8cc751a +F ext/fts5/test/fts5dlidx.test 748a84ceb74a4154725096a26dfa854260b0182f F ext/fts5/test/fts5ea.test 04695560a444fcc00c3c4f27783bdcfbf71f030c F ext/fts5/test/fts5eb.test 728a1f23f263548f5c29b29dfb851b5f2dbe723e F ext/fts5/test/fts5fault1.test ed71717a479bef32d05f02d9c48691011d160d4d @@ -1300,7 +1300,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P a21d60cb2ac6463c012d82d1970d90da5da2a14a 2cb945116e7a5b78741b19839899826b539d5868 -R ce8dc868a2df13b90cea4f2e6b631af2 +P 9797482ded7de985e3b20aedec5e4d81f55065c8 +R 0cda6877e1c22250889225fbc6083da5 U dan -Z da83630e09cb54023a2663ea2cafcf24 +Z 27e6fd1a7355db699b5f202767df6e93 diff --git a/manifest.uuid b/manifest.uuid index 98e5186972..2744268eb0 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -9797482ded7de985e3b20aedec5e4d81f55065c8 \ No newline at end of file +1e5994097e4c740c5173ea9718c3935728fdb86f \ No newline at end of file -- 2.47.3