From: shess Date: Tue, 22 Jul 2008 23:32:27 +0000 (+0000) Subject: fts2 functions for testing scripts. X-Git-Tag: version-3.6.10~721 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=3d373110f02306dc1fcca7db5d63c3f51c2c4c3e;p=thirdparty%2Fsqlite.git fts2 functions for testing scripts. Backports (5340) from fts3. (CVS 5456) FossilOrigin-Name: 4e47394be9dfbf0f9309e55eb6c6a3a517ea2006 --- diff --git a/ext/fts2/fts2.c b/ext/fts2/fts2.c index 3279f655d8..a7a6fc86b0 100644 --- a/ext/fts2/fts2.c +++ b/ext/fts2/fts2.c @@ -1781,9 +1781,10 @@ typedef enum fulltext_statement { SEGDIR_MAX_INDEX_STMT, SEGDIR_SET_STMT, - SEGDIR_SELECT_STMT, + SEGDIR_SELECT_LEVEL_STMT, SEGDIR_SPAN_STMT, SEGDIR_DELETE_STMT, + SEGDIR_SELECT_SEGMENT_STMT, SEGDIR_SELECT_ALL_STMT, MAX_STMT /* Always at end! */ @@ -1806,15 +1807,23 @@ static const char *const fulltext_zStatement[MAX_STMT] = { /* SEGDIR_MAX_INDEX */ "select max(idx) from %_segdir where level = ?", /* SEGDIR_SET */ "insert into %_segdir values (?, ?, ?, ?, ?, ?)", - /* SEGDIR_SELECT */ + /* SEGDIR_SELECT_LEVEL */ "select start_block, leaves_end_block, root from %_segdir " " where level = ? order by idx", /* SEGDIR_SPAN */ "select min(start_block), max(end_block) from %_segdir " " where level = ? and start_block <> 0", /* SEGDIR_DELETE */ "delete from %_segdir where level = ?", + + /* NOTE(shess): The first three results of the following two + ** statements must match. + */ + /* SEGDIR_SELECT_SEGMENT */ + "select start_block, leaves_end_block, root from %_segdir " + " where level = ? and idx = ?", /* SEGDIR_SELECT_ALL */ - "select root, leaves_end_block from %_segdir order by level desc, idx", + "select start_block, leaves_end_block, root from %_segdir " + " order by level desc, idx asc", }; /* @@ -5073,7 +5082,7 @@ static void leavesReaderReorder(LeavesReader *pLr, int nLr){ static int leavesReadersInit(fulltext_vtab *v, int iLevel, LeavesReader *pReaders, int *piReaders){ sqlite3_stmt *s; - int i, rc = sql_get_statement(v, SEGDIR_SELECT_STMT, &s); + int i, rc = sql_get_statement(v, SEGDIR_SELECT_LEVEL_STMT, &s); if( rc!=SQLITE_OK ) return rc; rc = sqlite3_bind_int(s, 1, iLevel); @@ -5611,8 +5620,8 @@ static int termSelect(fulltext_vtab *v, int iColumn, ** elements for given docids overwrite older elements. */ while( (rc = sqlite3_step(s))==SQLITE_ROW ){ - const char *pData = sqlite3_column_blob(s, 0); - const int nData = sqlite3_column_bytes(s, 0); + const char *pData = sqlite3_column_blob(s, 2); + const int nData = sqlite3_column_bytes(s, 2); const sqlite_int64 iLeavesEnd = sqlite3_column_int64(s, 1); rc = loadSegment(v, pData, nData, iLeavesEnd, pTerm, nTerm, isPrefix, &doclist); @@ -5881,6 +5890,386 @@ static void snippetOffsetsFunc( } } +#ifdef SQLITE_TEST +/* Generate an error of the form ": ". If msg is NULL, +** pull the error from the context's db handle. +*/ +static void generateError(sqlite3_context *pContext, + const char *prefix, const char *msg){ + char buf[512]; + if( msg==NULL ) msg = sqlite3_errmsg(sqlite3_context_db_handle(pContext)); + sqlite3_snprintf(sizeof(buf), buf, "%s: %s", prefix, msg); + sqlite3_result_error(pContext, buf, -1); +} + +/* Helper function to collect the set of terms in the segment into +** pTerms. The segment is defined by the leaf nodes between +** iStartBlockid and iEndBlockid, inclusive, or by the contents of +** pRootData if iStartBlockid is 0 (in which case the entire segment +** fit in a leaf). +*/ +static int collectSegmentTerms(fulltext_vtab *v, sqlite3_stmt *s, + fts2Hash *pTerms){ + const sqlite_int64 iStartBlockid = sqlite3_column_int64(s, 0); + const sqlite_int64 iEndBlockid = sqlite3_column_int64(s, 1); + const char *pRootData = sqlite3_column_blob(s, 2); + const int nRootData = sqlite3_column_bytes(s, 2); + LeavesReader reader; + int rc = leavesReaderInit(v, 0, iStartBlockid, iEndBlockid, + pRootData, nRootData, &reader); + if( rc!=SQLITE_OK ) return rc; + + while( rc==SQLITE_OK && !leavesReaderAtEnd(&reader) ){ + const char *pTerm = leavesReaderTerm(&reader); + const int nTerm = leavesReaderTermBytes(&reader); + void *oldValue = sqlite3Fts2HashFind(pTerms, pTerm, nTerm); + void *newValue = (void *)((char *)oldValue+1); + + /* From the comment before sqlite3Fts2HashInsert in fts2_hash.c, + ** the data value passed is returned in case of malloc failure. + */ + if( newValue==sqlite3Fts2HashInsert(pTerms, pTerm, nTerm, newValue) ){ + rc = SQLITE_NOMEM; + }else{ + rc = leavesReaderStep(v, &reader); + } + } + + leavesReaderDestroy(&reader); + return rc; +} + +/* Helper function to build the result string for dump_terms(). */ +static int generateTermsResult(sqlite3_context *pContext, fts2Hash *pTerms){ + int iTerm, nTerms, nResultBytes, iByte; + char *result; + TermData *pData; + fts2HashElem *e; + + /* Iterate pTerms to generate an array of terms in pData for + ** sorting. + */ + nTerms = fts2HashCount(pTerms); + assert( nTerms>0 ); + pData = sqlite3_malloc(nTerms*sizeof(TermData)); + if( pData==NULL ) return SQLITE_NOMEM; + + nResultBytes = 0; + for(iTerm = 0, e = fts2HashFirst(pTerms); e; iTerm++, e = fts2HashNext(e)){ + nResultBytes += fts2HashKeysize(e)+1; /* Term plus trailing space */ + assert( iTerm0 ); /* nTerms>0, nResultsBytes must be, too. */ + result = sqlite3_malloc(nResultBytes); + if( result==NULL ){ + sqlite3_free(pData); + return SQLITE_NOMEM; + } + + if( nTerms>1 ) qsort(pData, nTerms, sizeof(*pData), termDataCmp); + + /* Read the terms in order to build the result. */ + iByte = 0; + for(iTerm=0; iTerm0 ){ + rc = generateTermsResult(pContext, &terms); + if( rc==SQLITE_NOMEM ){ + generateError(pContext, "dump_terms", "out of memory"); + }else{ + assert( rc==SQLITE_OK ); + } + }else if( argc==3 ){ + /* The specific segment asked for could not be found. */ + generateError(pContext, "dump_terms", "segment not found"); + }else{ + /* No segments found. */ + /* TODO(shess): It should be impossible to reach this. This + ** case can only happen for an empty table, in which case + ** SQLite has no rows to call this function on. + */ + sqlite3_result_null(pContext); + } + } + sqlite3Fts2HashClear(&terms); + } +} + +/* Expand the DL_DEFAULT doclist in pData into a text result in +** pContext. +*/ +static void createDoclistResult(sqlite3_context *pContext, + const char *pData, int nData){ + DataBuffer dump; + DLReader dlReader; + + assert( pData!=NULL && nData>0 ); + + dataBufferInit(&dump, 0); + dlrInit(&dlReader, DL_DEFAULT, pData, nData); + for( ; !dlrAtEnd(&dlReader); dlrStep(&dlReader) ){ + char buf[256]; + PLReader plReader; + + plrInit(&plReader, &dlReader); + if( DL_DEFAULT==DL_DOCIDS || plrAtEnd(&plReader) ){ + sqlite3_snprintf(sizeof(buf), buf, "[%lld] ", dlrDocid(&dlReader)); + dataBufferAppend(&dump, buf, strlen(buf)); + }else{ + int iColumn = plrColumn(&plReader); + + sqlite3_snprintf(sizeof(buf), buf, "[%lld %d[", + dlrDocid(&dlReader), iColumn); + dataBufferAppend(&dump, buf, strlen(buf)); + + for( ; !plrAtEnd(&plReader); plrStep(&plReader) ){ + if( plrColumn(&plReader)!=iColumn ){ + iColumn = plrColumn(&plReader); + sqlite3_snprintf(sizeof(buf), buf, "] %d[", iColumn); + assert( dump.nData>0 ); + dump.nData--; /* Overwrite trailing space. */ + assert( dump.pData[dump.nData]==' '); + dataBufferAppend(&dump, buf, strlen(buf)); + } + if( DL_DEFAULT==DL_POSITIONS_OFFSETS ){ + sqlite3_snprintf(sizeof(buf), buf, "%d,%d,%d ", + plrPosition(&plReader), + plrStartOffset(&plReader), plrEndOffset(&plReader)); + }else if( DL_DEFAULT==DL_POSITIONS ){ + sqlite3_snprintf(sizeof(buf), buf, "%d ", plrPosition(&plReader)); + }else{ + assert( NULL=="Unhandled DL_DEFAULT value"); + } + dataBufferAppend(&dump, buf, strlen(buf)); + } + plrDestroy(&plReader); + + assert( dump.nData>0 ); + dump.nData--; /* Overwrite trailing space. */ + assert( dump.pData[dump.nData]==' '); + dataBufferAppend(&dump, "]] ", 3); + } + } + dlrDestroy(&dlReader); + + assert( dump.nData>0 ); + dump.nData--; /* Overwrite trailing space. */ + assert( dump.pData[dump.nData]==' '); + dump.pData[dump.nData] = '\0'; + assert( dump.nData>0 ); + + /* Passes ownership of dump's buffer to pContext. */ + sqlite3_result_text(pContext, dump.pData, dump.nData, sqlite3_free); + dump.pData = NULL; + dump.nData = dump.nCapacity = 0; +} + +/* Implements dump_doclist() for use in inspecting the fts2 index from +** tests. TEXT result containing a string representation of the +** doclist for the indicated term. dump_doclist(t, term, level, idx) +** dumps the doclist for term from the segment specified by level, idx +** (in %_segdir), while dump_doclist(t, term) dumps the logical +** doclist for the term across all segments. The per-segment doclist +** can contain deletions, while the full-index doclist will not +** (deletions are omitted). +** +** Result formats differ with the setting of DL_DEFAULTS. Examples: +** +** DL_DOCIDS: [1] [3] [7] +** DL_POSITIONS: [1 0[0 4] 1[17]] [3 1[5]] +** DL_POSITIONS_OFFSETS: [1 0[0,0,3 4,23,26] 1[17,102,105]] [3 1[5,20,23]] +** +** In each case the number after the outer '[' is the docid. In the +** latter two cases, the number before the inner '[' is the column +** associated with the values within. For DL_POSITIONS the numbers +** within are the positions, for DL_POSITIONS_OFFSETS they are the +** position, the start offset, and the end offset. +*/ +static void dumpDoclistFunc( + sqlite3_context *pContext, + int argc, sqlite3_value **argv +){ + fulltext_cursor *pCursor; + if( argc!=2 && argc!=4 ){ + generateError(pContext, "dump_doclist", "incorrect arguments"); + }else if( sqlite3_value_type(argv[0])!=SQLITE_BLOB || + sqlite3_value_bytes(argv[0])!=sizeof(pCursor) ){ + generateError(pContext, "dump_doclist", "illegal first argument"); + }else if( sqlite3_value_text(argv[1])==NULL || + sqlite3_value_text(argv[1])[0]=='\0' ){ + generateError(pContext, "dump_doclist", "empty second argument"); + }else{ + const char *pTerm = (const char *)sqlite3_value_text(argv[1]); + const int nTerm = strlen(pTerm); + fulltext_vtab *v; + int rc; + DataBuffer doclist; + + memcpy(&pCursor, sqlite3_value_blob(argv[0]), sizeof(pCursor)); + v = cursor_vtab(pCursor); + + dataBufferInit(&doclist, 0); + + /* termSelect() yields the same logical doclist that queries are + ** run against. + */ + if( argc==2 ){ + rc = termSelect(v, v->nColumn, pTerm, nTerm, 0, DL_DEFAULT, &doclist); + }else{ + sqlite3_stmt *s = NULL; + + /* Get our specific segment's information. */ + rc = sql_get_statement(v, SEGDIR_SELECT_SEGMENT_STMT, &s); + if( rc==SQLITE_OK ){ + rc = sqlite3_bind_int(s, 1, sqlite3_value_int(argv[2])); + if( rc==SQLITE_OK ){ + rc = sqlite3_bind_int(s, 2, sqlite3_value_int(argv[3])); + } + } + + if( rc==SQLITE_OK ){ + rc = sqlite3_step(s); + + if( rc==SQLITE_DONE ){ + dataBufferDestroy(&doclist); + generateError(pContext, "dump_doclist", "segment not found"); + return; + } + + /* Found a segment, load it into doclist. */ + if( rc==SQLITE_ROW ){ + const sqlite_int64 iLeavesEnd = sqlite3_column_int64(s, 1); + const char *pData = sqlite3_column_blob(s, 2); + const int nData = sqlite3_column_bytes(s, 2); + + /* loadSegment() is used by termSelect() to load each + ** segment's data. + */ + rc = loadSegment(v, pData, nData, iLeavesEnd, pTerm, nTerm, 0, + &doclist); + if( rc==SQLITE_OK ){ + rc = sqlite3_step(s); + + /* Should not have more than one matching segment. */ + if( rc!=SQLITE_DONE ){ + sqlite3_reset(s); + dataBufferDestroy(&doclist); + generateError(pContext, "dump_doclist", "invalid segdir"); + return; + } + rc = SQLITE_OK; + } + } + } + + sqlite3_reset(s); + } + + if( rc==SQLITE_OK ){ + if( doclist.nData>0 ){ + createDoclistResult(pContext, doclist.pData, doclist.nData); + }else{ + /* TODO(shess): This can happen if the term is not present, or + ** if all instances of the term have been deleted and this is + ** an all-index dump. It may be interesting to distinguish + ** these cases. + */ + sqlite3_result_text(pContext, "", 0, SQLITE_STATIC); + } + }else if( rc==SQLITE_NOMEM ){ + /* Handle out-of-memory cases specially because if they are + ** generated in fts2 code they may not be reflected in the db + ** handle. + */ + /* TODO(shess): Handle this more comprehensively. + ** sqlite3ErrStr() has what I need, but is internal. + */ + generateError(pContext, "dump_doclist", "out of memory"); + }else{ + generateError(pContext, "dump_doclist", NULL); + } + + dataBufferDestroy(&doclist); + } +} +#endif + /* ** This routine implements the xFindFunction method for the FTS2 ** virtual table. @@ -5898,6 +6287,20 @@ static int fulltextFindFunction( }else if( strcmp(zName,"offsets")==0 ){ *pxFunc = snippetOffsetsFunc; return 1; +#ifdef SQLITE_TEST + /* NOTE(shess): These functions are present only for testing + ** purposes. No particular effort is made to optimize their + ** execution or how they build their results. + */ + }else if( strcmp(zName,"dump_terms")==0 ){ + /* fprintf(stderr, "Found dump_terms\n"); */ + *pxFunc = dumpTermsFunc; + return 1; + }else if( strcmp(zName,"dump_doclist")==0 ){ + /* fprintf(stderr, "Found dump_doclist\n"); */ + *pxFunc = dumpDoclistFunc; + return 1; +#endif } return 0; } @@ -6017,6 +6420,10 @@ int sqlite3Fts2Init(sqlite3 *db){ && SQLITE_OK==(rc = sqlite3Fts2InitHashTable(db, pHash, "fts2_tokenizer")) && SQLITE_OK==(rc = sqlite3_overload_function(db, "snippet", -1)) && SQLITE_OK==(rc = sqlite3_overload_function(db, "offsets", -1)) +#ifdef SQLITE_TEST + && SQLITE_OK==(rc = sqlite3_overload_function(db, "dump_terms", -1)) + && SQLITE_OK==(rc = sqlite3_overload_function(db, "dump_doclist", -1)) +#endif ){ return sqlite3_create_module_v2( db, "fts2", &fts2Module, (void *)pHash, hashDestroy diff --git a/manifest b/manifest index 20e990b6fc..7ea48d4c7e 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Change\sprefix\ssearch\sfrom\sO(N*M)\sto\sO(NlogM).\r\nBackports\s(4599)\sfrom\sfts3.\s(CVS\s5455) -D 2008-07-22T23:08:40 +C fts2\sfunctions\sfor\stesting\sscripts.\r\nBackports\s(5340)\sfrom\sfts3.\s(CVS\s5456) +D 2008-07-22T23:32:28 F Makefile.arm-wince-mingw32ce-gcc fcd5e9cd67fe88836360bb4f9ef4cb7f8e2fb5a0 F Makefile.in 77ff156061bb870aa0a8b3d545c670d08070f7e6 F Makefile.linux-gcc d53183f4aa6a9192d249731c90dbdffbd2c68654 @@ -39,7 +39,7 @@ F ext/fts1/simple_tokenizer.c 1844d72f7194c3fd3d7e4173053911bf0661b70d F ext/fts1/tokenizer.h 0c53421b832366d20d720d21ea3e1f6e66a36ef9 F ext/fts2/README.tokenizers 21e3684ea5a095b55d70f6878b4ce6af5932dfb7 F ext/fts2/README.txt 8c18f41574404623b76917b9da66fcb0ab38328d -F ext/fts2/fts2.c 7a2e88d110d059c986234c3d7734133d59a709ef +F ext/fts2/fts2.c af6d11365c4ae66be9779dde0948887e92d8b867 F ext/fts2/fts2.h da5f76c65163301d1068a971fd32f4119e3c95fa F ext/fts2/fts2_hash.c 2689e42e1107ea67207f725cf69cf8972d00cf93 F ext/fts2/fts2_hash.h 9a5b1be94664139f93217a0770d7144425cffb3a @@ -315,6 +315,7 @@ F test/fts2l.test 3333336621524cf7d60bb62d6ef6ab69647866ed F test/fts2m.test 4b30142ead6f3ed076e880a2a464064c5ad58c51 F test/fts2n.test 12b9c5352128cebd1c6b8395e43788d4b09087c2 F test/fts2o.test c6a79567d85403dc4d15b89f3f9799a0a0aef065 +F test/fts2p.test 4b48c35c91e6a7dbf5ac8d1e5691823cc999aafb F test/fts2token.test d8070b241a15ff13592a9ae4a8b7c171af6f445a F test/fts3.test 6ee4c38b0864583c80e82a2d4372f63aae8b10c7 F test/fts3aa.test 432d1d5c41939bb5405d4d6c80a9ec759b363393 @@ -609,7 +610,7 @@ F tool/speedtest16.c c8a9c793df96db7e4933f0852abb7a03d48f2e81 F tool/speedtest2.tcl ee2149167303ba8e95af97873c575c3e0fab58ff F tool/speedtest8.c 1dbced29de5f59ba2ebf877edcadf171540374d1 F tool/speedtest8inst1.c 293327bc76823f473684d589a8160bde1f52c14e -P ecf2dec66cb979cb7d8db3b7ce5c64cab57fe2bb -R 89694177ea5ff7c878e6daa15283840d +P 3f614453d2d7c753a5963b027fe8618b50b4f6b9 +R 76720fcdb90c8738e414944367c3b0e7 U shess -Z 48033bdd7abd5465e3195f281a547d67 +Z 807f812f3e691d0e714ba8b89e45124c diff --git a/manifest.uuid b/manifest.uuid index 8e4895f1e4..c80705daff 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -3f614453d2d7c753a5963b027fe8618b50b4f6b9 \ No newline at end of file +4e47394be9dfbf0f9309e55eb6c6a3a517ea2006 \ No newline at end of file diff --git a/test/fts2p.test b/test/fts2p.test new file mode 100644 index 0000000000..38a8079d8f --- /dev/null +++ b/test/fts2p.test @@ -0,0 +1,357 @@ +# 2008 June 26 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#************************************************************************* +# This file exercises some new testing functions in the FTS2 module, +# and then uses them to do some basic tests that FTS2 is internally +# working as expected. +# +# $Id: fts2p.test,v 1.1 2008/07/22 23:32:28 shess Exp $ +# + +set testdir [file dirname $argv0] +source $testdir/tester.tcl + +# If SQLITE_ENABLE_FTS2 is not defined, omit this file. +ifcapable !fts2 { + finish_test + return +} + +#************************************************************************* +# Probe to see if support for these functions is compiled in. +# TODO(shess): Change main.mk to do the right thing and remove this test. +db eval { + DROP TABLE IF EXISTS t1; + CREATE VIRTUAL TABLE t1 USING fts2(c); + INSERT INTO t1 (rowid, c) VALUES (1, 'x'); +} + +set s {SELECT dump_terms(t1, 1) FROM t1 LIMIT 1} +set r {1 {unable to use function dump_terms in the requested context}} +if {[catchsql $s]==$r} { + finish_test + return +} + +#************************************************************************* +# Test that the new functions give appropriate errors. +do_test fts2p-0.0 { + catchsql { + SELECT dump_terms(t1, 1) FROM t1 LIMIT 1; + } +} {1 {dump_terms: incorrect arguments}} + +do_test fts2p-0.1 { + catchsql { + SELECT dump_terms(t1, 0, 0, 0) FROM t1 LIMIT 1; + } +} {1 {dump_terms: incorrect arguments}} + +do_test fts2p-0.2 { + catchsql { + SELECT dump_terms(1, t1) FROM t1 LIMIT 1; + } +} {1 {unable to use function dump_terms in the requested context}} + +do_test fts2p-0.3 { + catchsql { + SELECT dump_terms(t1, 16, 16) FROM t1 LIMIT 1; + } +} {1 {dump_terms: segment not found}} + +do_test fts2p-0.4 { + catchsql { + SELECT dump_doclist(t1) FROM t1 LIMIT 1; + } +} {1 {dump_doclist: incorrect arguments}} + +do_test fts2p-0.5 { + catchsql { + SELECT dump_doclist(t1, NULL) FROM t1 LIMIT 1; + } +} {1 {dump_doclist: empty second argument}} + +do_test fts2p-0.6 { + catchsql { + SELECT dump_doclist(t1, '') FROM t1 LIMIT 1; + } +} {1 {dump_doclist: empty second argument}} + +do_test fts2p-0.7 { + catchsql { + SELECT dump_doclist(t1, 'a', 0) FROM t1 LIMIT 1; + } +} {1 {dump_doclist: incorrect arguments}} + +do_test fts2p-0.8 { + catchsql { + SELECT dump_doclist(t1, 'a', 0, 0, 0) FROM t1 LIMIT 1; + } +} {1 {dump_doclist: incorrect arguments}} + +do_test fts2p-0.9 { + catchsql { + SELECT dump_doclist(t1, 'a', 16, 16) FROM t1 LIMIT 1; + } +} {1 {dump_doclist: segment not found}} + +#************************************************************************* +# Utility function to check for the expected terms in the segment +# level/index. _all version does same but for entire index. +proc check_terms {test level index terms} { + # TODO(shess): Figure out why uplevel in do_test can't catch + # $level and $index directly. + set ::level $level + set ::index $index + do_test $test.terms { + execsql { + SELECT dump_terms(t1, $::level, $::index) FROM t1 LIMIT 1; + } + } [list $terms] +} +proc check_terms_all {test terms} { + do_test $test.terms { + execsql { + SELECT dump_terms(t1) FROM t1 LIMIT 1; + } + } [list $terms] +} + +# Utility function to check for the expected doclist for the term in +# segment level/index. _all version does same for entire index. +proc check_doclist {test level index term doclist} { + # TODO(shess): Again, why can't the non-:: versions work? + set ::term $term + set ::level $level + set ::index $index + do_test $test { + execsql { + SELECT dump_doclist(t1, $::term, $::level, $::index) FROM t1 LIMIT 1; + } + } [list $doclist] +} +proc check_doclist_all {test term doclist} { + set ::term $term + do_test $test { + execsql { + SELECT dump_doclist(t1, $::term) FROM t1 LIMIT 1; + } + } [list $doclist] +} + +#************************************************************************* +# Test the segments resulting from straight-forward inserts. +db eval { + DROP TABLE IF EXISTS t1; + CREATE VIRTUAL TABLE t1 USING fts2(c); + INSERT INTO t1 (rowid, c) VALUES (1, 'This is a test'); + INSERT INTO t1 (rowid, c) VALUES (2, 'That was a test'); + INSERT INTO t1 (rowid, c) VALUES (3, 'This is a test'); +} + +# Check for expected segments and expected matches. +do_test fts2p-1.0.segments { + execsql { + SELECT level, idx FROM t1_segdir ORDER BY level, idx; + } +} {0 0 0 1 0 2} +do_test fts2p-1.0.matches { + execsql { + SELECT OFFSETS(t1) FROM t1 + WHERE t1 MATCH 'this OR that OR was OR a OR is OR test' ORDER BY rowid; + } +} [list {0 0 0 4 0 4 5 2 0 3 8 1 0 5 10 4} \ + {0 1 0 4 0 2 5 3 0 3 9 1 0 5 11 4} \ + {0 0 0 4 0 4 5 2 0 3 8 1 0 5 10 4}] + +# Check the specifics of the segments constructed. +# Logical view of entire index. +check_terms_all fts2p-1.0.1 {a is test that this was} +check_doclist_all fts2p-1.0.1.1 a {[1 0[2]] [2 0[2]] [3 0[2]]} +check_doclist_all fts2p-1.0.1.2 is {[1 0[1]] [3 0[1]]} +check_doclist_all fts2p-1.0.1.3 test {[1 0[3]] [2 0[3]] [3 0[3]]} +check_doclist_all fts2p-1.0.1.4 that {[2 0[0]]} +check_doclist_all fts2p-1.0.1.5 this {[1 0[0]] [3 0[0]]} +check_doclist_all fts2p-1.0.1.6 was {[2 0[1]]} + +# Segment 0,0 +check_terms fts2p-1.0.2 0 0 {a is test this} +check_doclist fts2p-1.0.2.1 0 0 a {[1 0[2]]} +check_doclist fts2p-1.0.2.2 0 0 is {[1 0[1]]} +check_doclist fts2p-1.0.2.3 0 0 test {[1 0[3]]} +check_doclist fts2p-1.0.2.4 0 0 this {[1 0[0]]} + +# Segment 0,1 +check_terms fts2p-1.0.3 0 1 {a test that was} +check_doclist fts2p-1.0.3.1 0 1 a {[2 0[2]]} +check_doclist fts2p-1.0.3.2 0 1 test {[2 0[3]]} +check_doclist fts2p-1.0.3.3 0 1 that {[2 0[0]]} +check_doclist fts2p-1.0.3.4 0 1 was {[2 0[1]]} + +# Segment 0,2 +check_terms fts2p-1.0.4 0 2 {a is test this} +check_doclist fts2p-1.0.4.1 0 2 a {[3 0[2]]} +check_doclist fts2p-1.0.4.2 0 2 is {[3 0[1]]} +check_doclist fts2p-1.0.4.3 0 2 test {[3 0[3]]} +check_doclist fts2p-1.0.4.4 0 2 this {[3 0[0]]} + +#************************************************************************* +# Test the segments resulting from inserts followed by a delete. +db eval { + DROP TABLE IF EXISTS t1; + CREATE VIRTUAL TABLE t1 USING fts2(c); + INSERT INTO t1 (rowid, c) VALUES (1, 'This is a test'); + INSERT INTO t1 (rowid, c) VALUES (2, 'That was a test'); + INSERT INTO t1 (rowid, c) VALUES (3, 'This is a test'); + DELETE FROM t1 WHERE rowid = 1; +} + +do_test fts2p-1.1.segments { + execsql { + SELECT level, idx FROM t1_segdir ORDER BY level, idx; + } +} {0 0 0 1 0 2 0 3} +do_test fts2p-1.1.matches { + execsql { + SELECT OFFSETS(t1) FROM t1 + WHERE t1 MATCH 'this OR that OR was OR a OR is OR test' ORDER BY rowid; + } +} {{0 1 0 4 0 2 5 3 0 3 9 1 0 5 11 4} {0 0 0 4 0 4 5 2 0 3 8 1 0 5 10 4}} + +check_terms_all fts2p-1.1.1 {a is test that this was} +check_doclist_all fts2p-1.1.1.1 a {[2 0[2]] [3 0[2]]} +check_doclist_all fts2p-1.1.1.2 is {[3 0[1]]} +check_doclist_all fts2p-1.1.1.3 test {[2 0[3]] [3 0[3]]} +check_doclist_all fts2p-1.1.1.4 that {[2 0[0]]} +check_doclist_all fts2p-1.1.1.5 this {[3 0[0]]} +check_doclist_all fts2p-1.1.1.6 was {[2 0[1]]} + +check_terms fts2p-1.1.2 0 0 {a is test this} +check_doclist fts2p-1.1.2.1 0 0 a {[1 0[2]]} +check_doclist fts2p-1.1.2.2 0 0 is {[1 0[1]]} +check_doclist fts2p-1.1.2.3 0 0 test {[1 0[3]]} +check_doclist fts2p-1.1.2.4 0 0 this {[1 0[0]]} + +check_terms fts2p-1.1.3 0 1 {a test that was} +check_doclist fts2p-1.1.3.1 0 1 a {[2 0[2]]} +check_doclist fts2p-1.1.3.2 0 1 test {[2 0[3]]} +check_doclist fts2p-1.1.3.3 0 1 that {[2 0[0]]} +check_doclist fts2p-1.1.3.4 0 1 was {[2 0[1]]} + +check_terms fts2p-1.1.4 0 2 {a is test this} +check_doclist fts2p-1.1.4.1 0 2 a {[3 0[2]]} +check_doclist fts2p-1.1.4.2 0 2 is {[3 0[1]]} +check_doclist fts2p-1.1.4.3 0 2 test {[3 0[3]]} +check_doclist fts2p-1.1.4.4 0 2 this {[3 0[0]]} + +check_terms fts2p-1.1.5 0 3 {a is test this} +check_doclist fts2p-1.1.5.1 0 3 a {[1]} +check_doclist fts2p-1.1.5.2 0 3 is {[1]} +check_doclist fts2p-1.1.5.3 0 3 test {[1]} +check_doclist fts2p-1.1.5.4 0 3 this {[1]} + +#************************************************************************* +# Test results when all references to certain tokens are deleted. +db eval { + DROP TABLE IF EXISTS t1; + CREATE VIRTUAL TABLE t1 USING fts2(c); + INSERT INTO t1 (rowid, c) VALUES (1, 'This is a test'); + INSERT INTO t1 (rowid, c) VALUES (2, 'That was a test'); + INSERT INTO t1 (rowid, c) VALUES (3, 'This is a test'); + DELETE FROM t1 WHERE rowid IN (1,3); +} + +# Still 4 segments because 0,3 will contain deletes for rowid 1 and 3. +do_test fts2p-1.2.segments { + execsql { + SELECT level, idx FROM t1_segdir ORDER BY level, idx; + } +} {0 0 0 1 0 2 0 3} +do_test fts2p-1.2.matches { + execsql { + SELECT OFFSETS(t1) FROM t1 + WHERE t1 MATCH 'this OR that OR was OR a OR is OR test' ORDER BY rowid; + } +} {{0 1 0 4 0 2 5 3 0 3 9 1 0 5 11 4}} + +check_terms_all fts2p-1.2.1 {a is test that this was} +check_doclist_all fts2p-1.2.1.1 a {[2 0[2]]} +check_doclist_all fts2p-1.2.1.2 is {} +check_doclist_all fts2p-1.2.1.3 test {[2 0[3]]} +check_doclist_all fts2p-1.2.1.4 that {[2 0[0]]} +check_doclist_all fts2p-1.2.1.5 this {} +check_doclist_all fts2p-1.2.1.6 was {[2 0[1]]} + +check_terms fts2p-1.2.2 0 0 {a is test this} +check_doclist fts2p-1.2.2.1 0 0 a {[1 0[2]]} +check_doclist fts2p-1.2.2.2 0 0 is {[1 0[1]]} +check_doclist fts2p-1.2.2.3 0 0 test {[1 0[3]]} +check_doclist fts2p-1.2.2.4 0 0 this {[1 0[0]]} + +check_terms fts2p-1.2.3 0 1 {a test that was} +check_doclist fts2p-1.2.3.1 0 1 a {[2 0[2]]} +check_doclist fts2p-1.2.3.2 0 1 test {[2 0[3]]} +check_doclist fts2p-1.2.3.3 0 1 that {[2 0[0]]} +check_doclist fts2p-1.2.3.4 0 1 was {[2 0[1]]} + +check_terms fts2p-1.2.4 0 2 {a is test this} +check_doclist fts2p-1.2.4.1 0 2 a {[3 0[2]]} +check_doclist fts2p-1.2.4.2 0 2 is {[3 0[1]]} +check_doclist fts2p-1.2.4.3 0 2 test {[3 0[3]]} +check_doclist fts2p-1.2.4.4 0 2 this {[3 0[0]]} + +check_terms fts2p-1.2.5 0 3 {a is test this} +check_doclist fts2p-1.2.5.1 0 3 a {[1] [3]} +check_doclist fts2p-1.2.5.2 0 3 is {[1] [3]} +check_doclist fts2p-1.2.5.3 0 3 test {[1] [3]} +check_doclist fts2p-1.2.5.4 0 3 this {[1] [3]} + +#************************************************************************* +# Test results when everything is optimized manually. +db eval { + DROP TABLE IF EXISTS t1; + CREATE VIRTUAL TABLE t1 USING fts2(c); + INSERT INTO t1 (rowid, c) VALUES (1, 'This is a test'); + INSERT INTO t1 (rowid, c) VALUES (2, 'That was a test'); + INSERT INTO t1 (rowid, c) VALUES (3, 'This is a test'); + DELETE FROM t1 WHERE rowid IN (1,3); + DROP TABLE IF EXISTS t1old; + ALTER TABLE t1 RENAME TO t1old; + CREATE VIRTUAL TABLE t1 USING fts2(c); + INSERT INTO t1 (rowid, c) SELECT rowid, c FROM t1old; + DROP TABLE t1old; +} + +# Should be a single optimal segment with the same logical results. +do_test fts2p-1.3.segments { + execsql { + SELECT level, idx FROM t1_segdir ORDER BY level, idx; + } +} {0 0} +do_test fts2p-1.3.matches { + execsql { + SELECT OFFSETS(t1) FROM t1 + WHERE t1 MATCH 'this OR that OR was OR a OR is OR test' ORDER BY rowid; + } +} {{0 1 0 4 0 2 5 3 0 3 9 1 0 5 11 4}} + +check_terms_all fts2p-1.3.1 {a test that was} +check_doclist_all fts2p-1.3.1.1 a {[2 0[2]]} +check_doclist_all fts2p-1.3.1.2 test {[2 0[3]]} +check_doclist_all fts2p-1.3.1.3 that {[2 0[0]]} +check_doclist_all fts2p-1.3.1.4 was {[2 0[1]]} + +check_terms fts2p-1.3.2 0 0 {a test that was} +check_doclist fts2p-1.3.2.1 0 0 a {[2 0[2]]} +check_doclist fts2p-1.3.2.2 0 0 test {[2 0[3]]} +check_doclist fts2p-1.3.2.3 0 0 that {[2 0[0]]} +check_doclist fts2p-1.3.2.4 0 0 was {[2 0[1]]} + +finish_test