From: shess Date: Thu, 3 Jul 2008 19:53:21 +0000 (+0000) Subject: fts3 functions for testing scripts. These are a first step towards X-Git-Tag: version-3.6.10~837 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=6c106e3f3b3faaf11778ce579a24ae0ba5a5b7be;p=thirdparty%2Fsqlite.git fts3 functions for testing scripts. These are a first step towards being able to write test script which verify that fts3 is internally building indices in the expected way. Both new functions are only defined if fts3.c is compiled with SQLITE_TEST defined, as when building testfixture. These functions are not intended to be part of the exposed fts3 API. dump_terms() generates a TEXT result of all the terms in the index (or a specified segment), sorted and joined with spaces. dump_doclist() generates a TEXT representation of the doclist associated with a given term in the index (or a specified segment). (CVS 5340) FossilOrigin-Name: a48e3d95f7a656285e959cef595cbe6d53428ad9 --- diff --git a/ext/fts3/fts3.c b/ext/fts3/fts3.c index 5915ff622b..da46615952 100644 --- a/ext/fts3/fts3.c +++ b/ext/fts3/fts3.c @@ -1912,9 +1912,10 @@ typedef enum fulltext_statement { SEGDIR_MAX_INDEX_STMT, SEGDIR_SET_STMT, - SEGDIR_SELECT_STMT, + SEGDIR_SELECT_LEVEL_STMT, SEGDIR_SPAN_STMT, SEGDIR_DELETE_STMT, + SEGDIR_SELECT_SEGMENT_STMT, SEGDIR_SELECT_ALL_STMT, MAX_STMT /* Always at end! */ @@ -1938,15 +1939,24 @@ static const char *const fulltext_zStatement[MAX_STMT] = { /* SEGDIR_MAX_INDEX */ "select max(idx) from %_segdir where level = ?", /* SEGDIR_SET */ "insert into %_segdir values (?, ?, ?, ?, ?, ?)", - /* SEGDIR_SELECT */ + /* SEGDIR_SELECT_LEVEL */ "select start_block, leaves_end_block, root from %_segdir " " where level = ? order by idx", /* SEGDIR_SPAN */ "select min(start_block), max(end_block) from %_segdir " " where level = ? and start_block <> 0", /* SEGDIR_DELETE */ "delete from %_segdir where level = ?", + + /* NOTE(shess): The first three results of the following two + ** statements must match. + */ + /* SEGDIR_SELECT_SEGMENT */ + "select start_block, leaves_end_block, root from %_segdir " + " where level = ? and idx = ?", /* SEGDIR_SELECT_ALL */ - "select root, leaves_end_block from %_segdir order by level desc, idx", + "select start_block, leaves_end_block, root from %_segdir " + " order by level desc, idx asc", + }; /* @@ -5409,7 +5419,7 @@ static void leavesReaderReorder(LeavesReader *pLr, int nLr){ static int leavesReadersInit(fulltext_vtab *v, int iLevel, LeavesReader *pReaders, int *piReaders){ sqlite3_stmt *s; - int i, rc = sql_get_statement(v, SEGDIR_SELECT_STMT, &s); + int i, rc = sql_get_statement(v, SEGDIR_SELECT_LEVEL_STMT, &s); if( rc!=SQLITE_OK ) return rc; rc = sqlite3_bind_int(s, 1, iLevel); @@ -5947,8 +5957,8 @@ static int termSelect(fulltext_vtab *v, int iColumn, ** elements for given docids overwrite older elements. */ while( (rc = sqlite3_step(s))==SQLITE_ROW ){ - const char *pData = sqlite3_column_blob(s, 0); - const int nData = sqlite3_column_bytes(s, 0); + const char *pData = sqlite3_column_blob(s, 2); + const int nData = sqlite3_column_bytes(s, 2); const sqlite_int64 iLeavesEnd = sqlite3_column_int64(s, 1); rc = loadSegment(v, pData, nData, iLeavesEnd, pTerm, nTerm, isPrefix, &doclist); @@ -6238,6 +6248,386 @@ static void snippetOffsetsFunc( } } +#ifdef SQLITE_TEST +/* Generate an error of the form ": ". If msg is NULL, +** pull the error from the context's db handle. +*/ +static void generateError(sqlite3_context *pContext, + const char *prefix, const char *msg){ + char buf[512]; + if( msg==NULL ) msg = sqlite3_errmsg(sqlite3_context_db_handle(pContext)); + sqlite3_snprintf(sizeof(buf), buf, "%s: %s", prefix, msg); + sqlite3_result_error(pContext, buf, -1); +} + +/* Helper function to collect the set of terms in the segment into +** pTerms. The segment is defined by the leaf nodes between +** iStartBlockid and iEndBlockid, inclusive, or by the contents of +** pRootData if iStartBlockid is 0 (in which case the entire segment +** fit in a leaf). +*/ +static int collectSegmentTerms(fulltext_vtab *v, sqlite3_stmt *s, + fts3Hash *pTerms){ + const sqlite_int64 iStartBlockid = sqlite3_column_int64(s, 0); + const sqlite_int64 iEndBlockid = sqlite3_column_int64(s, 1); + const char *pRootData = sqlite3_column_blob(s, 2); + const int nRootData = sqlite3_column_bytes(s, 2); + LeavesReader reader; + int rc = leavesReaderInit(v, 0, iStartBlockid, iEndBlockid, + pRootData, nRootData, &reader); + if( rc!=SQLITE_OK ) return rc; + + while( rc==SQLITE_OK && !leavesReaderAtEnd(&reader) ){ + const char *pTerm = leavesReaderTerm(&reader); + const int nTerm = leavesReaderTermBytes(&reader); + void *oldValue = sqlite3Fts3HashFind(pTerms, pTerm, nTerm); + void *newValue = (void *)((char *)oldValue+1); + + /* From the comment before sqlite3Fts3HashInsert in fts3_hash.c, + ** the data value passed is returned in case of malloc failure. + */ + if( newValue==sqlite3Fts3HashInsert(pTerms, pTerm, nTerm, newValue) ){ + rc = SQLITE_NOMEM; + }else{ + rc = leavesReaderStep(v, &reader); + } + } + + leavesReaderDestroy(&reader); + return rc; +} + +/* Helper function to build the result string for dump_terms(). */ +static int generateTermsResult(sqlite3_context *pContext, fts3Hash *pTerms){ + int iTerm, nTerms, nResultBytes, iByte; + char *result; + TermData *pData; + fts3HashElem *e; + + /* Iterate pTerms to generate an array of terms in pData for + ** sorting. + */ + nTerms = fts3HashCount(pTerms); + assert( nTerms>0 ); + pData = sqlite3_malloc(nTerms*sizeof(TermData)); + if( pData==NULL ) return SQLITE_NOMEM; + + nResultBytes = 0; + for(iTerm = 0, e = fts3HashFirst(pTerms); e; iTerm++, e = fts3HashNext(e)){ + nResultBytes += fts3HashKeysize(e)+1; /* Term plus trailing space */ + assert( iTerm0 ); /* nTerms>0, nResultsBytes must be, too. */ + result = sqlite3_malloc(nResultBytes); + if( result==NULL ){ + sqlite3_free(pData); + return SQLITE_NOMEM; + } + + if( nTerms>1 ) qsort(pData, nTerms, sizeof(*pData), termDataCmp); + + /* Read the terms in order to build the result. */ + iByte = 0; + for(iTerm=0; iTerm0 ){ + rc = generateTermsResult(pContext, &terms); + if( rc==SQLITE_NOMEM ){ + generateError(pContext, "dump_terms", "out of memory"); + }else{ + assert( rc==SQLITE_OK ); + } + }else if( argc==3 ){ + /* The specific segment asked for could not be found. */ + generateError(pContext, "dump_terms", "segment not found"); + }else{ + /* No segments found. */ + /* TODO(shess): It should be impossible to reach this. This + ** case can only happen for an empty table, in which case + ** SQLite has no rows to call this function on. + */ + sqlite3_result_null(pContext); + } + } + sqlite3Fts3HashClear(&terms); + } +} + +/* Expand the DL_DEFAULT doclist in pData into a text result in +** pContext. +*/ +static void createDoclistResult(sqlite3_context *pContext, + const char *pData, int nData){ + DataBuffer dump; + DLReader dlReader; + + assert( pData!=NULL && nData>0 ); + + dataBufferInit(&dump, 0); + dlrInit(&dlReader, DL_DEFAULT, pData, nData); + for( ; !dlrAtEnd(&dlReader); dlrStep(&dlReader) ){ + char buf[256]; + PLReader plReader; + + plrInit(&plReader, &dlReader); + if( DL_DEFAULT==DL_DOCIDS || plrAtEnd(&plReader) ){ + sqlite3_snprintf(sizeof(buf), buf, "[%lld] ", dlrDocid(&dlReader)); + dataBufferAppend(&dump, buf, strlen(buf)); + }else{ + int iColumn = plrColumn(&plReader); + + sqlite3_snprintf(sizeof(buf), buf, "[%lld %d[", + dlrDocid(&dlReader), iColumn); + dataBufferAppend(&dump, buf, strlen(buf)); + + for( ; !plrAtEnd(&plReader); plrStep(&plReader) ){ + if( plrColumn(&plReader)!=iColumn ){ + iColumn = plrColumn(&plReader); + sqlite3_snprintf(sizeof(buf), buf, "] %d[", iColumn); + assert( dump.nData>0 ); + dump.nData--; /* Overwrite trailing space. */ + assert( dump.pData[dump.nData]==' '); + dataBufferAppend(&dump, buf, strlen(buf)); + } + if( DL_DEFAULT==DL_POSITIONS_OFFSETS ){ + sqlite3_snprintf(sizeof(buf), buf, "%d,%d,%d ", + plrPosition(&plReader), + plrStartOffset(&plReader), plrEndOffset(&plReader)); + }else if( DL_DEFAULT==DL_POSITIONS ){ + sqlite3_snprintf(sizeof(buf), buf, "%d ", plrPosition(&plReader)); + }else{ + assert( NULL=="Unhandled DL_DEFAULT value"); + } + dataBufferAppend(&dump, buf, strlen(buf)); + } + plrDestroy(&plReader); + + assert( dump.nData>0 ); + dump.nData--; /* Overwrite trailing space. */ + assert( dump.pData[dump.nData]==' '); + dataBufferAppend(&dump, "]] ", 3); + } + } + dlrDestroy(&dlReader); + + assert( dump.nData>0 ); + dump.nData--; /* Overwrite trailing space. */ + assert( dump.pData[dump.nData]==' '); + dump.pData[dump.nData] = '\0'; + assert( dump.nData>0 ); + + /* Passes ownership of dump's buffer to pContext. */ + sqlite3_result_text(pContext, dump.pData, dump.nData, sqlite3_free); + dump.pData = NULL; + dump.nData = dump.nCapacity = 0; +} + +/* Implements dump_doclist() for use in inspecting the fts3 index from +** tests. TEXT result containing a string representation of the +** doclist for the indicated term. dump_doclist(t, term, level, idx) +** dumps the doclist for term from the segment specified by level, idx +** (in %_segdir), while dump_doclist(t, term) dumps the logical +** doclist for the term across all segments. The per-segment doclist +** can contain deletions, while the full-index doclist will not +** (deletions are omitted). +** +** Result formats differ with the setting of DL_DEFAULTS. Examples: +** +** DL_DOCIDS: [1] [3] [7] +** DL_POSITIONS: [1 0[0 4] 1[17]] [3 1[5]] +** DL_POSITIONS_OFFSETS: [1 0[0,0,3 4,23,26] 1[17,102,105]] [3 1[5,20,23]] +** +** In each case the number after the outer '[' is the docid. In the +** latter two cases, the number before the inner '[' is the column +** associated with the values within. For DL_POSITIONS the numbers +** within are the positions, for DL_POSITIONS_OFFSETS they are the +** position, the start offset, and the end offset. +*/ +static void dumpDoclistFunc( + sqlite3_context *pContext, + int argc, sqlite3_value **argv +){ + fulltext_cursor *pCursor; + if( argc!=2 && argc!=4 ){ + generateError(pContext, "dump_doclist", "incorrect arguments"); + }else if( sqlite3_value_type(argv[0])!=SQLITE_BLOB || + sqlite3_value_bytes(argv[0])!=sizeof(pCursor) ){ + generateError(pContext, "dump_doclist", "illegal first argument"); + }else if( sqlite3_value_text(argv[1])==NULL || + sqlite3_value_text(argv[1])[0]=='\0' ){ + generateError(pContext, "dump_doclist", "empty second argument"); + }else{ + const char *pTerm = (const char *)sqlite3_value_text(argv[1]); + const int nTerm = strlen(pTerm); + fulltext_vtab *v; + int rc; + DataBuffer doclist; + + memcpy(&pCursor, sqlite3_value_blob(argv[0]), sizeof(pCursor)); + v = cursor_vtab(pCursor); + + dataBufferInit(&doclist, 0); + + /* termSelect() yields the same logical doclist that queries are + ** run against. + */ + if( argc==2 ){ + rc = termSelect(v, v->nColumn, pTerm, nTerm, 0, DL_DEFAULT, &doclist); + }else{ + sqlite3_stmt *s = NULL; + + /* Get our specific segment's information. */ + rc = sql_get_statement(v, SEGDIR_SELECT_SEGMENT_STMT, &s); + if( rc==SQLITE_OK ){ + rc = sqlite3_bind_int(s, 1, sqlite3_value_int(argv[2])); + if( rc==SQLITE_OK ){ + rc = sqlite3_bind_int(s, 2, sqlite3_value_int(argv[3])); + } + } + + if( rc==SQLITE_OK ){ + rc = sqlite3_step(s); + + if( rc==SQLITE_DONE ){ + dataBufferDestroy(&doclist); + generateError(pContext, "dump_doclist", "segment not found"); + return; + } + + /* Found a segment, load it into doclist. */ + if( rc==SQLITE_ROW ){ + const sqlite_int64 iLeavesEnd = sqlite3_column_int64(s, 1); + const char *pData = sqlite3_column_blob(s, 2); + const int nData = sqlite3_column_bytes(s, 2); + + /* loadSegment() is used by termSelect() to load each + ** segment's data. + */ + rc = loadSegment(v, pData, nData, iLeavesEnd, pTerm, nTerm, 0, + &doclist); + if( rc==SQLITE_OK ){ + rc = sqlite3_step(s); + + /* Should not have more than one matching segment. */ + if( rc!=SQLITE_DONE ){ + sqlite3_reset(s); + dataBufferDestroy(&doclist); + generateError(pContext, "dump_doclist", "invalid segdir"); + return; + } + rc = SQLITE_OK; + } + } + } + + sqlite3_reset(s); + } + + if( rc==SQLITE_OK ){ + if( doclist.nData>0 ){ + createDoclistResult(pContext, doclist.pData, doclist.nData); + }else{ + /* TODO(shess): This can happen if the term is not present, or + ** if all instances of the term have been deleted and this is + ** an all-index dump. It may be interesting to distinguish + ** these cases. + */ + sqlite3_result_text(pContext, "", 0, SQLITE_STATIC); + } + }else if( rc==SQLITE_NOMEM ){ + /* Handle out-of-memory cases specially because if they are + ** generated in fts3 code they may not be reflected in the db + ** handle. + */ + /* TODO(shess): Handle this more comprehensively. + ** sqlite3ErrStr() has what I need, but is internal. + */ + generateError(pContext, "dump_doclist", "out of memory"); + }else{ + generateError(pContext, "dump_doclist", NULL); + } + + dataBufferDestroy(&doclist); + } +} +#endif + /* ** This routine implements the xFindFunction method for the FTS3 ** virtual table. @@ -6255,6 +6645,20 @@ static int fulltextFindFunction( }else if( strcmp(zName,"offsets")==0 ){ *pxFunc = snippetOffsetsFunc; return 1; +#ifdef SQLITE_TEST + /* NOTE(shess): These functions are present only for testing + ** purposes. No particular effort is made to optimize their + ** execution or how they build their results. + */ + }else if( strcmp(zName,"dump_terms")==0 ){ + /* fprintf(stderr, "Found dump_terms\n"); */ + *pxFunc = dumpTermsFunc; + return 1; + }else if( strcmp(zName,"dump_doclist")==0 ){ + /* fprintf(stderr, "Found dump_doclist\n"); */ + *pxFunc = dumpDoclistFunc; + return 1; +#endif } return 0; } @@ -6374,6 +6778,10 @@ int sqlite3Fts3Init(sqlite3 *db){ && SQLITE_OK==(rc = sqlite3Fts3InitHashTable(db, pHash, "fts3_tokenizer")) && SQLITE_OK==(rc = sqlite3_overload_function(db, "snippet", -1)) && SQLITE_OK==(rc = sqlite3_overload_function(db, "offsets", -1)) +#ifdef SQLITE_TEST + && SQLITE_OK==(rc = sqlite3_overload_function(db, "dump_terms", -1)) + && SQLITE_OK==(rc = sqlite3_overload_function(db, "dump_doclist", -1)) +#endif ){ return sqlite3_create_module_v2( db, "fts3", &fts3Module, (void *)pHash, hashDestroy diff --git a/manifest b/manifest index a264a69c41..0e81398c2a 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Fix\sa\smemory\sleak\sthat\scan\soccur\sfollowing\sa\smalloc\sfailure.\s(CVS\s5339) -D 2008-07-02T16:10:46 +C fts3\sfunctions\sfor\stesting\sscripts.\s\sThese\sare\sa\sfirst\sstep\stowards\nbeing\sable\sto\swrite\stest\sscript\swhich\sverify\sthat\sfts3\sis\sinternally\nbuilding\sindices\sin\sthe\sexpected\sway.\s\sBoth\snew\sfunctions\sare\sonly\ndefined\sif\sfts3.c\sis\scompiled\swith\sSQLITE_TEST\sdefined,\sas\swhen\nbuilding\stestfixture.\s\sThese\sfunctions\sare\snot\sintended\sto\sbe\spart\sof\nthe\sexposed\sfts3\sAPI.\n\ndump_terms()\sgenerates\sa\sTEXT\sresult\sof\sall\sthe\sterms\sin\sthe\sindex\s(or\na\sspecified\ssegment),\ssorted\sand\sjoined\swith\sspaces.\n\ndump_doclist()\sgenerates\sa\sTEXT\srepresentation\sof\sthe\sdoclist\nassociated\swith\sa\sgiven\sterm\sin\sthe\sindex\s(or\sa\sspecified\ssegment).\s(CVS\s5340) +D 2008-07-03T19:53:22 F Makefile.arm-wince-mingw32ce-gcc fcd5e9cd67fe88836360bb4f9ef4cb7f8e2fb5a0 F Makefile.in 325dfac0a0dd1cb4d975f1ace6453157892e6042 F Makefile.linux-gcc d53183f4aa6a9192d249731c90dbdffbd2c68654 @@ -51,7 +51,7 @@ F ext/fts2/fts2_tokenizer1.c 8a545c232bdffafd117c4eeaf59789691909f26a F ext/fts2/mkfts2amal.tcl 974d5d438cb3f7c4a652639262f82418c1e4cff0 F ext/fts3/README.tokenizers 226644a0eab97724e8de83061912e8bb248461b6 F ext/fts3/README.txt 8c18f41574404623b76917b9da66fcb0ab38328d -F ext/fts3/fts3.c 9f52ff22ca64172a4e508de30994e9e811b205e0 +F ext/fts3/fts3.c 192a65d39c2904873c13dc7a8e50b00cd190ec3a F ext/fts3/fts3.h 3a10a0af180d502cecc50df77b1b22df142817fe F ext/fts3/fts3_hash.c 83e7bb4042106b32811681dd2859b4577a7a6b35 F ext/fts3/fts3_hash.h 004b759e1602ff16dfa02fea3ca1c77336ad6798 @@ -327,6 +327,7 @@ F test/fts3an.test 4b4fdab5abe2f308bdc47f6e822df2bcae30361c F test/fts3ao.test 0aa29dd4fc1c8d46b1f7cfe5926f7ac97551bea9 F test/fts3atoken.test 25c2070e1e8755d414bf9c8200427b277a9f99fa F test/fts3b.test b3a25180a633873d37d86e1ccd00ed690d37237a +F test/fts3c.test 4c7ef29b37aca3e8ebb6a39b57910caa6506034e F test/fts3near.test 2d4dadcaac5025ab65bb87e66c45f39e92966194 F test/func.test 7cc20ab84558911cc77e95ae62b2af31b1b48ad2 F test/fuzz.test 62fc19dd36a427777fd671b569df07166548628a @@ -596,7 +597,7 @@ F tool/speedtest16.c c8a9c793df96db7e4933f0852abb7a03d48f2e81 F tool/speedtest2.tcl ee2149167303ba8e95af97873c575c3e0fab58ff F tool/speedtest8.c 1dbced29de5f59ba2ebf877edcadf171540374d1 F tool/speedtest8inst1.c 293327bc76823f473684d589a8160bde1f52c14e -P 8b88b64bb37df4e38cbfe31a14c219688b26e2af -R 3c07f799f108d6a630b004c233775cb0 -U danielk1977 -Z 072ebff2803ed385439d23a8fd8e0c8e +P cec4eba1a105396c5fd5d2b664456f6d6be3f215 +R 2994d09a5d3d78a35346e2b1b6bff7bc +U shess +Z 7b70c642c4bfeb309fc7899215c4dbcd diff --git a/manifest.uuid b/manifest.uuid index c0731f4d7b..a8f6b751cb 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -cec4eba1a105396c5fd5d2b664456f6d6be3f215 \ No newline at end of file +a48e3d95f7a656285e959cef595cbe6d53428ad9 \ No newline at end of file diff --git a/test/fts3c.test b/test/fts3c.test new file mode 100644 index 0000000000..2c73d4bcd0 --- /dev/null +++ b/test/fts3c.test @@ -0,0 +1,357 @@ +# 2008 June 26 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#************************************************************************* +# This file exercises some new testing functions in the FTS3 module, +# and then uses them to do some basic tests that FTS3 is internally +# working as expected. +# +# $Id: fts3c.test,v 1.1 2008/07/03 19:53:22 shess Exp $ +# + +set testdir [file dirname $argv0] +source $testdir/tester.tcl + +# If SQLITE_ENABLE_FTS3 is not defined, omit this file. +ifcapable !fts3 { + finish_test + return +} + +#************************************************************************* +# Probe to see if support for these functions is compiled in. +# TODO(shess): Change main.mk to do the right thing and remove this test. +db eval { + DROP TABLE IF EXISTS t1; + CREATE VIRTUAL TABLE t1 USING fts3(c); + INSERT INTO t1 (docid, c) VALUES (1, 'x'); +} + +set s {SELECT dump_terms(t1, 1) FROM t1 LIMIT 1} +set r {1 {unable to use function dump_terms in the requested context}} +if {[catchsql $s]==$r} { + finish_test + return +} + +#************************************************************************* +# Test that the new functions give appropriate errors. +do_test fts3c-0.0 { + catchsql { + SELECT dump_terms(t1, 1) FROM t1 LIMIT 1; + } +} {1 {dump_terms: incorrect arguments}} + +do_test fts3c-0.1 { + catchsql { + SELECT dump_terms(t1, 0, 0, 0) FROM t1 LIMIT 1; + } +} {1 {dump_terms: incorrect arguments}} + +do_test fts3c-0.2 { + catchsql { + SELECT dump_terms(1, t1) FROM t1 LIMIT 1; + } +} {1 {unable to use function dump_terms in the requested context}} + +do_test fts3c-0.3 { + catchsql { + SELECT dump_terms(t1, 16, 16) FROM t1 LIMIT 1; + } +} {1 {dump_terms: segment not found}} + +do_test fts3c-0.4 { + catchsql { + SELECT dump_doclist(t1) FROM t1 LIMIT 1; + } +} {1 {dump_doclist: incorrect arguments}} + +do_test fts3c-0.5 { + catchsql { + SELECT dump_doclist(t1, NULL) FROM t1 LIMIT 1; + } +} {1 {dump_doclist: empty second argument}} + +do_test fts3c-0.6 { + catchsql { + SELECT dump_doclist(t1, '') FROM t1 LIMIT 1; + } +} {1 {dump_doclist: empty second argument}} + +do_test fts3c-0.7 { + catchsql { + SELECT dump_doclist(t1, 'a', 0) FROM t1 LIMIT 1; + } +} {1 {dump_doclist: incorrect arguments}} + +do_test fts3c-0.8 { + catchsql { + SELECT dump_doclist(t1, 'a', 0, 0, 0) FROM t1 LIMIT 1; + } +} {1 {dump_doclist: incorrect arguments}} + +do_test fts3c-0.9 { + catchsql { + SELECT dump_doclist(t1, 'a', 16, 16) FROM t1 LIMIT 1; + } +} {1 {dump_doclist: segment not found}} + +#************************************************************************* +# Utility function to check for the expected terms in the segment +# level/index. _all version does same but for entire index. +proc check_terms {test level index terms} { + # TODO(shess): Figure out why uplevel in do_test can't catch + # $level and $index directly. + set ::level $level + set ::index $index + do_test $test.terms { + execsql { + SELECT dump_terms(t1, $::level, $::index) FROM t1 LIMIT 1; + } + } [list $terms] +} +proc check_terms_all {test terms} { + do_test $test.terms { + execsql { + SELECT dump_terms(t1) FROM t1 LIMIT 1; + } + } [list $terms] +} + +# Utility function to check for the expected doclist for the term in +# segment level/index. _all version does same for entire index. +proc check_doclist {test level index term doclist} { + # TODO(shess): Again, why can't the non-:: versions work? + set ::term $term + set ::level $level + set ::index $index + do_test $test { + execsql { + SELECT dump_doclist(t1, $::term, $::level, $::index) FROM t1 LIMIT 1; + } + } [list $doclist] +} +proc check_doclist_all {test term doclist} { + set ::term $term + do_test $test { + execsql { + SELECT dump_doclist(t1, $::term) FROM t1 LIMIT 1; + } + } [list $doclist] +} + +#************************************************************************* +# Test the segments resulting from straight-forward inserts. +db eval { + DROP TABLE IF EXISTS t1; + CREATE VIRTUAL TABLE t1 USING fts3(c); + INSERT INTO t1 (docid, c) VALUES (1, 'This is a test'); + INSERT INTO t1 (docid, c) VALUES (2, 'That was a test'); + INSERT INTO t1 (docid, c) VALUES (3, 'This is a test'); +} + +# Check for expected segments and expected matches. +do_test fts3c-1.0.segments { + execsql { + SELECT level, idx FROM t1_segdir ORDER BY level, idx; + } +} {0 0 0 1 0 2} +do_test fts3c-1.0.matches { + execsql { + SELECT OFFSETS(t1) FROM t1 + WHERE t1 MATCH 'this OR that OR was OR a OR is OR test' ORDER BY docid; + } +} [list {0 0 0 4 0 4 5 2 0 3 8 1 0 5 10 4} \ + {0 1 0 4 0 2 5 3 0 3 9 1 0 5 11 4} \ + {0 0 0 4 0 4 5 2 0 3 8 1 0 5 10 4}] + +# Check the specifics of the segments constructed. +# Logical view of entire index. +check_terms_all fts3c-1.0.1 {a is test that this was} +check_doclist_all fts3c-1.0.1.1 a {[1 0[2]] [2 0[2]] [3 0[2]]} +check_doclist_all fts3c-1.0.1.2 is {[1 0[1]] [3 0[1]]} +check_doclist_all fts3c-1.0.1.3 test {[1 0[3]] [2 0[3]] [3 0[3]]} +check_doclist_all fts3c-1.0.1.4 that {[2 0[0]]} +check_doclist_all fts3c-1.0.1.5 this {[1 0[0]] [3 0[0]]} +check_doclist_all fts3c-1.0.1.6 was {[2 0[1]]} + +# Segment 0,0 +check_terms fts3c-1.0.2 0 0 {a is test this} +check_doclist fts3c-1.0.2.1 0 0 a {[1 0[2]]} +check_doclist fts3c-1.0.2.2 0 0 is {[1 0[1]]} +check_doclist fts3c-1.0.2.3 0 0 test {[1 0[3]]} +check_doclist fts3c-1.0.2.4 0 0 this {[1 0[0]]} + +# Segment 0,1 +check_terms fts3c-1.0.3 0 1 {a test that was} +check_doclist fts3c-1.0.3.1 0 1 a {[2 0[2]]} +check_doclist fts3c-1.0.3.2 0 1 test {[2 0[3]]} +check_doclist fts3c-1.0.3.3 0 1 that {[2 0[0]]} +check_doclist fts3c-1.0.3.4 0 1 was {[2 0[1]]} + +# Segment 0,2 +check_terms fts3c-1.0.4 0 2 {a is test this} +check_doclist fts3c-1.0.4.1 0 2 a {[3 0[2]]} +check_doclist fts3c-1.0.4.2 0 2 is {[3 0[1]]} +check_doclist fts3c-1.0.4.3 0 2 test {[3 0[3]]} +check_doclist fts3c-1.0.4.4 0 2 this {[3 0[0]]} + +#************************************************************************* +# Test the segments resulting from inserts followed by a delete. +db eval { + DROP TABLE IF EXISTS t1; + CREATE VIRTUAL TABLE t1 USING fts3(c); + INSERT INTO t1 (docid, c) VALUES (1, 'This is a test'); + INSERT INTO t1 (docid, c) VALUES (2, 'That was a test'); + INSERT INTO t1 (docid, c) VALUES (3, 'This is a test'); + DELETE FROM t1 WHERE docid = 1; +} + +do_test fts3c-1.1.segments { + execsql { + SELECT level, idx FROM t1_segdir ORDER BY level, idx; + } +} {0 0 0 1 0 2 0 3} +do_test fts3c-1.1.matches { + execsql { + SELECT OFFSETS(t1) FROM t1 + WHERE t1 MATCH 'this OR that OR was OR a OR is OR test' ORDER BY docid; + } +} {{0 1 0 4 0 2 5 3 0 3 9 1 0 5 11 4} {0 0 0 4 0 4 5 2 0 3 8 1 0 5 10 4}} + +check_terms_all fts3c-1.1.1 {a is test that this was} +check_doclist_all fts3c-1.1.1.1 a {[2 0[2]] [3 0[2]]} +check_doclist_all fts3c-1.1.1.2 is {[3 0[1]]} +check_doclist_all fts3c-1.1.1.3 test {[2 0[3]] [3 0[3]]} +check_doclist_all fts3c-1.1.1.4 that {[2 0[0]]} +check_doclist_all fts3c-1.1.1.5 this {[3 0[0]]} +check_doclist_all fts3c-1.1.1.6 was {[2 0[1]]} + +check_terms fts3c-1.1.2 0 0 {a is test this} +check_doclist fts3c-1.1.2.1 0 0 a {[1 0[2]]} +check_doclist fts3c-1.1.2.2 0 0 is {[1 0[1]]} +check_doclist fts3c-1.1.2.3 0 0 test {[1 0[3]]} +check_doclist fts3c-1.1.2.4 0 0 this {[1 0[0]]} + +check_terms fts3c-1.1.3 0 1 {a test that was} +check_doclist fts3c-1.1.3.1 0 1 a {[2 0[2]]} +check_doclist fts3c-1.1.3.2 0 1 test {[2 0[3]]} +check_doclist fts3c-1.1.3.3 0 1 that {[2 0[0]]} +check_doclist fts3c-1.1.3.4 0 1 was {[2 0[1]]} + +check_terms fts3c-1.1.4 0 2 {a is test this} +check_doclist fts3c-1.1.4.1 0 2 a {[3 0[2]]} +check_doclist fts3c-1.1.4.2 0 2 is {[3 0[1]]} +check_doclist fts3c-1.1.4.3 0 2 test {[3 0[3]]} +check_doclist fts3c-1.1.4.4 0 2 this {[3 0[0]]} + +check_terms fts3c-1.1.5 0 3 {a is test this} +check_doclist fts3c-1.1.5.1 0 3 a {[1]} +check_doclist fts3c-1.1.5.2 0 3 is {[1]} +check_doclist fts3c-1.1.5.3 0 3 test {[1]} +check_doclist fts3c-1.1.5.4 0 3 this {[1]} + +#************************************************************************* +# Test results when all references to certain tokens are deleted. +db eval { + DROP TABLE IF EXISTS t1; + CREATE VIRTUAL TABLE t1 USING fts3(c); + INSERT INTO t1 (docid, c) VALUES (1, 'This is a test'); + INSERT INTO t1 (docid, c) VALUES (2, 'That was a test'); + INSERT INTO t1 (docid, c) VALUES (3, 'This is a test'); + DELETE FROM t1 WHERE docid IN (1,3); +} + +# Still 4 segments because 0,3 will contain deletes for docid 1 and 3. +do_test fts3c-1.2.segments { + execsql { + SELECT level, idx FROM t1_segdir ORDER BY level, idx; + } +} {0 0 0 1 0 2 0 3} +do_test fts3c-1.2.matches { + execsql { + SELECT OFFSETS(t1) FROM t1 + WHERE t1 MATCH 'this OR that OR was OR a OR is OR test' ORDER BY docid; + } +} {{0 1 0 4 0 2 5 3 0 3 9 1 0 5 11 4}} + +check_terms_all fts3c-1.2.1 {a is test that this was} +check_doclist_all fts3c-1.2.1.1 a {[2 0[2]]} +check_doclist_all fts3c-1.2.1.2 is {} +check_doclist_all fts3c-1.2.1.3 test {[2 0[3]]} +check_doclist_all fts3c-1.2.1.4 that {[2 0[0]]} +check_doclist_all fts3c-1.2.1.5 this {} +check_doclist_all fts3c-1.2.1.6 was {[2 0[1]]} + +check_terms fts3c-1.2.2 0 0 {a is test this} +check_doclist fts3c-1.2.2.1 0 0 a {[1 0[2]]} +check_doclist fts3c-1.2.2.2 0 0 is {[1 0[1]]} +check_doclist fts3c-1.2.2.3 0 0 test {[1 0[3]]} +check_doclist fts3c-1.2.2.4 0 0 this {[1 0[0]]} + +check_terms fts3c-1.2.3 0 1 {a test that was} +check_doclist fts3c-1.2.3.1 0 1 a {[2 0[2]]} +check_doclist fts3c-1.2.3.2 0 1 test {[2 0[3]]} +check_doclist fts3c-1.2.3.3 0 1 that {[2 0[0]]} +check_doclist fts3c-1.2.3.4 0 1 was {[2 0[1]]} + +check_terms fts3c-1.2.4 0 2 {a is test this} +check_doclist fts3c-1.2.4.1 0 2 a {[3 0[2]]} +check_doclist fts3c-1.2.4.2 0 2 is {[3 0[1]]} +check_doclist fts3c-1.2.4.3 0 2 test {[3 0[3]]} +check_doclist fts3c-1.2.4.4 0 2 this {[3 0[0]]} + +check_terms fts3c-1.2.5 0 3 {a is test this} +check_doclist fts3c-1.2.5.1 0 3 a {[1] [3]} +check_doclist fts3c-1.2.5.2 0 3 is {[1] [3]} +check_doclist fts3c-1.2.5.3 0 3 test {[1] [3]} +check_doclist fts3c-1.2.5.4 0 3 this {[1] [3]} + +#************************************************************************* +# Test results when everything is optimized manually. +db eval { + DROP TABLE IF EXISTS t1; + CREATE VIRTUAL TABLE t1 USING fts3(c); + INSERT INTO t1 (docid, c) VALUES (1, 'This is a test'); + INSERT INTO t1 (docid, c) VALUES (2, 'That was a test'); + INSERT INTO t1 (docid, c) VALUES (3, 'This is a test'); + DELETE FROM t1 WHERE docid IN (1,3); + DROP TABLE IF EXISTS t1old; + ALTER TABLE t1 RENAME TO t1old; + CREATE VIRTUAL TABLE t1 USING fts3(c); + INSERT INTO t1 (docid, c) SELECT docid, c FROM t1old; + DROP TABLE t1old; +} + +# Should be a single optimal segment with the same logical results. +do_test fts3c-1.3.segments { + execsql { + SELECT level, idx FROM t1_segdir ORDER BY level, idx; + } +} {0 0} +do_test fts3c-1.3.matches { + execsql { + SELECT OFFSETS(t1) FROM t1 + WHERE t1 MATCH 'this OR that OR was OR a OR is OR test' ORDER BY docid; + } +} {{0 1 0 4 0 2 5 3 0 3 9 1 0 5 11 4}} + +check_terms_all fts3c-1.3.1 {a test that was} +check_doclist_all fts3c-1.3.1.1 a {[2 0[2]]} +check_doclist_all fts3c-1.3.1.2 test {[2 0[3]]} +check_doclist_all fts3c-1.3.1.3 that {[2 0[0]]} +check_doclist_all fts3c-1.3.1.4 was {[2 0[1]]} + +check_terms fts3c-1.3.2 0 0 {a test that was} +check_doclist fts3c-1.3.2.1 0 0 a {[2 0[2]]} +check_doclist fts3c-1.3.2.2 0 0 test {[2 0[3]]} +check_doclist fts3c-1.3.2.3 0 0 that {[2 0[0]]} +check_doclist fts3c-1.3.2.4 0 0 was {[2 0[1]]} + +finish_test