From 3540c1f7b8c5790ff08239ad57e5088a5fb33e29 Mon Sep 17 00:00:00 2001 From: dan Date: Tue, 22 Dec 2009 18:56:19 +0000 Subject: [PATCH] Add the experimental FTS3 matchinfo() function. Provides details of the match that may be used for result ranking and other purposes. FossilOrigin-Name: 37a1de02d1d8a34604f1bee896eaf579d4ba149a --- ext/fts3/fts3.c | 305 +++++++++++++++++++++++++++++++++++++- ext/fts3/fts3Int.h | 17 +++ ext/fts3/fts3_expr.c | 2 + ext/fts3/fts3_tokenizer.h | 4 + manifest | 34 ++--- manifest.uuid | 2 +- test/fts3query.test | 27 ++++ test/fts3rnd.test | 38 ++++- 8 files changed, 393 insertions(+), 36 deletions(-) diff --git a/ext/fts3/fts3.c b/ext/fts3/fts3.c index 64e00dc2b2..524ec55799 100644 --- a/ext/fts3/fts3.c +++ b/ext/fts3/fts3.c @@ -957,11 +957,27 @@ static void fts3PutDeltaVarint( *piPrev = iVal; } +/* +** When this function is called, *ppPoslist is assumed to point to the +** start of a position-list. +*/ static void fts3PoslistCopy(char **pp, char **ppPoslist){ char *pEnd = *ppPoslist; char c = 0; + + /* The end of a position list is marked by a zero encoded as an FTS3 + ** varint. A single 0x00 byte. Except, if the 0x00 byte is preceded by + ** a byte with the 0x80 bit set, then it is not a varint 0, but the tail + ** of some other, multi-byte, value. + ** + ** The following block moves pEnd to point to the first byte that is not + ** immediately preceded by a byte with the 0x80 bit set. Then increments + ** pEnd once more so that it points to the byte immediately following the + ** last byte in the position-list. + */ while( *pEnd | c ) c = *pEnd++ & 0x80; pEnd++; + if( pp ){ int n = (int)(pEnd - *ppPoslist); char *p = *pp; @@ -988,6 +1004,25 @@ static void fts3ColumnlistCopy(char **pp, char **ppPoslist){ *ppPoslist = pEnd; } +/* +** This function is used to count the entries in a column-list (delta-encoded +** list of term offsets within a single column of a single row). +*/ +static int fts3ColumnlistCount(char **ppCollist){ + char *pEnd = *ppCollist; + char c = 0; + int nEntry = 0; + + /* A column-list is terminated by either a 0x01 or 0x00. */ + while( 0xFE & (*pEnd | c) ){ + c = *pEnd++ & 0x80; + if( !c ) nEntry++; + } + + *ppCollist = pEnd; + return nEntry; +} + /* ** Value used to signify the end of an offset-list. This is safe because ** it is not possible to have a document with 2^31 terms. @@ -1699,7 +1734,8 @@ static int evalFts3Expr( Fts3Table *p, /* Virtual table handle */ Fts3Expr *pExpr, /* Parsed fts3 expression */ char **paOut, /* OUT: Pointer to malloc'd result buffer */ - int *pnOut /* OUT: Size of buffer at *paOut */ + int *pnOut, /* OUT: Size of buffer at *paOut */ + int isReqPos /* Require positions in output buffer */ ){ int rc = SQLITE_OK; /* Return code */ @@ -1708,17 +1744,23 @@ static int evalFts3Expr( *pnOut = 0; if( pExpr ){ + assert( pExpr->eType==FTSQUERY_PHRASE + || pExpr->eType==FTSQUERY_NEAR + || isReqPos==0 + ); if( pExpr->eType==FTSQUERY_PHRASE ){ - int isReqPos = (pExpr->pParent && pExpr->pParent->eType==FTSQUERY_NEAR); - rc = fts3PhraseSelect(p, pExpr->pPhrase, isReqPos, paOut, pnOut); + rc = fts3PhraseSelect(p, pExpr->pPhrase, + isReqPos || (pExpr->pParent && pExpr->pParent->eType==FTSQUERY_NEAR), + paOut, pnOut + ); }else{ char *aLeft; char *aRight; int nLeft; int nRight; - if( SQLITE_OK==(rc = evalFts3Expr(p, pExpr->pRight, &aRight, &nRight)) - && SQLITE_OK==(rc = evalFts3Expr(p, pExpr->pLeft, &aLeft, &nLeft)) + if( 0==(rc = evalFts3Expr(p, pExpr->pRight, &aRight, &nRight, isReqPos)) + && 0==(rc = evalFts3Expr(p, pExpr->pLeft, &aLeft, &nLeft, isReqPos)) ){ assert( pExpr->eType==FTSQUERY_NEAR || pExpr->eType==FTSQUERY_OR || pExpr->eType==FTSQUERY_AND || pExpr->eType==FTSQUERY_NOT @@ -1727,7 +1769,7 @@ static int evalFts3Expr( case FTSQUERY_NEAR: { Fts3Expr *pLeft; Fts3Expr *pRight; - int mergetype = MERGE_NEAR; + int mergetype = isReqPos ? MERGE_POS_NEAR : MERGE_NEAR; int nParam1; int nParam2; char *aBuffer; @@ -1870,7 +1912,7 @@ static int fts3FilterMethod( ); if( rc!=SQLITE_OK ) return rc; - rc = evalFts3Expr(p, pCsr->pExpr, &pCsr->aDoclist, &pCsr->nDoclist); + rc = evalFts3Expr(p, pCsr->pExpr, &pCsr->aDoclist, &pCsr->nDoclist, 0); pCsr->pNextId = pCsr->aDoclist; pCsr->iPrevId = 0; } @@ -1992,6 +2034,193 @@ static int fts3RollbackMethod(sqlite3_vtab *pVtab){ return SQLITE_OK; } +/* +** The following flags affect the format of the blob of unsigned int values +** returned by the matchinfo() function. The format is defined as follows: +** +** Integer 0: Number of 'simple queries' that make up the FTS3 query. +** Integer 1: Number of columns in queried table. +** +** followed by the data for (query 0, column 0), (query 0, column 1) ... +** (query 1, column 0) and so on. +** +** The first integer in each data is the number of hits that the simple +** query has in the current column. +** +** If the GLOBALCOUNT flag is set, then this is followed by the total +** number of hits the simple query has in the current column of *all* +** selected rows. +** +** If the POSITIONLIST flag is set, then this is followed by +** integers - the positions of each of the hits for the current column/query. +*/ +#define FTS3_MATCHINFO_GLOBALCOUNT 0x00000001 +#define FTS3_MATCHINFO_POSITIONLIST 0x00000002 + +typedef struct MatchInfo MatchInfo; +struct MatchInfo { + int rc; /* Return code. SQLITE_OK if no error */ + sqlite3_int64 iDocid; /* Docid of entry to return data for */ + Fts3Table *pTab; /* FTS3 Virtual table */ + int flags; /* Output flags (see above) */ + int nQuery; /* Number of simple queries */ + + /* Malloced output buffer */ + unsigned int *aOut; + int nOut; + int nAlloc; +}; + +static void fts3MatchInfoAppend(MatchInfo *pInfo, unsigned int iVal){ + if( pInfo->rc!=SQLITE_OK ) return; + + if( pInfo->nOut==pInfo->nAlloc ){ + int nNew = pInfo->nAlloc*2+100; + unsigned int *aNew = (unsigned int *)sqlite3_realloc( + pInfo->aOut, nNew * sizeof(unsigned int) + ); + if( !aNew ){ + pInfo->rc = SQLITE_NOMEM; + return; + } + pInfo->aOut = aNew; + pInfo->nAlloc = nNew; + } + + pInfo->aOut[pInfo->nOut++] = iVal; +} + +/* +** Iterate through each simple query that makes up the query expression +** implemented by the cursor passed as the first argument. +*/ +static void fts3ExprMatchInfo( + sqlite3_context *pCtx, + Fts3Expr *pExpr, + MatchInfo *pInfo +){ + int eType = pExpr->eType; + if( eType==FTSQUERY_NOT || pInfo->rc ){ + return; + }else if( eType!=FTSQUERY_PHRASE && eType!=FTSQUERY_NEAR ){ + assert( pExpr->pLeft && pExpr->pRight ); + fts3ExprMatchInfo(pCtx, pExpr->pLeft, pInfo); + if( pInfo->rc==SQLITE_OK ){ + fts3ExprMatchInfo(pCtx, pExpr->pRight, pInfo); + } + }else{ + Fts3Table *pTab = pInfo->pTab; + + /* If it is not loaded already, load the doclist for this simple query + ** from the FTS3 full-text index. + */ + if( pExpr->isLoaded==0 ){ + pInfo->rc = evalFts3Expr(pTab,pExpr,&pExpr->aDoclist,&pExpr->nDoclist,1); + if( pInfo->rc ) return; + pExpr->isLoaded = 1; + } + + /* If aDoclist is not NULL, search for the doclist entry in pExpr->aDoclist + ** associated with the docid pInfo->iDocid. + */ + if( pExpr->aDoclist ){ + char *pEnd = &pExpr->aDoclist[pExpr->nDoclist]; + sqlite3_int64 iSearch = pInfo->iDocid; + + if( pExpr->pCurrent==0 ){ + assert( pExpr->iDocid==0 ); + pExpr->pCurrent = pExpr->aDoclist; + fts3GetDeltaVarint(&pExpr->pCurrent, &pExpr->iDocid); + } + + while( pExpr->iDocidpCurrentpCurrent); + if( pExpr->pCurrentpCurrent, &pExpr->iDocid); + } + } + + if( pExpr->iDocid==iSearch ){ + int i; + for(i=0; inColumn; i++){ + unsigned int iLocalOff; + + /* Add space for the "local-count" field. */ + iLocalOff = pInfo->nOut; + fts3MatchInfoAppend(pInfo, 0); + if( pInfo->rc ) return; + + /* If the GLOBALCOUNT field is required, write the global-count + ** value for this query/column to the output buffer. + */ + if( pInfo->flags&FTS3_MATCHINFO_GLOBALCOUNT ){ + if( !pExpr->aHist ){ + char *pCsr = pExpr->aDoclist; + + /* Allocate a zeroed buffer to store the global-counts + ** corresponding to this simple query for each table column. + */ + int nByte = sizeof(unsigned int)*pTab->nColumn; + pExpr->aHist = (unsigned int *)sqlite3_malloc(nByte); + if( !pExpr->aHist ){ + pInfo->rc = SQLITE_NOMEM; + return; + } + memset(pExpr->aHist, 0, nByte); + + /* Scan the entire doclist to populate Fts3Expr.aHist[]. */ + while( pCsraHist[iCol] += fts3ColumnlistCount(&pCsr); + } + pCsr++; + } + } + + fts3MatchInfoAppend(pInfo, pExpr->aHist[i]); + } + + if( i==0 ){ + if( *pExpr->pCurrent==0x01 ) continue; + }else{ + sqlite3_int64 iCol; + char *pList = pExpr->pCurrent; + if( *pList==0x00 ) continue; + pList++; + pList += sqlite3Fts3GetVarint(pList, &iCol); + if( iCol!=i ) continue; + pExpr->pCurrent = pList; + } + + if( pInfo->flags&FTS3_MATCHINFO_POSITIONLIST ){ + int nLocal = 0; + sqlite3_int64 iOffset = 0; + char *pList = pExpr->pCurrent; + while( *pList&0xFE ){ + fts3GetDeltaVarint(&pList, &iOffset); iOffset -= 2; + fts3MatchInfoAppend(pInfo, iOffset); + nLocal++; + } + pExpr->pCurrent = pList; + pInfo->aOut[iLocalOff] = nLocal; + }else{ + pInfo->aOut[iLocalOff] = fts3ColumnlistCount(&pExpr->pCurrent); + } + } + pExpr->pCurrent++; + if( pExpr->pCurrentpCurrent, &pExpr->iDocid); + } + } + } + pInfo->nQuery++; + } +} + /* ** Helper function used by the implementation of the overloaded snippet(), ** offsets() and optimize() SQL functions. @@ -2119,6 +2348,66 @@ static void fts3OptimizeFunc( } } +/* +** Implementation of the matchinfo() function for FTS3 +*/ +static void fts3MatchinfoFunc( + sqlite3_context *pContext, /* SQLite function call context */ + int nVal, /* Size of argument array */ + sqlite3_value **apVal /* Array of arguments */ +){ + Fts3Cursor *pCsr; /* Cursor handle passed through apVal[0] */ + int flags = 0; + + + if( nVal==2 ){ + int i; + const unsigned char *zFlags = sqlite3_value_text(apVal[1]); + for(i=0; zFlags[i]; i++){ + switch( zFlags[i] ){ + case 'g': flags |= FTS3_MATCHINFO_GLOBALCOUNT; break; + case 'p': flags |= FTS3_MATCHINFO_POSITIONLIST; break; + default: { + char zErr[18]; + memcpy(zErr, "Unknown flag: \"%c\"", 18); + zErr[16] = (char)zFlags[i]; + sqlite3_result_error(pContext, zErr, -1); + return; + } + } + } + }else if( nVal!=1 ){ + sqlite3_result_error(pContext, + "wrong number of arguments to function matchinfo()", -1); + return; + } + + if( SQLITE_OK==fts3FunctionArg(pContext, "matchinfo", apVal[0], &pCsr) ){ + MatchInfo ctx; + memset(&ctx, 0, sizeof(ctx)); + ctx.iDocid = pCsr->iPrevId; + ctx.pTab = (Fts3Table *)pCsr->base.pVtab; + ctx.flags = flags; + + fts3MatchInfoAppend(&ctx, 0); + fts3MatchInfoAppend(&ctx, ctx.pTab->nColumn); + + /* Iterate through each of the 'simple' queries that make up the query + ** expression. A 'simple' query is a phrase (including token and token + ** prefix) or NEAR query. + */ + fts3ExprMatchInfo(pContext, pCsr->pExpr, &ctx); + if( ctx.rc ){ + sqlite3_free(ctx.aOut); + sqlite3_result_error_code(pContext, ctx.rc); + }else{ + int nByte = ctx.nOut*sizeof(unsigned int); + ctx.aOut[0] = ctx.nQuery; + sqlite3_result_blob(pContext, ctx.aOut, nByte, sqlite3_free); + } + } +} + /* ** This routine implements the xFindFunction method for the FTS3 ** virtual table. @@ -2137,6 +2426,7 @@ static int fts3FindFunctionMethod( { "snippet", fts3SnippetFunc }, { "offsets", fts3OffsetsFunc }, { "optimize", fts3OptimizeFunc }, + { "matchinfo", fts3MatchinfoFunc }, }; int i; /* Iterator variable */ @@ -2284,6 +2574,7 @@ int sqlite3Fts3Init(sqlite3 *db){ && SQLITE_OK==(rc = sqlite3Fts3InitHashTable(db, pHash, "fts3_tokenizer")) && SQLITE_OK==(rc = sqlite3_overload_function(db, "snippet", -1)) && SQLITE_OK==(rc = sqlite3_overload_function(db, "offsets", 1)) + && SQLITE_OK==(rc = sqlite3_overload_function(db, "matchinfo", -1)) && SQLITE_OK==(rc = sqlite3_overload_function(db, "optimize", 1)) ){ return sqlite3_create_module_v2( diff --git a/ext/fts3/fts3Int.h b/ext/fts3/fts3Int.h index 64fd32b2c6..e3e18e701b 100644 --- a/ext/fts3/fts3Int.h +++ b/ext/fts3/fts3Int.h @@ -186,6 +186,16 @@ struct Fts3Phrase { /* ** A tree of these objects forms the RHS of a MATCH operator. +** +** If Fts3Expr.eType is either FTSQUERY_NEAR or FTSQUERY_PHRASE and isLoaded +** is true, then aDoclist points to a malloced buffer, size nDoclist bytes, +** containing the results of the NEAR or phrase query in FTS3 doclist +** format. As usual, the initial "Length" field found in doclists stored +** on disk is omitted from this buffer. +** +** Variable pCurrent always points to the start of a docid field within +** aDoclist. Since the doclist is usually scanned in docid order, this can +** be used to accelerate seeking to the required docid within the doclist. */ struct Fts3Expr { int eType; /* One of the FTSQUERY_XXX values defined below */ @@ -194,6 +204,13 @@ struct Fts3Expr { Fts3Expr *pLeft; /* Left operand */ Fts3Expr *pRight; /* Right operand */ Fts3Phrase *pPhrase; /* Valid if eType==FTSQUERY_PHRASE */ + + int isLoaded; + sqlite3_int64 iDocid; + char *aDoclist; + int nDoclist; + char *pCurrent; + unsigned int *aHist; }; /* diff --git a/ext/fts3/fts3_expr.c b/ext/fts3/fts3_expr.c index cae0d2aefa..0841082962 100644 --- a/ext/fts3/fts3_expr.c +++ b/ext/fts3/fts3_expr.c @@ -735,6 +735,8 @@ void sqlite3Fts3ExprFree(Fts3Expr *p){ if( p ){ sqlite3Fts3ExprFree(p->pLeft); sqlite3Fts3ExprFree(p->pRight); + sqlite3_free(p->aDoclist); + sqlite3_free(p->aHist); sqlite3_free(p); } } diff --git a/ext/fts3/fts3_tokenizer.h b/ext/fts3/fts3_tokenizer.h index 906303db4e..615644506c 100644 --- a/ext/fts3/fts3_tokenizer.h +++ b/ext/fts3/fts3_tokenizer.h @@ -145,4 +145,8 @@ struct sqlite3_tokenizer_cursor { /* Tokenizer implementations will typically add additional fields */ }; +int fts3_global_term_cnt(int iTerm, int iCol); +int fts3_term_cnt(int iTerm, int iCol); + + #endif /* _FTS3_TOKENIZER_H_ */ diff --git a/manifest b/manifest index 3d209f05c2..9c034580de 100644 --- a/manifest +++ b/manifest @@ -1,8 +1,5 @@ ------BEGIN PGP SIGNED MESSAGE----- -Hash: SHA1 - -C Add\sa\spragma\sto\sdisable\sthe\squery\sflattener\s-\sfor\suse\sduring\stesting. -D 2009-12-22T00:29:53 +C Add\sthe\sexperimental\sFTS3\smatchinfo()\sfunction.\sProvides\sdetails\sof\sthe\smatch\sthat\smay\sbe\sused\sfor\sresult\sranking\sand\sother\spurposes. +D 2009-12-22T18:56:19 F Makefile.arm-wince-mingw32ce-gcc fcd5e9cd67fe88836360bb4f9ef4cb7f8e2fb5a0 F Makefile.in c5827ead754ab32b9585487177c93bb00b9497b3 F Makefile.linux-gcc d53183f4aa6a9192d249731c90dbdffbd2c68654 @@ -59,17 +56,17 @@ F ext/fts2/mkfts2amal.tcl 974d5d438cb3f7c4a652639262f82418c1e4cff0 F ext/fts3/README.syntax a19711dc5458c20734b8e485e75fb1981ec2427a F ext/fts3/README.tokenizers 998756696647400de63d5ba60e9655036cb966e9 F ext/fts3/README.txt 8c18f41574404623b76917b9da66fcb0ab38328d -F ext/fts3/fts3.c 9c4ab6e74b20d9cc3806453f6fbdde6a84e97bb7 +F ext/fts3/fts3.c 0a09deb5525fd7580cfdef4fdbf71fb3147a071d F ext/fts3/fts3.h 3a10a0af180d502cecc50df77b1b22df142817fe -F ext/fts3/fts3Int.h 6fdd41b4f296e5bcc908444dc591397995d4ff5d -F ext/fts3/fts3_expr.c fcf6812dbfd9cb9a2cabaf50e741411794f83e7e +F ext/fts3/fts3Int.h 0d7c8d66ff9be8c79710438a46a7d046fcdedfc2 +F ext/fts3/fts3_expr.c 541de159278cfa694c584c763d23c3e23d796851 F ext/fts3/fts3_hash.c 3c8f6387a4a7f5305588b203fa7c887d753e1f1c F ext/fts3/fts3_hash.h 8331fb2206c609f9fc4c4735b9ab5ad6137c88ec F ext/fts3/fts3_icu.c ac494aed69835008185299315403044664bda295 F ext/fts3/fts3_porter.c a651e287e02b49b565a6ccf9441959d434489156 F ext/fts3/fts3_snippet.c 6c2eb6d872d66b2a9aa5663f2662e993f18a6496 F ext/fts3/fts3_tokenizer.c 1a49ee3d79cbf0b9386250370d9cbfe4bb89c8ff -F ext/fts3/fts3_tokenizer.h 7ff73caa3327589bf6550f60d93ebdd1f6a0fb5c +F ext/fts3/fts3_tokenizer.h 13ffd9fcb397fec32a05ef5cd9e0fa659bf3dbd3 F ext/fts3/fts3_tokenizer1.c 11a604a53cff5e8c28882727bf794e5252e5227b F ext/fts3/fts3_write.c 2847b13da9e00016b81ea4b80ff5a298e55bc32d F ext/fts3/mkfts3amal.tcl 252ecb7fe6467854f2aa237bf2c390b74e71f100 @@ -406,8 +403,8 @@ F test/fts3expr.test 05dab77387801e4900009917bb18f556037d82da F test/fts3expr2.test 18da930352e5693eaa163a3eacf96233b7290d1a F test/fts3malloc.test d02ee86b21edd2b43044e0d6dfdcd26cb6efddcb F test/fts3near.test dc196dd17b4606f440c580d45b3d23aa975fd077 -F test/fts3query.test 5561e1feed538d91512e0357f46db6a5aca4bb4d -F test/fts3rnd.test 2a78df5478657fca6444ee03cf930437c47e9a5e +F test/fts3query.test 2cba25181dac298abc10c3086a88b308f90a93c4 +F test/fts3rnd.test 654daa6206f9d63ed3388858c60bba3fd4004a5f F test/func.test af106ed834001738246d276659406823e35cde7b F test/func2.test 772d66227e4e6684b86053302e2d74a2500e1e0f F test/fuzz.test a4174c3009a3e2c2e14b31b364ebf7ddb49de2c9 @@ -785,14 +782,7 @@ F tool/speedtest2.tcl ee2149167303ba8e95af97873c575c3e0fab58ff F tool/speedtest8.c 2902c46588c40b55661e471d7a86e4dd71a18224 F tool/speedtest8inst1.c 293327bc76823f473684d589a8160bde1f52c14e F tool/vdbe-compress.tcl d70ea6d8a19e3571d7ab8c9b75cba86d1173ff0f -P 4b489ecb250ea0e80d5bf8806f07259e1107f8ad -R 4ff394e3e8f370cfe3bb1a4f517ce068 -U drh -Z a149efdbc300f2ef9c4bcc0e34bf2979 ------BEGIN PGP SIGNATURE----- -Version: GnuPG v1.4.6 (GNU/Linux) - -iD8DBQFLMBMFoxKgR168RlERAq3/AJ9DzTzxkRrhdye4sGLAI1OEQJXZUQCbB/A0 -oHnyJeYGpcPNKCuD7OdyWhQ= -=RmkK ------END PGP SIGNATURE----- +P 1d8550e5c88db0ea76aea579c1a5bf99d48fbe43 +R 41598dc57bec7fe2cae4d45c4e1125f1 +U dan +Z 61e8349bf00893649947578111240ad6 diff --git a/manifest.uuid b/manifest.uuid index a2597f3c4f..bef4ecaa7c 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -1d8550e5c88db0ea76aea579c1a5bf99d48fbe43 \ No newline at end of file +37a1de02d1d8a34604f1bee896eaf579d4ba149a \ No newline at end of file diff --git a/test/fts3query.test b/test/fts3query.test index 720dd32e3a..8b10f24775 100644 --- a/test/fts3query.test +++ b/test/fts3query.test @@ -89,5 +89,32 @@ do_test fts3query-3.2 { execsql { SELECT docid FROM foobar WHERE description MATCH '"high sp d"' } } {} +proc mit {blob} { + set scan(littleEndian) i* + set scan(bigEndian) I* + binary scan $blob $scan($::tcl_platform(byteOrder)) r + return $r +} +db func mit mit + +do_test fts3query-3.3 { + execsql { SELECT mit(matchinfo(foobar)) FROM foobar WHERE foobar MATCH 'the' } +} {{1 1 3}} +do_test fts3query-3.4 { + execsql { + SELECT mit(matchinfo(foobar, 'g')) FROM foobar WHERE foobar MATCH 'the' + } +} {{1 1 3 3}} +do_test fts3query-3.5 { + execsql { + SELECT mit(matchinfo(foobar, 'p')) FROM foobar WHERE foobar MATCH 'the' + } +} {{1 1 3 27 74 79}} +do_test fts3query-3.5 { + execsql { + SELECT mit(matchinfo(foobar, 'pg')) FROM foobar WHERE foobar MATCH 'the' + } +} {{1 1 3 3 27 74 79}} + finish_test diff --git a/test/fts3rnd.test b/test/fts3rnd.test index 1fc52b70ab..a527214b18 100644 --- a/test/fts3rnd.test +++ b/test/fts3rnd.test @@ -147,15 +147,33 @@ proc simple_phrase {zPrefix} { set reg [string map {* {[^ ]*}} $zPrefix] set reg " $reg " - foreach {key value} [array get ::t1] { + foreach key [lsort -integer [array names ::t1]] { + set value $::t1($key) + set cnt [list] foreach col $value { - if {[regexp $reg " $col "]} {lappend ret $key} + if {[regexp $reg " $col "]} { lappend ret $key ; break } } } - lsort -uniq -integer $ret + #lsort -uniq -integer $ret + set ret } +proc simple_token_matchinfo {zToken} { + foreach key [lsort -integer [array names ::t1]] { + set value $::t1($key) + set cnt [list] + foreach col $value { + lappend cnt [llength [lsearch -all $col $zToken]] + } + if {[lindex [lsort $cnt] end]} { + lappend ret $key [concat 1 3 $cnt] + } + } + + set ret +} + proc simple_near {termlist nNear} { set ret [list] @@ -214,6 +232,14 @@ proc setop_and {A B} { return $ret } +proc mit {blob} { + set scan(littleEndian) i* + set scan(bigEndian) I* + binary scan $blob $scan($::tcl_platform(byteOrder)) r + return $r +} +db func mit mit + set sqlite_fts3_enable_parentheses 1 foreach nodesize {50 500 1000 2000} { @@ -228,7 +254,7 @@ foreach nodesize {50 500 1000 2000} { for {set i 0} {$i < 100} {incr i} { insert_row $i } } - for {set iTest 0} {$iTest <= 100} {incr iTest} { + for {set iTest 1} {$iTest <= 100} {incr iTest} { catchsql COMMIT set DO_MALLOC_TEST 0 @@ -265,8 +291,8 @@ foreach nodesize {50 500 1000 2000} { for {set i 0} {$i < 10} {incr i} { set term [random_term] do_select_test fts3rnd-1.$nodesize.$iTest.1.$i { - SELECT docid FROM t1 WHERE t1 MATCH $term - } [simple_phrase $term] + SELECT docid, mit(matchinfo(t1)) FROM t1 WHERE t1 MATCH $term + } [simple_token_matchinfo $term] } # This time, use the first two characters of each term as a term prefix -- 2.47.2