From: dan Date: Wed, 6 Jan 2010 17:19:21 +0000 (+0000) Subject: Change the fts3 snippet function to return (hopefully) more relevant snippets in... X-Git-Tag: version-3.7.2~661 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=b023b04fcbc8d52d2f217ece6fcbdaa938166e94;p=thirdparty%2Fsqlite.git Change the fts3 snippet function to return (hopefully) more relevant snippets in less time. FossilOrigin-Name: 8a208223a74d451f60d9cd707d63fb7d157d1737 --- diff --git a/ext/fts3/fts3.c b/ext/fts3/fts3.c index 1d52b687d2..297065dbd7 100644 --- a/ext/fts3/fts3.c +++ b/ext/fts3/fts3.c @@ -1709,6 +1709,74 @@ static int fts3PhraseSelect( return rc; } +static int fts3NearMerge( + int mergetype, /* MERGE_POS_NEAR or MERGE_NEAR */ + int nNear, /* Parameter to NEAR operator */ + int nTokenLeft, /* Number of tokens in LHS phrase arg */ + char *aLeft, /* Doclist for LHS (incl. positions) */ + int nLeft, /* Size of LHS doclist in bytes */ + int nTokenRight, /* As nTokenLeft */ + char *aRight, /* As aLeft */ + int nRight, /* As nRight */ + char **paOut, /* OUT: Results of merge (malloced) */ + int *pnOut /* OUT: Sized of output buffer */ +){ + char *aOut; + int rc; + + assert( mergetype==MERGE_POS_NEAR || MERGE_NEAR ); + + aOut = sqlite3_malloc(nLeft+nRight+1); + if( aOut==0 ){ + rc = SQLITE_NOMEM; + }else{ + rc = fts3DoclistMerge(mergetype, nNear+nTokenRight, nNear+nTokenLeft, + aOut, pnOut, aLeft, nLeft, aRight, nRight + ); + if( rc!=SQLITE_OK ){ + sqlite3_free(aOut); + aOut = 0; + } + } + + *paOut = aOut; + return rc; +} + +int sqlite3Fts3ExprNearTrim(Fts3Expr *pLeft, Fts3Expr *pRight, int nNear){ + int rc; + if( pLeft->aDoclist==0 || pRight->aDoclist==0 ){ + sqlite3_free(pLeft->aDoclist); + sqlite3_free(pRight->aDoclist); + pRight->aDoclist = 0; + pLeft->aDoclist = 0; + rc = SQLITE_OK; + }else{ + char *aOut; + int nOut; + + rc = fts3NearMerge(MERGE_POS_NEAR, nNear, + pLeft->pPhrase->nToken, pLeft->aDoclist, pLeft->nDoclist, + pRight->pPhrase->nToken, pRight->aDoclist, pRight->nDoclist, + &aOut, &nOut + ); + if( rc!=SQLITE_OK ) return rc; + sqlite3_free(pRight->aDoclist); + pRight->aDoclist = aOut; + pRight->nDoclist = nOut; + + rc = fts3NearMerge(MERGE_POS_NEAR, nNear, + pRight->pPhrase->nToken, pRight->aDoclist, pRight->nDoclist, + pLeft->pPhrase->nToken, pLeft->aDoclist, pLeft->nDoclist, + &aOut, &nOut + ); + sqlite3_free(pLeft->aDoclist); + pLeft->aDoclist = aOut; + pLeft->nDoclist = nOut; + } + return rc; +} + /* ** Evaluate the full-text expression pExpr against fts3 table pTab. Store ** the resulting doclist in *paOut and *pnOut. @@ -1753,9 +1821,6 @@ static int evalFts3Expr( Fts3Expr *pLeft; Fts3Expr *pRight; int mergetype = isReqPos ? MERGE_POS_NEAR : MERGE_NEAR; - int nParam1; - int nParam2; - char *aBuffer; if( pExpr->pParent && pExpr->pParent->eType==FTSQUERY_NEAR ){ mergetype = MERGE_POS_NEAR; @@ -1768,17 +1833,11 @@ static int evalFts3Expr( assert( pRight->eType==FTSQUERY_PHRASE ); assert( pLeft->eType==FTSQUERY_PHRASE ); - nParam1 = pExpr->nNear+1; - nParam2 = nParam1+pLeft->pPhrase->nToken+pRight->pPhrase->nToken-2; - aBuffer = sqlite3_malloc(nLeft+nRight+1); - rc = fts3DoclistMerge(mergetype, nParam1, nParam2, aBuffer, - pnOut, aLeft, nLeft, aRight, nRight + rc = fts3NearMerge(mergetype, pExpr->nNear, + pLeft->pPhrase->nToken, aLeft, nLeft, + pRight->pPhrase->nToken, aRight, nRight, + paOut, pnOut ); - if( rc!=SQLITE_OK ){ - sqlite3_free(aBuffer); - }else{ - *paOut = aBuffer; - } sqlite3_free(aLeft); break; } @@ -2064,7 +2123,7 @@ char *sqlite3Fts3FindPositions( pCsr++; pCsr += sqlite3Fts3GetVarint32(pCsr, &iThis); } - if( iCol==iThis ) return pCsr; + if( iCol==iThis && (*pCsr&0xFE) ) return pCsr; } return 0; } @@ -2116,45 +2175,8 @@ static void fts3SnippetFunc( const char *zStart = ""; const char *zEnd = ""; const char *zEllipsis = "..."; - - /* There must be at least one argument passed to this function (otherwise - ** the non-overloaded version would have been called instead of this one). - */ - assert( nVal>=1 ); - - if( nVal>4 ){ - sqlite3_result_error(pContext, - "wrong number of arguments to function snippet()", -1); - return; - } - if( fts3FunctionArg(pContext, "snippet", apVal[0], &pCsr) ) return; - - switch( nVal ){ - case 4: zEllipsis = (const char*)sqlite3_value_text(apVal[3]); - case 3: zEnd = (const char*)sqlite3_value_text(apVal[2]); - case 2: zStart = (const char*)sqlite3_value_text(apVal[1]); - } - if( !zEllipsis || !zEnd || !zStart ){ - sqlite3_result_error_nomem(pContext); - }else if( SQLITE_OK==fts3CursorSeek(pContext, pCsr) ){ - sqlite3Fts3Snippet(pContext, pCsr, zStart, zEnd, zEllipsis); - } -} - -/* -** Implementation of the snippet2() function for FTS3 -*/ -static void fts3Snippet2Func( - sqlite3_context *pContext, /* SQLite function call context */ - int nVal, /* Size of apVal[] array */ - sqlite3_value **apVal /* Array of arguments */ -){ - Fts3Cursor *pCsr; /* Cursor handle passed through apVal[0] */ - const char *zStart = ""; - const char *zEnd = ""; - const char *zEllipsis = "..."; int iCol = -1; - int nToken = 10; + int nToken = 15; /* There must be at least one argument passed to this function (otherwise ** the non-overloaded version would have been called instead of this one). @@ -2178,7 +2200,7 @@ static void fts3Snippet2Func( if( !zEllipsis || !zEnd || !zStart ){ sqlite3_result_error_nomem(pContext); }else if( SQLITE_OK==fts3CursorSeek(pContext, pCsr) ){ - sqlite3Fts3Snippet2(pContext, pCsr, zStart, zEnd, zEllipsis, iCol, nToken); + sqlite3Fts3Snippet(pContext, pCsr, zStart, zEnd, zEllipsis, iCol, nToken); } } @@ -2279,7 +2301,6 @@ static int fts3FindFunctionMethod( void (*xFunc)(sqlite3_context*,int,sqlite3_value**); } aOverload[] = { { "snippet", fts3SnippetFunc }, - { "snippet2", fts3Snippet2Func }, { "offsets", fts3OffsetsFunc }, { "optimize", fts3OptimizeFunc }, { "matchinfo", fts3MatchinfoFunc }, @@ -2429,7 +2450,6 @@ int sqlite3Fts3Init(sqlite3 *db){ if( SQLITE_OK==rc && SQLITE_OK==(rc = sqlite3Fts3InitHashTable(db, pHash, "fts3_tokenizer")) && SQLITE_OK==(rc = sqlite3_overload_function(db, "snippet", -1)) - && SQLITE_OK==(rc = sqlite3_overload_function(db, "snippet2", -1)) && SQLITE_OK==(rc = sqlite3_overload_function(db, "offsets", 1)) && SQLITE_OK==(rc = sqlite3_overload_function(db, "matchinfo", -1)) && SQLITE_OK==(rc = sqlite3_overload_function(db, "optimize", 1)) diff --git a/ext/fts3/fts3Int.h b/ext/fts3/fts3Int.h index ceb13ee7d6..22d3885014 100644 --- a/ext/fts3/fts3Int.h +++ b/ext/fts3/fts3Int.h @@ -279,6 +279,7 @@ void sqlite3Fts3Dequote(char *); char *sqlite3Fts3FindPositions(Fts3Expr *, sqlite3_int64, int); int sqlite3Fts3ExprLoadDoclist(Fts3Table *, Fts3Expr *); +int sqlite3Fts3ExprNearTrim(Fts3Expr *, Fts3Expr *, int); /* fts3_tokenizer.c */ const char *sqlite3Fts3NextToken(const char *, int *); @@ -289,10 +290,7 @@ int sqlite3Fts3InitTokenizer(Fts3Hash *pHash, /* fts3_snippet.c */ void sqlite3Fts3Offsets(sqlite3_context*, Fts3Cursor*); -void sqlite3Fts3Snippet(sqlite3_context*, Fts3Cursor*, - const char *, const char *, const char * -); -void sqlite3Fts3Snippet2(sqlite3_context *, Fts3Cursor *, const char *, +void sqlite3Fts3Snippet(sqlite3_context *, Fts3Cursor *, const char *, const char *, const char *, int, int ); void sqlite3Fts3Matchinfo(sqlite3_context *, Fts3Cursor *); diff --git a/ext/fts3/fts3_snippet.c b/ext/fts3/fts3_snippet.c index 287c68b13b..78eb2ecc9f 100644 --- a/ext/fts3/fts3_snippet.c +++ b/ext/fts3/fts3_snippet.c @@ -18,723 +18,6 @@ #include #include -typedef struct Snippet Snippet; - -/* -** An instance of the following structure keeps track of generated -** matching-word offset information and snippets. -*/ -struct Snippet { - int nMatch; /* Total number of matches */ - int nAlloc; /* Space allocated for aMatch[] */ - struct snippetMatch { /* One entry for each matching term */ - char snStatus; /* Status flag for use while constructing snippets */ - short int nByte; /* Number of bytes in the term */ - short int iCol; /* The column that contains the match */ - short int iTerm; /* The index in Query.pTerms[] of the matching term */ - int iToken; /* The index of the matching document token */ - int iStart; /* The offset to the first character of the term */ - } *aMatch; /* Points to space obtained from malloc */ - char *zOffset; /* Text rendering of aMatch[] */ - int nOffset; /* strlen(zOffset) */ - char *zSnippet; /* Snippet text */ - int nSnippet; /* strlen(zSnippet) */ -}; - - -/* It is not safe to call isspace(), tolower(), or isalnum() on -** hi-bit-set characters. This is the same solution used in the -** tokenizer. -*/ -static int fts3snippetIsspace(char c){ - return (c&0x80)==0 ? isspace(c) : 0; -} - - -/* -** A StringBuffer object holds a zero-terminated string that grows -** arbitrarily by appending. Space to hold the string is obtained -** from sqlite3_malloc(). After any memory allocation failure, -** StringBuffer.z is set to NULL and no further allocation is attempted. -*/ -typedef struct StringBuffer { - char *z; /* Text of the string. Space from malloc. */ - int nUsed; /* Number bytes of z[] used, not counting \000 terminator */ - int nAlloc; /* Bytes allocated for z[] */ -} StringBuffer; - - -/* -** Initialize a new StringBuffer. -*/ -static void fts3SnippetSbInit(StringBuffer *p){ - p->nAlloc = 100; - p->nUsed = 0; - p->z = sqlite3_malloc( p->nAlloc ); -} - -/* -** Append text to the string buffer. -*/ -static void fts3SnippetAppend(StringBuffer *p, const char *zNew, int nNew){ - if( p->z==0 ) return; - if( nNew<0 ) nNew = (int)strlen(zNew); - if( p->nUsed + nNew >= p->nAlloc ){ - int nAlloc; - char *zNew; - - nAlloc = p->nUsed + nNew + p->nAlloc; - zNew = sqlite3_realloc(p->z, nAlloc); - if( zNew==0 ){ - sqlite3_free(p->z); - p->z = 0; - return; - } - p->z = zNew; - p->nAlloc = nAlloc; - } - memcpy(&p->z[p->nUsed], zNew, nNew); - p->nUsed += nNew; - p->z[p->nUsed] = 0; -} - -/* If the StringBuffer ends in something other than white space, add a -** single space character to the end. -*/ -static void fts3SnippetAppendWhiteSpace(StringBuffer *p){ - if( p->z && p->nUsed && !fts3snippetIsspace(p->z[p->nUsed-1]) ){ - fts3SnippetAppend(p, " ", 1); - } -} - -/* Remove white space from the end of the StringBuffer */ -static void fts3SnippetTrimWhiteSpace(StringBuffer *p){ - if( p->z ){ - while( p->nUsed && fts3snippetIsspace(p->z[p->nUsed-1]) ){ - p->nUsed--; - } - p->z[p->nUsed] = 0; - } -} - -/* -** Release all memory associated with the Snippet structure passed as -** an argument. -*/ -static void fts3SnippetFree(Snippet *p){ - if( p ){ - sqlite3_free(p->aMatch); - sqlite3_free(p->zOffset); - sqlite3_free(p->zSnippet); - sqlite3_free(p); - } -} - -/* -** Append a single entry to the p->aMatch[] log. -*/ -static int snippetAppendMatch( - Snippet *p, /* Append the entry to this snippet */ - int iCol, int iTerm, /* The column and query term */ - int iToken, /* Matching token in document */ - int iStart, int nByte /* Offset and size of the match */ -){ - int i; - struct snippetMatch *pMatch; - if( p->nMatch+1>=p->nAlloc ){ - struct snippetMatch *pNew; - p->nAlloc = p->nAlloc*2 + 10; - pNew = sqlite3_realloc(p->aMatch, p->nAlloc*sizeof(p->aMatch[0]) ); - if( pNew==0 ){ - p->aMatch = 0; - p->nMatch = 0; - p->nAlloc = 0; - return SQLITE_NOMEM; - } - p->aMatch = pNew; - } - i = p->nMatch++; - pMatch = &p->aMatch[i]; - pMatch->iCol = (short)iCol; - pMatch->iTerm = (short)iTerm; - pMatch->iToken = iToken; - pMatch->iStart = iStart; - pMatch->nByte = (short)nByte; - return SQLITE_OK; -} - -/* -** Sizing information for the circular buffer used in snippetOffsetsOfColumn() -*/ -#define FTS3_ROTOR_SZ (32) -#define FTS3_ROTOR_MASK (FTS3_ROTOR_SZ-1) - -/* -** Function to iterate through the tokens of a compiled expression. -** -** Except, skip all tokens on the right-hand side of a NOT operator. -** This function is used to find tokens as part of snippet and offset -** generation and we do nt want snippets and offsets to report matches -** for tokens on the RHS of a NOT. -*/ -static int fts3NextExprToken(Fts3Expr **ppExpr, int *piToken){ - Fts3Expr *p = *ppExpr; - int iToken = *piToken; - if( iToken<0 ){ - /* In this case the expression p is the root of an expression tree. - ** Move to the first token in the expression tree. - */ - while( p->pLeft ){ - p = p->pLeft; - } - iToken = 0; - }else{ - assert(p && p->eType==FTSQUERY_PHRASE ); - if( iToken<(p->pPhrase->nToken-1) ){ - iToken++; - }else{ - iToken = 0; - while( p->pParent && p->pParent->pLeft!=p ){ - assert( p->pParent->pRight==p ); - p = p->pParent; - } - p = p->pParent; - if( p ){ - assert( p->pRight!=0 ); - p = p->pRight; - while( p->pLeft ){ - p = p->pLeft; - } - } - } - } - - *ppExpr = p; - *piToken = iToken; - return p?1:0; -} - -/* -** Return TRUE if the expression node pExpr is located beneath the -** RHS of a NOT operator. -*/ -static int fts3ExprBeneathNot(Fts3Expr *p){ - Fts3Expr *pParent; - while( p ){ - pParent = p->pParent; - if( pParent && pParent->eType==FTSQUERY_NOT && pParent->pRight==p ){ - return 1; - } - p = pParent; - } - return 0; -} - -/* -** Add entries to pSnippet->aMatch[] for every match that occurs against -** document zDoc[0..nDoc-1] which is stored in column iColumn. -*/ -static int snippetOffsetsOfColumn( - Fts3Cursor *pCur, /* The fulltest search cursor */ - Snippet *pSnippet, /* The Snippet object to be filled in */ - int iColumn, /* Index of fulltext table column */ - const char *zDoc, /* Text of the fulltext table column */ - int nDoc /* Length of zDoc in bytes */ -){ - const sqlite3_tokenizer_module *pTModule; /* The tokenizer module */ - sqlite3_tokenizer *pTokenizer; /* The specific tokenizer */ - sqlite3_tokenizer_cursor *pTCursor; /* Tokenizer cursor */ - Fts3Table *pVtab; /* The full text index */ - int nColumn; /* Number of columns in the index */ - int i, j; /* Loop counters */ - int rc; /* Return code */ - unsigned int match, prevMatch; /* Phrase search bitmasks */ - const char *zToken; /* Next token from the tokenizer */ - int nToken; /* Size of zToken */ - int iBegin, iEnd, iPos; /* Offsets of beginning and end */ - - /* The following variables keep a circular buffer of the last - ** few tokens */ - unsigned int iRotor = 0; /* Index of current token */ - int iRotorBegin[FTS3_ROTOR_SZ]; /* Beginning offset of token */ - int iRotorLen[FTS3_ROTOR_SZ]; /* Length of token */ - - pVtab = (Fts3Table *)pCur->base.pVtab; - nColumn = pVtab->nColumn; - pTokenizer = pVtab->pTokenizer; - pTModule = pTokenizer->pModule; - rc = pTModule->xOpen(pTokenizer, zDoc, nDoc, &pTCursor); - if( rc ) return rc; - pTCursor->pTokenizer = pTokenizer; - - prevMatch = 0; - while( (rc = pTModule->xNext(pTCursor, &zToken, &nToken, - &iBegin, &iEnd, &iPos))==SQLITE_OK ){ - Fts3Expr *pIter = pCur->pExpr; - int iIter = -1; - iRotorBegin[iRotor&FTS3_ROTOR_MASK] = iBegin; - iRotorLen[iRotor&FTS3_ROTOR_MASK] = iEnd-iBegin; - match = 0; - for(i=0; i<(FTS3_ROTOR_SZ-1) && fts3NextExprToken(&pIter, &iIter); i++){ - int nPhrase; /* Number of tokens in current phrase */ - struct PhraseToken *pToken; /* Current token */ - int iCol; /* Column index */ - - if( fts3ExprBeneathNot(pIter) ) continue; - nPhrase = pIter->pPhrase->nToken; - pToken = &pIter->pPhrase->aToken[iIter]; - iCol = pIter->pPhrase->iColumn; - if( iCol>=0 && iColn>nToken ) continue; - if( !pToken->isPrefix && pToken->nn<=nToken ); - if( memcmp(pToken->z, zToken, pToken->n) ) continue; - if( iIter>0 && (prevMatch & (1<=0; j--){ - int k = (iRotor-j) & FTS3_ROTOR_MASK; - rc = snippetAppendMatch(pSnippet, iColumn, i-j, iPos-j, - iRotorBegin[k], iRotorLen[k]); - if( rc ) goto end_offsets_of_column; - } - } - } - prevMatch = match<<1; - iRotor++; - } -end_offsets_of_column: - pTModule->xClose(pTCursor); - return rc==SQLITE_DONE ? SQLITE_OK : rc; -} - -/* -** Remove entries from the pSnippet structure to account for the NEAR -** operator. When this is called, pSnippet contains the list of token -** offsets produced by treating all NEAR operators as AND operators. -** This function removes any entries that should not be present after -** accounting for the NEAR restriction. For example, if the queried -** document is: -** -** "A B C D E A" -** -** and the query is: -** -** A NEAR/0 E -** -** then when this function is called the Snippet contains token offsets -** 0, 4 and 5. This function removes the "0" entry (because the first A -** is not near enough to an E). -** -** When this function is called, the value pointed to by parameter piLeft is -** the integer id of the left-most token in the expression tree headed by -** pExpr. This function increments *piLeft by the total number of tokens -** in the expression tree headed by pExpr. -** -** Return 1 if any trimming occurs. Return 0 if no trimming is required. -*/ -static int trimSnippetOffsets( - Fts3Expr *pExpr, /* The search expression */ - Snippet *pSnippet, /* The set of snippet offsets to be trimmed */ - int *piLeft /* Index of left-most token in pExpr */ -){ - if( pExpr ){ - if( trimSnippetOffsets(pExpr->pLeft, pSnippet, piLeft) ){ - return 1; - } - - switch( pExpr->eType ){ - case FTSQUERY_PHRASE: - *piLeft += pExpr->pPhrase->nToken; - break; - case FTSQUERY_NEAR: { - /* The right-hand-side of a NEAR operator is always a phrase. The - ** left-hand-side is either a phrase or an expression tree that is - ** itself headed by a NEAR operator. The following initializations - ** set local variable iLeft to the token number of the left-most - ** token in the right-hand phrase, and iRight to the right most - ** token in the same phrase. For example, if we had: - ** - ** MATCH '"abc def" NEAR/2 "ghi jkl"' - ** - ** then iLeft will be set to 2 (token number of ghi) and nToken will - ** be set to 4. - */ - Fts3Expr *pLeft = pExpr->pLeft; - Fts3Expr *pRight = pExpr->pRight; - int iLeft = *piLeft; - int nNear = pExpr->nNear; - int nToken = pRight->pPhrase->nToken; - int jj, ii; - if( pLeft->eType==FTSQUERY_NEAR ){ - pLeft = pLeft->pRight; - } - assert( pRight->eType==FTSQUERY_PHRASE ); - assert( pLeft->eType==FTSQUERY_PHRASE ); - nToken += pLeft->pPhrase->nToken; - - for(ii=0; iinMatch; ii++){ - struct snippetMatch *p = &pSnippet->aMatch[ii]; - if( p->iTerm==iLeft ){ - int isOk = 0; - /* Snippet ii is an occurence of query term iLeft in the document. - ** It occurs at position (p->iToken) of the document. We now - ** search for an instance of token (iLeft-1) somewhere in the - ** range (p->iToken - nNear)...(p->iToken + nNear + nToken) within - ** the set of snippetMatch structures. If one is found, proceed. - ** If one cannot be found, then remove snippets ii..(ii+N-1) - ** from the matching snippets, where N is the number of tokens - ** in phrase pRight->pPhrase. - */ - for(jj=0; isOk==0 && jjnMatch; jj++){ - struct snippetMatch *p2 = &pSnippet->aMatch[jj]; - if( p2->iTerm==(iLeft-1) ){ - if( p2->iToken>=(p->iToken-nNear-1) - && p2->iToken<(p->iToken+nNear+nToken) - ){ - isOk = 1; - } - } - } - if( !isOk ){ - int kk; - for(kk=0; kkpPhrase->nToken; kk++){ - pSnippet->aMatch[kk+ii].iTerm = -2; - } - return 1; - } - } - if( p->iTerm==(iLeft-1) ){ - int isOk = 0; - for(jj=0; isOk==0 && jjnMatch; jj++){ - struct snippetMatch *p2 = &pSnippet->aMatch[jj]; - if( p2->iTerm==iLeft ){ - if( p2->iToken<=(p->iToken+nNear+1) - && p2->iToken>(p->iToken-nNear-nToken) - ){ - isOk = 1; - } - } - } - if( !isOk ){ - int kk; - for(kk=0; kkpPhrase->nToken; kk++){ - pSnippet->aMatch[ii-kk].iTerm = -2; - } - return 1; - } - } - } - break; - } - } - - if( trimSnippetOffsets(pExpr->pRight, pSnippet, piLeft) ){ - return 1; - } - } - return 0; -} - -/* -** Compute all offsets for the current row of the query. -** If the offsets have already been computed, this routine is a no-op. -*/ -static int snippetAllOffsets(Fts3Cursor *pCsr, Snippet **ppSnippet){ - Fts3Table *p = (Fts3Table *)pCsr->base.pVtab; /* The FTS3 virtual table */ - int nColumn; /* Number of columns. Docid does count */ - int iColumn; /* Index of of a column */ - int i; /* Loop index */ - int iFirst; /* First column to search */ - int iLast; /* Last coumn to search */ - int iTerm = 0; - Snippet *pSnippet; - int rc = SQLITE_OK; - - if( pCsr->pExpr==0 ){ - return SQLITE_OK; - } - - pSnippet = (Snippet *)sqlite3_malloc(sizeof(Snippet)); - *ppSnippet = pSnippet; - if( !pSnippet ){ - return SQLITE_NOMEM; - } - memset(pSnippet, 0, sizeof(Snippet)); - - nColumn = p->nColumn; - iColumn = (pCsr->eSearch - 2); - if( iColumn<0 || iColumn>=nColumn ){ - /* Look for matches over all columns of the full-text index */ - iFirst = 0; - iLast = nColumn-1; - }else{ - /* Look for matches in the iColumn-th column of the index only */ - iFirst = iColumn; - iLast = iColumn; - } - for(i=iFirst; rc==SQLITE_OK && i<=iLast; i++){ - const char *zDoc; - int nDoc; - zDoc = (const char*)sqlite3_column_text(pCsr->pStmt, i+1); - nDoc = sqlite3_column_bytes(pCsr->pStmt, i+1); - if( zDoc==0 && sqlite3_column_type(pCsr->pStmt, i+1)!=SQLITE_NULL ){ - rc = SQLITE_NOMEM; - }else{ - rc = snippetOffsetsOfColumn(pCsr, pSnippet, i, zDoc, nDoc); - } - } - - while( trimSnippetOffsets(pCsr->pExpr, pSnippet, &iTerm) ){ - iTerm = 0; - } - - return rc; -} - -/* -** Convert the information in the aMatch[] array of the snippet -** into the string zOffset[0..nOffset-1]. This string is used as -** the return of the SQL offsets() function. -*/ -static void snippetOffsetText(Snippet *p){ - int i; - int cnt = 0; - StringBuffer sb; - char zBuf[200]; - if( p->zOffset ) return; - fts3SnippetSbInit(&sb); - for(i=0; inMatch; i++){ - struct snippetMatch *pMatch = &p->aMatch[i]; - if( pMatch->iTerm>=0 ){ - /* If snippetMatch.iTerm is less than 0, then the match was - ** discarded as part of processing the NEAR operator (see the - ** trimSnippetOffsetsForNear() function for details). Ignore - ** it in this case - */ - zBuf[0] = ' '; - sqlite3_snprintf(sizeof(zBuf)-1, &zBuf[cnt>0], "%d %d %d %d", - pMatch->iCol, pMatch->iTerm, pMatch->iStart, pMatch->nByte); - fts3SnippetAppend(&sb, zBuf, -1); - cnt++; - } - } - p->zOffset = sb.z; - p->nOffset = sb.z ? sb.nUsed : 0; -} - -/* -** zDoc[0..nDoc-1] is phrase of text. aMatch[0..nMatch-1] are a set -** of matching words some of which might be in zDoc. zDoc is column -** number iCol. -** -** iBreak is suggested spot in zDoc where we could begin or end an -** excerpt. Return a value similar to iBreak but possibly adjusted -** to be a little left or right so that the break point is better. -*/ -static int wordBoundary( - int iBreak, /* The suggested break point */ - const char *zDoc, /* Document text */ - int nDoc, /* Number of bytes in zDoc[] */ - struct snippetMatch *aMatch, /* Matching words */ - int nMatch, /* Number of entries in aMatch[] */ - int iCol /* The column number for zDoc[] */ -){ - int i; - if( iBreak<=10 ){ - return 0; - } - if( iBreak>=nDoc-10 ){ - return nDoc; - } - for(i=0; ALWAYS(i0 && aMatch[i-1].iStart+aMatch[i-1].nByte>=iBreak ){ - return aMatch[i-1].iStart; - } - } - for(i=1; i<=10; i++){ - if( fts3snippetIsspace(zDoc[iBreak-i]) ){ - return iBreak - i + 1; - } - if( fts3snippetIsspace(zDoc[iBreak+i]) ){ - return iBreak + i + 1; - } - } - return iBreak; -} - - - -/* -** Allowed values for Snippet.aMatch[].snStatus -*/ -#define SNIPPET_IGNORE 0 /* It is ok to omit this match from the snippet */ -#define SNIPPET_DESIRED 1 /* We want to include this match in the snippet */ - -/* -** Generate the text of a snippet. -*/ -static void snippetText( - Fts3Cursor *pCursor, /* The cursor we need the snippet for */ - Snippet *pSnippet, - const char *zStartMark, /* Markup to appear before each match */ - const char *zEndMark, /* Markup to appear after each match */ - const char *zEllipsis /* Ellipsis mark */ -){ - int i, j; - struct snippetMatch *aMatch; - int nMatch; - int nDesired; - StringBuffer sb; - int tailCol; - int tailOffset; - int iCol; - int nDoc; - const char *zDoc; - int iStart, iEnd; - int tailEllipsis = 0; - int iMatch; - - - sqlite3_free(pSnippet->zSnippet); - pSnippet->zSnippet = 0; - aMatch = pSnippet->aMatch; - nMatch = pSnippet->nMatch; - fts3SnippetSbInit(&sb); - - for(i=0; i0; i++){ - if( aMatch[i].snStatus!=SNIPPET_DESIRED ) continue; - nDesired--; - iCol = aMatch[i].iCol; - zDoc = (const char*)sqlite3_column_text(pCursor->pStmt, iCol+1); - nDoc = sqlite3_column_bytes(pCursor->pStmt, iCol+1); - iStart = aMatch[i].iStart - 40; - iStart = wordBoundary(iStart, zDoc, nDoc, aMatch, nMatch, iCol); - if( iStart<=10 ){ - iStart = 0; - } - if( iCol==tailCol && iStart<=tailOffset+20 ){ - iStart = tailOffset; - } - if( (iCol!=tailCol && tailCol>=0) || iStart!=tailOffset ){ - fts3SnippetTrimWhiteSpace(&sb); - fts3SnippetAppendWhiteSpace(&sb); - fts3SnippetAppend(&sb, zEllipsis, -1); - fts3SnippetAppendWhiteSpace(&sb); - } - iEnd = aMatch[i].iStart + aMatch[i].nByte + 40; - iEnd = wordBoundary(iEnd, zDoc, nDoc, aMatch, nMatch, iCol); - if( iEnd>=nDoc-10 ){ - iEnd = nDoc; - tailEllipsis = 0; - }else{ - tailEllipsis = 1; - } - while( iMatchzSnippet = sb.z; - pSnippet->nSnippet = sb.z ? sb.nUsed : 0; -} - -void sqlite3Fts3Offsets( - sqlite3_context *pCtx, /* SQLite function call context */ - Fts3Cursor *pCsr /* Cursor object */ -){ - Snippet *p; /* Snippet structure */ - int rc = snippetAllOffsets(pCsr, &p); - if( rc==SQLITE_OK ){ - snippetOffsetText(p); - if( p->zOffset ){ - sqlite3_result_text(pCtx, p->zOffset, p->nOffset, SQLITE_TRANSIENT); - }else{ - sqlite3_result_error_nomem(pCtx); - } - }else{ - sqlite3_result_error_nomem(pCtx); - } - fts3SnippetFree(p); -} - -void sqlite3Fts3Snippet( - sqlite3_context *pCtx, /* SQLite function call context */ - Fts3Cursor *pCsr, /* Cursor object */ - const char *zStart, /* Snippet start text - "" */ - const char *zEnd, /* Snippet end text - "" */ - const char *zEllipsis /* Snippet ellipsis text - "..." */ -){ - Snippet *p; /* Snippet structure */ - int rc = snippetAllOffsets(pCsr, &p); - if( rc==SQLITE_OK ){ - snippetText(pCsr, p, zStart, zEnd, zEllipsis); - if( p->zSnippet ){ - sqlite3_result_text(pCtx, p->zSnippet, p->nSnippet, SQLITE_TRANSIENT); - }else{ - sqlite3_result_error_nomem(pCtx); - } - }else{ - sqlite3_result_error_nomem(pCtx); - } - fts3SnippetFree(p); -} - -/************************************************************************* -** Below this point is the alternative, experimental snippet() implementation. -*/ - #define SNIPPET_BUFFER_CHUNK 64 #define SNIPPET_BUFFER_SIZE SNIPPET_BUFFER_CHUNK*4 #define SNIPPET_BUFFER_MASK (SNIPPET_BUFFER_SIZE-1) @@ -780,29 +63,81 @@ typedef struct LoadDoclistCtx LoadDoclistCtx; struct LoadDoclistCtx { Fts3Table *pTab; /* FTS3 Table */ int nPhrase; /* Number of phrases so far */ + int nToken; /* Number of tokens so far */ }; -static int fts3ExprLoadDoclistsCb(Fts3Expr *pExpr, void *ctx){ +static int fts3ExprNearTrim(Fts3Expr *pExpr){ + int rc = SQLITE_OK; + Fts3Expr *pParent = pExpr->pParent; + + assert( pExpr->eType==FTSQUERY_PHRASE ); + while( rc==SQLITE_OK + && pExpr->aDoclist && pParent + && pParent->eType==FTSQUERY_NEAR + && pParent->pRight==pExpr + ){ + /* This expression (pExpr) is the right-hand-side of a NEAR operator. + ** Find the expression to the left of the same operator. + */ + int nNear = pParent->nNear; + Fts3Expr *pLeft = pParent->pLeft; + + if( pLeft->eType!=FTSQUERY_PHRASE ){ + assert( pLeft->eType==FTSQUERY_NEAR ); + assert( pLeft->pRight->eType==FTSQUERY_PHRASE ); + pLeft = pLeft->pRight; + } + + rc = sqlite3Fts3ExprNearTrim(pLeft, pExpr, nNear); + + pExpr = pLeft; + pParent = pExpr->pParent; + } + + return rc; +} + +static int fts3ExprLoadDoclistsCb1(Fts3Expr *pExpr, void *ctx){ int rc = SQLITE_OK; LoadDoclistCtx *p = (LoadDoclistCtx *)ctx; + p->nPhrase++; + p->nToken += pExpr->pPhrase->nToken; + if( pExpr->isLoaded==0 ){ rc = sqlite3Fts3ExprLoadDoclist(p->pTab, pExpr); pExpr->isLoaded = 1; - if( rc==SQLITE_OK && pExpr->aDoclist ){ - pExpr->pCurrent = pExpr->aDoclist; - pExpr->pCurrent += sqlite3Fts3GetVarint(pExpr->pCurrent,&pExpr->iCurrent); + if( rc==SQLITE_OK ){ + fts3ExprNearTrim(pExpr); } } + return rc; } -static int fts3ExprLoadDoclists(Fts3Cursor *pCsr, int *pnPhrase){ +static int fts3ExprLoadDoclistsCb2(Fts3Expr *pExpr, void *ctx){ + if( pExpr->aDoclist ){ + pExpr->pCurrent = pExpr->aDoclist; + pExpr->iCurrent = 0; + pExpr->pCurrent += sqlite3Fts3GetVarint(pExpr->pCurrent, &pExpr->iCurrent); + } + return SQLITE_OK; +} + +static int fts3ExprLoadDoclists( + Fts3Cursor *pCsr, + int *pnPhrase, /* OUT: Number of phrases in query */ + int *pnToken /* OUT: Number of tokens in query */ +){ int rc; - LoadDoclistCtx sCtx = {0, 0}; + LoadDoclistCtx sCtx = {0, 0, 0}; sCtx.pTab = (Fts3Table *)pCsr->base.pVtab; - rc = fts3ExprIterate(pCsr->pExpr, fts3ExprLoadDoclistsCb, (void *)&sCtx); - *pnPhrase = sCtx.nPhrase; + rc = fts3ExprIterate(pCsr->pExpr, fts3ExprLoadDoclistsCb1, (void *)&sCtx); + if( rc==SQLITE_OK ){ + (void)fts3ExprIterate(pCsr->pExpr, fts3ExprLoadDoclistsCb2, 0); + } + if( pnPhrase ) *pnPhrase = sCtx.nPhrase; + if( pnToken ) *pnToken = sCtx.nToken; return rc; } @@ -832,20 +167,20 @@ static int fts3LoadSnippetBuffer( int iPrev = aiPrev[i]; char *pList = apList[i]; - if( !pList ){ + if( iPrev<0 || !pList ){ nFin++; continue; } while( iPrev<(iPos+SNIPPET_BUFFER_CHUNK) ){ - if( iPrev>=iPos ){ - aBuffer[iPrev&SNIPPET_BUFFER_MASK] = (u8)(i+1); - } + assert( iPrev>=iPos ); + aBuffer[iPrev&SNIPPET_BUFFER_MASK] = i+1; if( 0==((*pList)&0xFE) ){ - nFin++; + iPrev = -1; break; + }else{ + fts3GetDeltaPosition(&pList, &iPrev); } - fts3GetDeltaPosition(&pList, &iPrev); } aiPrev[i] = iPrev; @@ -892,15 +227,11 @@ static void fts3SnippetCnt( ){ int iSub = (iIdx-1)&SNIPPET_BUFFER_MASK; int iAdd = (iIdx+nSnippet-1)&SNIPPET_BUFFER_MASK; - int iSub2 = (iIdx+(nSnippet/3)-1)&SNIPPET_BUFFER_MASK; - int iAdd2 = (iIdx+(nSnippet*2/3)-1)&SNIPPET_BUFFER_MASK; u64 h = *pHlmask; anCnt[ aBuffer[iSub] ]--; - anCnt[ aBuffer[iSub2] ]--; anCnt[ aBuffer[iAdd] ]++; - anCnt[ aBuffer[iAdd2] ]++; h = h >> 1; if( aBuffer[iAdd] ){ @@ -912,22 +243,46 @@ static void fts3SnippetCnt( *pHlmask = h; } -static int fts3SnippetScore(int n, int *anCnt){ +static int fts3SnippetScore(int n, int *anCnt, u64 covmask){ int j; int iScore = 0; for(j=1; j<=n; j++){ int nCnt = anCnt[j]; - iScore += nCnt + (nCnt ? 1000 : 0); + iScore += nCnt; + if( nCnt && 0==(covmask & ((u64)1 << (j-1))) ){ + iScore += 1000; + } } return iScore; } +static u64 fts3SnippetMask(int n, int *anCnt){ + int j; + u64 mask = 0; + + if( n>64 ) n = 64; + for(j=1; j<=n; j++){ + if( anCnt[j] ) mask |= ((u64)1)<<(j-1); + } + return mask; +} + +typedef struct SnippetFragment SnippetFragment; +struct SnippetFragment { + int iCol; /* Column snippet is extracted from */ + int iPos; /* Index of first token in snippet */ + u64 covered; /* Mask of query phrases covered */ + u64 hlmask; /* Mask of snippet terms to highlight */ +}; + static int fts3BestSnippet( int nSnippet, /* Desired snippet length */ Fts3Cursor *pCsr, /* Cursor to create snippet for */ int iCol, /* Index of column to create snippet from */ - int *piPos, /* OUT: Starting token for best snippet */ - u64 *pHlmask /* OUT: Highlight mask for best snippet */ + u64 mCovered, /* Mask of phrases already covered */ + u64 *pmSeen, /* IN/OUT: Mask of phrases seen */ + SnippetFragment *pFragment, /* OUT: Best snippet found */ + int *piScore /* OUT: Score of snippet pFragment */ ){ int rc; /* Return Code */ u8 aBuffer[SNIPPET_BUFFER_SIZE];/* Circular snippet buffer */ @@ -935,20 +290,21 @@ static int fts3BestSnippet( int *anToken; /* Number of tokens in each phrase */ char **apList; /* Array of position lists */ int *anCnt; /* Running totals of phrase occurences */ - int nList; - - int i; - + int nList; /* Number of phrases in expression */ + int nByte; /* Bytes of dynamic space required */ + int i; /* Loop counter */ u64 hlmask = 0; /* Current mask of highlighted terms */ u64 besthlmask = 0; /* Mask of highlighted terms for iBestPos */ + u64 bestcovmask = 0; /* Mask of terms with at least one hit */ int iBestPos = 0; /* Starting position of 'best' snippet */ int iBestScore = 0; /* Score of best snippet higher->better */ + int iEnd = 0x7FFFFFFF; SnippetCtx sCtx; /* Iterate through the phrases in the expression to count them. The same ** callback makes sure the doclists are loaded for each phrase. */ - rc = fts3ExprLoadDoclists(pCsr, &nList); + rc = fts3ExprLoadDoclists(pCsr, &nList, 0); if( rc!=SQLITE_OK ){ return rc; } @@ -956,16 +312,15 @@ static int fts3BestSnippet( /* Now that it is known how many phrases there are, allocate and zero ** the required arrays using malloc(). */ - apList = sqlite3_malloc( - sizeof(u8*)*nList + /* apList */ + nByte = sizeof(u8*)*nList + /* apList */ sizeof(int)*(nList) + /* anToken */ sizeof(int)*nList + /* aiPrev */ - sizeof(int)*(nList+1) /* anCnt */ - ); + sizeof(int)*(nList+1); /* anCnt */ + apList = (char **)sqlite3_malloc(nByte); if( !apList ){ return SQLITE_NOMEM; } - memset(apList, 0, sizeof(u8*)*nList+sizeof(int)*nList+sizeof(int)*nList); + memset(apList, 0, nByte); anToken = (int *)&apList[nList]; aiPrev = &anToken[nList]; anCnt = &aiPrev[nList]; @@ -979,6 +334,12 @@ static int fts3BestSnippet( sCtx.iPhrase = 0; (void)fts3ExprIterate(pCsr->pExpr, fts3SnippetFindPositions, (void *)&sCtx); + for(i=0; i=0 ){ + *pmSeen |= (u64)1 << i; + } + } + /* Load the first two chunks of data into the buffer. */ memset(aBuffer, 0, SNIPPET_BUFFER_SIZE); fts3LoadSnippetBuffer(0, aBuffer, nList, apList, aiPrev); @@ -988,16 +349,19 @@ static int fts3BestSnippet( for(i=1-nSnippet; i<=0; i++){ fts3SnippetCnt(i, nSnippet, anCnt, aBuffer, anToken, &hlmask); } - iBestScore = fts3SnippetScore(nList, anCnt); + iBestScore = fts3SnippetScore(nList, anCnt, mCovered); besthlmask = hlmask; iBestPos = 0; + bestcovmask = fts3SnippetMask(nList, anCnt); - for(i=1; 1; i++){ + for(i=1; iiBestScore ){ iBestPos = i; iBestScore = iScore; besthlmask = hlmask; + bestcovmask = fts3SnippetMask(nList, anCnt); } } sqlite3_free(apList); - *piPos = iBestPos; - *pHlmask = besthlmask; + + pFragment->iPos = iBestPos; + pFragment->hlmask = besthlmask; + pFragment->iCol = iCol; + pFragment->covered = bestcovmask; + *piScore = iBestScore; return SQLITE_OK; } @@ -1033,7 +402,7 @@ static int fts3StringAppend( int nAppend ){ if( nAppend<0 ){ - nAppend = (int)strlen(zAppend); + nAppend = strlen(zAppend); } if( pStr->n+nAppend+1>=pStr->nAlloc ){ @@ -1053,30 +422,86 @@ static int fts3StringAppend( return SQLITE_OK; } +int fts3SnippetShift( + Fts3Table *pTab, + int nSnippet, + const char *zDoc, + int nDoc, + int *piPos, + u64 *pHlmask +){ + u64 hlmask = *pHlmask; + + if( hlmask ){ + int nLeft; + int nRight; + int nDesired; + + for(nLeft=0; !(hlmask & ((u64)1 << nLeft)); nLeft++); + for(nRight=0; !(hlmask & ((u64)1 << (nSnippet-1-nRight))); nRight++); + + nDesired = (nLeft-nRight)/2; + if( nDesired>0 ){ + int nShift; + int iCurrent = 0; + int rc; + sqlite3_tokenizer_module *pMod; + sqlite3_tokenizer_cursor *pC; + + pMod = (sqlite3_tokenizer_module *)pTab->pTokenizer->pModule; + rc = pMod->xOpen(pTab->pTokenizer, zDoc, nDoc, &pC); + pC->pTokenizer = pTab->pTokenizer; + while( rc==SQLITE_OK && iCurrent<(nSnippet+nDesired) ){ + const char *ZDUMMY; int DUMMY1, DUMMY2, DUMMY3; + rc = pMod->xNext(pC, &ZDUMMY, &DUMMY1, &DUMMY2, &DUMMY3, &iCurrent); + } + pMod->xClose(pC); + if( rc!=SQLITE_OK && rc!=SQLITE_DONE ){ + return rc; + } + nShift = iCurrent-nSnippet; + if( nShift>0 ){ + *piPos += nShift; + *pHlmask = hlmask >> nShift; + } + } + } + return SQLITE_OK; +} + static int fts3SnippetText( Fts3Cursor *pCsr, /* FTS3 Cursor */ - const char *zDoc, /* Document to extract snippet from */ - int nDoc, /* Size of zDoc in bytes */ + SnippetFragment *pFragment, /* Snippet to extract */ int nSnippet, /* Number of tokens in extracted snippet */ - int iPos, /* Index of first document token in snippet */ - u64 hlmask, /* Bitmask of terms to highlight in snippet */ const char *zOpen, /* String inserted before highlighted term */ const char *zClose, /* String inserted after highlighted term */ const char *zEllipsis, - char **pzSnippet /* OUT: Snippet text */ + StrBuffer *pOut ){ Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; int rc; /* Return code */ - int iCurrent = 0; - int iStart = 0; - int iEnd; + const char *zDoc; /* Document text to extract snippet from */ + int nDoc; /* Size of zDoc in bytes */ + int iCurrent = 0; /* Current token number of document */ + int iStart = 0; /* Byte offset of current token */ + int iEnd = 0; /* Byte offset of end of current token */ + int isShiftDone = 0; + int iPos = pFragment->iPos; + u64 hlmask = pFragment->hlmask; sqlite3_tokenizer_module *pMod; /* Tokenizer module methods object */ sqlite3_tokenizer_cursor *pC; /* Tokenizer cursor open on zDoc/nDoc */ const char *ZDUMMY; /* Dummy arguments used with tokenizer */ int DUMMY1, DUMMY2, DUMMY3; /* Dummy arguments used with tokenizer */ - - StrBuffer res = {0, 0, 0}; /* Result string */ + + zDoc = (const char *)sqlite3_column_text(pCsr->pStmt, pFragment->iCol+1); + if( zDoc==0 ){ + if( sqlite3_column_type(pCsr->pStmt, pFragment->iCol+1)!=SQLITE_NULL ){ + return SQLITE_NOMEM; + } + return SQLITE_OK; + } + nDoc = sqlite3_column_bytes(pCsr->pStmt, pFragment->iCol+1); /* Open a token cursor on the document. Read all tokens up to and ** including token iPos (the first token of the snippet). Set variable @@ -1084,14 +509,10 @@ static int fts3SnippetText( */ pMod = (sqlite3_tokenizer_module *)pTab->pTokenizer->pModule; rc = pMod->xOpen(pTab->pTokenizer, zDoc, nDoc, &pC); - while( rc==SQLITE_OK && iCurrentxNext(pC, &ZDUMMY, &DUMMY1, &iStart, &DUMMY2, &iCurrent); - } - iEnd = iStart; - - if( rc==SQLITE_OK && iStart>0 ){ - rc = fts3StringAppend(&res, zEllipsis, -1); + if( rc!=SQLITE_OK ){ + return rc; } + pC->pTokenizer = pTab->pTokenizer; while( rc==SQLITE_OK ){ int iBegin; @@ -1099,15 +520,26 @@ static int fts3SnippetText( rc = pMod->xNext(pC, &ZDUMMY, &DUMMY1, &iBegin, &iFin, &iCurrent); if( rc==SQLITE_OK ){ + if( iCurrent=(iPos+nSnippet) ){ rc = SQLITE_DONE; }else{ iEnd = iFin; if( hlmask & ((u64)1 << (iCurrent-iPos)) ){ - if( fts3StringAppend(&res, &zDoc[iStart], iBegin-iStart) - || fts3StringAppend(&res, zOpen, -1) - || fts3StringAppend(&res, &zDoc[iBegin], iEnd-iBegin) - || fts3StringAppend(&res, zClose, -1) + if( fts3StringAppend(pOut, &zDoc[iStart], iBegin-iStart) + || fts3StringAppend(pOut, zOpen, -1) + || fts3StringAppend(pOut, &zDoc[iBegin], iEnd-iBegin) + || fts3StringAppend(pOut, zClose, -1) ){ rc = SQLITE_NOMEM; } @@ -1118,23 +550,18 @@ static int fts3SnippetText( } assert( rc!=SQLITE_OK ); if( rc==SQLITE_DONE ){ - rc = fts3StringAppend(&res, &zDoc[iStart], iEnd-iStart); + rc = fts3StringAppend(pOut, &zDoc[iStart], iEnd-iStart); if( rc==SQLITE_OK ){ rc = pMod->xNext(pC, &ZDUMMY, &DUMMY1, &DUMMY2, &DUMMY3, &iCurrent); - if( rc==SQLITE_OK ){ - rc = fts3StringAppend(&res, zEllipsis, -1); - }else if( rc==SQLITE_DONE ){ - rc = fts3StringAppend(&res, &zDoc[iEnd], -1); + if( rc==SQLITE_DONE ){ + rc = fts3StringAppend(pOut, &zDoc[iEnd], -1); + }else if( rc==SQLITE_OK && zEllipsis ){ + rc = fts3StringAppend(pOut, zEllipsis, -1); } } } pMod->xClose(pC); - if( rc!=SQLITE_OK ){ - sqlite3_free(res.z); - }else{ - *pzSnippet = res.z; - } return rc; } @@ -1264,7 +691,7 @@ static int fts3GetMatchinfo(Fts3Cursor *pCsr){ g.pTab = pTab; g.nCol = pTab->nColumn; g.iPhrase = 0; - rc = fts3ExprLoadDoclists(pCsr, &nPhrase); + rc = fts3ExprLoadDoclists(pCsr, &nPhrase, 0); if( rc!=SQLITE_OK ){ return rc; } @@ -1299,7 +726,7 @@ static int fts3GetMatchinfo(Fts3Cursor *pCsr){ return SQLITE_OK; } -void sqlite3Fts3Snippet2( +void sqlite3Fts3Snippet( sqlite3_context *pCtx, /* SQLite function call context */ Fts3Cursor *pCsr, /* Cursor object */ const char *zStart, /* Snippet start text - "" */ @@ -1308,27 +735,241 @@ void sqlite3Fts3Snippet2( int iCol, /* Extract snippet from this column */ int nToken /* Approximate number of tokens in snippet */ ){ - int rc; - int iPos = 0; - u64 hlmask = 0; - char *z = 0; - int nDoc; - const char *zDoc; + Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; + int rc = SQLITE_OK; + int i; + StrBuffer res = {0, 0, 0}; + + /* The returned text includes up to four fragments of text extracted from + ** the data in the current row. The first iteration of the for(...) loop + ** below attempts to locate a single fragment of text nToken tokens in + ** size that contains at least one instance of all phrases in the query + ** expression that appear in the current row. If such a fragment of text + ** cannot be found, the second iteration of the loop attempts to locate + ** a pair of fragments, and so on. + */ + int nSnippet = 0; /* Number of fragments in this snippet */ + SnippetFragment aSnippet[4]; /* Maximum of 4 fragments per snippet */ + int nFToken = -1; /* Number of tokens in each fragment */ - rc = fts3BestSnippet(nToken, pCsr, iCol, &iPos, &hlmask); + do { + int iSnip; /* Loop counter 0..nSnippet-1 */ + u64 mCovered = 0; /* Bitmask of phrases covered by snippet */ + u64 mSeen = 0; /* Bitmask of phrases seen by BestSnippet() */ - nDoc = sqlite3_column_bytes(pCsr->pStmt, iCol+1); - zDoc = (const char *)sqlite3_column_text(pCsr->pStmt, iCol+1); + nSnippet++; + nFToken = (nToken+nSnippet-1) / nSnippet; - if( rc==SQLITE_OK ){ - rc = fts3SnippetText( - pCsr, zDoc, nDoc, nToken, iPos, hlmask, zStart, zEnd, zEllipsis, &z); + for(iSnip=0; iSnipnColumn; iRead++){ + SnippetFragment sF; + int iS; + if( iCol>=0 && iRead!=iCol ) continue; + + /* Find the best snippet of nFToken tokens in column iRead. */ + rc = fts3BestSnippet(nFToken, pCsr, iRead, mCovered, &mSeen, &sF, &iS); + if( rc!=SQLITE_OK ){ + goto snippet_out; + } + if( iS>iBestScore ){ + *pFragment = sF; + iBestScore = iS; + } + } + + mCovered |= pFragment->covered; + } + + /* If all query phrases seen by fts3BestSnippet() are present in at least + ** one of the nSnippet snippet fragments, break out of the loop. + */ + assert( (mCovered&mSeen)==mCovered ); + if( mSeen==mCovered ) break; + }while( nSnippet0 ); + + for(i=0; i0 || p->iPos>0 ){ + fts3StringAppend(&res, zEllipsis, -1); + } + rc = fts3SnippetText(pCsr, p, nFToken, zStart, zEnd, zTail, &res); } + + snippet_out: if( rc!=SQLITE_OK ){ sqlite3_result_error_code(pCtx, rc); + sqlite3_free(res.z); + }else{ + sqlite3_result_text(pCtx, res.z, -1, sqlite3_free); + } +} + + +typedef struct TermOffset TermOffset; +struct TermOffset { + char *pList; /* Position-list */ + int iPos; /* Position just read from pList */ + int iOff; +}; +typedef struct TermOffsetCtx TermOffsetCtx; + +struct TermOffsetCtx { + int iCol; /* Column of table to populate aTerm for */ + int iTerm; + sqlite3_int64 iDocid; + TermOffset *aTerm; +}; + +/* +** This function is an fts3ExprIterate() callback used by sqlite3Fts3Offsets(). +*/ +static int fts3ExprTermOffsetInit(Fts3Expr *pExpr, void *ctx){ + TermOffsetCtx *p = (TermOffsetCtx *)ctx; + int nTerm; /* Number of tokens in phrase */ + int iTerm; /* For looping through nTerm phrase terms */ + char *pList; /* Pointer to position list for phrase */ + int iPos = 0; /* First position in position-list */ + + pList = sqlite3Fts3FindPositions(pExpr, p->iDocid, p->iCol); + nTerm = pExpr->pPhrase->nToken; + if( pList ){ + fts3GetDeltaPosition(&pList, &iPos); + assert( iPos>=0 ); + } + + for(iTerm=0; iTermaTerm[p->iTerm++]; + pT->iOff = nTerm-iTerm-1; + pT->pList = pList; + pT->iPos = iPos; + } + + return SQLITE_OK; +} + +/* +** Implementation of offsets() function. +*/ +void sqlite3Fts3Offsets( + sqlite3_context *pCtx, /* SQLite function call context */ + Fts3Cursor *pCsr /* Cursor object */ +){ + Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; + sqlite3_tokenizer_module const *pMod = pTab->pTokenizer->pModule; + const char *ZDUMMY; + int NDUMMY; + + int rc; /* Return Code */ + int nToken; /* Number of tokens in query */ + int iCol; /* Column currently being processed */ + StrBuffer res = {0, 0, 0}; /* Result string */ + + TermOffsetCtx sCtx; + memset(&sCtx, 0, sizeof(sCtx)); + + assert( pCsr->isRequireSeek==0 ); + + /* Count the number of terms in the query */ + rc = fts3ExprLoadDoclists(pCsr, 0, &nToken); + if( rc!=SQLITE_OK ) goto offsets_out; + + /* Allocate the array of TermOffset iterators. */ + sCtx.aTerm = (TermOffset *)sqlite3_malloc(sizeof(TermOffset)*nToken); + if( 0==sCtx.aTerm ){ + rc = SQLITE_NOMEM; + goto offsets_out; + } + sCtx.iDocid = pCsr->iPrevId; + + for(iCol=0; iColnColumn; iCol++){ + sqlite3_tokenizer_cursor *pC; /* Tokenizer cursor */ + int iStart; + int iEnd; + int iCurrent; + const char *zDoc; + int nDoc; + + /* Initialize the contents of sCtx.aTerm[] for column iCol. */ + sCtx.iCol = iCol; + sCtx.iTerm = 0; + rc = fts3ExprIterate(pCsr->pExpr, fts3ExprTermOffsetInit, (void *)&sCtx); + if( rc!=SQLITE_OK ) goto offsets_out; + + /* Initialize a tokenizer iterator to iterate through column iCol. */ + zDoc = (const char *)sqlite3_column_text(pCsr->pStmt, iCol+1); + nDoc = sqlite3_column_bytes(pCsr->pStmt, iCol+1); + rc = pMod->xOpen(pTab->pTokenizer, zDoc, nDoc, &pC); + if( rc!=SQLITE_OK ) goto offsets_out; + pC->pTokenizer = pTab->pTokenizer; + + rc = pMod->xNext(pC, &ZDUMMY, &NDUMMY, &iStart, &iEnd, &iCurrent); + while( rc==SQLITE_OK ){ + int i; /* Used to loop through terms */ + int iMinPos = 0x7FFFFFFF; /* Position of next token */ + TermOffset *pTerm = 0; /* TermOffset associated with next token */ + + for(i=0; ipList && (pT->iPos-pT->iOff)iPos-pT->iOff; + pTerm = pT; + } + } + + if( !pTerm ){ + /* All offsets for this column have been gathered. */ + break; + }else{ + assert( iCurrent<=iMinPos ); + if( 0==(0xFE&*pTerm->pList) ){ + pTerm->pList = 0; + }else{ + fts3GetDeltaPosition(&pTerm->pList, &pTerm->iPos); + } + while( rc==SQLITE_OK && iCurrentxNext(pC, &ZDUMMY, &NDUMMY, &iStart, &iEnd, &iCurrent); + } + if( rc==SQLITE_OK ){ + char aBuffer[64]; + sqlite3_snprintf(sizeof(aBuffer), aBuffer, + "%d %d %d %d ", iCol, pTerm-sCtx.aTerm, iStart, iEnd-iStart + ); + fts3StringAppend(&res, aBuffer, -1); + } + } + } + if( rc==SQLITE_DONE ){ + rc = SQLITE_ERROR; + } + + pMod->xClose(pC); + if( rc!=SQLITE_OK ) goto offsets_out; + } + + offsets_out: + sqlite3_free(sCtx.aTerm); + assert( rc!=SQLITE_DONE ); + if( rc!=SQLITE_OK ){ + sqlite3_result_error_code(pCtx, rc); + sqlite3_free(res.z); }else{ - sqlite3_result_text(pCtx, z, -1, sqlite3_free); + sqlite3_result_text(pCtx, res.z, res.n-1, sqlite3_free); } + return; } void sqlite3Fts3Matchinfo(sqlite3_context *pContext, Fts3Cursor *pCsr){ diff --git a/manifest b/manifest index 959dc6ca24..002e04be4a 100644 --- a/manifest +++ b/manifest @@ -1,8 +1,5 @@ ------BEGIN PGP SIGNED MESSAGE----- -Hash: SHA1 - -C Fix\san\sissue\swith\slemon\sgenerating\sincorrect\sgrammars.\s\sThis\sissue\sdoes\nnot\seffect\sSQLite. -D 2010-01-06T13:07:31 +C Change\sthe\sfts3\ssnippet\sfunction\sto\sreturn\s(hopefully)\smore\srelevant\ssnippets\sin\sless\stime. +D 2010-01-06T17:19:22 F Makefile.arm-wince-mingw32ce-gcc fcd5e9cd67fe88836360bb4f9ef4cb7f8e2fb5a0 F Makefile.in c5827ead754ab32b9585487177c93bb00b9497b3 F Makefile.linux-gcc d53183f4aa6a9192d249731c90dbdffbd2c68654 @@ -59,15 +56,15 @@ F ext/fts2/mkfts2amal.tcl 974d5d438cb3f7c4a652639262f82418c1e4cff0 F ext/fts3/README.syntax a19711dc5458c20734b8e485e75fb1981ec2427a F ext/fts3/README.tokenizers 998756696647400de63d5ba60e9655036cb966e9 F ext/fts3/README.txt 8c18f41574404623b76917b9da66fcb0ab38328d -F ext/fts3/fts3.c 15fb87c1f00dfd88c2fbbbd9e50f319ea77834f0 +F ext/fts3/fts3.c 04e95afa45789d7a3da59f458d4a8c1879c31446 F ext/fts3/fts3.h 3a10a0af180d502cecc50df77b1b22df142817fe -F ext/fts3/fts3Int.h 9326800fa10e06d8e9d6d519f873b1371252968a +F ext/fts3/fts3Int.h 45bc7e284806042119722c8f4127ee944b77f0dd F ext/fts3/fts3_expr.c f4ff02ebe854e97ac03ff00b38b728a9ab57fd4b F ext/fts3/fts3_hash.c 3c8f6387a4a7f5305588b203fa7c887d753e1f1c F ext/fts3/fts3_hash.h 8331fb2206c609f9fc4c4735b9ab5ad6137c88ec F ext/fts3/fts3_icu.c ac494aed69835008185299315403044664bda295 F ext/fts3/fts3_porter.c a651e287e02b49b565a6ccf9441959d434489156 -F ext/fts3/fts3_snippet.c 0e38f76c5992dd08d20fc81e1265763370f9ea4f +F ext/fts3/fts3_snippet.c 612b3ad63abf2c5c85b6a46aac94bd90280e905a F ext/fts3/fts3_tokenizer.c 1a49ee3d79cbf0b9386250370d9cbfe4bb89c8ff F ext/fts3/fts3_tokenizer.h 13ffd9fcb397fec32a05ef5cd9e0fa659bf3dbd3 F ext/fts3/fts3_tokenizer1.c 11a604a53cff5e8c28882727bf794e5252e5227b @@ -383,7 +380,7 @@ F test/fts3.test ae0433b09b12def08105640e57693726c4949338 F test/fts3_common.tcl 2a2044688ce3addb1dd58d3d846c574cf4b7bbcd F test/fts3aa.test 5327d4c1d9b6c61021696746cc9a6cdc5bf159c0 F test/fts3ab.test 09aeaa162aee6513d9ff336b6932211008b9d1f9 -F test/fts3ac.test 356280144a2c92aa7b11474afadfe62a437fcd69 +F test/fts3ac.test fc1ac42c33f8a66d48ae41e4728f7ca4b6dfc950 F test/fts3ad.test e40570cb6f74f059129ad48bcef3d7cbc20dda49 F test/fts3ae.test ce32a13b34b0260928e4213b4481acf801533bda F test/fts3af.test d394978c534eabf22dd0837e718b913fd66b499c @@ -392,7 +389,7 @@ F test/fts3ah.test ba181d6a3dee0c929f0d69df67cac9c47cda6bff F test/fts3ai.test d29cee6ed653e30de478066881cec8aa766531b2 F test/fts3aj.test 584facbc9ac4381a7ec624bfde677340ffc2a5a4 F test/fts3ak.test bd14deafe9d1586e8e9bf032411026ac4f8c925d -F test/fts3al.test 6d19619402d2133773262652fc3f185cdf6be667 +F test/fts3al.test 07d64326e79bbdbab20ee87fc3328fbf01641c9f F test/fts3am.test 218aa6ba0dfc50c7c16b2022aac5c6be593d08d8 F test/fts3an.test 931fa21bd80641ca594bfa32e105250a8a07918b F test/fts3ao.test 0aa29dd4fc1c8d46b1f7cfe5926f7ac97551bea9 @@ -405,9 +402,10 @@ F test/fts3e.test 1f6c6ac9cc8b772ca256e6b22aaeed50c9350851 F test/fts3expr.test 05dab77387801e4900009917bb18f556037d82da F test/fts3expr2.test 18da930352e5693eaa163a3eacf96233b7290d1a F test/fts3malloc.test d02ee86b21edd2b43044e0d6dfdcd26cb6efddcb -F test/fts3near.test dc196dd17b4606f440c580d45b3d23aa975fd077 +F test/fts3near.test 2e318ee434d32babd27c167142e2b94ddbab4844 F test/fts3query.test ca21717993f51caa7e36231dba2499868f3f8a6f F test/fts3rnd.test 153b4214bad6084a348814f3dd651a92e2f31d9b +F test/fts3snippet.test bfbceb2e292ddfdc6bb0b1b252ccea78bd6091be F test/func.test af106ed834001738246d276659406823e35cde7b F test/func2.test 772d66227e4e6684b86053302e2d74a2500e1e0f F test/fuzz.test a4174c3009a3e2c2e14b31b364ebf7ddb49de2c9 @@ -786,14 +784,7 @@ F tool/speedtest2.tcl ee2149167303ba8e95af97873c575c3e0fab58ff F tool/speedtest8.c 2902c46588c40b55661e471d7a86e4dd71a18224 F tool/speedtest8inst1.c 293327bc76823f473684d589a8160bde1f52c14e F tool/vdbe-compress.tcl d70ea6d8a19e3571d7ab8c9b75cba86d1173ff0f -P 28d0d7710761114a44a1a3a425a6883c661f06e7 -R cb14a38f5906a10fa21936447376b66a -U drh -Z 437ad8ccf6b703d6a3f5435217fbeff7 ------BEGIN PGP SIGNATURE----- -Version: GnuPG v1.4.6 (GNU/Linux) - -iD8DBQFLRIsWoxKgR168RlERAko3AJ9cRW4W+hFzWCSEF5rdeL83LKknrgCfQKRR -l/RSoin5yCY/+/3Q1I6oeNA= -=d16B ------END PGP SIGNATURE----- +P 077a6bee2dd4668a5b13c37aa7d4c052350ec782 +R 63513c05ce3003328b753382175b1505 +U dan +Z 7955c05e9b09116e00ebafe15af16394 diff --git a/manifest.uuid b/manifest.uuid index efd93353cd..a4236dd065 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -077a6bee2dd4668a5b13c37aa7d4c052350ec782 \ No newline at end of file +8a208223a74d451f60d9cd707d63fb7d157d1737 \ No newline at end of file diff --git a/test/fts3ac.test b/test/fts3ac.test index 72e5410c7d..34abce44da 100644 --- a/test/fts3ac.test +++ b/test/fts3ac.test @@ -1131,39 +1131,36 @@ do_test fts3ac-4.2 { SELECT snippet(email) FROM email WHERE email MATCH 'christmas candlelight' } -} {{... place.? What do you think about going here Christmas -eve?? They have an 11:00 a.m. service and a candlelight service at 5:00 p.m., -among others. ...}} +} {{...here Christmas +eve?? They have an 11:00 a.m. service and a candlelight service...}} do_test fts3ac-4.3 { execsql { SELECT snippet(email) FROM email WHERE email MATCH 'deal sheet potential reuse' } -} {{EOL-Accenture Deal Sheet ... intent - Review Enron asset base for potential reuse/ licensing - Contract negotiations ...}} +} {{EOL-Accenture Deal Sheet...asset base for potential reuse/ licensing + Contract negotiations...}} do_test fts3ac-4.4 { execsql { SELECT snippet(email,'<<<','>>>',' ') FROM email WHERE email MATCH 'deal sheet potential reuse' } -} {{EOL-Accenture <<>> <<>> intent - Review Enron asset base for <<>> <<>>/ licensing - Contract negotiations }} +} {{EOL-Accenture <<>> <<>> asset base for <<>> <<>>/ licensing + Contract negotiations }} do_test fts3ac-4.5 { execsql { SELECT snippet(email,'<<<','>>>',' ') FROM email WHERE email MATCH 'first things' } -} {{Re: <<>> Polish Deal! Congrats! <<>> seem to be building rapidly now on the }} +} {{Re: <<>> Polish Deal! Congrats! <<>> seem to be building rapidly now }} do_test fts3ac-4.6 { execsql { SELECT snippet(email) FROM email WHERE email MATCH 'chris is here' } -} {{chris.germany@enron.com ... Sounds good to me. I bet this is next to the Warick?? Hotel. ... place.? What do you think about going here Christmas -eve?? They have an 11:00 a.m. ...}} +} {{...chris.germany@enron.com'" <chris...bet this is next to...about going here Christmas +eve...}} do_test fts3ac-4.7 { execsql { SELECT snippet(email) FROM email @@ -1171,19 +1168,15 @@ do_test fts3ac-4.7 { } } {{Erin: -Pursuant to your request, attached are the Schedule to ...}} +Pursuant to your request, attached are the Schedule to the ISDA Master Agreement, together...}} do_test fts3ac-4.8 { execsql { SELECT snippet(email) FROM email WHERE email MATCH 'ancillary load davis' } -} {{pete.davis@enron.com ... Start Date: 4/22/01; HourAhead hour: 3; No ancillary schedules awarded. -Variances detected. -Variances detected in Load schedule. - - LOG MESSAGES: +} {{pete.davis@enron.com...3; No ancillary schedules awarded...detected in Load schedule. -PARSING ...}} + LOG...}} # Combinations of AND and OR operators: # @@ -1192,22 +1185,17 @@ do_test fts3ac-5.1 { SELECT snippet(email) FROM email WHERE email MATCH 'questar enron OR com' } -} {{matt.smith@enron.com ... six reports: - -31 Keystone Receipts +} {{matt.smith@enron.com...31 Keystone Receipts 15 Questar Pipeline -40 Rockies Production -22 West_2 ...}} +40 Rockies...}} + do_test fts3ac-5.2 { execsql { SELECT snippet(email) FROM email WHERE email MATCH 'enron OR com questar' } -} {{matt.smith@enron.com ... six reports: - -31 Keystone Receipts +} {{matt.smith@enron.com...31 Keystone Receipts 15 Questar Pipeline -40 Rockies Production -22 West_2 ...}} +40 Rockies...}} finish_test diff --git a/test/fts3al.test b/test/fts3al.test index be01ecb7c0..02cc0d1695 100644 --- a/test/fts3al.test +++ b/test/fts3al.test @@ -53,6 +53,10 @@ do_test fts3al-1.3 { # # The trailing and leading hi-bit chars help with code which tests for # isspace() to coalesce multiple spaces. +# +# UPDATE: The above is no longer true; there is no such code in fts3. +# But leave the test in just the same. +# set word "\x80xxxxx\x80xxxxx\x80xxxxx\x80xxxxx\x80xxxxx\x80xxxxx\x80" set phrase1 "$word $word $word target $word $word $word" @@ -64,6 +68,6 @@ db eval "INSERT INTO t4 (content) VALUES ('$phrase2')" do_test fts3al-1.4 { execsql {SELECT rowid, length(snippet(t4)) FROM t4 WHERE t4 MATCH 'target'} -} {1 111 2 117} +} {1 241 2 247} finish_test diff --git a/test/fts3near.test b/test/fts3near.test index e824133bb5..9c4409e1ed 100644 --- a/test/fts3near.test +++ b/test/fts3near.test @@ -76,6 +76,17 @@ do_test fts3near-1.15 { execsql {SELECT docid FROM t1 WHERE content MATCH 'one NEAR two NEAR one'} } {3} +do_test fts3near-1.16 { + execsql { + SELECT docid FROM t1 WHERE content MATCH '"one three" NEAR/0 "four five"' + } +} {1} +do_test fts3near-1.17 { + execsql { + SELECT docid FROM t1 WHERE content MATCH '"four five" NEAR/0 "one three"' + } +} {1} + # Output format of the offsets() function: # @@ -154,6 +165,7 @@ do_test fts3near-3.6 { SELECT offsets(t1) FROM t1 WHERE content MATCH 'three NEAR/0 "two four"' } } {{0 0 8 5 0 1 14 3 0 2 18 4}} +breakpoint do_test fts3near-3.7 { execsql { SELECT offsets(t1) FROM t1 WHERE content MATCH '"two four" NEAR/0 three'} @@ -170,7 +182,7 @@ do_test fts3near-4.1 { execsql { SELECT snippet(t1) FROM t1 WHERE content MATCH 'specification NEAR supports' } -} {{... devices, handheld devices, etc. This specification also supports content positioning, downloadable fonts, ...}} +} {{...braille devices, handheld devices, etc. This specification also supports content positioning, downloadable fonts, table layout...}} do_test fts3near-5.1 { execsql { diff --git a/test/fts3snippet.test b/test/fts3snippet.test new file mode 100644 index 0000000000..1d388e86d8 --- /dev/null +++ b/test/fts3snippet.test @@ -0,0 +1,68 @@ + +set testdir [file dirname $argv0] +source $testdir/tester.tcl + +# If SQLITE_ENABLE_FTS3 is defined, omit this file. +ifcapable !fts3 { finish_test ; return } + +do_test fts3snippet-1.1 { + execsql { + CREATE VIRTUAL TABLE ft USING fts3; + INSERT INTO ft VALUES('xxx xxx xxx xxx'); + } +} {} + +proc normalize {L} { + set ret [list] + foreach l $L {lappend ret $l} + return $ret +} + +do_test fts3snippet-1.2 { + execsql { SELECT offsets(ft) FROM ft WHERE ft MATCH 'xxx' } +} {{0 0 0 3 0 0 4 3 0 0 8 3 0 0 12 3}} + +do_test fts3snippet-1.3 { + execsql { SELECT offsets(ft) FROM ft WHERE ft MATCH '"xxx xxx"' } +} [list [normalize { + 0 0 0 3 + 0 0 4 3 + 0 1 4 3 + 0 0 8 3 + 0 1 8 3 + 0 1 12 3 +}]] + + +do_test fts3snippet-1.4 { + execsql { SELECT offsets(ft) FROM ft WHERE ft MATCH '"xxx xxx" xxx' } +} [list [normalize { + 0 0 0 3 + 0 2 0 3 + 0 0 4 3 + 0 1 4 3 + 0 2 4 3 + 0 0 8 3 + 0 1 8 3 + 0 2 8 3 + 0 1 12 3 + 0 2 12 3 +}]] + +do_test fts3snippet-1.5 { + execsql { SELECT offsets(ft) FROM ft WHERE ft MATCH 'xxx "xxx xxx"' } +} [list [normalize { + 0 0 0 3 + 0 1 0 3 + 0 0 4 3 + 0 1 4 3 + 0 2 4 3 + 0 0 8 3 + 0 1 8 3 + 0 2 8 3 + 0 0 12 3 + 0 2 12 3 +}]] + +finish_test +