From: dan Date: Thu, 3 Jul 2014 20:39:39 +0000 (+0000) Subject: Add support for NEAR expressions to fts5. X-Git-Tag: version-3.8.11~114^2~171 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=c18a8fe99c3d6e8ad316ecac66b961a5a9dcb996;p=thirdparty%2Fsqlite.git Add support for NEAR expressions to fts5. FossilOrigin-Name: 250ae8d40115e2e47cc5a1e8a427fa8c0a89124d --- diff --git a/ext/fts5/fts5_expr.c b/ext/fts5/fts5_expr.c index 1c0e936cb0..5a29f04709 100644 --- a/ext/fts5/fts5_expr.c +++ b/ext/fts5/fts5_expr.c @@ -69,7 +69,6 @@ struct Fts5ExprTerm { */ struct Fts5ExprPhrase { Fts5Buffer poslist; /* Current position list */ - i64 iRowid; /* Current rowid */ int nTerm; /* Number of entries in aTerm[] */ Fts5ExprTerm aTerm[0]; /* Terms that make up this phrase */ }; @@ -109,7 +108,7 @@ struct Fts5PoslistIter { i64 iPos; /* (iCol<<32) + iPos */ }; -static void fts5PoslistIterNext(Fts5PoslistIter *pIter){ +static int fts5PoslistIterNext(Fts5PoslistIter *pIter){ if( pIter->i>=pIter->n ){ pIter->bEof = 1; }else{ @@ -122,6 +121,7 @@ static void fts5PoslistIterNext(Fts5PoslistIter *pIter){ } pIter->iPos += (iVal-2); } + return pIter->bEof; } static void fts5PoslistIterInit(const u8 *a, int n, Fts5PoslistIter *pIter){ @@ -130,6 +130,32 @@ static void fts5PoslistIterInit(const u8 *a, int n, Fts5PoslistIter *pIter){ pIter->n = n; fts5PoslistIterNext(pIter); } + +typedef struct Fts5PoslistWriter Fts5PoslistWriter; +struct Fts5PoslistWriter { + int iCol; + int iOff; +}; + +static int fts5PoslistWriterAppend( + Fts5Buffer *pBuf, + Fts5PoslistWriter *pWriter, + i64 iPos +){ + int rc = SQLITE_OK; + int iCol = (int)(iPos >> 32); + int iOff = (iPos & 0x7FFFFFFF); + + if( iCol!=pWriter->iCol ){ + fts5BufferAppendVarint(&rc, pBuf, 1); + fts5BufferAppendVarint(&rc, pBuf, iCol); + pWriter->iCol = iCol; + pWriter->iOff = 0; + } + fts5BufferAppendVarint(&rc, pBuf, (iOff - pWriter->iOff) + 2); + + return rc; +} /* *************************************************************************/ @@ -302,11 +328,14 @@ static int fts5ExprPhraseIsMatch( Fts5ExprPhrase *pPhrase, /* Phrase object to initialize */ int *pbMatch /* OUT: Set to true if really a match */ ){ + Fts5PoslistWriter writer = {0, 0}; Fts5PoslistIter aStatic[4]; Fts5PoslistIter *aIter = aStatic; int i; int rc = SQLITE_OK; + /* If the aStatic[] array is not large enough, allocate a large array + ** using sqlite3_malloc(). This approach could be improved upon. */ if( pPhrase->nTerm>(sizeof(aStatic) / sizeof(aStatic[0])) ){ int nByte = sizeof(Fts5PoslistIter) * pPhrase->nTerm; aIter = (Fts5PoslistIter*)sqlite3_malloc(nByte); @@ -320,108 +349,189 @@ static int fts5ExprPhraseIsMatch( fts5PoslistIterInit(a, n, &aIter[i]); } - *pbMatch = 0; + fts5BufferZero(&pPhrase->poslist); while( 1 ){ - - int bMatch = 1; + int bMatch; i64 iPos = aIter[0].iPos; - for(i=1; inTerm; i++){ - Fts5PoslistIter *pPos = &aIter[i]; - i64 iAdj = pPos->iPos-i; - if( (pPos->iPos-i)!=iPos ){ - bMatch = 0; - if( iAdj>iPos ) iPos = iAdj; + do { + bMatch = 1; + for(i=0; inTerm; i++){ + Fts5PoslistIter *pPos = &aIter[i]; + i64 iAdj = iPos + i; + if( pPos->iPos!=iAdj ){ + bMatch = 0; + while( pPos->iPosiPos>iAdj ) iPos = pPos->iPos-i; + } } - } - if( bMatch ){ - *pbMatch = 1; - break; - } + }while( bMatch==0 ); + + /* Append position iPos to the output */ + rc = fts5PoslistWriterAppend(&pPhrase->poslist, &writer, iPos); + if( rc!=SQLITE_OK ) goto ismatch_out; for(i=0; inTerm; i++){ + if( fts5PoslistIterNext(&aIter[i]) ) goto ismatch_out; + } + } + + ismatch_out: + *pbMatch = (pPhrase->poslist.n>0); + if( aIter!=aStatic ) sqlite3_free(aIter); + return rc; +} + + +static int fts5ExprNearIsMatch(Fts5ExprNearset *pNear, int *pbMatch){ + Fts5PoslistIter aStatic[4]; + Fts5PoslistIter *aIter = aStatic; + int i; + int rc = SQLITE_OK; + int bMatch; + i64 iMax; + + /* If the aStatic[] array is not large enough, allocate a large array + ** using sqlite3_malloc(). This approach could be improved upon. */ + if( pNear->nPhrase>(sizeof(aStatic) / sizeof(aStatic[0])) ){ + int nByte = sizeof(Fts5PoslistIter) * pNear->nPhrase; + aIter = (Fts5PoslistIter*)sqlite3_malloc(nByte); + if( !aIter ) return SQLITE_NOMEM; + } + + /* Initialize a term iterator for each phrase */ + for(i=0; inPhrase; i++){ + Fts5Buffer *pPoslist = &pNear->apPhrase[i]->poslist; + fts5PoslistIterInit(pPoslist->p, pPoslist->n, &aIter[i]); + } + + iMax = aIter[0].iPos; + do { + bMatch = 1; + for(i=0; inPhrase; i++){ Fts5PoslistIter *pPos = &aIter[i]; - while( (pPos->iPos-i) < iPos ){ - fts5PoslistIterNext(pPos); - if( pPos->bEof ) goto ismatch_out; + i64 iMin = iMax - pNear->apPhrase[i]->nTerm - pNear->nNear; + if( pPos->iPosiPos>iMax ){ + bMatch = 0; + while( pPos->iPosiPos>iMax ) iMax = pPos->iPos; } } - } + }while( bMatch==0 ); ismatch_out: + *pbMatch = bMatch; if( aIter!=aStatic ) sqlite3_free(aIter); return rc; } /* -** All individual term iterators in pPhrase are guaranteed to be valid when -** this function is called. This function checks if all term iterators -** point to the same rowid, and if not, advances them until they do. -** If an EOF is reached before this happens, *pbEof is set to true before -** returning. -** -** SQLITE_OK is returned if an error occurs, or an SQLite error code -** otherwise. It is not considered an error code if an iterator reaches -** EOF. +** Advance each phrase iterator in phrase pNear. If any reach EOF, set +** output variable *pbEof to true before returning. */ -static int fts5ExprPhraseNextRowidMatch( +static int fts5ExprNearAdvanceAll( Fts5Expr *pExpr, /* Expression pPhrase belongs to */ - Fts5ExprPhrase *pPhrase, /* Phrase object to initialize */ + Fts5ExprNearset *pNear, /* Near object to advance iterators of */ int *pbEof /* OUT: Set to true if phrase at EOF */ ){ - assert( *pbEof==0 ); - while( 1 ){ - int i; - int bMatch = 1; - i64 iMin = sqlite3Fts5IterRowid(pPhrase->aTerm[0].pIter); - for(i=1; inTerm; i++){ - i64 iRowid = sqlite3Fts5IterRowid(pPhrase->aTerm[i].pIter); - if( iRowid!=iMin ){ - bMatch = 0; - if( iRowidnTerm; i++){ - Fts5IndexIter *pIter = pPhrase->aTerm[i].pIter; - while( sqlite3Fts5IterRowid(pIter)>iMin ){ - sqlite3Fts5IterNext(pIter, 0); - if( sqlite3Fts5IterEof(pIter) ){ - *pbEof = 1; - return SQLITE_OK; - } + int rc = SQLITE_OK; /* Return code */ + int i, j; /* Phrase and token index, respectively */ + + for(i=0; inPhrase; i++){ + Fts5ExprPhrase *pPhrase = pNear->apPhrase[i]; + for(j=0; jnTerm; j++){ + Fts5IndexIter *pIter = pPhrase->aTerm[j].pIter; + sqlite3Fts5IterNext(pIter, 0); + if( sqlite3Fts5IterEof(pIter) ){ + *pbEof = 1; + return rc; } } } - return SQLITE_OK; + return rc; } -static int fts5ExprPhraseAdvanceAll( - Fts5Expr *pExpr, /* Expression pPhrase belongs to */ - Fts5ExprPhrase *pPhrase, /* Phrase object to initialize */ - int *pbEof /* OUT: Set to true if phrase at EOF */ +/* +** Advance iterator pIter until it points to a value equal to or smaller +** than the initial value of *piMin. If this means the iterator points +** to a value smaller than *piMin, update *piMin to the new smallest value. +** +** If the iterator reaches EOF, set *pbEof to true before returning. If +** an error occurs, set *pRc to an error code. If either *pbEof or *pRc +** are set, return a non-zero value. Otherwise, return zero. +*/ +static int fts5ExprAdvanceto( + Fts5IndexIter *pIter, /* Iterator to advance */ + i64 *piMin, /* IN/OUT: Minimum rowid seen so far */ + int *pRc, /* OUT: Error code */ + int *pbEof /* OUT: Set to true if EOF */ ){ - int i; - int rc = SQLITE_OK; - for(i=0; inTerm; i++){ - Fts5IndexIter *pIter = pPhrase->aTerm[i].pIter; + i64 iMin = *piMin; + i64 iRowid; + while( (iRowid = sqlite3Fts5IterRowid(pIter))>iMin ){ sqlite3Fts5IterNext(pIter, 0); if( sqlite3Fts5IterEof(pIter) ){ *pbEof = 1; - break; + return 1; } } + if( iRowidpNear; + int rc = SQLITE_OK; + int i, j; /* Phrase and token index, respectively */ + i64 iMin; /* Smallest rowid any iterator points to */ + int bMatch; + + iMin = sqlite3Fts5IterRowid(pNear->apPhrase[0]->aTerm[0].pIter); + do { + bMatch = 1; + for(i=0; inPhrase; i++){ + Fts5ExprPhrase *pPhrase = pNear->apPhrase[i]; + for(j=0; jnTerm; j++){ + Fts5IndexIter *pIter = pPhrase->aTerm[j].pIter; + i64 iRowid = sqlite3Fts5IterRowid(pIter); + if( iRowid!=iMin ) bMatch = 0; + if( fts5ExprAdvanceto(pIter, &iMin, &rc, &pNode->bEof) ) return rc; + } + } + }while( bMatch==0 ); + + pNode->iRowid = iMin; return rc; } /* -** Argument pPhrase points to a multi-term phrase object. All individual -** term iterators point to valid entries (not EOF). +** Argument pNode points to a NEAR node. All individual term iterators +** point to valid entries (not EOF). * ** This function tests if the term iterators currently all point to the -** same rowid, and if so, if the rowid matches the phrase constraint. If -** so, the pPhrase->poslist buffer is populated and the pPhrase->iRowid +** same rowid, and if so, if the row matches the phrase and NEAR constraints. +** If so, the pPhrase->poslist buffers are populated and the pNode->iRowid ** variable set before returning. Or, if the current combination of ** iterators is not a match, they are advanced until they are. If one of ** the iterators reaches EOF before a match is found, *pbEof is set to @@ -432,123 +542,106 @@ static int fts5ExprPhraseAdvanceAll( ** otherwise. It is not considered an error code if an iterator reaches ** EOF. */ -static int fts5ExprPhraseNextMatch( - Fts5Expr *pExpr, /* Expression pPhrase belongs to */ - Fts5ExprPhrase *pPhrase, /* Phrase object to initialize */ - int *pbEof /* OUT: Set to true if phrase at EOF */ +static int fts5ExprNearNextMatch( + Fts5Expr *pExpr, /* Expression that pNear is a part of */ + Fts5ExprNode *pNode ){ - int i; /* Used to iterate through terms */ - int rc = SQLITE_OK; /* Return code */ - int bMatch = 0; + int rc = SQLITE_OK; + Fts5ExprNearset *pNear = pNode->pNear; + while( 1 ){ + int i; - assert( *pbEof==0 ); + /* Advance the iterators until they are a match */ + rc = fts5ExprNearNextRowidMatch(pExpr, pNode); + if( pNode->bEof || rc!=SQLITE_OK ) break; - while( 1 ){ - rc = fts5ExprPhraseNextRowidMatch(pExpr, pPhrase, pbEof); - if( rc!=SQLITE_OK || *pbEof ) break; + for(i=0; inPhrase; i++){ + Fts5ExprPhrase *pPhrase = pNear->apPhrase[i]; + if( pPhrase->nTerm>1 ){ + int bMatch = 0; + rc = fts5ExprPhraseIsMatch(pExpr, pPhrase, &bMatch); + if( rc!=SQLITE_OK ) return rc; + if( bMatch==0 ) break; + }else{ + int n; + u8 *a = sqlite3Fts5IterPoslist(pPhrase->aTerm[0].pIter, &n); + fts5BufferSet(&rc, &pPhrase->poslist, n, a); + } + } - /* At this point, all term iterators are valid and point to the same rowid. - ** The following assert() statements verify this. */ -#ifdef SQLITE_DEBUG - for(i=0; inTerm; i++){ - Fts5IndexIter *pIter = pPhrase->aTerm[i].pIter; - Fts5IndexIter *pOne = pPhrase->aTerm[0].pIter; - assert( 0==sqlite3Fts5IterEof(pIter) ); - assert( sqlite3Fts5IterRowid(pOne)==sqlite3Fts5IterRowid(pIter) ); + if( i==pNear->nPhrase ){ + int bMatch = 1; + if( pNear->nPhrase>1 ){ + rc = fts5ExprNearIsMatch(pNear, &bMatch); + } + if( rc!=SQLITE_OK || bMatch ) break; } -#endif - rc = fts5ExprPhraseIsMatch(pExpr, pPhrase, &bMatch); - if( rc!=SQLITE_OK || bMatch ) break; - rc = fts5ExprPhraseAdvanceAll(pExpr, pPhrase, pbEof); - if( rc!=SQLITE_OK || *pbEof ) break; + rc = fts5ExprNearAdvanceAll(pExpr, pNear, &pNode->bEof); + if( pNode->bEof || rc!=SQLITE_OK ) break; } - pPhrase->iRowid = sqlite3Fts5IterRowid(pPhrase->aTerm[0].pIter); return rc; } /* -** Advance the phrase iterator pPhrase to the next match. +** Initialize all term iterators in the pNear object. If any term is found +** to match no documents at all, set *pbEof to true and return immediately, +** without initializing any further iterators. */ -static int fts5ExprPhraseNext( - Fts5Expr *pExpr, /* Expression pPhrase belongs to */ - Fts5ExprPhrase *pPhrase, /* Phrase object to initialize */ - int *pbEof /* OUT: Set to true if phrase at EOF */ +static int fts5ExprNearInitAll( + Fts5Expr *pExpr, + Fts5ExprNode *pNode ){ - int i; - for(i=0; inTerm; i++){ - Fts5IndexIter *pIter = pPhrase->aTerm[i].pIter; - sqlite3Fts5IterNext(pIter, 0); - if( sqlite3Fts5IterEof(pIter) ){ - *pbEof = 1; - return SQLITE_OK; + Fts5ExprNearset *pNear = pNode->pNear; + Fts5ExprTerm *pTerm; + Fts5ExprPhrase *pPhrase; + int i, j; + + for(i=0; inPhrase; i++){ + pPhrase = pNear->apPhrase[i]; + for(j=0; jnTerm; j++){ + pTerm = &pPhrase->aTerm[j]; + pTerm->pIter = sqlite3Fts5IndexQuery( + pExpr->pIndex, pTerm->zTerm, strlen(pTerm->zTerm), + (pTerm->bPrefix ? FTS5INDEX_QUERY_PREFIX : 0) | + (pExpr->bAsc ? FTS5INDEX_QUERY_ASC : 0) + ); + if( sqlite3Fts5IterEof(pTerm->pIter) ){ + pNode->bEof = 1; + return SQLITE_OK; + } } } - if( pPhrase->nTerm==1 ){ - pPhrase->iRowid = sqlite3Fts5IterRowid(pPhrase->aTerm[0].pIter); - }else{ - fts5ExprPhraseNextMatch(pExpr, pPhrase, pbEof); - } - return SQLITE_OK; } -/* -** Point phrase object pPhrase at the first matching document. Or, if there -** are no matching documents at all, move pPhrase to EOF and set *pbEof to -** true before returning. -** -** If no error occurs, SQLITE_OK is returned. Otherwise, an SQLite error -** code. -*/ -static int fts5ExprPhraseFirst( - Fts5Expr *pExpr, /* Expression pPhrase belongs to */ - Fts5ExprPhrase *pPhrase, /* Phrase object to initialize */ - int *pbEof /* OUT: Set to true if phrase at EOF */ +static int fts5ExprNearNext( + Fts5Expr *pExpr, /* Expression that pNear is a part of */ + Fts5ExprNode *pNode ){ - int i; /* Used to iterate through terms */ - int rc = SQLITE_OK; - - for(i=0; inTerm; i++){ - Fts5ExprTerm *pTerm = &pPhrase->aTerm[i]; - pTerm->pIter = sqlite3Fts5IndexQuery( - pExpr->pIndex, pTerm->zTerm, strlen(pTerm->zTerm), - (pTerm->bPrefix ? FTS5INDEX_QUERY_PREFIX : 0) | - (pExpr->bAsc ? FTS5INDEX_QUERY_ASC : 0) - ); - if( sqlite3Fts5IterEof(pTerm->pIter) ){ - *pbEof = 1; - return SQLITE_OK; - } - } - - if( pPhrase->nTerm==1 ){ - const u8 *a; int n; - Fts5IndexIter *pIter = pPhrase->aTerm[0].pIter; - pPhrase->iRowid = sqlite3Fts5IterRowid(pIter); - a = sqlite3Fts5IterPoslist(pIter, &n); - if( a ){ - sqlite3Fts5BufferSet(&rc, &pPhrase->poslist, n, a); - } - }else{ - rc = fts5ExprPhraseNextMatch(pExpr, pPhrase, pbEof); + int rc = fts5ExprNearAdvanceAll(pExpr, pNode->pNear, &pNode->bEof); + if( rc==SQLITE_OK && pNode->bEof==0 ){ + rc = fts5ExprNearNextMatch(pExpr, pNode); } - return rc; } static int fts5ExprNodeFirst(Fts5Expr *pExpr, Fts5ExprNode *pNode){ int rc = SQLITE_OK; - pNode->bEof = 0; + if( pNode->eType==FTS5_STRING ){ - Fts5ExprPhrase *pPhrase = pNode->pNear->apPhrase[0]; - assert( pNode->pNear->nPhrase==1 ); - assert( pNode->bEof==0 ); - rc = fts5ExprPhraseFirst(pExpr, pPhrase, &pNode->bEof); - pNode->iRowid = pPhrase->iRowid; + + /* Initialize all term iterators in the NEAR object. */ + rc = fts5ExprNearInitAll(pExpr, pNode); + + /* Attempt to advance to the first match */ + if( rc==SQLITE_OK && pNode->bEof==0 ){ + rc = fts5ExprNearNextMatch(pExpr, pNode); + } + }else{ rc = fts5ExprNodeFirst(pExpr, pNode->pLeft); if( rc==SQLITE_OK ){ @@ -565,10 +658,7 @@ static int fts5ExprNodeNext(Fts5Expr *pExpr, Fts5ExprNode *pNode){ int rc = SQLITE_OK; if( pNode->eType==FTS5_STRING ){ - Fts5ExprPhrase *pPhrase = pNode->pNear->apPhrase[0]; - assert( pNode->pNear->nPhrase==1 ); - rc = fts5ExprPhraseNext(pExpr, pPhrase, &pNode->bEof); - pNode->iRowid = pPhrase->iRowid; + rc = fts5ExprNearNext(pExpr, pNode); }else{ assert( 0 ); } @@ -806,6 +896,11 @@ Fts5ExprPhrase *sqlite3Fts5ParseTerm( return sCtx.pPhrase; } +/* +** Token pTok has appeared in a MATCH expression where the NEAR operator +** is expected. If token pTok does not contain "NEAR", store an error +** in the pParse object. +*/ void sqlite3Fts5ParseNear(Fts5Parse *pParse, Fts5Token *pTok){ if( pParse->rc==SQLITE_OK ){ if( pTok->n!=4 || memcmp("NEAR", pTok->p, 4) ){ diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index e60ec5f503..af69b4280d 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -1156,7 +1156,7 @@ static void fts5SegIterNext( int n = pLeaf->n; iOff = pIter->iLeafOffset; - if( iOff<=n ){ + if( iOff=0 || [string first $pat $y]>=0} { + set res [concat $id $res] + } + } + set n [llength $res] + do_execsql_test 1.2.$tn.$n { + SELECT rowid FROM xx WHERE xx match '"' || $phrase || '"' + } $res +} + + + +finish_test +