From: dan Date: Tue, 2 Feb 2016 17:40:41 +0000 (+0000) Subject: Enhance the performance of fts5 AND and OR queries. X-Git-Tag: version-3.11.0~65 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=735ff4a80947404ab8960e2b6d6d23c0b163661f;p=thirdparty%2Fsqlite.git Enhance the performance of fts5 AND and OR queries. FossilOrigin-Name: 62ea9e5ab8bc1a20245beebceb5ea62dcd7ec84e --- diff --git a/ext/fts5/fts5Int.h b/ext/fts5/fts5Int.h index 48ab980921..9e5e52fa41 100644 --- a/ext/fts5/fts5Int.h +++ b/ext/fts5/fts5Int.h @@ -319,8 +319,11 @@ struct Fts5IndexIter { i64 iRowid; const u8 *pData; int nData; + u8 bEof; }; +#define sqlite3Fts5IterEof(x) ((x)->bEof) + /* ** Values used as part of the flags argument passed to IndexQuery(). */ @@ -384,7 +387,6 @@ int sqlite3Fts5IndexQuery( ** The various operations on open token or token prefix iterators opened ** using sqlite3Fts5IndexQuery(). */ -int sqlite3Fts5IterEof(Fts5IndexIter*); int sqlite3Fts5IterNext(Fts5IndexIter*); int sqlite3Fts5IterNextFrom(Fts5IndexIter*, i64 iMatch); i64 sqlite3Fts5IterRowid(Fts5IndexIter*); diff --git a/ext/fts5/fts5_expr.c b/ext/fts5/fts5_expr.c index 7c8e3c87c1..1df173cfcc 100644 --- a/ext/fts5/fts5_expr.c +++ b/ext/fts5/fts5_expr.c @@ -303,7 +303,7 @@ static i64 fts5ExprSynonymRowid(Fts5ExprTerm *pTerm, int bDesc, int *pbEof){ assert( bDesc==0 || bDesc==1 ); for(p=pTerm; p; p=p->pSynonym){ if( 0==sqlite3Fts5IterEof(p->pIter) ){ - i64 iRowid = sqlite3Fts5IterRowid(p->pIter); + i64 iRowid = p->pIter->iRowid; if( bRetValid==0 || (bDesc!=(iRowidpSynonym ); for(p=pTerm; p; p=p->pSynonym){ Fts5IndexIter *pIter = p->pIter; - if( sqlite3Fts5IterEof(pIter)==0 && sqlite3Fts5IterRowid(pIter)==iRowid ){ + if( sqlite3Fts5IterEof(pIter)==0 && pIter->iRowid==iRowid ){ if( pIter->nData==0 ) continue; if( nIter==nAlloc ){ int nByte = sizeof(Fts5PoslistReader) * nAlloc * 2; @@ -631,72 +631,6 @@ static int fts5ExprNearIsMatch(int *pRc, Fts5ExprNearset *pNear){ } } -/* -** Advance the first term iterator in the first phrase of pNear. Set output -** variable *pbEof to true if it reaches EOF or if an error occurs. -** -** Return SQLITE_OK if successful, or an SQLite error code if an error -** occurs. -*/ -static int fts5ExprNearAdvanceFirst( - Fts5Expr *pExpr, /* Expression pPhrase belongs to */ - Fts5ExprNode *pNode, /* FTS5_STRING or FTS5_TERM node */ - int bFromValid, - i64 iFrom -){ - Fts5ExprTerm *pTerm = &pNode->pNear->apPhrase[0]->aTerm[0]; - int rc = SQLITE_OK; - - pNode->bNomatch = 0; - if( pTerm->pSynonym ){ - int bEof = 1; - Fts5ExprTerm *p; - - /* Find the firstest rowid any synonym points to. */ - i64 iRowid = fts5ExprSynonymRowid(pTerm, pExpr->bDesc, 0); - - /* Advance each iterator that currently points to iRowid. Or, if iFrom - ** is valid - each iterator that points to a rowid before iFrom. */ - for(p=pTerm; p; p=p->pSynonym){ - if( sqlite3Fts5IterEof(p->pIter)==0 ){ - i64 ii = sqlite3Fts5IterRowid(p->pIter); - if( ii==iRowid - || (bFromValid && ii!=iFrom && (ii>iFrom)==pExpr->bDesc) - ){ - if( bFromValid ){ - rc = sqlite3Fts5IterNextFrom(p->pIter, iFrom); - }else{ - rc = sqlite3Fts5IterNext(p->pIter); - } - if( rc!=SQLITE_OK ) break; - if( sqlite3Fts5IterEof(p->pIter)==0 ){ - bEof = 0; - } - }else{ - bEof = 0; - } - } - } - - /* Set the EOF flag if either all synonym iterators are at EOF or an - ** error has occurred. */ - pNode->bEof = (rc || bEof); - }else{ - Fts5IndexIter *pIter = pTerm->pIter; - - assert( Fts5NodeIsString(pNode) ); - if( bFromValid ){ - rc = sqlite3Fts5IterNextFrom(pIter, iFrom); - }else{ - rc = sqlite3Fts5IterNext(pIter); - } - - pNode->bEof = (rc || sqlite3Fts5IterEof(pIter)); - } - - return rc; -} - /* ** Advance iterator pIter until it points to a value equal to or laster ** than the initial value of *piLast. If this means the iterator points @@ -716,7 +650,7 @@ static int fts5ExprAdvanceto( i64 iLast = *piLast; i64 iRowid; - iRowid = sqlite3Fts5IterRowid(pIter); + iRowid = pIter->iRowid; if( (bDesc==0 && iLast>iRowid) || (bDesc && iLastiRowid; assert( (bDesc==0 && iRowid>=iLast) || (bDesc==1 && iRowid<=iLast) ); } *piLast = iRowid; @@ -745,7 +679,7 @@ static int fts5ExprSynonymAdvanceto( for(p=pTerm; rc==SQLITE_OK && p; p=p->pSynonym){ if( sqlite3Fts5IterEof(p->pIter)==0 ){ - i64 iRowid = sqlite3Fts5IterRowid(p->pIter); + i64 iRowid = p->pIter->iRowid; if( (bDesc==0 && iLast>iRowid) || (bDesc && iLastpIter, iLast); } @@ -809,29 +743,128 @@ static int fts5ExprNearTest( } } -static int fts5ExprTokenTest( - Fts5Expr *pExpr, /* Expression that pNear is a part of */ - Fts5ExprNode *pNode /* The "NEAR" node (FTS5_TERM) */ + +/* +** Initialize all term iterators in the pNear object. If any term is found +** to match no documents at all, return immediately without initializing any +** further iterators. +*/ +static int fts5ExprNearInitAll( + Fts5Expr *pExpr, + Fts5ExprNode *pNode ){ - /* As this "NEAR" object is actually a single phrase that consists - ** of a single term only, grab pointers into the poslist managed by the - ** fts5_index.c iterator object. This is much faster than synthesizing - ** a new poslist the way we have to for more complicated phrase or NEAR - ** expressions. */ - Fts5ExprPhrase *pPhrase = pNode->pNear->apPhrase[0]; - Fts5IndexIter *pIter = pPhrase->aTerm[0].pIter; + Fts5ExprNearset *pNear = pNode->pNear; + int i, j; + int rc = SQLITE_OK; - assert( pNode->eType==FTS5_TERM ); - assert( pNode->pNear->nPhrase==1 && pPhrase->nTerm==1 ); - assert( pPhrase->aTerm[0].pSynonym==0 ); + assert( pNode->bNomatch==0 ); + for(i=0; rc==SQLITE_OK && inPhrase; i++){ + Fts5ExprPhrase *pPhrase = pNear->apPhrase[i]; + for(j=0; jnTerm; j++){ + Fts5ExprTerm *pTerm = &pPhrase->aTerm[j]; + Fts5ExprTerm *p; + int bEof = 1; - pPhrase->poslist.n = pIter->nData; - if( pExpr->pConfig->eDetail==FTS5_DETAIL_FULL ){ - pPhrase->poslist.p = (u8*)pIter->pData; + for(p=pTerm; p && rc==SQLITE_OK; p=p->pSynonym){ + if( p->pIter ){ + sqlite3Fts5IterClose(p->pIter); + p->pIter = 0; + } + rc = sqlite3Fts5IndexQuery( + pExpr->pIndex, p->zTerm, (int)strlen(p->zTerm), + (pTerm->bPrefix ? FTS5INDEX_QUERY_PREFIX : 0) | + (pExpr->bDesc ? FTS5INDEX_QUERY_DESC : 0), + pNear->pColset, + &p->pIter + ); + assert( rc==SQLITE_OK || p->pIter==0 ); + if( p->pIter && 0==sqlite3Fts5IterEof(p->pIter) ){ + bEof = 0; + } + } + + if( bEof ){ + pNode->bEof = 1; + return rc; + } + } } - pNode->iRowid = pIter->iRowid; - pNode->bNomatch = (pPhrase->poslist.n==0); - return SQLITE_OK; + + return rc; +} + +/* +** If pExpr is an ASC iterator, this function returns a value with the +** same sign as: +** +** (iLhs - iRhs) +** +** Otherwise, if this is a DESC iterator, the opposite is returned: +** +** (iRhs - iLhs) +*/ +static int fts5RowidCmp( + Fts5Expr *pExpr, + i64 iLhs, + i64 iRhs +){ + assert( pExpr->bDesc==0 || pExpr->bDesc==1 ); + if( pExpr->bDesc==0 ){ + if( iLhs iRhs); + }else{ + if( iLhs>iRhs ) return -1; + return (iLhs < iRhs); + } +} + +static void fts5ExprSetEof(Fts5ExprNode *pNode){ + int i; + pNode->bEof = 1; + pNode->bNomatch = 0; + for(i=0; inChild; i++){ + fts5ExprSetEof(pNode->apChild[i]); + } +} + +static void fts5ExprNodeZeroPoslist(Fts5ExprNode *pNode){ + if( pNode->eType==FTS5_STRING || pNode->eType==FTS5_TERM ){ + Fts5ExprNearset *pNear = pNode->pNear; + int i; + for(i=0; inPhrase; i++){ + Fts5ExprPhrase *pPhrase = pNear->apPhrase[i]; + pPhrase->poslist.n = 0; + } + }else{ + int i; + for(i=0; inChild; i++){ + fts5ExprNodeZeroPoslist(pNode->apChild[i]); + } + } +} + + + +/* +** Compare the values currently indicated by the two nodes as follows: +** +** res = (*p1) - (*p2) +** +** Nodes that point to values that come later in the iteration order are +** considered to be larger. Nodes at EOF are the largest of all. +** +** This means that if the iteration order is ASC, then numerically larger +** rowids are considered larger. Or if it is the default DESC, numerically +** smaller rowids are larger. +*/ +static int fts5NodeCompare( + Fts5Expr *pExpr, + Fts5ExprNode *p1, + Fts5ExprNode *p2 +){ + if( p2->bEof ) return -1; + if( p1->bEof ) return +1; + return fts5RowidCmp(pExpr, p1->iRowid, p2->iRowid); } /* @@ -845,7 +878,7 @@ static int fts5ExprTokenTest( ** otherwise. It is not considered an error code if an iterator reaches ** EOF. */ -static int fts5ExprNearNextMatch( +static int fts5ExprNodeTest_STRING( Fts5Expr *pExpr, /* Expression pPhrase belongs to */ Fts5ExprNode *pNode ){ @@ -870,7 +903,7 @@ static int fts5ExprNearNextMatch( if( pLeft->aTerm[0].pSynonym ){ iLast = fts5ExprSynonymRowid(&pLeft->aTerm[0], bDesc, 0); }else{ - iLast = sqlite3Fts5IterRowid(pLeft->aTerm[0].pIter); + iLast = pLeft->aTerm[0].pIter->iRowid; } do { @@ -890,8 +923,7 @@ static int fts5ExprNearNextMatch( } }else{ Fts5IndexIter *pIter = pPhrase->aTerm[j].pIter; - i64 iRowid = sqlite3Fts5IterRowid(pIter); - if( iRowid==iLast ) continue; + if( pIter->iRowid==iLast ) continue; bMatch = 0; if( fts5ExprAdvanceto(pIter, bDesc, &iLast, &rc, &pNode->bEof) ){ return rc; @@ -909,113 +941,178 @@ static int fts5ExprNearNextMatch( } /* -** Initialize all term iterators in the pNear object. If any term is found -** to match no documents at all, return immediately without initializing any -** further iterators. +** Advance the first term iterator in the first phrase of pNear. Set output +** variable *pbEof to true if it reaches EOF or if an error occurs. +** +** Return SQLITE_OK if successful, or an SQLite error code if an error +** occurs. */ -static int fts5ExprNearInitAll( - Fts5Expr *pExpr, - Fts5ExprNode *pNode +static int fts5ExprNodeNext_STRING( + Fts5Expr *pExpr, /* Expression pPhrase belongs to */ + Fts5ExprNode *pNode, /* FTS5_STRING or FTS5_TERM node */ + int bFromValid, + i64 iFrom ){ - Fts5ExprNearset *pNear = pNode->pNear; - int i, j; + Fts5ExprTerm *pTerm = &pNode->pNear->apPhrase[0]->aTerm[0]; int rc = SQLITE_OK; - assert( pNode->bNomatch==0 ); - for(i=0; rc==SQLITE_OK && inPhrase; i++){ - Fts5ExprPhrase *pPhrase = pNear->apPhrase[i]; - for(j=0; jnTerm; j++){ - Fts5ExprTerm *pTerm = &pPhrase->aTerm[j]; - Fts5ExprTerm *p; - int bEof = 1; + pNode->bNomatch = 0; + if( pTerm->pSynonym ){ + int bEof = 1; + Fts5ExprTerm *p; - for(p=pTerm; p && rc==SQLITE_OK; p=p->pSynonym){ - if( p->pIter ){ - sqlite3Fts5IterClose(p->pIter); - p->pIter = 0; - } - rc = sqlite3Fts5IndexQuery( - pExpr->pIndex, p->zTerm, (int)strlen(p->zTerm), - (pTerm->bPrefix ? FTS5INDEX_QUERY_PREFIX : 0) | - (pExpr->bDesc ? FTS5INDEX_QUERY_DESC : 0), - pNear->pColset, - &p->pIter - ); - assert( rc==SQLITE_OK || p->pIter==0 ); - if( p->pIter && 0==sqlite3Fts5IterEof(p->pIter) ){ + /* Find the firstest rowid any synonym points to. */ + i64 iRowid = fts5ExprSynonymRowid(pTerm, pExpr->bDesc, 0); + + /* Advance each iterator that currently points to iRowid. Or, if iFrom + ** is valid - each iterator that points to a rowid before iFrom. */ + for(p=pTerm; p; p=p->pSynonym){ + if( sqlite3Fts5IterEof(p->pIter)==0 ){ + i64 ii = p->pIter->iRowid; + if( ii==iRowid + || (bFromValid && ii!=iFrom && (ii>iFrom)==pExpr->bDesc) + ){ + if( bFromValid ){ + rc = sqlite3Fts5IterNextFrom(p->pIter, iFrom); + }else{ + rc = sqlite3Fts5IterNext(p->pIter); + } + if( rc!=SQLITE_OK ) break; + if( sqlite3Fts5IterEof(p->pIter)==0 ){ + bEof = 0; + } + }else{ bEof = 0; } } + } - if( bEof ){ - pNode->bEof = 1; - return rc; - } + /* Set the EOF flag if either all synonym iterators are at EOF or an + ** error has occurred. */ + pNode->bEof = (rc || bEof); + }else{ + Fts5IndexIter *pIter = pTerm->pIter; + + assert( Fts5NodeIsString(pNode) ); + if( bFromValid ){ + rc = sqlite3Fts5IterNextFrom(pIter, iFrom); + }else{ + rc = sqlite3Fts5IterNext(pIter); } + + pNode->bEof = (rc || sqlite3Fts5IterEof(pIter)); + } + + if( pNode->bEof==0 ){ + assert( rc==SQLITE_OK ); + rc = fts5ExprNodeTest_STRING(pExpr, pNode); } return rc; } -/* fts5ExprNodeNext() calls fts5ExprNodeNextMatch(). And vice-versa. */ -static int fts5ExprNodeNextMatch(Fts5Expr*, Fts5ExprNode*); +static int fts5ExprNodeTest_TERM( + Fts5Expr *pExpr, /* Expression that pNear is a part of */ + Fts5ExprNode *pNode /* The "NEAR" node (FTS5_TERM) */ +){ + /* As this "NEAR" object is actually a single phrase that consists + ** of a single term only, grab pointers into the poslist managed by the + ** fts5_index.c iterator object. This is much faster than synthesizing + ** a new poslist the way we have to for more complicated phrase or NEAR + ** expressions. */ + Fts5ExprPhrase *pPhrase = pNode->pNear->apPhrase[0]; + Fts5IndexIter *pIter = pPhrase->aTerm[0].pIter; + + assert( pNode->eType==FTS5_TERM ); + assert( pNode->pNear->nPhrase==1 && pPhrase->nTerm==1 ); + assert( pPhrase->aTerm[0].pSynonym==0 ); + + pPhrase->poslist.n = pIter->nData; + if( pExpr->pConfig->eDetail==FTS5_DETAIL_FULL ){ + pPhrase->poslist.p = (u8*)pIter->pData; + } + pNode->iRowid = pIter->iRowid; + pNode->bNomatch = (pPhrase->poslist.n==0); + return SQLITE_OK; +} /* -** If pExpr is an ASC iterator, this function returns a value with the -** same sign as: -** -** (iLhs - iRhs) -** -** Otherwise, if this is a DESC iterator, the opposite is returned: -** -** (iRhs - iLhs) +** xNext() method for a node of type FTS5_TERM. */ -static int fts5RowidCmp( - Fts5Expr *pExpr, - i64 iLhs, - i64 iRhs +static int fts5ExprNodeNext_TERM( + Fts5Expr *pExpr, + Fts5ExprNode *pNode, + int bFromValid, + i64 iFrom ){ - assert( pExpr->bDesc==0 || pExpr->bDesc==1 ); - if( pExpr->bDesc==0 ){ - if( iLhs iRhs); + int rc; + Fts5IndexIter *pIter = pNode->pNear->apPhrase[0]->aTerm[0].pIter; + + assert( pNode->bEof==0 ); + if( bFromValid ){ + rc = sqlite3Fts5IterNextFrom(pIter, iFrom); }else{ - if( iLhs>iRhs ) return -1; - return (iLhs < iRhs); + rc = sqlite3Fts5IterNext(pIter); } + if( rc==SQLITE_OK && sqlite3Fts5IterEof(pIter)==0 ){ + rc = fts5ExprNodeTest_TERM(pExpr, pNode); + }else{ + pNode->bEof = 1; + pNode->bNomatch = 0; + } + return rc; } -static void fts5ExprSetEof(Fts5ExprNode *pNode){ +static void fts5ExprNodeTest_OR( + Fts5Expr *pExpr, /* Expression of which pNode is a part */ + Fts5ExprNode *pNode /* Expression node to test */ +){ + Fts5ExprNode *pNext = pNode->apChild[0]; int i; - pNode->bEof = 1; - pNode->bNomatch = 0; - for(i=0; inChild; i++){ - fts5ExprSetEof(pNode->apChild[i]); + + for(i=1; inChild; i++){ + Fts5ExprNode *pChild = pNode->apChild[i]; + int cmp = fts5NodeCompare(pExpr, pNext, pChild); + if( cmp>0 || (cmp==0 && pChild->bNomatch==0) ){ + pNext = pChild; + } } + pNode->iRowid = pNext->iRowid; + pNode->bEof = pNext->bEof; + pNode->bNomatch = pNext->bNomatch; } -static void fts5ExprNodeZeroPoslist(Fts5ExprNode *pNode){ - if( pNode->eType==FTS5_STRING || pNode->eType==FTS5_TERM ){ - Fts5ExprNearset *pNear = pNode->pNear; - int i; - for(i=0; inPhrase; i++){ - Fts5ExprPhrase *pPhrase = pNear->apPhrase[i]; - pPhrase->poslist.n = 0; - } - }else{ - int i; - for(i=0; inChild; i++){ - fts5ExprNodeZeroPoslist(pNode->apChild[i]); +static int fts5ExprNodeNext_OR( + Fts5Expr *pExpr, + Fts5ExprNode *pNode, + int bFromValid, + i64 iFrom +){ + int i; + i64 iLast = pNode->iRowid; + + for(i=0; inChild; i++){ + Fts5ExprNode *p1 = pNode->apChild[i]; + assert( p1->bEof || fts5RowidCmp(pExpr, p1->iRowid, iLast)>=0 ); + if( p1->bEof==0 ){ + if( (p1->iRowid==iLast) + || (bFromValid && fts5RowidCmp(pExpr, p1->iRowid, iFrom)<0) + ){ + int rc = fts5ExprNodeNext(pExpr, p1, bFromValid, iFrom); + if( rc!=SQLITE_OK ) return rc; + } } } -} + fts5ExprNodeTest_OR(pExpr, pNode); + return SQLITE_OK; +} /* ** Argument pNode is an FTS5_AND node. */ -static int fts5ExprAndNextRowid( +static int fts5ExprNodeTest_AND( Fts5Expr *pExpr, /* Expression pPhrase belongs to */ Fts5ExprNode *pAnd /* FTS5_AND node to advance */ ){ @@ -1030,15 +1127,11 @@ static int fts5ExprAndNextRowid( bMatch = 1; for(iChild=0; iChildnChild; iChild++){ Fts5ExprNode *pChild = pAnd->apChild[iChild]; - if( 0 && pChild->eType==FTS5_STRING ){ - /* TODO */ - }else{ - int cmp = fts5RowidCmp(pExpr, iLast, pChild->iRowid); - if( cmp>0 ){ - /* Advance pChild until it points to iLast or laster */ - rc = fts5ExprNodeNext(pExpr, pChild, 1, iLast); - if( rc!=SQLITE_OK ) return rc; - } + int cmp = fts5RowidCmp(pExpr, iLast, pChild->iRowid); + if( cmp>0 ){ + /* Advance pChild until it points to iLast or laster */ + rc = fts5ExprNodeNext(pExpr, pChild, 1, iLast); + if( rc!=SQLITE_OK ) return rc; } /* If the child node is now at EOF, so is the parent AND node. Otherwise, @@ -1068,155 +1161,66 @@ static int fts5ExprAndNextRowid( return SQLITE_OK; } - -/* -** Compare the values currently indicated by the two nodes as follows: -** -** res = (*p1) - (*p2) -** -** Nodes that point to values that come later in the iteration order are -** considered to be larger. Nodes at EOF are the largest of all. -** -** This means that if the iteration order is ASC, then numerically larger -** rowids are considered larger. Or if it is the default DESC, numerically -** smaller rowids are larger. -*/ -static int fts5NodeCompare( - Fts5Expr *pExpr, - Fts5ExprNode *p1, - Fts5ExprNode *p2 -){ - if( p2->bEof ) return -1; - if( p1->bEof ) return +1; - return fts5RowidCmp(pExpr, p1->iRowid, p2->iRowid); -} - -/* -** xNext() method for a node of type FTS5_TERM. -*/ -static int fts5ExprNodeNext_Term( +static int fts5ExprNodeNext_AND( Fts5Expr *pExpr, Fts5ExprNode *pNode, int bFromValid, i64 iFrom ){ - int rc; - Fts5IndexIter *pIter = pNode->pNear->apPhrase[0]->aTerm[0].pIter; - - assert( pNode->bEof==0 ); - if( bFromValid ){ - rc = sqlite3Fts5IterNextFrom(pIter, iFrom); - }else{ - rc = sqlite3Fts5IterNext(pIter); + int rc = fts5ExprNodeNext(pExpr, pNode->apChild[0], bFromValid, iFrom); + if( rc==SQLITE_OK ){ + rc = fts5ExprNodeTest_AND(pExpr, pNode); } - if( rc==SQLITE_OK && sqlite3Fts5IterEof(pIter)==0 ){ - rc = fts5ExprTokenTest(pExpr, pNode); - }else{ - pNode->bEof = 1; - pNode->bNomatch = 0; + return rc; +} + +static int fts5ExprNodeTest_NOT( + Fts5Expr *pExpr, /* Expression pPhrase belongs to */ + Fts5ExprNode *pNode /* FTS5_NOT node to advance */ +){ + int rc = SQLITE_OK; + Fts5ExprNode *p1 = pNode->apChild[0]; + Fts5ExprNode *p2 = pNode->apChild[1]; + assert( pNode->nChild==2 ); + + while( rc==SQLITE_OK && p1->bEof==0 ){ + int cmp = fts5NodeCompare(pExpr, p1, p2); + if( cmp>0 ){ + rc = fts5ExprNodeNext(pExpr, p2, 1, p1->iRowid); + cmp = fts5NodeCompare(pExpr, p1, p2); + } + assert( rc!=SQLITE_OK || cmp<=0 ); + if( cmp || p2->bNomatch ) break; + rc = fts5ExprNodeNext(pExpr, p1, 0, 0); + } + pNode->bEof = p1->bEof; + pNode->bNomatch = p1->bNomatch; + pNode->iRowid = p1->iRowid; + if( p1->bEof ){ + fts5ExprNodeZeroPoslist(p2); } return rc; } -/* -** Advance node iterator pNode, part of expression pExpr. If argument -** bFromValid is zero, then pNode is advanced exactly once. Or, if argument -** bFromValid is non-zero, then pNode is advanced until it is at or past -** rowid value iFrom. Whether "past" means "less than" or "greater than" -** depends on whether this is an ASC or DESC iterator. -*/ -static int fts5ExprNodeNext_Fallback( +static int fts5ExprNodeNext_NOT( Fts5Expr *pExpr, Fts5ExprNode *pNode, int bFromValid, i64 iFrom ){ - int rc = SQLITE_OK; - - if( pNode->bEof==0 ){ - switch( pNode->eType ){ - case FTS5_STRING: { - rc = fts5ExprNearAdvanceFirst(pExpr, pNode, bFromValid, iFrom); - break; - }; - - case FTS5_TERM: { - Fts5IndexIter *pIter = pNode->pNear->apPhrase[0]->aTerm[0].pIter; - if( bFromValid ){ - rc = sqlite3Fts5IterNextFrom(pIter, iFrom); - }else{ - rc = sqlite3Fts5IterNext(pIter); - } - if( rc==SQLITE_OK && sqlite3Fts5IterEof(pIter)==0 ){ - assert( rc==SQLITE_OK ); - rc = fts5ExprTokenTest(pExpr, pNode); - }else{ - pNode->bEof = 1; - pNode->bNomatch = 0; - } - return rc; - }; - - case FTS5_AND: { - Fts5ExprNode *pLeft = pNode->apChild[0]; - rc = fts5ExprNodeNext(pExpr, pLeft, bFromValid, iFrom); - break; - } - - case FTS5_OR: { - int i; - i64 iLast = pNode->iRowid; - - for(i=0; rc==SQLITE_OK && inChild; i++){ - Fts5ExprNode *p1 = pNode->apChild[i]; - assert( p1->bEof || fts5RowidCmp(pExpr, p1->iRowid, iLast)>=0 ); - if( p1->bEof==0 ){ - if( (p1->iRowid==iLast) - || (bFromValid && fts5RowidCmp(pExpr, p1->iRowid, iFrom)<0) - ){ - rc = fts5ExprNodeNext(pExpr, p1, bFromValid, iFrom); - } - } - } - - break; - } - - default: assert( pNode->eType==FTS5_NOT ); { - assert( pNode->nChild==2 ); - rc = fts5ExprNodeNext(pExpr, pNode->apChild[0], bFromValid, iFrom); - break; - } - } - - if( rc==SQLITE_OK ){ - rc = fts5ExprNodeNextMatch(pExpr, pNode); - } + int rc = fts5ExprNodeNext(pExpr, pNode->apChild[0], bFromValid, iFrom); + if( rc==SQLITE_OK ){ + rc = fts5ExprNodeTest_NOT(pExpr, pNode); } - - /* Assert that if bFromValid was true, either: - ** - ** a) an error occurred, or - ** b) the node is now at EOF, or - ** c) the node is now at or past rowid iFrom. - */ - assert( bFromValid==0 - || rc!=SQLITE_OK /* a */ - || pNode->bEof /* b */ - || pNode->iRowid==iFrom || pExpr->bDesc==(pNode->iRowidbNomatch==0 || rc==SQLITE_OK ); return rc; } - /* ** If pNode currently points to a match, this function returns SQLITE_OK ** without modifying it. Otherwise, pNode is advanced until it does point ** to a match or EOF is reached. */ -static int fts5ExprNodeNextMatch( +static int fts5ExprNodeTest( Fts5Expr *pExpr, /* Expression of which pNode is a part */ Fts5ExprNode *pNode /* Expression node to test */ ){ @@ -1225,59 +1229,27 @@ static int fts5ExprNodeNextMatch( switch( pNode->eType ){ case FTS5_STRING: { - /* Advance the iterators until they all point to the same rowid */ - rc = fts5ExprNearNextMatch(pExpr, pNode); + rc = fts5ExprNodeTest_STRING(pExpr, pNode); break; } case FTS5_TERM: { - rc = fts5ExprTokenTest(pExpr, pNode); + rc = fts5ExprNodeTest_TERM(pExpr, pNode); break; } case FTS5_AND: { - rc = fts5ExprAndNextRowid(pExpr, pNode); + rc = fts5ExprNodeTest_AND(pExpr, pNode); break; } case FTS5_OR: { - Fts5ExprNode *pNext = pNode->apChild[0]; - int i; - - for(i=1; inChild; i++){ - Fts5ExprNode *pChild = pNode->apChild[i]; - int cmp = fts5NodeCompare(pExpr, pNext, pChild); - if( cmp>0 || (cmp==0 && pChild->bNomatch==0) ){ - pNext = pChild; - } - } - pNode->iRowid = pNext->iRowid; - pNode->bEof = pNext->bEof; - pNode->bNomatch = pNext->bNomatch; + fts5ExprNodeTest_OR(pExpr, pNode); break; } default: assert( pNode->eType==FTS5_NOT ); { - Fts5ExprNode *p1 = pNode->apChild[0]; - Fts5ExprNode *p2 = pNode->apChild[1]; - assert( pNode->nChild==2 ); - - while( rc==SQLITE_OK && p1->bEof==0 ){ - int cmp = fts5NodeCompare(pExpr, p1, p2); - if( cmp>0 ){ - rc = fts5ExprNodeNext(pExpr, p2, 1, p1->iRowid); - cmp = fts5NodeCompare(pExpr, p1, p2); - } - assert( rc!=SQLITE_OK || cmp<=0 ); - if( cmp || p2->bNomatch ) break; - rc = fts5ExprNodeNext(pExpr, p1, 0, 0); - } - pNode->bEof = p1->bEof; - pNode->bNomatch = p1->bNomatch; - pNode->iRowid = p1->iRowid; - if( p1->bEof ){ - fts5ExprNodeZeroPoslist(p2); - } + rc = fts5ExprNodeTest_NOT(pExpr, pNode); break; } } @@ -1329,7 +1301,7 @@ static int fts5ExprNodeFirst(Fts5Expr *pExpr, Fts5ExprNode *pNode){ } if( rc==SQLITE_OK ){ - rc = fts5ExprNodeNextMatch(pExpr, pNode); + rc = fts5ExprNodeTest(pExpr, pNode); } return rc; } @@ -1690,13 +1662,14 @@ int sqlite3Fts5ExprClonePhrase( pNew->apExprPhrase[0] = sCtx.pPhrase; pNew->pRoot->pNear->apPhrase[0] = sCtx.pPhrase; pNew->pRoot->pNear->nPhrase = 1; - pNew->pRoot->xNext = fts5ExprNodeNext_Fallback; sCtx.pPhrase->pNode = pNew->pRoot; if( pOrig->nTerm==1 && pOrig->aTerm[0].pSynonym==0 ){ pNew->pRoot->eType = FTS5_TERM; + pNew->pRoot->xNext = fts5ExprNodeNext_TERM; }else{ pNew->pRoot->eType = FTS5_STRING; + pNew->pRoot->xNext = fts5ExprNodeNext_STRING; } }else{ sqlite3Fts5ExprFree(pNew); @@ -1844,6 +1817,38 @@ void sqlite3Fts5ParseSetColset( } } +static void fts5ExprAssignXNext(Fts5ExprNode *pNode){ + switch( pNode->eType ){ + case FTS5_STRING: { + Fts5ExprNearset *pNear = pNode->pNear; + if( pNear->nPhrase==1 && pNear->apPhrase[0]->nTerm==1 + && pNear->apPhrase[0]->aTerm[0].pSynonym==0 + ){ + pNode->eType = FTS5_TERM; + pNode->xNext = fts5ExprNodeNext_TERM; + }else{ + pNode->xNext = fts5ExprNodeNext_STRING; + } + break; + }; + + case FTS5_OR: { + pNode->xNext = fts5ExprNodeNext_OR; + break; + }; + + case FTS5_AND: { + pNode->xNext = fts5ExprNodeNext_AND; + break; + }; + + default: assert( pNode->eType==FTS5_NOT ); { + pNode->xNext = fts5ExprNodeNext_NOT; + break; + }; + } +} + static void fts5ExprAddChildren(Fts5ExprNode *p, Fts5ExprNode *pSub){ if( p->eType!=FTS5_NOT && pSub->eType==p->eType ){ int nByte = sizeof(Fts5ExprNode*) * pSub->nChild; @@ -1891,20 +1896,18 @@ Fts5ExprNode *sqlite3Fts5ParseNode( pRet = (Fts5ExprNode*)sqlite3Fts5MallocZero(&pParse->rc, nByte); if( pRet ){ - pRet->xNext = fts5ExprNodeNext_Fallback; pRet->eType = eType; pRet->pNear = pNear; + fts5ExprAssignXNext(pRet); if( eType==FTS5_STRING ){ int iPhrase; for(iPhrase=0; iPhrasenPhrase; iPhrase++){ pNear->apPhrase[iPhrase]->pNode = pRet; } - if( pNear->nPhrase==1 && pNear->apPhrase[0]->nTerm==1 ){ - if( pNear->apPhrase[0]->aTerm[0].pSynonym==0 ){ - pRet->eType = FTS5_TERM; - pRet->xNext = fts5ExprNodeNext_Term; - } - }else if( pParse->pConfig->eDetail!=FTS5_DETAIL_FULL ){ + + if( pParse->pConfig->eDetail!=FTS5_DETAIL_FULL + && (pNear->nPhrase!=1 || pNear->apPhrase[0]->nTerm!=1) + ){ assert( pParse->rc==SQLITE_OK ); pParse->rc = SQLITE_ERROR; assert( pParse->zErr==0 ); @@ -1915,6 +1918,7 @@ Fts5ExprNode *sqlite3Fts5ParseNode( sqlite3_free(pRet); pRet = 0; } + }else{ fts5ExprAddChildren(pRet, pLeft); fts5ExprAddChildren(pRet, pRight); diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index f30c38d91a..3b627dabad 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -518,7 +518,6 @@ struct Fts5Iter { int nSeg; /* Size of aSeg[] array */ int bRev; /* True to iterate in reverse order */ u8 bSkipEmpty; /* True to skip deleted entries */ - u8 bEof; /* True at EOF */ u8 bFiltered; /* True if column-filter already applied */ i64 iSwitchRowid; /* Firstest rowid of other than aFirst[1] */ @@ -2459,7 +2458,7 @@ static void fts5AssertMultiIterSetup(Fts5Index *p, Fts5Iter *pIter){ Fts5SegIter *pFirst = &pIter->aSeg[ pIter->aFirst[1].iFirst ]; int i; - assert( (pFirst->pLeaf==0)==pIter->bEof ); + assert( (pFirst->pLeaf==0)==pIter->base.bEof ); /* Check that pIter->iSwitchRowid is set correctly. */ for(i=0; inSeg; i++){ @@ -2731,7 +2730,7 @@ static int fts5MultiIterAdvanceRowid( */ static void fts5MultiIterSetEof(Fts5Iter *pIter){ Fts5SegIter *pSeg = &pIter->aSeg[ pIter->aFirst[1].iFirst ]; - pIter->bEof = pSeg->pLeaf==0; + pIter->base.bEof = pSeg->pLeaf==0; pIter->iSwitchRowid = pSeg->iRowid; } @@ -2964,7 +2963,7 @@ static void fts5MultiIterNew2( } pData = 0; }else{ - pNew->bEof = 1; + pNew->base.bEof = 1; } fts5SegIterSetNext(p, pIter); @@ -2980,9 +2979,9 @@ static void fts5MultiIterNew2( */ static int fts5MultiIterEof(Fts5Index *p, Fts5Iter *pIter){ assert( p->rc - || (pIter->aSeg[ pIter->aFirst[1].iFirst ].pLeaf==0)==pIter->bEof + || (pIter->aSeg[ pIter->aFirst[1].iFirst ].pLeaf==0)==pIter->base.bEof ); - return (p->rc || pIter->bEof); + return (p->rc || pIter->base.bEof); } /* @@ -5209,11 +5208,6 @@ int sqlite3Fts5IndexQuery( /* ** Return true if the iterator passed as the only argument is at EOF. */ -int sqlite3Fts5IterEof(Fts5IndexIter *pIter){ - assert( ((Fts5Iter*)pIter)->pIndex->rc==SQLITE_OK ); - return ((Fts5Iter*)pIter)->bEof; -} - /* ** Move to the next matching rowid. */ @@ -5239,7 +5233,7 @@ int sqlite3Fts5IterNextScan(Fts5IndexIter *pIndexIter){ if( pSeg->pLeaf && pSeg->term.p[0]!=FTS5_MAIN_PREFIX ){ fts5DataRelease(pSeg->pLeaf); pSeg->pLeaf = 0; - pIter->bEof = 1; + pIter->base.bEof = 1; } } @@ -5257,13 +5251,6 @@ int sqlite3Fts5IterNextFrom(Fts5IndexIter *pIndexIter, i64 iMatch){ return fts5IndexReturn(pIter->pIndex); } -/* -** Return the current rowid. -*/ -i64 sqlite3Fts5IterRowid(Fts5IndexIter *pIndexIter){ - return fts5MultiIterRowid((Fts5Iter*)pIndexIter); -} - /* ** Return the current term. */ @@ -5450,7 +5437,7 @@ static int fts5QueryCksum( int rc = sqlite3Fts5IndexQuery(p, z, n, flags, 0, &pIter); while( rc==SQLITE_OK && 0==sqlite3Fts5IterEof(pIter) ){ - i64 rowid = sqlite3Fts5IterRowid(pIter); + i64 rowid = pIter->iRowid; if( eDetail==FTS5_DETAIL_NONE ){ cksum ^= sqlite3Fts5IndexEntryCksum(rowid, 0, 0, iIdx, z, n); diff --git a/ext/fts5/tool/fts5speed.tcl b/ext/fts5/tool/fts5speed.tcl index f0e96e348d..0f38638c26 100644 --- a/ext/fts5/tool/fts5speed.tcl +++ b/ext/fts5/tool/fts5speed.tcl @@ -12,6 +12,10 @@ set Q { {1 "SELECT count(*) FROM t1 WHERE t1 MATCH 'a:t* OR b:t* OR c:t* OR d:t* OR e:t* OR f:t* OR g:t*'"} {1 "SELECT count(*) FROM t1 WHERE t1 MATCH 'a:t*'"} {2 "SELECT count(*) FROM t1 WHERE t1 MATCH 'c:the'"} + + {2 "SELECT count(*) FROM t1 WHERE t1 MATCH 'd:holmes OR e:holmes OR f:holmes OR g:holmes'" } + {2 "SELECT count(*) FROM t1 WHERE t1 MATCH 'd:holmes AND e:holmes AND f:holmes AND g:holmes'" } + {4 "SELECT count(*) FROM t1 WHERE t1 MATCH 'd:holmes NOT e:holmes'" } } proc usage {} { diff --git a/manifest b/manifest index 3952435799..44101197f7 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Enhance\sthe\scomment\son\sthe\ssqlite3_index_constraint\sobject\sto\sbring\sattention\s\nto\sthe\sfact\sthan\siColumn\sfield\scan\sbe\snegative\sfor\sa\srowid. -D 2016-02-02T02:04:21.840 +C Enhance\sthe\sperformance\sof\sfts5\sAND\sand\sOR\squeries. +D 2016-02-02T17:40:41.411 F Makefile.in 027c1603f255390c43a426671055a31c0a65fdb4 F Makefile.linux-gcc 7bc79876b875010e8c8f9502eb935ca92aa3c434 F Makefile.msc 72b7858f02017611c3ac1ddc965251017fed0845 @@ -98,13 +98,13 @@ F ext/fts3/unicode/mkunicode.tcl 95cf7ec186e48d4985e433ff8a1c89090a774252 F ext/fts3/unicode/parseunicode.tcl da577d1384810fb4e2b209bf3313074353193e95 F ext/fts5/extract_api_docs.tcl a36e54ec777172ddd3f9a88daf593b00848368e0 F ext/fts5/fts5.h ff9c2782e8ed890b0de2f697a8d63971939e70c7 -F ext/fts5/fts5Int.h 6e0f90eb4872654a5b98130dec16965716525c9a +F ext/fts5/fts5Int.h 9505f3bc8d0b2ca4cd2b112e7e042e3c6a3222a7 F ext/fts5/fts5_aux.c b9bcce753ef5b451267b2232f0ca153ddeb3951d F ext/fts5/fts5_buffer.c f6e0c6018ffc8e39fc0b333b5daa8b8d528ae6e4 F ext/fts5/fts5_config.c 0c384ebdd23fd055e2e50a93277b8d59da538238 -F ext/fts5/fts5_expr.c 768d221e592df03b26f46da56aa0a561f00fa4e0 +F ext/fts5/fts5_expr.c 304b22448ee0c3056d4177e327ed62dc80cbe23a F ext/fts5/fts5_hash.c 1b113977296cf4212c6ec667d5e3f2bd18036955 -F ext/fts5/fts5_index.c cd1e4faca8b9adc2d89b367075bf93a7f50c406b +F ext/fts5/fts5_index.c 471ff6935068a4579830474249e1046b57137103 F ext/fts5/fts5_main.c 7e8a5f27d504bc04e3de7f1cba8867f0332aee9d F ext/fts5/fts5_storage.c 2a1f44deae090cd711f02cec0c2af8e660360d24 F ext/fts5/fts5_tcl.c f8731e0508299bd43f1a2eff7dbeaac870768966 @@ -190,7 +190,7 @@ F ext/fts5/test/fts5unindexed.test e9539d5b78c677315e7ed8ea911d4fd25437c680 F ext/fts5/test/fts5update.test 57c7012a7919889048947addae10e0613df45529 F ext/fts5/test/fts5version.test 978f59541d8cef7e8591f8be2115ec5ccb863e2e F ext/fts5/test/fts5vocab.test 480d780aa6b699816c5066225fbd86f3a0239477 -F ext/fts5/tool/fts5speed.tcl f9944a9abb9b7685cfbee8101a3dd772ededca66 +F ext/fts5/tool/fts5speed.tcl b0056f91a55b2d1a3684ec05729de92b042e2f85 F ext/fts5/tool/fts5txt2db.tcl 1343745b89ca2a1e975c23f836d0cee410052975 F ext/fts5/tool/loadfts5.tcl 95b03429ee6b138645703c6ca192c3ac96eaf093 F ext/fts5/tool/mkfts5c.tcl d1c2a9ab8e0ec690a52316f33dd9b1d379942f45 @@ -1422,7 +1422,7 @@ F tool/vdbe_profile.tcl 246d0da094856d72d2c12efec03250d71639d19f F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 48bd54594752d5be3337f12c72f28d2080cb630b F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 6d7d4703ebf3754bec74123d5ba7e861a705f90f -R a0fd58918162f72722cea72a597641d9 -U drh -Z 8a77cde1543981fcb216eb719b02e019 +P d8b7b1996eefae7768bfcb82d4ff22c69392aa63 +R e61034725c181be1c4702281e2c951bc +U dan +Z e59ee5dbb82eef632e47f5b2b9e1cde6 diff --git a/manifest.uuid b/manifest.uuid index dae04066fb..0a6e70dd4e 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -d8b7b1996eefae7768bfcb82d4ff22c69392aa63 \ No newline at end of file +62ea9e5ab8bc1a20245beebceb5ea62dcd7ec84e \ No newline at end of file