From: dan Date: Tue, 14 Jun 2011 07:22:30 +0000 (+0000) Subject: Merge recent trunk changes into fts3-prefix-search branch. X-Git-Tag: version-3.7.7~62^2~2 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=c72f9d98cba00feb7062b785942c506fff39481a;p=thirdparty%2Fsqlite.git Merge recent trunk changes into fts3-prefix-search branch. FossilOrigin-Name: 135ce30f62ebd6a1b239c18dbbd9c926ea507db4 --- c72f9d98cba00feb7062b785942c506fff39481a diff --cc ext/fts3/fts3.c index 60b8595e84,0aba0546c3..e057eb3825 --- a/ext/fts3/fts3.c +++ b/ext/fts3/fts3.c @@@ -3145,1269 -3840,4 +3145,1278 @@@ int sqlite3_extension_init } #endif + +/* +** Allocate an Fts3MultiSegReader for each token in the expression headed +** by pExpr. +** +** An Fts3SegReader object is a cursor that can seek or scan a range of +** entries within a single segment b-tree. An Fts3MultiSegReader uses multiple +** Fts3SegReader objects internally to provide an interface to seek or scan +** within the union of all segments of a b-tree. Hence the name. +** +** If the allocated Fts3MultiSegReader just seeks to a single entry in a +** segment b-tree (if the term is not a prefix or it is a prefix for which +** there exists prefix b-tree of the right length) then it may be traversed +** and merged incrementally. Otherwise, it has to be merged into an in-memory +** doclist and then traversed. +*/ +static void fts3EvalAllocateReaders( + Fts3Cursor *pCsr, + Fts3Expr *pExpr, + int *pnToken, /* OUT: Total number of tokens in phrase. */ + int *pnOr, /* OUT: Total number of OR nodes in expr. */ + int *pRc +){ + if( pExpr && SQLITE_OK==*pRc ){ + if( pExpr->eType==FTSQUERY_PHRASE ){ + int i; + int nToken = pExpr->pPhrase->nToken; + *pnToken += nToken; + for(i=0; ipPhrase->aToken[i]; + int rc = sqlite3Fts3TermSegReaderCursor(pCsr, + pToken->z, pToken->n, pToken->isPrefix, &pToken->pSegcsr + ); + if( rc!=SQLITE_OK ){ + *pRc = rc; + return; + } + } + }else{ + *pnOr += (pExpr->eType==FTSQUERY_OR); + fts3EvalAllocateReaders(pCsr, pExpr->pLeft, pnToken, pnOr, pRc); + fts3EvalAllocateReaders(pCsr, pExpr->pRight, pnToken, pnOr, pRc); + } + } +} + +static int fts3EvalPhraseLoad( + Fts3Cursor *pCsr, + Fts3Phrase *p +){ + Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; + int iToken; + int rc = SQLITE_OK; + + char *aDoclist = 0; + int nDoclist = 0; + int iPrev = -1; + + for(iToken=0; rc==SQLITE_OK && iTokennToken; iToken++){ + Fts3PhraseToken *pToken = &p->aToken[iToken]; + assert( pToken->pSegcsr || pToken->pDeferred ); + + if( pToken->pDeferred==0 ){ + int nThis = 0; + char *pThis = 0; + rc = fts3TermSelect(pTab, pToken, p->iColumn, 1, &nThis, &pThis); + if( rc==SQLITE_OK ){ + if( pThis==0 ){ + sqlite3_free(aDoclist); + aDoclist = 0; + nDoclist = 0; + break; + }else if( aDoclist==0 ){ + aDoclist = pThis; + nDoclist = nThis; + }else{ + assert( iPrev>=0 ); + fts3DoclistPhraseMerge(pTab->bDescIdx, + iToken-iPrev, aDoclist, nDoclist, pThis, &nThis + ); + sqlite3_free(aDoclist); + aDoclist = pThis; + nDoclist = nThis; + } + iPrev = iToken; + } + } + } + + if( rc==SQLITE_OK ){ + p->doclist.aAll = aDoclist; + p->doclist.nAll = nDoclist; + }else{ + sqlite3_free(aDoclist); + } + return rc; +} + +static int fts3EvalDeferredPhrase(Fts3Cursor *pCsr, Fts3Phrase *pPhrase){ + int iToken; + int rc = SQLITE_OK; + + int nMaxUndeferred = -1; + char *aPoslist = 0; + int nPoslist = 0; + int iPrev = -1; + + assert( pPhrase->doclist.bFreeList==0 ); + + for(iToken=0; rc==SQLITE_OK && iTokennToken; iToken++){ + Fts3PhraseToken *pToken = &pPhrase->aToken[iToken]; + Fts3DeferredToken *pDeferred = pToken->pDeferred; + + if( pDeferred ){ + char *pList; + int nList; + rc = sqlite3Fts3DeferredTokenList(pDeferred, &pList, &nList); + if( rc!=SQLITE_OK ) return rc; + + if( pList==0 ){ + sqlite3_free(aPoslist); + pPhrase->doclist.pList = 0; + pPhrase->doclist.nList = 0; + return SQLITE_OK; + + }else if( aPoslist==0 ){ + aPoslist = pList; + nPoslist = nList; + + }else{ + assert( iPrev>=0 ); + + char *aOut = pList; + char *p1 = aPoslist; + char *p2 = aOut; + + fts3PoslistPhraseMerge(&aOut, iToken-iPrev, 0, 1, &p1, &p2); + sqlite3_free(aPoslist); + aPoslist = pList; + nPoslist = aOut - aPoslist; + if( nPoslist==0 ){ + sqlite3_free(aPoslist); + pPhrase->doclist.pList = 0; + pPhrase->doclist.nList = 0; + return SQLITE_OK; + } + } + iPrev = iToken; + }else{ + nMaxUndeferred = iToken; + } + } + + if( iPrev>=0 ){ + if( nMaxUndeferred<0 ){ + pPhrase->doclist.pList = aPoslist; + pPhrase->doclist.nList = nPoslist; + pPhrase->doclist.iDocid = pCsr->iPrevId; + pPhrase->doclist.bFreeList = 1; + }else{ + int nDistance; + char *p1; + char *p2; + char *aOut; + + if( nMaxUndeferred>iPrev ){ + p1 = aPoslist; + p2 = pPhrase->doclist.pList; + nDistance = nMaxUndeferred - iPrev; + }else{ + p1 = pPhrase->doclist.pList; + p2 = aPoslist; + nDistance = iPrev - nMaxUndeferred; + } + + aOut = (char *)sqlite3_malloc(nPoslist+8); + if( !aOut ){ + sqlite3_free(aPoslist); + return SQLITE_NOMEM; + } + + pPhrase->doclist.pList = aOut; + if( fts3PoslistPhraseMerge(&aOut, nDistance, 0, 1, &p1, &p2) ){ + pPhrase->doclist.bFreeList = 1; + pPhrase->doclist.nList = (aOut - pPhrase->doclist.pList); + }else{ + sqlite3_free(aOut); + pPhrase->doclist.pList = 0; + pPhrase->doclist.nList = 0; + } + sqlite3_free(aPoslist); + } + } + + return SQLITE_OK; +} + +/* +** This function is called for each Fts3Phrase in a full-text query +** expression to initialize the mechanism for returning rows. Once this +** function has been called successfully on an Fts3Phrase, it may be +** used with fts3EvalPhraseNext() to iterate through the matching docids. +*/ +static int fts3EvalPhraseStart(Fts3Cursor *pCsr, int bOptOk, Fts3Phrase *p){ + int rc; + Fts3PhraseToken *pFirst = &p->aToken[0]; + Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; + + assert( p->doclist.aAll==0 ); + if( pCsr->bDesc==pTab->bDescIdx && bOptOk==1 && p->nToken==1 + && pFirst->pSegcsr && pFirst->pSegcsr->bLookup + ){ + /* Use the incremental approach. */ + int iCol = (p->iColumn >= pTab->nColumn ? -1 : p->iColumn); + rc = sqlite3Fts3MsrIncrStart( + pTab, pFirst->pSegcsr, iCol, pFirst->z, pFirst->n); + p->bIncr = 1; + + }else{ + /* Load the full doclist for the phrase into memory. */ + rc = fts3EvalPhraseLoad(pCsr, p); + p->bIncr = 0; + } + + assert( rc!=SQLITE_OK || p->nToken<1 || p->aToken[0].pSegcsr==0 || p->bIncr ); + return rc; +} + +/* +** This function is used to iterate backwards (from the end to start) +** through doclists. +*/ +void sqlite3Fts3DoclistPrev( + int bDescIdx, /* True if the doclist is desc */ + char *aDoclist, /* Pointer to entire doclist */ + int nDoclist, /* Length of aDoclist in bytes */ + char **ppIter, /* IN/OUT: Iterator pointer */ + sqlite3_int64 *piDocid, /* IN/OUT: Docid pointer */ + int *pnList, /* IN/OUT: List length pointer */ + u8 *pbEof /* OUT: End-of-file flag */ +){ + char *p = *ppIter; + int iMul = (bDescIdx ? -1 : 1); + + assert( nDoclist>0 ); + assert( *pbEof==0 ); + assert( p || *piDocid==0 ); + assert( !p || (p>aDoclist && p<&aDoclist[nDoclist]) ); + + if( p==0 ){ + sqlite3_int64 iDocid = 0; + char *pNext = 0; + char *pDocid = aDoclist; + char *pEnd = &aDoclist[nDoclist]; + + pDocid += sqlite3Fts3GetVarint(pDocid, &iDocid); + pNext = pDocid; + fts3PoslistCopy(0, &pDocid); + while( pDociddoclist; + Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; + + if( p->bIncr ){ + assert( p->nToken==1 ); + assert( pDL->pNextDocid==0 ); + rc = sqlite3Fts3MsrIncrNext(pTab, p->aToken[0].pSegcsr, + &pDL->iDocid, &pDL->pList, &pDL->nList + ); + if( rc==SQLITE_OK && !pDL->pList ){ + *pbEof = 1; + } + }else if( pCsr->bDesc!=pTab->bDescIdx && pDL->nAll ){ + sqlite3Fts3DoclistPrev(pTab->bDescIdx, pDL->aAll, pDL->nAll, + &pDL->pNextDocid, &pDL->iDocid, &pDL->nList, pbEof + ); + pDL->pList = pDL->pNextDocid; + }else{ + char *pIter; /* Used to iterate through aAll */ + char *pEnd = &pDL->aAll[pDL->nAll]; /* 1 byte past end of aAll */ + if( pDL->pNextDocid ){ + pIter = pDL->pNextDocid; + }else{ + pIter = pDL->aAll; + } + + if( pIter>=pEnd ){ + /* We have already reached the end of this doclist. EOF. */ + *pbEof = 1; + }else{ + sqlite3_int64 iDelta; + pIter += sqlite3Fts3GetVarint(pIter, &iDelta); + if( pTab->bDescIdx==0 || pDL->pNextDocid==0 ){ + pDL->iDocid += iDelta; + }else{ + pDL->iDocid -= iDelta; + } + pDL->pList = pIter; + fts3PoslistCopy(0, &pIter); + pDL->nList = (pIter - pDL->pList); + + /* pIter now points just past the 0x00 that terminates the position- + ** list for document pDL->iDocid. However, if this position-list was + ** edited in place by fts3EvalNearTrim2(), then pIter may not actually + ** point to the start of the next docid value. The following line deals + ** with this case by advancing pIter past the zero-padding added by + ** fts3EvalNearTrim2(). */ + while( pIterpNextDocid = pIter; + assert( *pIter || pIter>=&pDL->aAll[pDL->nAll] ); + *pbEof = 0; + } + } + + return rc; +} + +static void fts3EvalStartReaders( + Fts3Cursor *pCsr, + Fts3Expr *pExpr, + int bOptOk, + int *pRc +){ + if( pExpr && SQLITE_OK==*pRc ){ + if( pExpr->eType==FTSQUERY_PHRASE ){ + int i; + int nToken = pExpr->pPhrase->nToken; + for(i=0; ipPhrase->aToken[i].pDeferred==0 ) break; + } + pExpr->bDeferred = (i==nToken); + *pRc = fts3EvalPhraseStart(pCsr, bOptOk, pExpr->pPhrase); + }else{ + fts3EvalStartReaders(pCsr, pExpr->pLeft, bOptOk, pRc); + fts3EvalStartReaders(pCsr, pExpr->pRight, bOptOk, pRc); + pExpr->bDeferred = (pExpr->pLeft->bDeferred && pExpr->pRight->bDeferred); + } + } +} + + +typedef struct Fts3TokenAndCost Fts3TokenAndCost; +struct Fts3TokenAndCost { + Fts3PhraseToken *pToken; + Fts3Expr *pRoot; + int nOvfl; + int iCol; +}; + +static void fts3EvalTokenCosts( + Fts3Cursor *pCsr, + Fts3Expr *pRoot, + Fts3Expr *pExpr, + Fts3TokenAndCost **ppTC, + Fts3Expr ***ppOr, + int *pRc +){ + if( *pRc==SQLITE_OK && pExpr ){ + if( pExpr->eType==FTSQUERY_PHRASE ){ + Fts3Phrase *pPhrase = pExpr->pPhrase; + int i; + for(i=0; *pRc==SQLITE_OK && inToken; i++){ + Fts3TokenAndCost *pTC = (*ppTC)++; + pTC->pRoot = pRoot; + pTC->pToken = &pPhrase->aToken[i]; + pTC->iCol = pPhrase->iColumn; + *pRc = sqlite3Fts3MsrOvfl(pCsr, pTC->pToken->pSegcsr, &pTC->nOvfl); + } + }else if( pExpr->eType!=FTSQUERY_NOT ){ + if( pExpr->eType==FTSQUERY_OR ){ + pRoot = pExpr->pLeft; + **ppOr = pRoot; + (*ppOr)++; + } + fts3EvalTokenCosts(pCsr, pRoot, pExpr->pLeft, ppTC, ppOr, pRc); + if( pExpr->eType==FTSQUERY_OR ){ + pRoot = pExpr->pRight; + **ppOr = pRoot; + (*ppOr)++; + } + fts3EvalTokenCosts(pCsr, pRoot, pExpr->pRight, ppTC, ppOr, pRc); + } + } +} + +static int fts3EvalAverageDocsize(Fts3Cursor *pCsr, int *pnPage){ + if( pCsr->nRowAvg==0 ){ + /* The average document size, which is required to calculate the cost + ** of each doclist, has not yet been determined. Read the required + ** data from the %_stat table to calculate it. + ** + ** Entry 0 of the %_stat table is a blob containing (nCol+1) FTS3 + ** varints, where nCol is the number of columns in the FTS3 table. + ** The first varint is the number of documents currently stored in + ** the table. The following nCol varints contain the total amount of + ** data stored in all rows of each column of the table, from left + ** to right. + */ + int rc; + Fts3Table *p = (Fts3Table*)pCsr->base.pVtab; + sqlite3_stmt *pStmt; + sqlite3_int64 nDoc = 0; + sqlite3_int64 nByte = 0; + const char *pEnd; + const char *a; + + rc = sqlite3Fts3SelectDoctotal(p, &pStmt); + if( rc!=SQLITE_OK ) return rc; + a = sqlite3_column_blob(pStmt, 0); + assert( a ); + + pEnd = &a[sqlite3_column_bytes(pStmt, 0)]; + a += sqlite3Fts3GetVarint(a, &nDoc); + while( anDoc = nDoc; + pCsr->nRowAvg = (int)(((nByte / nDoc) + p->nPgsz) / p->nPgsz); + assert( pCsr->nRowAvg>0 ); + rc = sqlite3_reset(pStmt); + if( rc!=SQLITE_OK ) return rc; + } + + *pnPage = pCsr->nRowAvg; + return SQLITE_OK; +} + +static int fts3EvalSelectDeferred( + Fts3Cursor *pCsr, + Fts3Expr *pRoot, + Fts3TokenAndCost *aTC, + int nTC +){ + int nDocSize = 0; + int nDocEst = 0; + int rc = SQLITE_OK; + Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; + int ii; + + int nOvfl = 0; + int nTerm = 0; + + for(ii=0; iinOvfl) + ){ + pTC = &aTC[jj]; + } + } + assert( pTC ); + + /* At this point pTC points to the cheapest remaining token. */ + if( ii==0 ){ + if( pTC->nOvfl ){ + nDocEst = (pTC->nOvfl * pTab->nPgsz + pTab->nPgsz) / 10; + }else{ + /* TODO: Fix this so that the doclist need not be read twice. */ + Fts3PhraseToken *pToken = pTC->pToken; + int nList = 0; + char *pList = 0; + rc = fts3TermSelect(pTab, pToken, pTC->iCol, 1, &nList, &pList); + if( rc==SQLITE_OK ){ + nDocEst = fts3DoclistCountDocids(1, pList, nList); + } + sqlite3_free(pList); + if( rc==SQLITE_OK ){ + rc = sqlite3Fts3TermSegReaderCursor(pCsr, + pToken->z, pToken->n, pToken->isPrefix, &pToken->pSegcsr + ); + } + } + }else{ + if( pTC->nOvfl>=(nDocEst*nDocSize) ){ + Fts3PhraseToken *pToken = pTC->pToken; + rc = sqlite3Fts3DeferToken(pCsr, pToken, pTC->iCol); + fts3SegReaderCursorFree(pToken->pSegcsr); + pToken->pSegcsr = 0; + } + nDocEst = 1 + (nDocEst/4); + } + pTC->pToken = 0; + } + + return rc; +} + +int sqlite3Fts3EvalStart(Fts3Cursor *pCsr, Fts3Expr *pExpr, int bOptOk){ + Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; + int rc = SQLITE_OK; + int nToken = 0; + int nOr = 0; + + /* Allocate a MultiSegReader for each token in the expression. */ + fts3EvalAllocateReaders(pCsr, pExpr, &nToken, &nOr, &rc); + + /* Call fts3EvalPhraseStart() on all phrases in the expression. TODO: + ** This call will eventually also be responsible for determining which + ** tokens are 'deferred' until the document text is loaded into memory. + ** + ** Each token in each phrase is dealt with using one of the following + ** three strategies: + ** + ** 1. Entire doclist loaded into memory as part of the + ** fts3EvalStartReaders() call. + ** + ** 2. Doclist loaded into memory incrementally, as part of each + ** sqlite3Fts3EvalNext() call. + ** + ** 3. Token doclist is never loaded. Instead, documents are loaded into + ** memory and scanned for the token as part of the sqlite3Fts3EvalNext() + ** call. This is known as a "deferred" token. + */ + + /* If bOptOk is true, check if there are any tokens that should be deferred. + */ + if( rc==SQLITE_OK && bOptOk && nToken>1 && pTab->bHasStat ){ + Fts3TokenAndCost *aTC; + Fts3Expr **apOr; + aTC = (Fts3TokenAndCost *)sqlite3_malloc( + sizeof(Fts3TokenAndCost) * nToken + + sizeof(Fts3Expr *) * nOr * 2 + ); + apOr = (Fts3Expr **)&aTC[nToken]; + + if( !aTC ){ + rc = SQLITE_NOMEM; + }else{ + int ii; + Fts3TokenAndCost *pTC = aTC; + Fts3Expr **ppOr = apOr; + + fts3EvalTokenCosts(pCsr, 0, pExpr, &pTC, &ppOr, &rc); + nToken = pTC-aTC; + nOr = ppOr-apOr; + + if( rc==SQLITE_OK ){ + rc = fts3EvalSelectDeferred(pCsr, 0, aTC, nToken); + for(ii=0; rc==SQLITE_OK && iidoclist.bFreeList ){ + sqlite3_free(pPhrase->doclist.pList); + } + pPhrase->doclist.pList = 0; + pPhrase->doclist.nList = 0; + pPhrase->doclist.bFreeList = 0; +} + +static int fts3EvalNearTrim2( + int nNear, + char *aTmp, /* Temporary space to use */ + char **paPoslist, /* IN/OUT: Position list */ + int *pnToken, /* IN/OUT: Tokens in phrase of *paPoslist */ + Fts3Phrase *pPhrase /* The phrase object to trim the doclist of */ +){ + int nParam1 = nNear + pPhrase->nToken; + int nParam2 = nNear + *pnToken; + int nNew; + char *p2; + char *pOut; + int res; + + assert( pPhrase->doclist.pList ); + + p2 = pOut = pPhrase->doclist.pList; + res = fts3PoslistNearMerge( + &pOut, aTmp, nParam1, nParam2, paPoslist, &p2 + ); + if( res ){ + nNew = (pOut - pPhrase->doclist.pList) - 1; + assert( pPhrase->doclist.pList[nNew]=='\0' ); + assert( nNew<=pPhrase->doclist.nList && nNew>0 ); + memset(&pPhrase->doclist.pList[nNew], 0, pPhrase->doclist.nList - nNew); + pPhrase->doclist.nList = nNew; + *paPoslist = pPhrase->doclist.pList; + *pnToken = pPhrase->nToken; + } + + return res; +} + +static int fts3EvalNearTest(Fts3Expr *pExpr, int *pRc){ + int res = 1; + + /* The following block runs if pExpr is the root of a NEAR query. + ** For example, the query: + ** + ** "w" NEAR "x" NEAR "y" NEAR "z" + ** + ** which is represented in tree form as: + ** + ** | + ** +--NEAR--+ <-- root of NEAR query + ** | | + ** +--NEAR--+ "z" + ** | | + ** +--NEAR--+ "y" + ** | | + ** "w" "x" + ** + ** The right-hand child of a NEAR node is always a phrase. The + ** left-hand child may be either a phrase or a NEAR node. There are + ** no exceptions to this. + */ + if( *pRc==SQLITE_OK + && pExpr->eType==FTSQUERY_NEAR + && pExpr->bEof==0 + && (pExpr->pParent==0 || pExpr->pParent->eType!=FTSQUERY_NEAR) + ){ + Fts3Expr *p; + int nTmp = 0; /* Bytes of temp space */ + char *aTmp; /* Temp space for PoslistNearMerge() */ + + /* Allocate temporary working space. */ + for(p=pExpr; p->pLeft; p=p->pLeft){ + nTmp += p->pRight->pPhrase->doclist.nList; + } + nTmp += p->pPhrase->doclist.nList; + aTmp = sqlite3_malloc(nTmp*2); + if( !aTmp ){ + *pRc = SQLITE_NOMEM; + res = 0; + }else{ + char *aPoslist = p->pPhrase->doclist.pList; + int nToken = p->pPhrase->nToken; + + for(p=p->pParent;res && p && p->eType==FTSQUERY_NEAR; p=p->pParent){ + Fts3Phrase *pPhrase = p->pRight->pPhrase; + int nNear = p->nNear; + res = fts3EvalNearTrim2(nNear, aTmp, &aPoslist, &nToken, pPhrase); + } + + aPoslist = pExpr->pRight->pPhrase->doclist.pList; + nToken = pExpr->pRight->pPhrase->nToken; + for(p=pExpr->pLeft; p && res; p=p->pLeft){ + int nNear = p->pParent->nNear; + Fts3Phrase *pPhrase = ( + p->eType==FTSQUERY_NEAR ? p->pRight->pPhrase : p->pPhrase + ); + res = fts3EvalNearTrim2(nNear, aTmp, &aPoslist, &nToken, pPhrase); + } + } + + sqlite3_free(aTmp); + } + + return res; +} + +/* +** This macro is used by the fts3EvalNext() function. The two arguments are +** 64-bit docid values. If the current query is "ORDER BY docid ASC", then +** the macro returns (i1 - i2). Or if it is "ORDER BY docid DESC", then +** it returns (i2 - i1). This allows the same code to be used for merging +** doclists in ascending or descending order. +*/ +#define DOCID_CMP(i1, i2) ((pCsr->bDesc?-1:1) * (i1-i2)) + +static void fts3EvalNext( + Fts3Cursor *pCsr, + Fts3Expr *pExpr, + int *pRc +){ + if( *pRc==SQLITE_OK ){ + assert( pExpr->bEof==0 ); + pExpr->bStart = 1; + + switch( pExpr->eType ){ + case FTSQUERY_NEAR: + case FTSQUERY_AND: { + Fts3Expr *pLeft = pExpr->pLeft; + Fts3Expr *pRight = pExpr->pRight; + assert( !pLeft->bDeferred || !pRight->bDeferred ); + if( pLeft->bDeferred ){ + fts3EvalNext(pCsr, pRight, pRc); + pExpr->iDocid = pRight->iDocid; + pExpr->bEof = pRight->bEof; + }else if( pRight->bDeferred ){ + fts3EvalNext(pCsr, pLeft, pRc); + pExpr->iDocid = pLeft->iDocid; + pExpr->bEof = pLeft->bEof; + }else{ + fts3EvalNext(pCsr, pLeft, pRc); + fts3EvalNext(pCsr, pRight, pRc); + + while( !pLeft->bEof && !pRight->bEof && *pRc==SQLITE_OK ){ + sqlite3_int64 iDiff = DOCID_CMP(pLeft->iDocid, pRight->iDocid); + if( iDiff==0 ) break; + if( iDiff<0 ){ + fts3EvalNext(pCsr, pLeft, pRc); + }else{ + fts3EvalNext(pCsr, pRight, pRc); + } + } + + pExpr->iDocid = pLeft->iDocid; + pExpr->bEof = (pLeft->bEof || pRight->bEof); + } + break; + } + + case FTSQUERY_OR: { + Fts3Expr *pLeft = pExpr->pLeft; + Fts3Expr *pRight = pExpr->pRight; + sqlite3_int64 iCmp = DOCID_CMP(pLeft->iDocid, pRight->iDocid); + + assert( pLeft->bStart || pLeft->iDocid==pRight->iDocid ); + assert( pRight->bStart || pLeft->iDocid==pRight->iDocid ); + + if( pRight->bEof || (pLeft->bEof==0 && iCmp<0) ){ + fts3EvalNext(pCsr, pLeft, pRc); + }else if( pLeft->bEof || (pRight->bEof==0 && iCmp>0) ){ + fts3EvalNext(pCsr, pRight, pRc); + }else{ + fts3EvalNext(pCsr, pLeft, pRc); + fts3EvalNext(pCsr, pRight, pRc); + } + + pExpr->bEof = (pLeft->bEof && pRight->bEof); + iCmp = DOCID_CMP(pLeft->iDocid, pRight->iDocid); + if( pRight->bEof || (pLeft->bEof==0 && iCmp<0) ){ + pExpr->iDocid = pLeft->iDocid; + }else{ + pExpr->iDocid = pRight->iDocid; + } + + break; + } + + case FTSQUERY_NOT: { + Fts3Expr *pLeft = pExpr->pLeft; + Fts3Expr *pRight = pExpr->pRight; + + if( pRight->bStart==0 ){ + fts3EvalNext(pCsr, pRight, pRc); + assert( *pRc!=SQLITE_OK || pRight->bStart ); + } + + fts3EvalNext(pCsr, pLeft, pRc); + if( pLeft->bEof==0 ){ + while( !*pRc + && !pRight->bEof + && DOCID_CMP(pLeft->iDocid, pRight->iDocid)>0 + ){ + fts3EvalNext(pCsr, pRight, pRc); + } + } + pExpr->iDocid = pLeft->iDocid; + pExpr->bEof = pLeft->bEof; + break; + } + + default: { + Fts3Phrase *pPhrase = pExpr->pPhrase; + fts3EvalZeroPoslist(pPhrase); + *pRc = fts3EvalPhraseNext(pCsr, pPhrase, &pExpr->bEof); + pExpr->iDocid = pPhrase->doclist.iDocid; + break; + } + } + } +} + +static int fts3EvalDeferredTest(Fts3Cursor *pCsr, Fts3Expr *pExpr, int *pRc){ + int bHit = 1; + if( *pRc==SQLITE_OK ){ + switch( pExpr->eType ){ + case FTSQUERY_NEAR: + case FTSQUERY_AND: + bHit = ( + fts3EvalDeferredTest(pCsr, pExpr->pLeft, pRc) + && fts3EvalDeferredTest(pCsr, pExpr->pRight, pRc) + && fts3EvalNearTest(pExpr, pRc) + ); + + /* If the NEAR expression does not match any rows, zero the doclist for + ** all phrases involved in the NEAR. This is because the snippet(), + ** offsets() and matchinfo() functions are not supposed to recognize + ** any instances of phrases that are part of unmatched NEAR queries. + ** For example if this expression: + ** + ** ... MATCH 'a OR (b NEAR c)' + ** + ** is matched against a row containing: + ** + ** 'a b d e' + ** + ** then any snippet() should ony highlight the "a" term, not the "b" + ** (as "b" is part of a non-matching NEAR clause). + */ + if( bHit==0 + && pExpr->eType==FTSQUERY_NEAR + && (pExpr->pParent==0 || pExpr->pParent->eType!=FTSQUERY_NEAR) + ){ + Fts3Expr *p; + for(p=pExpr; p->pPhrase==0; p=p->pLeft){ + if( p->pRight->iDocid==pCsr->iPrevId ){ + fts3EvalZeroPoslist(p->pRight->pPhrase); + } + } + if( p->iDocid==pCsr->iPrevId ){ + fts3EvalZeroPoslist(p->pPhrase); + } + } + + break; + + case FTSQUERY_OR: { + int bHit1 = fts3EvalDeferredTest(pCsr, pExpr->pLeft, pRc); + int bHit2 = fts3EvalDeferredTest(pCsr, pExpr->pRight, pRc); + bHit = bHit1 || bHit2; + break; + } + + case FTSQUERY_NOT: + bHit = ( + fts3EvalDeferredTest(pCsr, pExpr->pLeft, pRc) + && !fts3EvalDeferredTest(pCsr, pExpr->pRight, pRc) + ); + break; + + default: { + if( pCsr->pDeferred + && (pExpr->iDocid==pCsr->iPrevId || pExpr->bDeferred) + ){ + Fts3Phrase *pPhrase = pExpr->pPhrase; + assert( pExpr->bDeferred || pPhrase->doclist.bFreeList==0 ); + if( pExpr->bDeferred ){ + fts3EvalZeroPoslist(pPhrase); + } + *pRc = fts3EvalDeferredPhrase(pCsr, pPhrase); + bHit = (pPhrase->doclist.pList!=0); + pExpr->iDocid = pCsr->iPrevId; + }else{ + bHit = (pExpr->bEof==0 && pExpr->iDocid==pCsr->iPrevId); + } + break; + } + } + } + return bHit; +} + +/* +** Return 1 if both of the following are true: +** +** 1. *pRc is SQLITE_OK when this function returns, and +** +** 2. After scanning the current FTS table row for the deferred tokens, +** it is determined that the row does not match the query. +** +** Or, if no error occurs and it seems the current row does match the FTS +** query, return 0. +*/ +static int fts3EvalLoadDeferred(Fts3Cursor *pCsr, int *pRc){ + int rc = *pRc; + int bMiss = 0; + if( rc==SQLITE_OK ){ + if( pCsr->pDeferred ){ + rc = fts3CursorSeek(0, pCsr); + if( rc==SQLITE_OK ){ + rc = sqlite3Fts3CacheDeferredDoclists(pCsr); + } + } + bMiss = (0==fts3EvalDeferredTest(pCsr, pCsr->pExpr, &rc)); + sqlite3Fts3FreeDeferredDoclists(pCsr); + *pRc = rc; + } + return (rc==SQLITE_OK && bMiss); +} + +/* +** Advance to the next document that matches the FTS expression in +** Fts3Cursor.pExpr. +*/ +int sqlite3Fts3EvalNext(Fts3Cursor *pCsr){ + int rc = SQLITE_OK; /* Return Code */ + Fts3Expr *pExpr = pCsr->pExpr; + assert( pCsr->isEof==0 ); + if( pExpr==0 ){ + pCsr->isEof = 1; + }else{ + do { + if( pCsr->isRequireSeek==0 ){ + sqlite3_reset(pCsr->pStmt); + } + assert( sqlite3_data_count(pCsr->pStmt)==0 ); + fts3EvalNext(pCsr, pExpr, &rc); + pCsr->isEof = pExpr->bEof; + pCsr->isRequireSeek = 1; + pCsr->isMatchinfoNeeded = 1; + pCsr->iPrevId = pExpr->iDocid; + }while( pCsr->isEof==0 && fts3EvalLoadDeferred(pCsr, &rc) ); + } + return rc; +} + ++/* ++** Restart interation for expression pExpr so that the next call to ++** sqlite3Fts3EvalNext() visits the first row. Do not allow incremental ++** loading or merging of phrase doclists for this iteration. ++** ++** If *pRc is other than SQLITE_OK when this function is called, it is ++** a no-op. If an error occurs within this function, *pRc is set to an ++** SQLite error code before returning. ++*/ +static void fts3EvalRestart( + Fts3Cursor *pCsr, + Fts3Expr *pExpr, + int *pRc +){ + if( pExpr && *pRc==SQLITE_OK ){ + Fts3Phrase *pPhrase = pExpr->pPhrase; + + if( pPhrase ){ + fts3EvalZeroPoslist(pPhrase); + if( pPhrase->bIncr ){ + sqlite3Fts3EvalPhraseCleanup(pPhrase); + memset(&pPhrase->doclist, 0, sizeof(Fts3Doclist)); + *pRc = sqlite3Fts3EvalStart(pCsr, pExpr, 0); + }else{ + pPhrase->doclist.pNextDocid = 0; + pPhrase->doclist.iDocid = 0; + } + } + + pExpr->iDocid = 0; + pExpr->bEof = 0; + pExpr->bStart = 0; + + fts3EvalRestart(pCsr, pExpr->pLeft, pRc); + fts3EvalRestart(pCsr, pExpr->pRight, pRc); + } +} + +/* +** After allocating the Fts3Expr.aMI[] array for each phrase in the +** expression rooted at pExpr, the cursor iterates through all rows matched +** by pExpr, calling this function for each row. This function increments +** the values in Fts3Expr.aMI[] according to the position-list currently +** found in Fts3Expr.pPhrase->doclist.pList for each of the phrase +** expression nodes. +*/ +static void fts3EvalUpdateCounts(Fts3Expr *pExpr){ + if( pExpr ){ + Fts3Phrase *pPhrase = pExpr->pPhrase; + if( pPhrase && pPhrase->doclist.pList ){ + int iCol = 0; + char *p = pPhrase->doclist.pList; + + assert( *p ); + while( 1 ){ + u8 c = 0; + int iCnt = 0; + while( 0xFE & (*p | c) ){ + if( (c&0x80)==0 ) iCnt++; + c = *p++ & 0x80; + } + + /* aMI[iCol*3 + 1] = Number of occurrences + ** aMI[iCol*3 + 2] = Number of rows containing at least one instance + */ + pExpr->aMI[iCol*3 + 1] += iCnt; + pExpr->aMI[iCol*3 + 2] += (iCnt>0); + if( *p==0x00 ) break; + p++; + p += sqlite3Fts3GetVarint32(p, &iCol); + } + } + + fts3EvalUpdateCounts(pExpr->pLeft); + fts3EvalUpdateCounts(pExpr->pRight); + } +} + +/* +** Expression pExpr must be of type FTSQUERY_PHRASE. +** +** If it is not already allocated and populated, this function allocates and +** populates the Fts3Expr.aMI[] array for expression pExpr. If pExpr is part +** of a NEAR expression, then it also allocates and populates the same array +** for all other phrases that are part of the NEAR expression. +** +** SQLITE_OK is returned if the aMI[] array is successfully allocated and +** populated. Otherwise, if an error occurs, an SQLite error code is returned. +*/ +static int fts3EvalGatherStats( + Fts3Cursor *pCsr, /* Cursor object */ + Fts3Expr *pExpr /* FTSQUERY_PHRASE expression */ +){ + int rc = SQLITE_OK; /* Return code */ + + assert( pExpr->eType==FTSQUERY_PHRASE ); + if( pExpr->aMI==0 ){ + Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; + Fts3Expr *pRoot; /* Root of NEAR expression */ + Fts3Expr *p; /* Iterator used for several purposes */ + + sqlite3_int64 iPrevId = pCsr->iPrevId; + sqlite3_int64 iDocid; + u8 bEof; + + /* Find the root of the NEAR expression */ + pRoot = pExpr; + while( pRoot->pParent && pRoot->pParent->eType==FTSQUERY_NEAR ){ + pRoot = pRoot->pParent; + } + iDocid = pRoot->iDocid; + bEof = pRoot->bEof; + assert( pRoot->bStart ); + + /* Allocate space for the aMSI[] array of each FTSQUERY_PHRASE node */ + for(p=pRoot; p; p=p->pLeft){ + Fts3Expr *pE = (p->eType==FTSQUERY_PHRASE?p:p->pRight); + assert( pE->aMI==0 ); + pE->aMI = (u32 *)sqlite3_malloc(pTab->nColumn * 3 * sizeof(u32)); + if( !pE->aMI ) return SQLITE_NOMEM; + memset(pE->aMI, 0, pTab->nColumn * 3 * sizeof(u32)); + } + + fts3EvalRestart(pCsr, pRoot, &rc); + + while( pCsr->isEof==0 && rc==SQLITE_OK ){ + + do { + /* Ensure the %_content statement is reset. */ + if( pCsr->isRequireSeek==0 ) sqlite3_reset(pCsr->pStmt); + assert( sqlite3_data_count(pCsr->pStmt)==0 ); + + /* Advance to the next document */ + fts3EvalNext(pCsr, pRoot, &rc); + pCsr->isEof = pRoot->bEof; + pCsr->isRequireSeek = 1; + pCsr->isMatchinfoNeeded = 1; + pCsr->iPrevId = pRoot->iDocid; + }while( pCsr->isEof==0 + && pRoot->eType==FTSQUERY_NEAR + && fts3EvalLoadDeferred(pCsr, &rc) + ); + + if( rc==SQLITE_OK && pCsr->isEof==0 ){ + fts3EvalUpdateCounts(pRoot); + } + } + + pCsr->isEof = 0; + pCsr->iPrevId = iPrevId; + + if( bEof ){ + pRoot->bEof = bEof; + }else{ + /* Caution: pRoot may iterate through docids in ascending or descending + ** order. For this reason, even though it seems more defensive, the + ** do loop can not be written: + ** + ** do {...} while( pRoot->iDocidbEof==0 ); + }while( pRoot->iDocid!=iDocid && rc==SQLITE_OK ); + fts3EvalLoadDeferred(pCsr, &rc); + } + } + return rc; +} + +/* +** This function is used by the matchinfo() module to query a phrase +** expression node for the following information: +** +** 1. The total number of occurrences of the phrase in each column of +** the FTS table (considering all rows), and +** +** 2. For each column, the number of rows in the table for which the +** column contains at least one instance of the phrase. +** +** If no error occurs, SQLITE_OK is returned and the values for each column +** written into the array aiOut as follows: +** +** aiOut[iCol*3 + 1] = Number of occurrences +** aiOut[iCol*3 + 2] = Number of rows containing at least one instance +** +** Caveats: +** +** * If a phrase consists entirely of deferred tokens, then all output +** values are set to the number of documents in the table. In other +** words we assume that very common tokens occur exactly once in each +** column of each row of the table. +** +** * If a phrase contains some deferred tokens (and some non-deferred +** tokens), count the potential occurrence identified by considering +** the non-deferred tokens instead of actual phrase occurrences. +** +** * If the phrase is part of a NEAR expression, then only phrase instances +** that meet the NEAR constraint are included in the counts. +*/ +int sqlite3Fts3EvalPhraseStats( + Fts3Cursor *pCsr, /* FTS cursor handle */ + Fts3Expr *pExpr, /* Phrase expression */ + u32 *aiOut /* Array to write results into (see above) */ +){ + Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; + int rc = SQLITE_OK; + int iCol; + + if( pExpr->bDeferred && pExpr->pParent->eType!=FTSQUERY_NEAR ){ + assert( pCsr->nDoc>0 ); + for(iCol=0; iColnColumn; iCol++){ + aiOut[iCol*3 + 1] = pCsr->nDoc; + aiOut[iCol*3 + 2] = pCsr->nDoc; + } + }else{ + rc = fts3EvalGatherStats(pCsr, pExpr); + if( rc==SQLITE_OK ){ + assert( pExpr->aMI ); + for(iCol=0; iColnColumn; iCol++){ + aiOut[iCol*3 + 1] = pExpr->aMI[iCol*3 + 1]; + aiOut[iCol*3 + 2] = pExpr->aMI[iCol*3 + 2]; + } + } + } + + return rc; +} + +/* +** The expression pExpr passed as the second argument to this function +** must be of type FTSQUERY_PHRASE. +** +** The returned value is either NULL or a pointer to a buffer containing +** a position-list indicating the occurrences of the phrase in column iCol +** of the current row. +** +** More specifically, the returned buffer contains 1 varint for each +** occurence of the phrase in the column, stored using the normal (delta+2) +** compression and is terminated by either an 0x01 or 0x00 byte. For example, +** if the requested column contains "a b X c d X X" and the position-list +** for 'X' is requested, the buffer returned may contain: +** +** 0x04 0x05 0x03 0x01 or 0x04 0x05 0x03 0x00 +** +** This function works regardless of whether or not the phrase is deferred, +** incremental, or neither. +*/ +char *sqlite3Fts3EvalPhrasePoslist( + Fts3Cursor *pCsr, /* FTS3 cursor object */ + Fts3Expr *pExpr, /* Phrase to return doclist for */ + int iCol /* Column to return position list for */ +){ + Fts3Phrase *pPhrase = pExpr->pPhrase; + Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; + char *pIter = pPhrase->doclist.pList; + int iThis; + + assert( iCol>=0 && iColnColumn ); + if( !pIter + || pExpr->bEof + || pExpr->iDocid!=pCsr->iPrevId + || (pPhrase->iColumnnColumn && pPhrase->iColumn!=iCol) + ){ + return 0; + } + + assert( pPhrase->doclist.nList>0 ); + if( *pIter==0x01 ){ + pIter++; + pIter += sqlite3Fts3GetVarint32(pIter, &iThis); + }else{ + iThis = 0; + } + while( iThisdoclist, and +** * any Fts3MultiSegReader objects held by phrase tokens. +*/ +void sqlite3Fts3EvalPhraseCleanup(Fts3Phrase *pPhrase){ + if( pPhrase ){ + int i; + sqlite3_free(pPhrase->doclist.aAll); + fts3EvalZeroPoslist(pPhrase); + memset(&pPhrase->doclist, 0, sizeof(Fts3Doclist)); + for(i=0; inToken; i++){ + fts3SegReaderCursorFree(pPhrase->aToken[i].pSegcsr); + pPhrase->aToken[i].pSegcsr = 0; + } + } +} + #endif diff --cc manifest index 037d939c2d,404f96248e..0a91abe00f --- a/manifest +++ b/manifest @@@ -1,7 -1,7 +1,7 @@@ - C Remove\sunused\sparameters\sfrom\sinternal\sfts3\sfunction. - D 2011-06-14T07:14:43.149 -C Use\sonly\sunsigned\svalues\sin\sthe\simplementatin\sof\sLIKE\sand\sGLOB\sso\sthat\nvalues\swon't\soverflow\sto\snegative\swhen\sdealing\swith\smalformed\sUTF8. -D 2011-06-13T12:19:21.072 ++C Merge\srecent\strunk\schanges\sinto\sfts3-prefix-search\sbranch. ++D 2011-06-14T07:22:30.078 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f -F Makefile.in 11dcc00a8d0e5202def00e81732784fb0cc4fe1d +F Makefile.in c1d7a7f4fd8da6b1815032efca950e3d5125407e F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 F Makefile.vxworks c85ec1d8597fe2f7bc225af12ac1666e21379151 F README cd04a36fbc7ea56932a4052d7d0b7f09f27c33d6 @@@ -61,11 -61,11 +61,11 @@@ F ext/fts2/mkfts2amal.tcl 974d5d438cb3f F ext/fts3/README.syntax a19711dc5458c20734b8e485e75fb1981ec2427a F ext/fts3/README.tokenizers 998756696647400de63d5ba60e9655036cb966e9 F ext/fts3/README.txt 8c18f41574404623b76917b9da66fcb0ab38328d - F ext/fts3/fts3.c f4f65273121386349993d600c4c5f710d4b3e956 -F ext/fts3/fts3.c b3a10a1a320aaeb56a1dd6710bf09eb5c2370839 ++F ext/fts3/fts3.c 5d59c015d60379d399bbd1e1a597624237fd4583 F ext/fts3/fts3.h 3a10a0af180d502cecc50df77b1b22df142817fe -F ext/fts3/fts3Int.h c8c0011c5e5b3a7703376ea6cd7deb91cfb96a06 -F ext/fts3/fts3_aux.c 97c960b1b0d371c08eae6b8565dfac619eb9d979 -F ext/fts3/fts3_expr.c 5f49e0deaf723724b08100bb3ff40aab02ad0c93 +F ext/fts3/fts3Int.h a999cfbf605efec293a88519f74192f5204c84d6 +F ext/fts3/fts3_aux.c baed9dab7fb4604ae8cafdb2d7700abe93beffbe +F ext/fts3/fts3_expr.c b95f0d76bcf4507c73a838f3178c4ed8c42dc2bb F ext/fts3/fts3_hash.c 3c8f6387a4a7f5305588b203fa7c887d753e1f1c F ext/fts3/fts3_hash.h 8331fb2206c609f9fc4c4735b9ab5ad6137c88ec F ext/fts3/fts3_icu.c ac494aed69835008185299315403044664bda295 @@@ -945,7 -942,7 +945,7 @@@ F tool/split-sqlite3c.tcl d9be87f1c3402 F tool/symbols.sh bc2a3709940d47c8ac8e0a1fdf17ec801f015a00 F tool/vdbe-compress.tcl d70ea6d8a19e3571d7ab8c9b75cba86d1173ff0f F tool/warnings.sh 347d974d143cf132f953b565fbc03026f19fcb4d - P 2c20129297b64f4113b8edb551385eb918279471 - R 24738ea4e4f82770b7e9961a6f31bf0f -P 2b3d9996a829c62fbaf7c92d50e44636340b07c6 -R 901f548a763eaba41771c9603ccfd6ef -U drh -Z 700a00fc01c336d2e1728d856de33900 ++P 06de3f2cbc27cdfd9f83218c9ea576f74f60d07b 77f01578bb565d1bc884b374b68bae10ce34a084 ++R 4b70a0fb05718888a014edb1bc0cc1e6 +U dan - Z 096f5065b2f90702df92a748235d8620 ++Z 363a76362b64a627be0e398551d0d1e2 diff --cc manifest.uuid index 6cb1ded47e,37f82214ef..2e05a59352 --- a/manifest.uuid +++ b/manifest.uuid @@@ -1,1 -1,1 +1,1 @@@ - 06de3f2cbc27cdfd9f83218c9ea576f74f60d07b -77f01578bb565d1bc884b374b68bae10ce34a084 ++135ce30f62ebd6a1b239c18dbbd9c926ea507db4