From: dan Date: Fri, 25 Jul 2014 20:30:47 +0000 (+0000) Subject: Add extension apis xRowCount, xQueryPhrase, xSetAuxdata and xGetAuxdata. And a rankin... X-Git-Tag: version-3.8.11~114^2~156 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=700b33d7a5665a24b3a806d195f0dfd248f8257b;p=thirdparty%2Fsqlite.git Add extension apis xRowCount, xQueryPhrase, xSetAuxdata and xGetAuxdata. And a ranking function that uses all of the above. FossilOrigin-Name: c4d50428ab97f77e6721c4f8d03eaaf3ea91f3eb --- diff --git a/ext/fts5/fts5.c b/ext/fts5/fts5.c index 14da56ec40..e89817d04e 100644 --- a/ext/fts5/fts5.c +++ b/ext/fts5/fts5.c @@ -19,6 +19,7 @@ typedef struct Fts5Table Fts5Table; typedef struct Fts5Cursor Fts5Cursor; typedef struct Fts5Global Fts5Global; typedef struct Fts5Auxiliary Fts5Auxiliary; +typedef struct Fts5Auxdata Fts5Auxdata; /* ** A single object of this type is allocated when the FTS5 module is @@ -75,7 +76,8 @@ struct Fts5Cursor { /* Variables used by auxiliary functions */ i64 iCsrId; /* Cursor id */ - Fts5Auxiliary *pAux; /* Currently executing function */ + Fts5Auxiliary *pAux; /* Currently executing extension function */ + Fts5Auxdata *pAuxdata; /* First in linked list of aux-data */ int *aColumnSize; /* Values for xColumnSize() */ }; @@ -93,6 +95,13 @@ struct Fts5Cursor { #define CsrFlagClear(pCsr, flag) ((pCsr)->csrflags &= ~(flag)) #define CsrFlagTest(pCsr, flag) ((pCsr)->csrflags & (flag)) +struct Fts5Auxdata { + Fts5Auxiliary *pAux; /* Extension to which this belongs */ + void *pPtr; /* Pointer value */ + void(*xDelete)(void*); /* Destructor */ + Fts5Auxdata *pNext; /* Next object in linked list */ +}; + /* ** Close a virtual table handle opened by fts5InitVtab(). If the bDestroy ** argument is non-zero, attempt delete the shadow tables from teh database @@ -325,12 +334,21 @@ static int fts5CloseMethod(sqlite3_vtab_cursor *pCursor){ Fts5Table *pTab = (Fts5Table*)(pCursor->pVtab); Fts5Cursor *pCsr = (Fts5Cursor*)pCursor; Fts5Cursor **pp; + Fts5Auxdata *pData; + Fts5Auxdata *pNext; + if( pCsr->pStmt ){ int eStmt = fts5StmtType(pCsr->idxNum); sqlite3Fts5StorageStmtRelease(pTab->pStorage, eStmt, pCsr->pStmt); } sqlite3Fts5ExprFree(pCsr->pExpr); + for(pData=pCsr->pAuxdata; pData; pData=pNext){ + pNext = pData->pNext; + if( pData->xDelete ) pData->xDelete(pData->pPtr); + sqlite3_free(pData); + } + /* Remove the cursor from the Fts5Global.pCsr list */ for(pp=&pTab->pGlobal->pCsr; (*pp)!=pCsr; pp=&(*pp)->pNext); *pp = pCsr->pNext; @@ -372,6 +390,16 @@ static int fts5NextMethod(sqlite3_vtab_cursor *pCursor){ return rc; } +static int fts5CursorFirst(Fts5Table *pTab, Fts5Cursor *pCsr, int bAsc){ + int rc; + rc = sqlite3Fts5ExprFirst(pCsr->pExpr, pTab->pIndex, bAsc); + if( sqlite3Fts5ExprEof(pCsr->pExpr) ){ + CsrFlagSet(pCsr, FTS5CSR_EOF); + } + CsrFlagSet(pCsr, FTS5CSR_REQUIRE_CONTENT | FTS5CSR_REQUIRE_DOCSIZE ); + return rc; +} + /* ** This is the xFilter interface for the virtual table. See ** the virtual table xFilter method documentation for additional @@ -403,11 +431,7 @@ static int fts5FilterMethod( const char *zExpr = (const char*)sqlite3_value_text(apVal[0]); rc = sqlite3Fts5ExprNew(pTab->pConfig, zExpr, &pCsr->pExpr, pzErr); if( rc==SQLITE_OK ){ - rc = sqlite3Fts5ExprFirst(pCsr->pExpr, pTab->pIndex, bAsc); - if( sqlite3Fts5ExprEof(pCsr->pExpr) ){ - CsrFlagSet(pCsr, FTS5CSR_EOF); - } - CsrFlagSet(pCsr, FTS5CSR_REQUIRE_CONTENT | FTS5CSR_REQUIRE_DOCSIZE ); + rc = fts5CursorFirst(pTab, pCsr, bAsc); } }else{ if( ePlan==FTS5_PLAN_ROWID ){ @@ -618,10 +642,20 @@ static int fts5ApiColumnCount(Fts5Context *pCtx){ return ((Fts5Table*)(pCsr->base.pVtab))->pConfig->nCol; } -static int fts5ApiColumnAvgSize(Fts5Context *pCtx, int iCol, int *pnToken){ +static int fts5ApiColumnTotalSize( + Fts5Context *pCtx, + int iCol, + sqlite3_int64 *pnToken +){ + Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; + Fts5Table *pTab = (Fts5Table*)(pCsr->base.pVtab); + return sqlite3Fts5StorageSize(pTab->pStorage, iCol, pnToken); +} + +static int fts5ApiRowCount(Fts5Context *pCtx, i64 *pnRow){ Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; Fts5Table *pTab = (Fts5Table*)(pCsr->base.pVtab); - return sqlite3Fts5StorageAvgsize(pTab->pStorage, iCol, pnToken); + return sqlite3Fts5StorageRowCount(pTab->pStorage, pnRow); } static int fts5ApiTokenize( @@ -694,24 +728,123 @@ static int fts5ApiPoslist( return sqlite3Fts5PoslistNext64(a, n, pi, piPos); } +static int fts5ApiSetAuxdata( + Fts5Context *pCtx, /* Fts5 context */ + void *pPtr, /* Pointer to save as auxdata */ + void(*xDelete)(void*) /* Destructor for pPtr (or NULL) */ +){ + Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; + Fts5Auxdata *pData; + + for(pData=pCsr->pAuxdata; pData; pData=pData->pNext){ + if( pData->pAux==pCsr->pAux ) break; + } + + if( pData ){ + if( pData->xDelete ){ + pData->xDelete(pData->pPtr); + } + }else{ + pData = (Fts5Auxdata*)sqlite3_malloc(sizeof(Fts5Auxdata)); + if( pData==0 ) return SQLITE_NOMEM; + memset(pData, 0, sizeof(Fts5Auxdata)); + pData->pAux = pCsr->pAux; + pData->pNext = pCsr->pAuxdata; + pCsr->pAuxdata = pData; + } + + pData->xDelete = xDelete; + pData->pPtr = pPtr; + return SQLITE_OK; +} + +static void *fts5ApiGetAuxdata(Fts5Context *pCtx, int bClear){ + Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; + Fts5Auxdata *pData; + void *pRet = 0; + + for(pData=pCsr->pAuxdata; pData; pData=pData->pNext){ + if( pData->pAux==pCsr->pAux ) break; + } + + if( pData ){ + pRet = pData->pPtr; + if( bClear ){ + pData->pPtr = 0; + pData->xDelete = 0; + } + } + + return pRet; +} + +static int fts5ApiQueryPhrase(Fts5Context*, int, void*, + int(*)(const Fts5ExtensionApi*, Fts5Context*, void*) +); + +static const Fts5ExtensionApi sFts5Api = { + 1, /* iVersion */ + fts5ApiUserData, + fts5ApiColumnCount, + fts5ApiRowCount, + fts5ApiColumnTotalSize, + fts5ApiTokenize, + fts5ApiPhraseCount, + fts5ApiPhraseSize, + fts5ApiRowid, + fts5ApiColumnText, + fts5ApiColumnSize, + fts5ApiPoslist, + fts5ApiQueryPhrase, + fts5ApiSetAuxdata, + fts5ApiGetAuxdata, +}; + + +/* +** Implementation of API function xQueryPhrase(). +*/ +static int fts5ApiQueryPhrase( + Fts5Context *pCtx, + int iPhrase, + void *pUserData, + int(*xCallback)(const Fts5ExtensionApi*, Fts5Context*, void*) +){ + Fts5Cursor *pCsr = (Fts5Cursor*)pCtx; + Fts5Table *pTab = (Fts5Table*)(pCsr->base.pVtab); + int rc; + Fts5Cursor *pNew = 0; + + rc = fts5OpenMethod(pCsr->base.pVtab, (sqlite3_vtab_cursor**)&pNew); + if( rc==SQLITE_OK ){ + Fts5Config *pConf = pTab->pConfig; + pNew->idxNum = FTS5_PLAN_MATCH; + pNew->base.pVtab = (sqlite3_vtab*)pTab; + rc = sqlite3Fts5ExprPhraseExpr(pConf, pCsr->pExpr, iPhrase, &pNew->pExpr); + } + + if( rc==SQLITE_OK ){ + for(rc = fts5CursorFirst(pTab, pNew, 0); + rc==SQLITE_OK && CsrFlagTest(pNew, FTS5CSR_EOF)==0; + rc = fts5NextMethod((sqlite3_vtab_cursor*)pNew) + ){ + rc = xCallback(&sFts5Api, (Fts5Context*)pNew, pUserData); + if( rc!=SQLITE_OK ){ + if( rc==SQLITE_DONE ) rc = SQLITE_OK; + break; + } + } + } + + fts5CloseMethod((sqlite3_vtab_cursor*)pNew); + return rc; +} + static void fts5ApiCallback( sqlite3_context *context, int argc, sqlite3_value **argv ){ - static const Fts5ExtensionApi sApi = { - 1, /* iVersion */ - fts5ApiUserData, - fts5ApiColumnCount, - fts5ApiColumnAvgSize, - fts5ApiTokenize, - fts5ApiPhraseCount, - fts5ApiPhraseSize, - fts5ApiRowid, - fts5ApiColumnText, - fts5ApiColumnSize, - fts5ApiPoslist, - }; Fts5Auxiliary *pAux; Fts5Cursor *pCsr; @@ -730,7 +863,7 @@ static void fts5ApiCallback( }else{ assert( pCsr->pAux==0 ); pCsr->pAux = pAux; - pAux->xFunc(&sApi, (Fts5Context*)pCsr, context, argc-1, &argv[1]); + pAux->xFunc(&sFts5Api, (Fts5Context*)pCsr, context, argc-1, &argv[1]); pCsr->pAux = 0; } } diff --git a/ext/fts5/fts5.h b/ext/fts5/fts5.h index 4d45ee60b0..82d4884dc7 100644 --- a/ext/fts5/fts5.h +++ b/ext/fts5/fts5.h @@ -43,10 +43,18 @@ typedef void (*fts5_extension_function)( /* ** -** xUserData: +** xUserData(pFts): +** ** Return a copy of the context pointer the extension function was ** registered with. ** +** +** xColumnTotalSize(pFts, iCol, pnToken): +** +** Returns the total number of tokens in column iCol, considering all +** rows in the FTS5 table. +** +** ** xColumnCount: ** Returns the number of columns in the FTS5 table. ** @@ -73,6 +81,68 @@ typedef void (*fts5_extension_function)( ** ** xTokenize: ** Tokenize text using the tokenizer belonging to the FTS5 table. +** +** +** xQueryPhrase(pFts5, iPhrase, pUserData, xCallback): +** +** This API function is used to query the FTS table for phrase iPhrase +** of the current query. Specifically, a query equivalent to: +** +** ... FROM ftstable WHERE ftstable MATCH $p ORDER BY DESC +** +** with $p set to a phrase equivalent to the phrase iPhrase of the +** current query is executed. For each row visited, the callback function +** passed as the fourth argument is invoked. The context and API objects +** passed to the callback function may be used to access the properties of +** each matched row. Invoking Api.xUserData() returns a copy of the pointer +** passed as the third argument to pUserData. +** +** If the callback function returns any value other than SQLITE_OK, the +** query is abandoned and the xQueryPhrase function returns immediately. +** If the returned value is SQLITE_DONE, xQueryPhrase returns SQLITE_OK. +** Otherwise, the error code is propagated upwards. +** +** If the query runs to completion without incident, SQLITE_OK is returned. +** Or, if some error occurs before the query completes or is aborted by +** the callback, an SQLite error code is returned. +** +** +** xSetAuxdata(pFts5, pAux, xDelete) +** +** Save the pointer passed as the second argument as the extension functions +** "auxiliary data". The pointer may then be retrieved by the current or any +** future invocation of the same fts5 extension function made as part of +** of the same MATCH query using the xGetAuxdata() API. +** +** Each extension function is allocated a single auxiliary data slot per +** query. If the extension function is invoked more than once by the SQL +** query, then all invocations share a single auxiliary data context. +** +** If there is already an auxiliary data pointer when this function is +** invoked, then it is replaced by the new pointer. If an xDelete callback +** was specified along with the original pointer, it is invoked at this +** point. +** +** The xDelete callback, if one is specified, is also invoked on the +** auxiliary data pointer after the FTS5 query has finished. +** +** +** xGetAuxdata(pFts5, bClear) +** +** Returns the current auxiliary data pointer for the fts5 extension +** function. See the xSetAuxdata() method for details. +** +** If the bClear argument is non-zero, then the auxiliary data is cleared +** (set to NULL) before this function returns. In this case the xDelete, +** if any, is not invoked. +** +** +** xRowCount(pFts5, pnRow) +** +** This function is used to retrieve the total number of rows in the table. +** In other words, the same value that would be returned by: +** +** SELECT count(*) FROM ftstable; */ struct Fts5ExtensionApi { int iVersion; /* Currently always set to 1 */ @@ -80,7 +150,9 @@ struct Fts5ExtensionApi { void *(*xUserData)(Fts5Context*); int (*xColumnCount)(Fts5Context*); - int (*xColumnAvgSize)(Fts5Context*, int iCol, int *pnToken); + int (*xRowCount)(Fts5Context*, sqlite3_int64 *pnRow); + int (*xColumnTotalSize)(Fts5Context*, int iCol, sqlite3_int64 *pnToken); + int (*xTokenize)(Fts5Context*, const char *pText, int nText, /* Text to tokenize */ void *pCtx, /* Context passed to xToken() */ @@ -94,6 +166,12 @@ struct Fts5ExtensionApi { int (*xColumnText)(Fts5Context*, int iCol, const char **pz, int *pn); int (*xColumnSize)(Fts5Context*, int iCol, int *pnToken); int (*xPoslist)(Fts5Context*, int iPhrase, int *pi, sqlite3_int64 *piPos); + + int (*xQueryPhrase)(Fts5Context*, int iPhrase, void *pUserData, + int(*)(const Fts5ExtensionApi*,Fts5Context*,void*) + ); + int (*xSetAuxdata)(Fts5Context*, void *pAux, void(*xDelete)(void*)); + void *(*xGetAuxdata)(Fts5Context*, int bClear); }; #define FTS5_POS2COLUMN(iPos) (int)(iPos >> 32) diff --git a/ext/fts5/fts5Int.h b/ext/fts5/fts5Int.h index fc78d64589..21c6d459d9 100644 --- a/ext/fts5/fts5Int.h +++ b/ext/fts5/fts5Int.h @@ -300,7 +300,8 @@ int sqlite3Fts5StorageStmt(Fts5Storage *p, int eStmt, sqlite3_stmt **); void sqlite3Fts5StorageStmtRelease(Fts5Storage *p, int eStmt, sqlite3_stmt*); int sqlite3Fts5StorageDocsize(Fts5Storage *p, i64 iRowid, int *aCol); -int sqlite3Fts5StorageAvgsize(Fts5Storage *p, int iCol, int *pnAvg); +int sqlite3Fts5StorageSize(Fts5Storage *p, int iCol, i64 *pnAvg); +int sqlite3Fts5StorageRowCount(Fts5Storage *p, i64 *pnRow); /* @@ -354,6 +355,8 @@ int sqlite3Fts5ExprPhraseCount(Fts5Expr*); int sqlite3Fts5ExprPhraseSize(Fts5Expr*, int iPhrase); int sqlite3Fts5ExprPoslist(Fts5Expr*, int, const u8 **); +int sqlite3Fts5ExprPhraseExpr(Fts5Config*, Fts5Expr*, int, Fts5Expr**); + /******************************************* ** The fts5_expr.c API above this point is used by the other hand-written ** C code in this module. The interfaces below this point are called by diff --git a/ext/fts5/fts5_aux.c b/ext/fts5/fts5_aux.c index faee34c82d..6281cf60d0 100644 --- a/ext/fts5/fts5_aux.c +++ b/ext/fts5/fts5_aux.c @@ -12,6 +12,7 @@ */ #include "fts5Int.h" +#include typedef struct SnippetPhrase SnippetPhrase; typedef struct SnippetIter SnippetIter; @@ -267,7 +268,6 @@ static int fts5SnippetText( int iPrint; int iMatchto; - int iBit0; int iLast; int *aiStart = ctx.aiStart - ctx.iFirst; @@ -367,7 +367,6 @@ static void fts5SnippetFunction( const char *zEllip = "..."; int nToken = -15; int nAbs; - int nFrag; /* Number of fragments to return */ int rc; SnippetIter *pIter = 0; @@ -384,8 +383,6 @@ static void fts5SnippetFunction( if( rc==SQLITE_OK ){ Fts5Buffer buf; /* Result buffer */ int nBestScore = 0; /* Score of best snippet found */ - int n; /* Size of column snippet is from in bytes */ - int i; /* Used to iterate through phrases */ for(fts5SnippetIterFirst(pIter); pIter->iLast>=0; @@ -414,6 +411,93 @@ static void fts5SnippetFunction( } } +typedef struct Fts5GatherCtx Fts5GatherCtx; +struct Fts5GatherCtx { + int nCol; + int iPhrase; + int *anVal; +}; + +static int fts5GatherCallback( + const Fts5ExtensionApi *pApi, + Fts5Context *pFts, + void *pUserData +){ + Fts5GatherCtx *p = (Fts5GatherCtx*)pUserData; + int i = 0; + int iPrev = -1; + i64 iPos = 0; + + while( 0==pApi->xPoslist(pFts, 0, &i, &iPos) ){ + int iCol = FTS5_POS2COLUMN(iPos); + if( iCol!=iPrev ){ + p->anVal[p->iPhrase * p->nCol + iCol]++; + iPrev = iCol; + } + } + + return SQLITE_OK; +} + +/* +** This function returns a pointer to an array of integers containing entries +** indicating the number of rows in the table for which each phrase features +** at least once in each column. +** +** If nCol is the number of matchable columns in the table, and nPhrase is +** the number of phrases in the query, the array contains a total of +** (nPhrase*nCol) entries. +** +** For phrase iPhrase and column iCol: +** +** anVal[iPhrase * nCol + iCol] +** +** is set to the number of rows in the table for which column iCol contains +** at least one instance of phrase iPhrase. +*/ +static int fts5GatherTotals( + const Fts5ExtensionApi *pApi, /* API offered by current FTS version */ + Fts5Context *pFts, /* First arg to pass to pApi functions */ + int **panVal +){ + int rc = SQLITE_OK; + int *anVal = 0; + int i; /* For iterating through expression phrases */ + int nPhrase = pApi->xPhraseCount(pFts); + int nCol = pApi->xColumnCount(pFts); + int nByte = nCol * nPhrase * sizeof(int); + Fts5GatherCtx sCtx; + + sCtx.nCol = nCol; + anVal = sCtx.anVal = (int*)sqlite3_malloc(nByte); + if( anVal==0 ){ + rc = SQLITE_NOMEM; + }else{ + memset(anVal, 0, nByte); + } + + for(i=0; ixQueryPhrase(pFts, i, (void*)&sCtx, fts5GatherCallback); + } + + if( rc!=SQLITE_OK ){ + sqlite3_free(anVal); + anVal = 0; + } + + *panVal = anVal; + return rc; +} + +typedef struct Fts5Bm25Context Fts5Bm25Context; +struct Fts5Bm25Context { + int nPhrase; + int nCol; + double *aIDF; /* Array of IDF values */ + double *aAvg; /* Average size of each column in tokens */ +}; + static void fts5Bm25Function( const Fts5ExtensionApi *pApi, /* API offered by current FTS version */ Fts5Context *pFts, /* First arg to pass to pApi functions */ @@ -421,7 +505,99 @@ static void fts5Bm25Function( int nVal, /* Number of values in apVal[] array */ sqlite3_value **apVal /* Array of trailing arguments */ ){ - assert( 0 ); + const double k1 = 1.2; + const double B = 0.75; + + int rc = SQLITE_OK; + Fts5Bm25Context *p; + + p = pApi->xGetAuxdata(pFts, 0); + if( p==0 ){ + int *anVal = 0; + int ic; /* For iterating through columns */ + int ip; /* For iterating through phrases */ + i64 nRow; /* Total number of rows in table */ + int nPhrase = pApi->xPhraseCount(pFts); + int nCol = pApi->xColumnCount(pFts); + int nByte = sizeof(Fts5Bm25Context) + + sizeof(double) * nPhrase * nCol /* aIDF[] */ + + sizeof(double) * nCol; /* aAvg[] */ + + p = (Fts5Bm25Context*)sqlite3_malloc(nByte); + if( p==0 ){ + rc = SQLITE_NOMEM; + }else{ + memset(p, 0, nByte); + p->aAvg = (double*)&p[1]; + p->aIDF = (double*)&p->aAvg[nCol]; + } + + if( rc==SQLITE_OK ){ + rc = pApi->xRowCount(pFts, &nRow); + assert( nRow>0 || rc!=SQLITE_OK ); + } + + for(ic=0; rc==SQLITE_OK && icxColumnTotalSize(pFts, ic, &nToken); + p->aAvg[ic] = (double)nToken / (double)nRow; + } + + if( rc==SQLITE_OK ){ + rc = fts5GatherTotals(pApi, pFts, &anVal); + } + for(ic=0; icaIDF[idx] = log( (0.5 + nRow - anVal[idx]) / (0.5 + anVal[idx]) ); + if( p->aIDF[idx]<0.0 ) p->aIDF[idx] = 0.0; + } + } + + sqlite3_free(anVal); + if( rc==SQLITE_OK ){ + rc = pApi->xSetAuxdata(pFts, p, sqlite3_free); + } + if( rc!=SQLITE_OK ){ + sqlite3_free(p); + } + } + + if( rc==SQLITE_OK ){ + int ip; + double score = 0.0; + + for(ip=0; rc==SQLITE_OK && ipnPhrase; ip++){ + int iPrev = 0; + int nHit = 0; + int i = 0; + i64 iPos = 0; + + while( rc==SQLITE_OK && 0==pApi->xPoslist(pFts, ip, &i, &iPos) ){ + int iCol = FTS5_POS2COLUMN(iPos); + if( iCol!=iPrev && nHit>0 ){ + int sz = 0; + int idx = ip * p->nCol + iPrev; + rc = pApi->xColumnSize(pFts, iPrev, &sz); + + score += p->aIDF[idx] * nHit * (k1+1.0) / + (nHit + k1 * (1.0 - B + B * sz / p->aAvg[iCol])); + nHit = 0; + } + nHit++; + iPrev = iCol; + } + } + + if( rc==SQLITE_OK ){ + sqlite3_result_double(pCtx, score); + } + + } + + if( rc!=SQLITE_OK ){ + sqlite3_result_error_code(pCtx, rc); + } } static int fts5TestCallback( @@ -465,13 +641,13 @@ static void fts5TestFunction( nCol = pApi->xColumnCount(pFts); if( zReq==0 ){ - sqlite3Fts5BufferAppendPrintf(&rc, &s, "columnavgsize "); + sqlite3Fts5BufferAppendPrintf(&rc, &s, "columntotalsize "); } - if( 0==zReq || 0==sqlite3_stricmp(zReq, "columnavgsize") ){ + if( 0==zReq || 0==sqlite3_stricmp(zReq, "columntotalsize") ){ if( zReq==0 && nCol>1 ) sqlite3Fts5BufferAppendPrintf(&rc, &s, "{"); for(i=0; rc==SQLITE_OK && ixColumnAvgSize(pFts, i, &colsz); + i64 colsz = 0; + rc = pApi->xColumnTotalSize(pFts, i, &colsz); sqlite3Fts5BufferAppendPrintf(&rc, &s, "%s%d", i==0?"":" ", colsz); } if( zReq==0 && nCol>1 ) sqlite3Fts5BufferAppendPrintf(&rc, &s, "}"); @@ -580,7 +756,44 @@ static void fts5TestFunction( } if( zReq==0 ){ - sqlite3Fts5BufferAppendPrintf(&rc, &s, " rowid "); + sqlite3Fts5BufferAppendPrintf(&rc, &s, " queryphrase "); + } + if( 0==zReq || 0==sqlite3_stricmp(zReq, "queryphrase") ){ + int ic, ip; + int *anVal = 0; + Fts5Buffer buf1; + memset(&buf1, 0, sizeof(Fts5Buffer)); + + if( rc==SQLITE_OK ){ + anVal = (int*)pApi->xGetAuxdata(pFts, 0); + if( anVal==0 ){ + rc = fts5GatherTotals(pApi, pFts, &anVal); + if( rc==SQLITE_OK ){ + rc = pApi->xSetAuxdata(pFts, (void*)anVal, sqlite3_free); + } + } + } + + for(ip=0; rc==SQLITE_OK && ip0 ) sqlite3Fts5BufferAppendString(&rc, &buf1, " "); + if( nCol>1 ) sqlite3Fts5BufferAppendString(&rc, &buf1, "{"); + for(ic=0; ic1 ) sqlite3Fts5BufferAppendString(&rc, &buf1, "}"); + } + + if( zReq==0 ){ + sqlite3Fts5BufferAppendListElem(&rc, &s, (const char*)buf1.p, buf1.n); + }else{ + sqlite3Fts5BufferAppendString(&rc, &s, (const char*)buf1.p); + } + sqlite3_free(buf1.p); + } + + if( zReq==0 ){ + sqlite3Fts5BufferAppendString(&rc, &s, " rowid "); } if( 0==zReq || 0==sqlite3_stricmp(zReq, "rowid") ){ iRowid = pApi->xRowid(pFts); @@ -588,7 +801,16 @@ static void fts5TestFunction( } if( zReq==0 ){ - sqlite3Fts5BufferAppendPrintf(&rc, &s, " tokenize "); + sqlite3Fts5BufferAppendString(&rc, &s, " rowcount "); + } + if( 0==zReq || 0==sqlite3_stricmp(zReq, "rowcount") ){ + i64 nRow; + rc = pApi->xRowCount(pFts, &nRow); + sqlite3Fts5BufferAppendPrintf(&rc, &s, "%lld", nRow); + } + + if( zReq==0 ){ + sqlite3Fts5BufferAppendString(&rc, &s, " tokenize "); } if( 0==zReq || 0==sqlite3_stricmp(zReq, "tokenize") ){ Fts5Buffer buf; @@ -629,8 +851,9 @@ int sqlite3Fts5AuxInit(Fts5Global *pGlobal){ fts5_extension_function xFunc;/* Callback function */ void (*xDestroy)(void*); /* Destructor function */ } aBuiltin [] = { - { "snippet", 0, fts5SnippetFunction, 0 }, - { "fts5_test", 0, fts5TestFunction, 0 }, + { "bm25", 0, fts5Bm25Function, 0 }, + { "snippet", 0, fts5SnippetFunction, 0 }, + { "fts5_test", 0, fts5TestFunction, 0 }, }; int rc = SQLITE_OK; /* Return code */ diff --git a/ext/fts5/fts5_expr.c b/ext/fts5/fts5_expr.c index 9eea4552bf..ab9b307bc2 100644 --- a/ext/fts5/fts5_expr.c +++ b/ext/fts5/fts5_expr.c @@ -227,6 +227,106 @@ int sqlite3Fts5ExprNew( return sParse.rc; } +static char *fts5ExprStrdup(int *pRc, const char *zIn){ + char *zRet = 0; + if( *pRc==SQLITE_OK ){ + int nByte = strlen(zIn) + 1; + zRet = sqlite3_malloc(nByte); + if( zRet ){ + memcpy(zRet, zIn, nByte); + }else{ + *pRc = SQLITE_NOMEM; + } + } + return zRet; +} + +static void *fts5ExprMalloc(int *pRc, int nByte){ + void *pRet = 0; + if( *pRc==SQLITE_OK ){ + pRet = sqlite3_malloc(nByte); + if( pRet ){ + memset(pRet, 0, nByte); + }else{ + *pRc = SQLITE_NOMEM; + } + } + return pRet; +} + +/* +** Create a new FTS5 expression by cloning phrase iPhrase of the +** expression passed as the second argument. +*/ +int sqlite3Fts5ExprPhraseExpr( + Fts5Config *pConfig, + Fts5Expr *pExpr, + int iPhrase, + Fts5Expr **ppNew +){ + int rc = SQLITE_OK; /* Return code */ + Fts5ExprPhrase *pOrig = 0; /* The phrase extracted from pExpr */ + int i; /* Used to iterate through phrase terms */ + + /* Components of the new expression object */ + Fts5Expr *pNew; + Fts5ExprPhrase **apPhrase; + Fts5ExprNode *pNode; + Fts5ExprNearset *pNear; + Fts5ExprPhrase *pCopy; + + pOrig = pExpr->apPhrase[iPhrase]; + pNew = (Fts5Expr*)fts5ExprMalloc(&rc, sizeof(Fts5Expr)); + apPhrase = (Fts5ExprPhrase**)fts5ExprMalloc(&rc, sizeof(Fts5ExprPhrase*)); + pNode = (Fts5ExprNode*)fts5ExprMalloc(&rc, sizeof(Fts5ExprNode)); + pNear = (Fts5ExprNearset*)fts5ExprMalloc(&rc, + sizeof(Fts5ExprNearset) + sizeof(Fts5ExprPhrase*) + ); + pCopy = (Fts5ExprPhrase*)fts5ExprMalloc(&rc, + sizeof(Fts5ExprPhrase) + sizeof(Fts5ExprTerm) * pOrig->nTerm + ); + + for(i=0; rc==SQLITE_OK && inTerm; i++){ + pCopy->aTerm[i].zTerm = fts5ExprStrdup(&rc, pOrig->aTerm[i].zTerm); + pCopy->aTerm[i].bPrefix = pOrig->aTerm[i].bPrefix; + } + + if( rc==SQLITE_OK ){ + /* All the allocations succeeded. Put the expression object together. */ + pNew->pIndex = pExpr->pIndex; + pNew->pRoot = pNode; + pNew->nPhrase = 1; + pNew->apPhrase = apPhrase; + pNew->apPhrase[0] = pCopy; + + pNode->eType = FTS5_STRING; + pNode->pNear = pNear; + + pNear->iCol = -1; + pNear->nPhrase = 1; + pNear->apPhrase[0] = pCopy; + + pCopy->nTerm = pOrig->nTerm; + pCopy->pNode = pNode; + }else{ + /* At least one allocation failed. Free them all. */ + if( pCopy ){ + for(i=0; inTerm; i++){ + sqlite3_free(pCopy->aTerm[i].zTerm); + } + sqlite3_free(pCopy); + sqlite3_free(pNear); + sqlite3_free(pNode); + sqlite3_free(apPhrase); + sqlite3_free(pNew); + pNew = 0; + } + } + + *ppNew = pNew; + return rc; +} + /* ** Free the expression node object passed as the only argument. */ diff --git a/ext/fts5/fts5_storage.c b/ext/fts5/fts5_storage.c index c56c2d038a..22cb427e05 100644 --- a/ext/fts5/fts5_storage.c +++ b/ext/fts5/fts5_storage.c @@ -705,15 +705,18 @@ int sqlite3Fts5StorageDocsize(Fts5Storage *p, i64 iRowid, int *aCol){ return rc; } -int sqlite3Fts5StorageAvgsize(Fts5Storage *p, int iCol, int *pnAvg){ +int sqlite3Fts5StorageSize(Fts5Storage *p, int iCol, i64 *pnToken){ int rc = fts5StorageLoadTotals(p); if( rc==SQLITE_OK ){ - int nAvg = 1; - if( p->nTotalRow ){ - nAvg = (int)((p->aTotalSize[iCol] + (p->nTotalRow/2)) / p->nTotalRow); - if( nAvg<1 ) nAvg = 1; - *pnAvg = nAvg; - } + *pnToken = p->aTotalSize[iCol]; + } + return rc; +} + +int sqlite3Fts5StorageRowCount(Fts5Storage *p, i64 *pnRow){ + int rc = fts5StorageLoadTotals(p); + if( rc==SQLITE_OK ){ + *pnRow = p->nTotalRow; } return rc; } diff --git a/manifest b/manifest index ec56664116..e50b5af932 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Add\sa\ssnippet()\sfunction\sto\sfts5. -D 2014-07-23T19:31:56.454 +C Add\sextension\sapis\sxRowCount,\sxQueryPhrase,\sxSetAuxdata\sand\sxGetAuxdata.\sAnd\sa\sranking\sfunction\sthat\suses\sall\sof\sthe\sabove. +D 2014-07-25T20:30:47.445 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in b03432313a3aad96c706f8164fb9f5307eaf19f5 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -103,15 +103,15 @@ F ext/fts3/tool/fts3view.c 6cfc5b67a5f0e09c0d698f9fd012c784bfaa9197 F ext/fts3/unicode/CaseFolding.txt 8c678ca52ecc95e16bc7afc2dbf6fc9ffa05db8c F ext/fts3/unicode/UnicodeData.txt cd07314edb62d49fde34debdaf92fa2aa69011e7 F ext/fts3/unicode/mkunicode.tcl dc6f268eb526710e2c6e496c372471d773d0c368 -F ext/fts5/fts5.c 6f859d444eb8be46cb3f7aba3aaae369c5b26809 -F ext/fts5/fts5.h 57325b418b26dcd60be5bc8aab05b33767d81590 -F ext/fts5/fts5Int.h 12d03496152b716e63a5380e396b776fbefa2065 -F ext/fts5/fts5_aux.c cba929fb13931c9b8be7d572991e648b98f14cf2 +F ext/fts5/fts5.c 1496aff16dd9b0a013d14b6c8cf5b7df8c170abe +F ext/fts5/fts5.h 8ace10d5b249a3baa983c79e7a1306d2a79cfd6a +F ext/fts5/fts5Int.h 92fb9c4f759674ef569aebc338f363e167a8933c +F ext/fts5/fts5_aux.c f8bed7a86b65cb07cffdafbf4f0611f127b36274 F ext/fts5/fts5_buffer.c 248c61ac9fec001602efc72a45704f3b8d367c00 F ext/fts5/fts5_config.c 94f1b4cb4de6a7cd5780c14adb0198e289df8cef -F ext/fts5/fts5_expr.c 2911813db174afa28b69ccc7031b6dd80293b241 +F ext/fts5/fts5_expr.c 65c1918002f2ec1755e4c0c28bf007659409fbd8 F ext/fts5/fts5_index.c 68d2d41b5c6d2f8838c3d6ebdc8b242718b8e997 -F ext/fts5/fts5_storage.c 9a2744f492413395a0e75f20c19b797c801a7308 +F ext/fts5/fts5_storage.c 2866e7e1de9dc851756c3a9c76b6e1d75e0facb7 F ext/fts5/fts5parse.y 777da8e5819f75c217982c79c29d014c293acac9 F ext/icu/README.txt d9fbbad0c2f647c3fdf715fc9fd64af53aedfc43 F ext/icu/icu.c d415ccf984defeb9df2c0e1afcfaa2f6dc05eacb @@ -599,8 +599,8 @@ F test/fts5aa.test a2c7bbc18f25f0b57ea8fc483c8a8830273b9ed4 F test/fts5ab.test dc04ed48cf93ca957d174406e6c192f2ff4f3397 F test/fts5ac.test 9be418d037763f4cc5d86f4239db41fc86bb4f85 F test/fts5ad.test 2ed38bbc865678cb2905247120d02ebba7f20e07 -F test/fts5ae.test fe9db78201bbb87c6f82b72a14b946d0f7fc3026 -F test/fts5af.test a2980528a04b67ac4690e6c02ebe9455f45c9a35 +F test/fts5ae.test 1424ec557d543ace1f3cf6d231b247bc7b9f337c +F test/fts5af.test 5f53d0a52280b63caf5a519d6994c4d428835155 F test/fts5ea.test ff43b40f8879ba50b82def70f2ab67c195d1a1d4 F test/full.test 6b3c8fb43c6beab6b95438c1675374b95fab245d F test/func.test ae97561957aba6ca9e3a7b8a13aac41830d701ef @@ -1196,7 +1196,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P d44d3a8518ff7a1a3e2c0ab97493aa590676ee8c -R 638d6826a594d773b5778bd6943c3d96 +P bdc58fd28a63ac9632c3df6c7768a9a236566605 +R 2e8cb20122478987f116ef8ff9f6144b U dan -Z 30db824dafb73f9c4c6895383aa25ed9 +Z 5dd5c36b8a0e52d63a87d23e7179571f diff --git a/manifest.uuid b/manifest.uuid index 990c947a9e..8319bdf8c4 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -bdc58fd28a63ac9632c3df6c7768a9a236566605 \ No newline at end of file +c4d50428ab97f77e6721c4f8d03eaaf3ea91f3eb \ No newline at end of file diff --git a/test/fts5ae.test b/test/fts5ae.test index c1eabef3f8..4480c081df 100644 --- a/test/fts5ae.test +++ b/test/fts5ae.test @@ -157,22 +157,22 @@ do_execsql_test 5.2 { } do_execsql_test 5.3 { - SELECT rowid, fts5_test(t5, 'columnavgsize') FROM t5 WHERE t5 MATCH 'a' + SELECT rowid, fts5_test(t5, 'columntotalsize') FROM t5 WHERE t5 MATCH 'a' ORDER BY rowid DESC; } { - 3 {2 2} - 2 {2 2} - 1 {2 2} + 3 {5 7} + 2 {5 7} + 1 {5 7} } do_execsql_test 5.4 { INSERT INTO t5 VALUES('x y z', 'v w x y z'); - SELECT rowid, fts5_test(t5, 'columnavgsize') FROM t5 WHERE t5 MATCH 'a' + SELECT rowid, fts5_test(t5, 'columntotalsize') FROM t5 WHERE t5 MATCH 'a' ORDER BY rowid DESC; } { - 3 {2 3} - 2 {2 3} - 1 {2 3} + 3 {8 12} + 2 {8 12} + 1 {8 12} } #------------------------------------------------------------------------- @@ -192,5 +192,44 @@ do_execsql_test 6.2 { 1 {{there are more} {things in heaven and earth}} } +#------------------------------------------------------------------------- +# Test the xQueryPhrase() API +# +reset_db +do_execsql_test 7.1 { + CREATE VIRTUAL TABLE t7 USING fts5(x, y); +} +do_test 7.2 { + foreach {x y} { + {q i b w s a a e l o} {i b z a l f p t e u} + {b a z t a l o x d i} {b p a d b f h d w y} + {z m h n p p u i e g} {v h d v b x j j c z} + {a g i m v a u c b i} {p k s o t l r t b m} + {v v c j o d a s c p} {f f v o k p o f o g} + } { + execsql {INSERT INTO t7 VALUES($x, $y)} + } + execsql { SELECT count(*) FROM t7 } +} {5} + +foreach {tn q res} { + 1 a {{4 2}} + 2 b {{3 4}} + 3 c {{2 1}} + 4 d {{2 2}} + 5 {a AND b} {{4 2} {3 4}} + 6 {a OR b OR c OR d} {{4 2} {3 4} {2 1} {2 2}} +} { + do_execsql_test 7.3.$tn { + SELECT fts5_test(t7, 'queryphrase') FROM t7 WHERE t7 MATCH $q LIMIT 1 + } [list $res] +} + +do_execsql_test 7.4 { + SELECT fts5_test(t7, 'rowcount') FROM t7 WHERE t7 MATCH 'a'; +} {5 5 5 5} + + + finish_test diff --git a/test/fts5af.test b/test/fts5af.test index cd5f91f13e..2412b4a7a4 100644 --- a/test/fts5af.test +++ b/test/fts5af.test @@ -30,6 +30,25 @@ do_execsql_test 1.0 { CREATE VIRTUAL TABLE t1 USING fts5(x, y); } +proc do_snippet_test {tn doc match res} { + + uplevel #0 [list set v1 $doc] + uplevel #0 [list set v2 $match] + + do_execsql_test $tn.1 { + DELETE FROM t1; + INSERT INTO t1 VALUES($v1, NULL); + SELECT snippet(t1, '[', ']', '...', 7) FROM t1 WHERE t1 MATCH $v2; + } [list $res] + + do_execsql_test $tn.2 { + DELETE FROM t1; + INSERT INTO t1 VALUES(NULL, $v1); + SELECT snippet(t1, '[', ']', '...', 7) FROM t1 WHERE t1 MATCH $v2; + } [list $res] + +} + foreach {tn doc res} { @@ -82,17 +101,7 @@ foreach {tn doc res} { 7.5 {o o o o X o o X o} {...o o [X] o o [X] o} 7.6 {o o o o o X o o X} {...o o o [X] o o [X]} } { - do_execsql_test 1.$tn.1 { - DELETE FROM t1; - INSERT INTO t1 VALUES($doc, NULL); - SELECT snippet(t1, '[', ']', '...', 7) FROM t1 WHERE t1 MATCH 'X'; - } [list $res] - - do_execsql_test 1.$tn.2 { - DELETE FROM t1; - INSERT INTO t1 VALUES(NULL, $doc); - SELECT snippet(t1, '[', ']', '...', 7) FROM t1 WHERE t1 MATCH 'X'; - } [list $res] + do_snippet_test 1.$tn $doc X $res } foreach {tn doc res} { @@ -121,17 +130,7 @@ foreach {tn doc res} { 3.8 {o o o o o o o X Y} {...o o o o o [X Y]} } { - do_execsql_test 2.$tn.1 { - DELETE FROM t1; - INSERT INTO t1 VALUES($doc, NULL); - SELECT snippet(t1, '[', ']', '...', 7) FROM t1 WHERE t1 MATCH 'X+Y'; - } [list $res] - - do_execsql_test 2.$tn.2 { - DELETE FROM t1; - INSERT INTO t1 VALUES(NULL, $doc); - SELECT snippet(t1, '[', ']', '...', 7) FROM t1 WHERE t1 MATCH 'X+Y'; - } [list $res] + do_snippet_test 1.$tn $doc "X + Y" $res } finish_test