typedef struct Fts5Cursor Fts5Cursor;
typedef struct Fts5Global Fts5Global;
typedef struct Fts5Auxiliary Fts5Auxiliary;
+typedef struct Fts5Auxdata Fts5Auxdata;
/*
** A single object of this type is allocated when the FTS5 module is
/* Variables used by auxiliary functions */
i64 iCsrId; /* Cursor id */
- Fts5Auxiliary *pAux; /* Currently executing function */
+ Fts5Auxiliary *pAux; /* Currently executing extension function */
+ Fts5Auxdata *pAuxdata; /* First in linked list of aux-data */
int *aColumnSize; /* Values for xColumnSize() */
};
#define CsrFlagClear(pCsr, flag) ((pCsr)->csrflags &= ~(flag))
#define CsrFlagTest(pCsr, flag) ((pCsr)->csrflags & (flag))
+struct Fts5Auxdata {
+ Fts5Auxiliary *pAux; /* Extension to which this belongs */
+ void *pPtr; /* Pointer value */
+ void(*xDelete)(void*); /* Destructor */
+ Fts5Auxdata *pNext; /* Next object in linked list */
+};
+
/*
** Close a virtual table handle opened by fts5InitVtab(). If the bDestroy
** argument is non-zero, attempt delete the shadow tables from teh database
Fts5Table *pTab = (Fts5Table*)(pCursor->pVtab);
Fts5Cursor *pCsr = (Fts5Cursor*)pCursor;
Fts5Cursor **pp;
+ Fts5Auxdata *pData;
+ Fts5Auxdata *pNext;
+
if( pCsr->pStmt ){
int eStmt = fts5StmtType(pCsr->idxNum);
sqlite3Fts5StorageStmtRelease(pTab->pStorage, eStmt, pCsr->pStmt);
}
sqlite3Fts5ExprFree(pCsr->pExpr);
+ for(pData=pCsr->pAuxdata; pData; pData=pNext){
+ pNext = pData->pNext;
+ if( pData->xDelete ) pData->xDelete(pData->pPtr);
+ sqlite3_free(pData);
+ }
+
/* Remove the cursor from the Fts5Global.pCsr list */
for(pp=&pTab->pGlobal->pCsr; (*pp)!=pCsr; pp=&(*pp)->pNext);
*pp = pCsr->pNext;
return rc;
}
+static int fts5CursorFirst(Fts5Table *pTab, Fts5Cursor *pCsr, int bAsc){
+ int rc;
+ rc = sqlite3Fts5ExprFirst(pCsr->pExpr, pTab->pIndex, bAsc);
+ if( sqlite3Fts5ExprEof(pCsr->pExpr) ){
+ CsrFlagSet(pCsr, FTS5CSR_EOF);
+ }
+ CsrFlagSet(pCsr, FTS5CSR_REQUIRE_CONTENT | FTS5CSR_REQUIRE_DOCSIZE );
+ return rc;
+}
+
/*
** This is the xFilter interface for the virtual table. See
** the virtual table xFilter method documentation for additional
const char *zExpr = (const char*)sqlite3_value_text(apVal[0]);
rc = sqlite3Fts5ExprNew(pTab->pConfig, zExpr, &pCsr->pExpr, pzErr);
if( rc==SQLITE_OK ){
- rc = sqlite3Fts5ExprFirst(pCsr->pExpr, pTab->pIndex, bAsc);
- if( sqlite3Fts5ExprEof(pCsr->pExpr) ){
- CsrFlagSet(pCsr, FTS5CSR_EOF);
- }
- CsrFlagSet(pCsr, FTS5CSR_REQUIRE_CONTENT | FTS5CSR_REQUIRE_DOCSIZE );
+ rc = fts5CursorFirst(pTab, pCsr, bAsc);
}
}else{
if( ePlan==FTS5_PLAN_ROWID ){
return ((Fts5Table*)(pCsr->base.pVtab))->pConfig->nCol;
}
-static int fts5ApiColumnAvgSize(Fts5Context *pCtx, int iCol, int *pnToken){
+static int fts5ApiColumnTotalSize(
+ Fts5Context *pCtx,
+ int iCol,
+ sqlite3_int64 *pnToken
+){
+ Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
+ Fts5Table *pTab = (Fts5Table*)(pCsr->base.pVtab);
+ return sqlite3Fts5StorageSize(pTab->pStorage, iCol, pnToken);
+}
+
+static int fts5ApiRowCount(Fts5Context *pCtx, i64 *pnRow){
Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
Fts5Table *pTab = (Fts5Table*)(pCsr->base.pVtab);
- return sqlite3Fts5StorageAvgsize(pTab->pStorage, iCol, pnToken);
+ return sqlite3Fts5StorageRowCount(pTab->pStorage, pnRow);
}
static int fts5ApiTokenize(
return sqlite3Fts5PoslistNext64(a, n, pi, piPos);
}
+static int fts5ApiSetAuxdata(
+ Fts5Context *pCtx, /* Fts5 context */
+ void *pPtr, /* Pointer to save as auxdata */
+ void(*xDelete)(void*) /* Destructor for pPtr (or NULL) */
+){
+ Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
+ Fts5Auxdata *pData;
+
+ for(pData=pCsr->pAuxdata; pData; pData=pData->pNext){
+ if( pData->pAux==pCsr->pAux ) break;
+ }
+
+ if( pData ){
+ if( pData->xDelete ){
+ pData->xDelete(pData->pPtr);
+ }
+ }else{
+ pData = (Fts5Auxdata*)sqlite3_malloc(sizeof(Fts5Auxdata));
+ if( pData==0 ) return SQLITE_NOMEM;
+ memset(pData, 0, sizeof(Fts5Auxdata));
+ pData->pAux = pCsr->pAux;
+ pData->pNext = pCsr->pAuxdata;
+ pCsr->pAuxdata = pData;
+ }
+
+ pData->xDelete = xDelete;
+ pData->pPtr = pPtr;
+ return SQLITE_OK;
+}
+
+static void *fts5ApiGetAuxdata(Fts5Context *pCtx, int bClear){
+ Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
+ Fts5Auxdata *pData;
+ void *pRet = 0;
+
+ for(pData=pCsr->pAuxdata; pData; pData=pData->pNext){
+ if( pData->pAux==pCsr->pAux ) break;
+ }
+
+ if( pData ){
+ pRet = pData->pPtr;
+ if( bClear ){
+ pData->pPtr = 0;
+ pData->xDelete = 0;
+ }
+ }
+
+ return pRet;
+}
+
+static int fts5ApiQueryPhrase(Fts5Context*, int, void*,
+ int(*)(const Fts5ExtensionApi*, Fts5Context*, void*)
+);
+
+static const Fts5ExtensionApi sFts5Api = {
+ 1, /* iVersion */
+ fts5ApiUserData,
+ fts5ApiColumnCount,
+ fts5ApiRowCount,
+ fts5ApiColumnTotalSize,
+ fts5ApiTokenize,
+ fts5ApiPhraseCount,
+ fts5ApiPhraseSize,
+ fts5ApiRowid,
+ fts5ApiColumnText,
+ fts5ApiColumnSize,
+ fts5ApiPoslist,
+ fts5ApiQueryPhrase,
+ fts5ApiSetAuxdata,
+ fts5ApiGetAuxdata,
+};
+
+
+/*
+** Implementation of API function xQueryPhrase().
+*/
+static int fts5ApiQueryPhrase(
+ Fts5Context *pCtx,
+ int iPhrase,
+ void *pUserData,
+ int(*xCallback)(const Fts5ExtensionApi*, Fts5Context*, void*)
+){
+ Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
+ Fts5Table *pTab = (Fts5Table*)(pCsr->base.pVtab);
+ int rc;
+ Fts5Cursor *pNew = 0;
+
+ rc = fts5OpenMethod(pCsr->base.pVtab, (sqlite3_vtab_cursor**)&pNew);
+ if( rc==SQLITE_OK ){
+ Fts5Config *pConf = pTab->pConfig;
+ pNew->idxNum = FTS5_PLAN_MATCH;
+ pNew->base.pVtab = (sqlite3_vtab*)pTab;
+ rc = sqlite3Fts5ExprPhraseExpr(pConf, pCsr->pExpr, iPhrase, &pNew->pExpr);
+ }
+
+ if( rc==SQLITE_OK ){
+ for(rc = fts5CursorFirst(pTab, pNew, 0);
+ rc==SQLITE_OK && CsrFlagTest(pNew, FTS5CSR_EOF)==0;
+ rc = fts5NextMethod((sqlite3_vtab_cursor*)pNew)
+ ){
+ rc = xCallback(&sFts5Api, (Fts5Context*)pNew, pUserData);
+ if( rc!=SQLITE_OK ){
+ if( rc==SQLITE_DONE ) rc = SQLITE_OK;
+ break;
+ }
+ }
+ }
+
+ fts5CloseMethod((sqlite3_vtab_cursor*)pNew);
+ return rc;
+}
+
static void fts5ApiCallback(
sqlite3_context *context,
int argc,
sqlite3_value **argv
){
- static const Fts5ExtensionApi sApi = {
- 1, /* iVersion */
- fts5ApiUserData,
- fts5ApiColumnCount,
- fts5ApiColumnAvgSize,
- fts5ApiTokenize,
- fts5ApiPhraseCount,
- fts5ApiPhraseSize,
- fts5ApiRowid,
- fts5ApiColumnText,
- fts5ApiColumnSize,
- fts5ApiPoslist,
- };
Fts5Auxiliary *pAux;
Fts5Cursor *pCsr;
}else{
assert( pCsr->pAux==0 );
pCsr->pAux = pAux;
- pAux->xFunc(&sApi, (Fts5Context*)pCsr, context, argc-1, &argv[1]);
+ pAux->xFunc(&sFts5Api, (Fts5Context*)pCsr, context, argc-1, &argv[1]);
pCsr->pAux = 0;
}
}
/*
**
-** xUserData:
+** xUserData(pFts):
+**
** Return a copy of the context pointer the extension function was
** registered with.
**
+**
+** xColumnTotalSize(pFts, iCol, pnToken):
+**
+** Returns the total number of tokens in column iCol, considering all
+** rows in the FTS5 table.
+**
+**
** xColumnCount:
** Returns the number of columns in the FTS5 table.
**
**
** xTokenize:
** Tokenize text using the tokenizer belonging to the FTS5 table.
+**
+**
+** xQueryPhrase(pFts5, iPhrase, pUserData, xCallback):
+**
+** This API function is used to query the FTS table for phrase iPhrase
+** of the current query. Specifically, a query equivalent to:
+**
+** ... FROM ftstable WHERE ftstable MATCH $p ORDER BY DESC
+**
+** with $p set to a phrase equivalent to the phrase iPhrase of the
+** current query is executed. For each row visited, the callback function
+** passed as the fourth argument is invoked. The context and API objects
+** passed to the callback function may be used to access the properties of
+** each matched row. Invoking Api.xUserData() returns a copy of the pointer
+** passed as the third argument to pUserData.
+**
+** If the callback function returns any value other than SQLITE_OK, the
+** query is abandoned and the xQueryPhrase function returns immediately.
+** If the returned value is SQLITE_DONE, xQueryPhrase returns SQLITE_OK.
+** Otherwise, the error code is propagated upwards.
+**
+** If the query runs to completion without incident, SQLITE_OK is returned.
+** Or, if some error occurs before the query completes or is aborted by
+** the callback, an SQLite error code is returned.
+**
+**
+** xSetAuxdata(pFts5, pAux, xDelete)
+**
+** Save the pointer passed as the second argument as the extension functions
+** "auxiliary data". The pointer may then be retrieved by the current or any
+** future invocation of the same fts5 extension function made as part of
+** of the same MATCH query using the xGetAuxdata() API.
+**
+** Each extension function is allocated a single auxiliary data slot per
+** query. If the extension function is invoked more than once by the SQL
+** query, then all invocations share a single auxiliary data context.
+**
+** If there is already an auxiliary data pointer when this function is
+** invoked, then it is replaced by the new pointer. If an xDelete callback
+** was specified along with the original pointer, it is invoked at this
+** point.
+**
+** The xDelete callback, if one is specified, is also invoked on the
+** auxiliary data pointer after the FTS5 query has finished.
+**
+**
+** xGetAuxdata(pFts5, bClear)
+**
+** Returns the current auxiliary data pointer for the fts5 extension
+** function. See the xSetAuxdata() method for details.
+**
+** If the bClear argument is non-zero, then the auxiliary data is cleared
+** (set to NULL) before this function returns. In this case the xDelete,
+** if any, is not invoked.
+**
+**
+** xRowCount(pFts5, pnRow)
+**
+** This function is used to retrieve the total number of rows in the table.
+** In other words, the same value that would be returned by:
+**
+** SELECT count(*) FROM ftstable;
*/
struct Fts5ExtensionApi {
int iVersion; /* Currently always set to 1 */
void *(*xUserData)(Fts5Context*);
int (*xColumnCount)(Fts5Context*);
- int (*xColumnAvgSize)(Fts5Context*, int iCol, int *pnToken);
+ int (*xRowCount)(Fts5Context*, sqlite3_int64 *pnRow);
+ int (*xColumnTotalSize)(Fts5Context*, int iCol, sqlite3_int64 *pnToken);
+
int (*xTokenize)(Fts5Context*,
const char *pText, int nText, /* Text to tokenize */
void *pCtx, /* Context passed to xToken() */
int (*xColumnText)(Fts5Context*, int iCol, const char **pz, int *pn);
int (*xColumnSize)(Fts5Context*, int iCol, int *pnToken);
int (*xPoslist)(Fts5Context*, int iPhrase, int *pi, sqlite3_int64 *piPos);
+
+ int (*xQueryPhrase)(Fts5Context*, int iPhrase, void *pUserData,
+ int(*)(const Fts5ExtensionApi*,Fts5Context*,void*)
+ );
+ int (*xSetAuxdata)(Fts5Context*, void *pAux, void(*xDelete)(void*));
+ void *(*xGetAuxdata)(Fts5Context*, int bClear);
};
#define FTS5_POS2COLUMN(iPos) (int)(iPos >> 32)
void sqlite3Fts5StorageStmtRelease(Fts5Storage *p, int eStmt, sqlite3_stmt*);
int sqlite3Fts5StorageDocsize(Fts5Storage *p, i64 iRowid, int *aCol);
-int sqlite3Fts5StorageAvgsize(Fts5Storage *p, int iCol, int *pnAvg);
+int sqlite3Fts5StorageSize(Fts5Storage *p, int iCol, i64 *pnAvg);
+int sqlite3Fts5StorageRowCount(Fts5Storage *p, i64 *pnRow);
/*
int sqlite3Fts5ExprPhraseSize(Fts5Expr*, int iPhrase);
int sqlite3Fts5ExprPoslist(Fts5Expr*, int, const u8 **);
+int sqlite3Fts5ExprPhraseExpr(Fts5Config*, Fts5Expr*, int, Fts5Expr**);
+
/*******************************************
** The fts5_expr.c API above this point is used by the other hand-written
** C code in this module. The interfaces below this point are called by
*/
#include "fts5Int.h"
+#include <math.h>
typedef struct SnippetPhrase SnippetPhrase;
typedef struct SnippetIter SnippetIter;
int iPrint;
int iMatchto;
- int iBit0;
int iLast;
int *aiStart = ctx.aiStart - ctx.iFirst;
const char *zEllip = "<b>...</b>";
int nToken = -15;
int nAbs;
- int nFrag; /* Number of fragments to return */
int rc;
SnippetIter *pIter = 0;
if( rc==SQLITE_OK ){
Fts5Buffer buf; /* Result buffer */
int nBestScore = 0; /* Score of best snippet found */
- int n; /* Size of column snippet is from in bytes */
- int i; /* Used to iterate through phrases */
for(fts5SnippetIterFirst(pIter);
pIter->iLast>=0;
}
}
+typedef struct Fts5GatherCtx Fts5GatherCtx;
+struct Fts5GatherCtx {
+ int nCol;
+ int iPhrase;
+ int *anVal;
+};
+
+static int fts5GatherCallback(
+ const Fts5ExtensionApi *pApi,
+ Fts5Context *pFts,
+ void *pUserData
+){
+ Fts5GatherCtx *p = (Fts5GatherCtx*)pUserData;
+ int i = 0;
+ int iPrev = -1;
+ i64 iPos = 0;
+
+ while( 0==pApi->xPoslist(pFts, 0, &i, &iPos) ){
+ int iCol = FTS5_POS2COLUMN(iPos);
+ if( iCol!=iPrev ){
+ p->anVal[p->iPhrase * p->nCol + iCol]++;
+ iPrev = iCol;
+ }
+ }
+
+ return SQLITE_OK;
+}
+
+/*
+** This function returns a pointer to an array of integers containing entries
+** indicating the number of rows in the table for which each phrase features
+** at least once in each column.
+**
+** If nCol is the number of matchable columns in the table, and nPhrase is
+** the number of phrases in the query, the array contains a total of
+** (nPhrase*nCol) entries.
+**
+** For phrase iPhrase and column iCol:
+**
+** anVal[iPhrase * nCol + iCol]
+**
+** is set to the number of rows in the table for which column iCol contains
+** at least one instance of phrase iPhrase.
+*/
+static int fts5GatherTotals(
+ const Fts5ExtensionApi *pApi, /* API offered by current FTS version */
+ Fts5Context *pFts, /* First arg to pass to pApi functions */
+ int **panVal
+){
+ int rc = SQLITE_OK;
+ int *anVal = 0;
+ int i; /* For iterating through expression phrases */
+ int nPhrase = pApi->xPhraseCount(pFts);
+ int nCol = pApi->xColumnCount(pFts);
+ int nByte = nCol * nPhrase * sizeof(int);
+ Fts5GatherCtx sCtx;
+
+ sCtx.nCol = nCol;
+ anVal = sCtx.anVal = (int*)sqlite3_malloc(nByte);
+ if( anVal==0 ){
+ rc = SQLITE_NOMEM;
+ }else{
+ memset(anVal, 0, nByte);
+ }
+
+ for(i=0; i<nPhrase && rc==SQLITE_OK; i++){
+ sCtx.iPhrase = i;
+ rc = pApi->xQueryPhrase(pFts, i, (void*)&sCtx, fts5GatherCallback);
+ }
+
+ if( rc!=SQLITE_OK ){
+ sqlite3_free(anVal);
+ anVal = 0;
+ }
+
+ *panVal = anVal;
+ return rc;
+}
+
+typedef struct Fts5Bm25Context Fts5Bm25Context;
+struct Fts5Bm25Context {
+ int nPhrase;
+ int nCol;
+ double *aIDF; /* Array of IDF values */
+ double *aAvg; /* Average size of each column in tokens */
+};
+
static void fts5Bm25Function(
const Fts5ExtensionApi *pApi, /* API offered by current FTS version */
Fts5Context *pFts, /* First arg to pass to pApi functions */
int nVal, /* Number of values in apVal[] array */
sqlite3_value **apVal /* Array of trailing arguments */
){
- assert( 0 );
+ const double k1 = 1.2;
+ const double B = 0.75;
+
+ int rc = SQLITE_OK;
+ Fts5Bm25Context *p;
+
+ p = pApi->xGetAuxdata(pFts, 0);
+ if( p==0 ){
+ int *anVal = 0;
+ int ic; /* For iterating through columns */
+ int ip; /* For iterating through phrases */
+ i64 nRow; /* Total number of rows in table */
+ int nPhrase = pApi->xPhraseCount(pFts);
+ int nCol = pApi->xColumnCount(pFts);
+ int nByte = sizeof(Fts5Bm25Context)
+ + sizeof(double) * nPhrase * nCol /* aIDF[] */
+ + sizeof(double) * nCol; /* aAvg[] */
+
+ p = (Fts5Bm25Context*)sqlite3_malloc(nByte);
+ if( p==0 ){
+ rc = SQLITE_NOMEM;
+ }else{
+ memset(p, 0, nByte);
+ p->aAvg = (double*)&p[1];
+ p->aIDF = (double*)&p->aAvg[nCol];
+ }
+
+ if( rc==SQLITE_OK ){
+ rc = pApi->xRowCount(pFts, &nRow);
+ assert( nRow>0 || rc!=SQLITE_OK );
+ }
+
+ for(ic=0; rc==SQLITE_OK && ic<nCol; ic++){
+ i64 nToken = 0;
+ rc = pApi->xColumnTotalSize(pFts, ic, &nToken);
+ p->aAvg[ic] = (double)nToken / (double)nRow;
+ }
+
+ if( rc==SQLITE_OK ){
+ rc = fts5GatherTotals(pApi, pFts, &anVal);
+ }
+ for(ic=0; ic<nCol; ic++){
+ for(ip=0; rc==SQLITE_OK && ip<nPhrase; ip++){
+ int idx = ip * nCol + ic;
+ p->aIDF[idx] = log( (0.5 + nRow - anVal[idx]) / (0.5 + anVal[idx]) );
+ if( p->aIDF[idx]<0.0 ) p->aIDF[idx] = 0.0;
+ }
+ }
+
+ sqlite3_free(anVal);
+ if( rc==SQLITE_OK ){
+ rc = pApi->xSetAuxdata(pFts, p, sqlite3_free);
+ }
+ if( rc!=SQLITE_OK ){
+ sqlite3_free(p);
+ }
+ }
+
+ if( rc==SQLITE_OK ){
+ int ip;
+ double score = 0.0;
+
+ for(ip=0; rc==SQLITE_OK && ip<p->nPhrase; ip++){
+ int iPrev = 0;
+ int nHit = 0;
+ int i = 0;
+ i64 iPos = 0;
+
+ while( rc==SQLITE_OK && 0==pApi->xPoslist(pFts, ip, &i, &iPos) ){
+ int iCol = FTS5_POS2COLUMN(iPos);
+ if( iCol!=iPrev && nHit>0 ){
+ int sz = 0;
+ int idx = ip * p->nCol + iPrev;
+ rc = pApi->xColumnSize(pFts, iPrev, &sz);
+
+ score += p->aIDF[idx] * nHit * (k1+1.0) /
+ (nHit + k1 * (1.0 - B + B * sz / p->aAvg[iCol]));
+ nHit = 0;
+ }
+ nHit++;
+ iPrev = iCol;
+ }
+ }
+
+ if( rc==SQLITE_OK ){
+ sqlite3_result_double(pCtx, score);
+ }
+
+ }
+
+ if( rc!=SQLITE_OK ){
+ sqlite3_result_error_code(pCtx, rc);
+ }
}
static int fts5TestCallback(
nCol = pApi->xColumnCount(pFts);
if( zReq==0 ){
- sqlite3Fts5BufferAppendPrintf(&rc, &s, "columnavgsize ");
+ sqlite3Fts5BufferAppendPrintf(&rc, &s, "columntotalsize ");
}
- if( 0==zReq || 0==sqlite3_stricmp(zReq, "columnavgsize") ){
+ if( 0==zReq || 0==sqlite3_stricmp(zReq, "columntotalsize") ){
if( zReq==0 && nCol>1 ) sqlite3Fts5BufferAppendPrintf(&rc, &s, "{");
for(i=0; rc==SQLITE_OK && i<nCol; i++){
- int colsz = 0;
- rc = pApi->xColumnAvgSize(pFts, i, &colsz);
+ i64 colsz = 0;
+ rc = pApi->xColumnTotalSize(pFts, i, &colsz);
sqlite3Fts5BufferAppendPrintf(&rc, &s, "%s%d", i==0?"":" ", colsz);
}
if( zReq==0 && nCol>1 ) sqlite3Fts5BufferAppendPrintf(&rc, &s, "}");
}
if( zReq==0 ){
- sqlite3Fts5BufferAppendPrintf(&rc, &s, " rowid ");
+ sqlite3Fts5BufferAppendPrintf(&rc, &s, " queryphrase ");
+ }
+ if( 0==zReq || 0==sqlite3_stricmp(zReq, "queryphrase") ){
+ int ic, ip;
+ int *anVal = 0;
+ Fts5Buffer buf1;
+ memset(&buf1, 0, sizeof(Fts5Buffer));
+
+ if( rc==SQLITE_OK ){
+ anVal = (int*)pApi->xGetAuxdata(pFts, 0);
+ if( anVal==0 ){
+ rc = fts5GatherTotals(pApi, pFts, &anVal);
+ if( rc==SQLITE_OK ){
+ rc = pApi->xSetAuxdata(pFts, (void*)anVal, sqlite3_free);
+ }
+ }
+ }
+
+ for(ip=0; rc==SQLITE_OK && ip<nPhrase; ip++){
+ if( ip>0 ) sqlite3Fts5BufferAppendString(&rc, &buf1, " ");
+ if( nCol>1 ) sqlite3Fts5BufferAppendString(&rc, &buf1, "{");
+ for(ic=0; ic<nCol; ic++){
+ int iVal = anVal[ip * nCol + ic];
+ sqlite3Fts5BufferAppendPrintf(&rc, &buf1, "%s%d", ic==0?"":" ", iVal);
+ }
+ if( nCol>1 ) sqlite3Fts5BufferAppendString(&rc, &buf1, "}");
+ }
+
+ if( zReq==0 ){
+ sqlite3Fts5BufferAppendListElem(&rc, &s, (const char*)buf1.p, buf1.n);
+ }else{
+ sqlite3Fts5BufferAppendString(&rc, &s, (const char*)buf1.p);
+ }
+ sqlite3_free(buf1.p);
+ }
+
+ if( zReq==0 ){
+ sqlite3Fts5BufferAppendString(&rc, &s, " rowid ");
}
if( 0==zReq || 0==sqlite3_stricmp(zReq, "rowid") ){
iRowid = pApi->xRowid(pFts);
}
if( zReq==0 ){
- sqlite3Fts5BufferAppendPrintf(&rc, &s, " tokenize ");
+ sqlite3Fts5BufferAppendString(&rc, &s, " rowcount ");
+ }
+ if( 0==zReq || 0==sqlite3_stricmp(zReq, "rowcount") ){
+ i64 nRow;
+ rc = pApi->xRowCount(pFts, &nRow);
+ sqlite3Fts5BufferAppendPrintf(&rc, &s, "%lld", nRow);
+ }
+
+ if( zReq==0 ){
+ sqlite3Fts5BufferAppendString(&rc, &s, " tokenize ");
}
if( 0==zReq || 0==sqlite3_stricmp(zReq, "tokenize") ){
Fts5Buffer buf;
fts5_extension_function xFunc;/* Callback function */
void (*xDestroy)(void*); /* Destructor function */
} aBuiltin [] = {
- { "snippet", 0, fts5SnippetFunction, 0 },
- { "fts5_test", 0, fts5TestFunction, 0 },
+ { "bm25", 0, fts5Bm25Function, 0 },
+ { "snippet", 0, fts5SnippetFunction, 0 },
+ { "fts5_test", 0, fts5TestFunction, 0 },
};
int rc = SQLITE_OK; /* Return code */
return sParse.rc;
}
+static char *fts5ExprStrdup(int *pRc, const char *zIn){
+ char *zRet = 0;
+ if( *pRc==SQLITE_OK ){
+ int nByte = strlen(zIn) + 1;
+ zRet = sqlite3_malloc(nByte);
+ if( zRet ){
+ memcpy(zRet, zIn, nByte);
+ }else{
+ *pRc = SQLITE_NOMEM;
+ }
+ }
+ return zRet;
+}
+
+static void *fts5ExprMalloc(int *pRc, int nByte){
+ void *pRet = 0;
+ if( *pRc==SQLITE_OK ){
+ pRet = sqlite3_malloc(nByte);
+ if( pRet ){
+ memset(pRet, 0, nByte);
+ }else{
+ *pRc = SQLITE_NOMEM;
+ }
+ }
+ return pRet;
+}
+
+/*
+** Create a new FTS5 expression by cloning phrase iPhrase of the
+** expression passed as the second argument.
+*/
+int sqlite3Fts5ExprPhraseExpr(
+ Fts5Config *pConfig,
+ Fts5Expr *pExpr,
+ int iPhrase,
+ Fts5Expr **ppNew
+){
+ int rc = SQLITE_OK; /* Return code */
+ Fts5ExprPhrase *pOrig = 0; /* The phrase extracted from pExpr */
+ int i; /* Used to iterate through phrase terms */
+
+ /* Components of the new expression object */
+ Fts5Expr *pNew;
+ Fts5ExprPhrase **apPhrase;
+ Fts5ExprNode *pNode;
+ Fts5ExprNearset *pNear;
+ Fts5ExprPhrase *pCopy;
+
+ pOrig = pExpr->apPhrase[iPhrase];
+ pNew = (Fts5Expr*)fts5ExprMalloc(&rc, sizeof(Fts5Expr));
+ apPhrase = (Fts5ExprPhrase**)fts5ExprMalloc(&rc, sizeof(Fts5ExprPhrase*));
+ pNode = (Fts5ExprNode*)fts5ExprMalloc(&rc, sizeof(Fts5ExprNode));
+ pNear = (Fts5ExprNearset*)fts5ExprMalloc(&rc,
+ sizeof(Fts5ExprNearset) + sizeof(Fts5ExprPhrase*)
+ );
+ pCopy = (Fts5ExprPhrase*)fts5ExprMalloc(&rc,
+ sizeof(Fts5ExprPhrase) + sizeof(Fts5ExprTerm) * pOrig->nTerm
+ );
+
+ for(i=0; rc==SQLITE_OK && i<pOrig->nTerm; i++){
+ pCopy->aTerm[i].zTerm = fts5ExprStrdup(&rc, pOrig->aTerm[i].zTerm);
+ pCopy->aTerm[i].bPrefix = pOrig->aTerm[i].bPrefix;
+ }
+
+ if( rc==SQLITE_OK ){
+ /* All the allocations succeeded. Put the expression object together. */
+ pNew->pIndex = pExpr->pIndex;
+ pNew->pRoot = pNode;
+ pNew->nPhrase = 1;
+ pNew->apPhrase = apPhrase;
+ pNew->apPhrase[0] = pCopy;
+
+ pNode->eType = FTS5_STRING;
+ pNode->pNear = pNear;
+
+ pNear->iCol = -1;
+ pNear->nPhrase = 1;
+ pNear->apPhrase[0] = pCopy;
+
+ pCopy->nTerm = pOrig->nTerm;
+ pCopy->pNode = pNode;
+ }else{
+ /* At least one allocation failed. Free them all. */
+ if( pCopy ){
+ for(i=0; i<pOrig->nTerm; i++){
+ sqlite3_free(pCopy->aTerm[i].zTerm);
+ }
+ sqlite3_free(pCopy);
+ sqlite3_free(pNear);
+ sqlite3_free(pNode);
+ sqlite3_free(apPhrase);
+ sqlite3_free(pNew);
+ pNew = 0;
+ }
+ }
+
+ *ppNew = pNew;
+ return rc;
+}
+
/*
** Free the expression node object passed as the only argument.
*/
return rc;
}
-int sqlite3Fts5StorageAvgsize(Fts5Storage *p, int iCol, int *pnAvg){
+int sqlite3Fts5StorageSize(Fts5Storage *p, int iCol, i64 *pnToken){
int rc = fts5StorageLoadTotals(p);
if( rc==SQLITE_OK ){
- int nAvg = 1;
- if( p->nTotalRow ){
- nAvg = (int)((p->aTotalSize[iCol] + (p->nTotalRow/2)) / p->nTotalRow);
- if( nAvg<1 ) nAvg = 1;
- *pnAvg = nAvg;
- }
+ *pnToken = p->aTotalSize[iCol];
+ }
+ return rc;
+}
+
+int sqlite3Fts5StorageRowCount(Fts5Storage *p, i64 *pnRow){
+ int rc = fts5StorageLoadTotals(p);
+ if( rc==SQLITE_OK ){
+ *pnRow = p->nTotalRow;
}
return rc;
}
-C Add\sa\ssnippet()\sfunction\sto\sfts5.
-D 2014-07-23T19:31:56.454
+C Add\sextension\sapis\sxRowCount,\sxQueryPhrase,\sxSetAuxdata\sand\sxGetAuxdata.\sAnd\sa\sranking\sfunction\sthat\suses\sall\sof\sthe\sabove.
+D 2014-07-25T20:30:47.445
F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f
F Makefile.in b03432313a3aad96c706f8164fb9f5307eaf19f5
F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23
F ext/fts3/unicode/CaseFolding.txt 8c678ca52ecc95e16bc7afc2dbf6fc9ffa05db8c
F ext/fts3/unicode/UnicodeData.txt cd07314edb62d49fde34debdaf92fa2aa69011e7
F ext/fts3/unicode/mkunicode.tcl dc6f268eb526710e2c6e496c372471d773d0c368
-F ext/fts5/fts5.c 6f859d444eb8be46cb3f7aba3aaae369c5b26809
-F ext/fts5/fts5.h 57325b418b26dcd60be5bc8aab05b33767d81590
-F ext/fts5/fts5Int.h 12d03496152b716e63a5380e396b776fbefa2065
-F ext/fts5/fts5_aux.c cba929fb13931c9b8be7d572991e648b98f14cf2
+F ext/fts5/fts5.c 1496aff16dd9b0a013d14b6c8cf5b7df8c170abe
+F ext/fts5/fts5.h 8ace10d5b249a3baa983c79e7a1306d2a79cfd6a
+F ext/fts5/fts5Int.h 92fb9c4f759674ef569aebc338f363e167a8933c
+F ext/fts5/fts5_aux.c f8bed7a86b65cb07cffdafbf4f0611f127b36274
F ext/fts5/fts5_buffer.c 248c61ac9fec001602efc72a45704f3b8d367c00
F ext/fts5/fts5_config.c 94f1b4cb4de6a7cd5780c14adb0198e289df8cef
-F ext/fts5/fts5_expr.c 2911813db174afa28b69ccc7031b6dd80293b241
+F ext/fts5/fts5_expr.c 65c1918002f2ec1755e4c0c28bf007659409fbd8
F ext/fts5/fts5_index.c 68d2d41b5c6d2f8838c3d6ebdc8b242718b8e997
-F ext/fts5/fts5_storage.c 9a2744f492413395a0e75f20c19b797c801a7308
+F ext/fts5/fts5_storage.c 2866e7e1de9dc851756c3a9c76b6e1d75e0facb7
F ext/fts5/fts5parse.y 777da8e5819f75c217982c79c29d014c293acac9
F ext/icu/README.txt d9fbbad0c2f647c3fdf715fc9fd64af53aedfc43
F ext/icu/icu.c d415ccf984defeb9df2c0e1afcfaa2f6dc05eacb
F test/fts5ab.test dc04ed48cf93ca957d174406e6c192f2ff4f3397
F test/fts5ac.test 9be418d037763f4cc5d86f4239db41fc86bb4f85
F test/fts5ad.test 2ed38bbc865678cb2905247120d02ebba7f20e07
-F test/fts5ae.test fe9db78201bbb87c6f82b72a14b946d0f7fc3026
-F test/fts5af.test a2980528a04b67ac4690e6c02ebe9455f45c9a35
+F test/fts5ae.test 1424ec557d543ace1f3cf6d231b247bc7b9f337c
+F test/fts5af.test 5f53d0a52280b63caf5a519d6994c4d428835155
F test/fts5ea.test ff43b40f8879ba50b82def70f2ab67c195d1a1d4
F test/full.test 6b3c8fb43c6beab6b95438c1675374b95fab245d
F test/func.test ae97561957aba6ca9e3a7b8a13aac41830d701ef
F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4
F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32
F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f
-P d44d3a8518ff7a1a3e2c0ab97493aa590676ee8c
-R 638d6826a594d773b5778bd6943c3d96
+P bdc58fd28a63ac9632c3df6c7768a9a236566605
+R 2e8cb20122478987f116ef8ff9f6144b
U dan
-Z 30db824dafb73f9c4c6895383aa25ed9
+Z 5dd5c36b8a0e52d63a87d23e7179571f
-bdc58fd28a63ac9632c3df6c7768a9a236566605
\ No newline at end of file
+c4d50428ab97f77e6721c4f8d03eaaf3ea91f3eb
\ No newline at end of file
}
do_execsql_test 5.3 {
- SELECT rowid, fts5_test(t5, 'columnavgsize') FROM t5 WHERE t5 MATCH 'a'
+ SELECT rowid, fts5_test(t5, 'columntotalsize') FROM t5 WHERE t5 MATCH 'a'
ORDER BY rowid DESC;
} {
- 3 {2 2}
- 2 {2 2}
- 1 {2 2}
+ 3 {5 7}
+ 2 {5 7}
+ 1 {5 7}
}
do_execsql_test 5.4 {
INSERT INTO t5 VALUES('x y z', 'v w x y z');
- SELECT rowid, fts5_test(t5, 'columnavgsize') FROM t5 WHERE t5 MATCH 'a'
+ SELECT rowid, fts5_test(t5, 'columntotalsize') FROM t5 WHERE t5 MATCH 'a'
ORDER BY rowid DESC;
} {
- 3 {2 3}
- 2 {2 3}
- 1 {2 3}
+ 3 {8 12}
+ 2 {8 12}
+ 1 {8 12}
}
#-------------------------------------------------------------------------
1 {{there are more} {things in heaven and earth}}
}
+#-------------------------------------------------------------------------
+# Test the xQueryPhrase() API
+#
+reset_db
+do_execsql_test 7.1 {
+ CREATE VIRTUAL TABLE t7 USING fts5(x, y);
+}
+do_test 7.2 {
+ foreach {x y} {
+ {q i b w s a a e l o} {i b z a l f p t e u}
+ {b a z t a l o x d i} {b p a d b f h d w y}
+ {z m h n p p u i e g} {v h d v b x j j c z}
+ {a g i m v a u c b i} {p k s o t l r t b m}
+ {v v c j o d a s c p} {f f v o k p o f o g}
+ } {
+ execsql {INSERT INTO t7 VALUES($x, $y)}
+ }
+ execsql { SELECT count(*) FROM t7 }
+} {5}
+
+foreach {tn q res} {
+ 1 a {{4 2}}
+ 2 b {{3 4}}
+ 3 c {{2 1}}
+ 4 d {{2 2}}
+ 5 {a AND b} {{4 2} {3 4}}
+ 6 {a OR b OR c OR d} {{4 2} {3 4} {2 1} {2 2}}
+} {
+ do_execsql_test 7.3.$tn {
+ SELECT fts5_test(t7, 'queryphrase') FROM t7 WHERE t7 MATCH $q LIMIT 1
+ } [list $res]
+}
+
+do_execsql_test 7.4 {
+ SELECT fts5_test(t7, 'rowcount') FROM t7 WHERE t7 MATCH 'a';
+} {5 5 5 5}
+
+
+
finish_test
CREATE VIRTUAL TABLE t1 USING fts5(x, y);
}
+proc do_snippet_test {tn doc match res} {
+
+ uplevel #0 [list set v1 $doc]
+ uplevel #0 [list set v2 $match]
+
+ do_execsql_test $tn.1 {
+ DELETE FROM t1;
+ INSERT INTO t1 VALUES($v1, NULL);
+ SELECT snippet(t1, '[', ']', '...', 7) FROM t1 WHERE t1 MATCH $v2;
+ } [list $res]
+
+ do_execsql_test $tn.2 {
+ DELETE FROM t1;
+ INSERT INTO t1 VALUES(NULL, $v1);
+ SELECT snippet(t1, '[', ']', '...', 7) FROM t1 WHERE t1 MATCH $v2;
+ } [list $res]
+
+}
+
foreach {tn doc res} {
7.5 {o o o o X o o X o} {...o o [X] o o [X] o}
7.6 {o o o o o X o o X} {...o o o [X] o o [X]}
} {
- do_execsql_test 1.$tn.1 {
- DELETE FROM t1;
- INSERT INTO t1 VALUES($doc, NULL);
- SELECT snippet(t1, '[', ']', '...', 7) FROM t1 WHERE t1 MATCH 'X';
- } [list $res]
-
- do_execsql_test 1.$tn.2 {
- DELETE FROM t1;
- INSERT INTO t1 VALUES(NULL, $doc);
- SELECT snippet(t1, '[', ']', '...', 7) FROM t1 WHERE t1 MATCH 'X';
- } [list $res]
+ do_snippet_test 1.$tn $doc X $res
}
foreach {tn doc res} {
3.8 {o o o o o o o X Y} {...o o o o o [X Y]}
} {
- do_execsql_test 2.$tn.1 {
- DELETE FROM t1;
- INSERT INTO t1 VALUES($doc, NULL);
- SELECT snippet(t1, '[', ']', '...', 7) FROM t1 WHERE t1 MATCH 'X+Y';
- } [list $res]
-
- do_execsql_test 2.$tn.2 {
- DELETE FROM t1;
- INSERT INTO t1 VALUES(NULL, $doc);
- SELECT snippet(t1, '[', ']', '...', 7) FROM t1 WHERE t1 MATCH 'X+Y';
- } [list $res]
+ do_snippet_test 1.$tn $doc "X + Y" $res
}
finish_test