#include <assert.h>
#include <ctype.h>
-#define SNIPPET_BUFFER_CHUNK 64
-#define SNIPPET_BUFFER_SIZE SNIPPET_BUFFER_CHUNK*4
-#define SNIPPET_BUFFER_MASK (SNIPPET_BUFFER_SIZE-1)
+/*
+** Used as an fts3ExprIterate() context when loading phrase doclists to
+** Fts3Expr.aDoclist[]/nDoclist.
+*/
+typedef struct LoadDoclistCtx LoadDoclistCtx;
+struct LoadDoclistCtx {
+ Fts3Table *pTab; /* FTS3 Table */
+ int nPhrase; /* Number of phrases seen so far */
+ int nToken; /* Number of tokens seen so far */
+};
+
+/*
+** The following types are used as part of the implementation of the
+** fts3BestSnippet() routine.
+*/
+typedef struct SnippetIter SnippetIter;
+typedef struct SnippetPhrase SnippetPhrase;
+typedef struct SnippetFragment SnippetFragment;
+
+struct SnippetIter {
+ Fts3Cursor *pCsr; /* Cursor snippet is being generated from */
+ int iCol; /* Extract snippet from this column */
+ int nSnippet; /* Requested snippet length (in tokens) */
+ int nPhrase; /* Number of phrases in query */
+ SnippetPhrase *aPhrase; /* Array of size nPhrase */
+ int iCurrent; /* First token of current snippet */
+};
+
+struct SnippetPhrase {
+ int nToken; /* Number of tokens in phrase */
+ char *pList; /* Pointer to start of phrase position list */
+ int iHead; /* Next value in position list */
+ char *pHead; /* Position list data following iHead */
+ int iTail; /* Next value in trailing position list */
+ char *pTail; /* Position list data following iTail */
+};
+
+struct SnippetFragment {
+ int iCol; /* Column snippet is extracted from */
+ int iPos; /* Index of first token in snippet */
+ u64 covered; /* Mask of query phrases covered */
+ u64 hlmask; /* Mask of snippet terms to highlight */
+};
+
+/*
+** This type is used as an fts3ExprIterate() context object while
+** accumulating the data returned by the matchinfo() function.
+*/
+typedef struct MatchInfo MatchInfo;
+struct MatchInfo {
+ Fts3Cursor *pCursor; /* FTS3 Cursor */
+ int nCol; /* Number of columns in table */
+ u32 *aMatchinfo; /* Pre-allocated buffer */
+};
+
+
+
+/*
+** The snippet() and offsets() functions both return text values. An instance
+** of the following structure is used to accumulate those values while the
+** functions are running. See fts3StringAppend() for details.
+*/
+typedef struct StrBuffer StrBuffer;
+struct StrBuffer {
+ char *z; /* Pointer to buffer containing string */
+ int n; /* Length of z in bytes (excl. nul-term) */
+ int nAlloc; /* Allocated size of buffer z in bytes */
+};
+
+
+/*
+** This function is used to help iterate through a position-list. A position
+** list is a list of unique integers, sorted from smallest to largest. Each
+** element of the list is represented by an FTS3 varint that takes the value
+** of the difference between the current element and the previous one plus
+** two. For example, to store the position-list:
+**
+** 4 9 113
+**
+** the three varints:
+**
+** 6 7 106
+**
+** are encoded.
+**
+** When this function is called, *pp points to the start of an element of
+** the list. *piPos contains the value of the previous entry in the list.
+** After it returns, *piPos contains the value of the next element of the
+** list and *pp is advanced to the following varint.
+*/
static void fts3GetDeltaPosition(char **pp, int *piPos){
int iVal;
*pp += sqlite3Fts3GetVarint32(*pp, &iVal);
*piPos += (iVal-2);
}
+/*
+** Helper function for fts3ExprIterate() (see below).
+*/
static int fts3ExprIterate2(
Fts3Expr *pExpr, /* Expression to iterate phrases of */
int *piPhrase, /* Pointer to phrase counter */
int (*x)(Fts3Expr*,int,void*), /* Callback function to invoke for phrases */
void *pCtx /* Second argument to pass to callback */
){
- int rc;
- int eType = pExpr->eType;
+ int rc; /* Return code */
+ int eType = pExpr->eType; /* Type of expression node pExpr */
+
if( eType!=FTSQUERY_PHRASE ){
assert( pExpr->pLeft && pExpr->pRight );
rc = fts3ExprIterate2(pExpr->pLeft, piPhrase, x, pCtx);
int (*x)(Fts3Expr*,int,void*), /* Callback function to invoke for phrases */
void *pCtx /* Second argument to pass to callback */
){
- int iPhrase = 0;
+ int iPhrase = 0; /* Variable used as the phrase counter */
return fts3ExprIterate2(pExpr, &iPhrase, x, pCtx);
}
-typedef struct LoadDoclistCtx LoadDoclistCtx;
-struct LoadDoclistCtx {
- Fts3Table *pTab; /* FTS3 Table */
- int nPhrase; /* Number of phrases so far */
- int nToken; /* Number of tokens so far */
-};
-
+/*
+** The argument to this function is always a phrase node. Its doclist
+** (Fts3Expr.aDoclist[]) and the doclists associated with all phrase nodes
+** to the left of this one in the query tree have already been loaded.
+**
+** If this phrase node is part of a series of phrase nodes joined by
+** NEAR operators (and is not the left-most of said series), then elements are
+** removed from the phrases doclist consistent with the NEAR restriction. If
+** required, elements may be removed from the doclists of phrases to the
+** left of this one that are part of the same series of NEAR operator
+** connected phrases.
+**
+** If an OOM error occurs, SQLITE_NOMEM is returned. Otherwise, SQLITE_OK.
+*/
static int fts3ExprNearTrim(Fts3Expr *pExpr){
int rc = SQLITE_OK;
Fts3Expr *pParent = pExpr->pParent;
return rc;
}
+/*
+** This is an fts3ExprIterate() callback used while loading the doclists
+** for each phrase into Fts3Expr.aDoclist[]/nDoclist. See also
+** fts3ExprLoadDoclists().
+*/
static int fts3ExprLoadDoclistsCb1(Fts3Expr *pExpr, int iPhrase, void *ctx){
int rc = SQLITE_OK;
LoadDoclistCtx *p = (LoadDoclistCtx *)ctx;
return rc;
}
+/*
+** This is an fts3ExprIterate() callback used while loading the doclists
+** for each phrase into Fts3Expr.aDoclist[]/nDoclist. See also
+** fts3ExprLoadDoclists().
+*/
static int fts3ExprLoadDoclistsCb2(Fts3Expr *pExpr, int iPhrase, void *ctx){
if( pExpr->aDoclist ){
pExpr->pCurrent = pExpr->aDoclist;
return SQLITE_OK;
}
+/*
+** Load the doclists for each phrase in the query associated with FTS3 cursor
+** pCsr.
+**
+** If pnPhrase is not NULL, then *pnPhrase is set to the number of matchable
+** phrases in the expression (all phrases except those directly or
+** indirectly descended from the right-hand-side of a NOT operator). If
+** pnToken is not NULL, then it is set to the number of tokens in all
+** matchable phrases of the expression.
+*/
static int fts3ExprLoadDoclists(
- Fts3Cursor *pCsr,
+ Fts3Cursor *pCsr, /* Fts3 cursor for current query */
int *pnPhrase, /* OUT: Number of phrases in query */
int *pnToken /* OUT: Number of tokens in query */
){
- int rc;
- LoadDoclistCtx sCtx = {0, 0, 0};
+ int rc; /* Return Code */
+ LoadDoclistCtx sCtx = {0,0,0}; /* Context for fts3ExprIterate() */
sCtx.pTab = (Fts3Table *)pCsr->base.pVtab;
rc = fts3ExprIterate(pCsr->pExpr, fts3ExprLoadDoclistsCb1, (void *)&sCtx);
if( rc==SQLITE_OK ){
return rc;
}
-/*
-** The following types are used as part of the implementation of the
-** fts3BestSnippet() routine.
-*/
-typedef struct SnippetCtx SnippetCtx;
-typedef struct SnippetPhrase SnippetPhrase;
-
-struct SnippetCtx {
- Fts3Cursor *pCsr; /* Cursor snippet is being generated from */
- int iCol; /* Extract snippet from this column */
- int nSnippet; /* Requested snippet length (in tokens) */
- int nPhrase; /* Number of phrases in query */
- SnippetPhrase *aPhrase; /* Array of size nPhrase */
- int iCurrent; /* First token of current snippet */
-};
-struct SnippetPhrase {
- int nToken; /* Number of tokens in phrase */
- char *pList; /* Pointer to start of phrase position list */
- int iHead; /* Next value in position list */
- char *pHead; /* Position list data following iHead */
- int iTail; /* Next value in trailing position list */
- char *pTail; /* Position list data following iTail */
-};
-
/*
** Advance the position list iterator specified by the first two
** arguments so that it points to the first element with a value greater
}
}
-static int fts3SnippetNextCandidate(SnippetCtx *pIter){
+/*
+** Advance the snippet iterator to the next candidate snippet.
+*/
+static int fts3SnippetNextCandidate(SnippetIter *pIter){
int i; /* Loop counter */
if( pIter->iCurrent<0 ){
- /* The SnippetCtx object has just been initialized. The first snippet
+ /* The SnippetIter object has just been initialized. The first snippet
** candidate always starts at offset 0 (even if this candidate has a
** score of 0.0).
*/
return 0;
}
+/*
+** Retrieve information about the current candidate snippet of snippet
+** iterator pIter.
+*/
static void fts3SnippetDetails(
- SnippetCtx *pIter, /* Snippet iterator */
+ SnippetIter *pIter, /* Snippet iterator */
u64 mCovered, /* Bitmask of phrases already covered */
int *piToken, /* OUT: First token of proposed snippet */
int *piScore, /* OUT: "Score" for this snippet */
u64 *pmHighlight /* OUT: Bitmask of terms to highlight */
){
int iStart = pIter->iCurrent; /* First token of snippet */
-
- int iScore = 0;
- int i;
- u64 mCover = 0;
- u64 mHighlight = 0;
+ int iScore = 0; /* Score of this snippet */
+ int i; /* Loop counter */
+ u64 mCover = 0; /* Mask of phrases covered by this snippet */
+ u64 mHighlight = 0; /* Mask of tokens to highlight in snippet */
for(i=0; i<pIter->nPhrase; i++){
SnippetPhrase *pPhrase = &pIter->aPhrase[i];
}
}
+ /* Set the output variables before returning. */
*piToken = iStart;
*piScore = iScore;
*pmCover = mCover;
/*
** This function is an fts3ExprIterate() callback used by fts3BestSnippet().
-** Each invocation populates an element of the SnippetCtx.aPhrase[] array.
+** Each invocation populates an element of the SnippetIter.aPhrase[] array.
*/
static int fts3SnippetFindPositions(Fts3Expr *pExpr, int iPhrase, void *ctx){
- SnippetCtx *p = (SnippetCtx *)ctx;
+ SnippetIter *p = (SnippetIter *)ctx;
SnippetPhrase *pPhrase = &p->aPhrase[iPhrase];
char *pCsr;
return SQLITE_OK;
}
-#define BITMASK_SIZE 64
-
-typedef struct SnippetFragment SnippetFragment;
-struct SnippetFragment {
- int iCol; /* Column snippet is extracted from */
- int iPos; /* Index of first token in snippet */
- u64 covered; /* Mask of query phrases covered */
- u64 hlmask; /* Mask of snippet terms to highlight */
-};
-
+/*
+** Select the fragment of text consisting of nFragment contiguous tokens
+** from column iCol that represent the "best" snippet. The best snippet
+** is the snippet with the highest score, where scores are calculated
+** by adding:
+**
+** (a) +1 point for each occurence of a matchable phrase in the snippet.
+**
+** (b) +1000 points for the first occurence of each matchable phrase in
+** the snippet for which the corresponding mCovered bit is not set.
+**
+** The selected snippet parameters are stored in structure *pFragment before
+** returning. The score of the selected snippet is stored in *piScore
+** before returning.
+*/
static int fts3BestSnippet(
int nSnippet, /* Desired snippet length */
Fts3Cursor *pCsr, /* Cursor to create snippet for */
){
int rc; /* Return Code */
int nList; /* Number of phrases in expression */
- SnippetCtx sCtx; /* Snippet context object */
+ SnippetIter sIter; /* Iterates through snippet candidates */
int nByte; /* Number of bytes of space to allocate */
- int iBestScore = -1;
- int i;
+ int iBestScore = -1; /* Best snippet score found so far */
+ int i; /* Loop counter */
- memset(&sCtx, 0, sizeof(sCtx));
+ memset(&sIter, 0, sizeof(sIter));
/* Iterate through the phrases in the expression to count them. The same
** callback makes sure the doclists are loaded for each phrase.
** the required space using malloc().
*/
nByte = sizeof(SnippetPhrase) * nList;
- sCtx.aPhrase = (SnippetPhrase *)sqlite3_malloc(nByte);
- if( !sCtx.aPhrase ){
+ sIter.aPhrase = (SnippetPhrase *)sqlite3_malloc(nByte);
+ if( !sIter.aPhrase ){
return SQLITE_NOMEM;
}
- memset(sCtx.aPhrase, 0, nByte);
+ memset(sIter.aPhrase, 0, nByte);
- /* Initialize the contents of the SnippetCtx object. Then iterate through
+ /* Initialize the contents of the SnippetIter object. Then iterate through
** the set of phrases in the expression to populate the aPhrase[] array.
*/
- sCtx.pCsr = pCsr;
- sCtx.iCol = iCol;
- sCtx.nSnippet = nSnippet;
- sCtx.nPhrase = nList;
- sCtx.iCurrent = -1;
- (void)fts3ExprIterate(pCsr->pExpr, fts3SnippetFindPositions, (void *)&sCtx);
-
+ sIter.pCsr = pCsr;
+ sIter.iCol = iCol;
+ sIter.nSnippet = nSnippet;
+ sIter.nPhrase = nList;
+ sIter.iCurrent = -1;
+ (void)fts3ExprIterate(pCsr->pExpr, fts3SnippetFindPositions, (void *)&sIter);
+
+ /* Set the *pmSeen output variable. */
for(i=0; i<nList; i++){
- if( sCtx.aPhrase[i].pHead ){
+ if( sIter.aPhrase[i].pHead ){
*pmSeen |= (u64)1 << i;
}
}
+ /* Loop through all candidate snippets. Store the best snippet in
+ ** *pFragment. Store its associated 'score' in iBestScore.
+ */
pFragment->iCol = iCol;
- while( !fts3SnippetNextCandidate(&sCtx) ){
+ while( !fts3SnippetNextCandidate(&sIter) ){
int iPos;
int iScore;
u64 mCover;
u64 mHighlight;
- fts3SnippetDetails(&sCtx, mCovered, &iPos, &iScore, &mCover, &mHighlight);
-
+ fts3SnippetDetails(&sIter, mCovered, &iPos, &iScore, &mCover, &mHighlight);
assert( iScore>=0 );
if( iScore>iBestScore ){
pFragment->iPos = iPos;
}
}
- sqlite3_free(sCtx.aPhrase);
+ sqlite3_free(sIter.aPhrase);
*piScore = iBestScore;
return SQLITE_OK;
}
-typedef struct StrBuffer StrBuffer;
-struct StrBuffer {
- char *z;
- int n;
- int nAlloc;
-};
-
+/*
+** Append a string to the string-buffer passed as the first argument.
+**
+** If nAppend is negative, then the length of the string zAppend is
+** determined using strlen().
+*/
static int fts3StringAppend(
- StrBuffer *pStr,
- const char *zAppend,
- int nAppend
+ StrBuffer *pStr, /* Buffer to append to */
+ const char *zAppend, /* Pointer to data to append to buffer */
+ int nAppend /* Size of zAppend in bytes (or -1) */
){
if( nAppend<0 ){
nAppend = strlen(zAppend);
}
+ /* If there is insufficient space allocated at StrBuffer.z, use realloc()
+ ** to grow the buffer until so that it is big enough to accomadate the
+ ** appended data.
+ */
if( pStr->n+nAppend+1>=pStr->nAlloc ){
int nAlloc = pStr->nAlloc+nAppend+100;
char *zNew = sqlite3_realloc(pStr->z, nAlloc);
pStr->nAlloc = nAlloc;
}
+ /* Append the data to the string buffer. */
memcpy(&pStr->z[pStr->n], zAppend, nAppend);
pStr->n += nAppend;
pStr->z[pStr->n] = '\0';
return SQLITE_OK;
}
+/*
+** The fts3BestSnippet() function often selects snippets that end with a
+** query term. That is, the final term of the snippet is always a term
+** that requires highlighting. For example, if 'X' is a highlighted term
+** and '.' is a non-highlighted term, BestSnippet() may select:
+**
+** ........X.....X
+**
+** This function "shifts" the beginning of the snippet forward in the
+** document so that there are approximately the same number of
+** non-highlighted terms to the right of the final highlighted term as there
+** are to the left of the first highlighted term. For example, to this:
+**
+** ....X.....X....
+**
+** This is done as part of extracting the snippet text, not when selecting
+** the snippet. Snippet selection is done based on doclists only, so there
+** is no way for fts3BestSnippet() to know whether or not the document
+** actually contains terms that follow the final highlighted term.
+*/
int fts3SnippetShift(
- Fts3Table *pTab,
- int nSnippet,
- const char *zDoc,
- int nDoc,
- int *piPos,
- u64 *pHlmask
+ Fts3Table *pTab, /* FTS3 table snippet comes from */
+ int nSnippet, /* Number of tokens desired for snippet */
+ const char *zDoc, /* Document text to extract snippet from */
+ int nDoc, /* Size of buffer zDoc in bytes */
+ int *piPos, /* IN/OUT: First token of snippet */
+ u64 *pHlmask /* IN/OUT: Mask of tokens to highlight */
){
- u64 hlmask = *pHlmask;
+ u64 hlmask = *pHlmask; /* Local copy of initial highlight-mask */
if( hlmask ){
- int nLeft;
- int nRight;
- int nDesired;
+ int nLeft; /* Tokens to the left of first highlight */
+ int nRight; /* Tokens to the right of last highlight */
+ int nDesired; /* Ideal number of tokens to shift forward */
for(nLeft=0; !(hlmask & ((u64)1 << nLeft)); nLeft++);
for(nRight=0; !(hlmask & ((u64)1 << (nSnippet-1-nRight))); nRight++);
-
nDesired = (nLeft-nRight)/2;
+
+ /* Ideally, the start of the snippet should be pushed forward in the
+ ** document nDesired tokens. This block checks if there are actually
+ ** nDesired tokens to the right of the snippet. If so, *piPos and
+ ** *pHlMask are updated to shift the snippet nDesired tokens to the
+ ** right. Otherwise, the snippet is shifted by the number of tokens
+ ** available.
+ */
if( nDesired>0 ){
- int nShift;
- int iCurrent = 0;
- int rc;
+ int nShift; /* Number of tokens to shift snippet by */
+ int iCurrent = 0; /* Token counter */
+ int rc; /* Return Code */
sqlite3_tokenizer_module *pMod;
sqlite3_tokenizer_cursor *pC;
-
pMod = (sqlite3_tokenizer_module *)pTab->pTokenizer->pModule;
+
+ /* Open a cursor on zDoc/nDoc. Check if there are (nSnippet+nDesired)
+ ** or more tokens in zDoc/nDoc.
+ */
rc = pMod->xOpen(pTab->pTokenizer, zDoc, nDoc, &pC);
if( rc!=SQLITE_OK ){
return rc;
return SQLITE_OK;
}
+/*
+** Extract the snippet text for fragment pFragment from cursor pCsr and
+** append it to string buffer pOut.
+*/
static int fts3SnippetText(
Fts3Cursor *pCsr, /* FTS3 Cursor */
SnippetFragment *pFragment, /* Snippet to extract */
int nSnippet, /* Number of tokens in extracted snippet */
const char *zOpen, /* String inserted before highlighted term */
const char *zClose, /* String inserted after highlighted term */
- const char *zEllipsis,
- StrBuffer *pOut
+ const char *zEllipsis, /* String inserted between snippets */
+ StrBuffer *pOut /* Write output here */
){
Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
int rc; /* Return code */
int nDoc; /* Size of zDoc in bytes */
int iCurrent = 0; /* Current token number of document */
int iEnd = 0; /* Byte offset of end of current token */
- int isShiftDone = 0;
- int iPos = pFragment->iPos;
- u64 hlmask = pFragment->hlmask;
-
+ int isShiftDone = 0; /* True after snippet is shifted */
+ int iPos = pFragment->iPos; /* First token of snippet */
+ u64 hlmask = pFragment->hlmask; /* Highlight-mask for snippet */
+ int iCol = pFragment->iCol+1; /* Query column to extract text from */
sqlite3_tokenizer_module *pMod; /* Tokenizer module methods object */
sqlite3_tokenizer_cursor *pC; /* Tokenizer cursor open on zDoc/nDoc */
- const char *ZDUMMY; /* Dummy arguments used with tokenizer */
- int DUMMY1; /* Dummy arguments used with tokenizer */
+ const char *ZDUMMY; /* Dummy argument used with tokenizer */
+ int DUMMY1; /* Dummy argument used with tokenizer */
- zDoc = (const char *)sqlite3_column_text(pCsr->pStmt, pFragment->iCol+1);
+ zDoc = (const char *)sqlite3_column_text(pCsr->pStmt, iCol);
if( zDoc==0 ){
- if( sqlite3_column_type(pCsr->pStmt, pFragment->iCol+1)!=SQLITE_NULL ){
+ if( sqlite3_column_type(pCsr->pStmt, iCol)!=SQLITE_NULL ){
return SQLITE_NOMEM;
}
return SQLITE_OK;
}
- nDoc = sqlite3_column_bytes(pCsr->pStmt, pFragment->iCol+1);
+ nDoc = sqlite3_column_bytes(pCsr->pStmt, iCol);
/* Open a token cursor on the document. */
pMod = (sqlite3_tokenizer_module *)pTab->pTokenizer->pModule;
while( rc==SQLITE_OK ){
int iBegin; /* Offset in zDoc of start of token */
int iFin; /* Offset in zDoc of end of token */
- int isHighlight;
+ int isHighlight; /* True for highlighted terms */
rc = pMod->xNext(pC, &ZDUMMY, &DUMMY1, &iBegin, &iFin, &iCurrent);
if( rc!=SQLITE_OK ){
/*
-** An instance of this structure is used to collect the 'global' part of
-** the matchinfo statistics. The 'global' part consists of the following:
-**
-** 1. The number of phrases in the query (nPhrase).
+** This function is used to count the entries in a column-list (a
+** delta-encoded list of term offsets within a single column of a single
+** row). When this function is called, *ppCollist should point to the
+** beginning of the first varint in the column-list (the varint that
+** contains the position of the first matching term in the column data).
+** Before returning, *ppCollist is set to point to the first byte after
+** the last varint in the column-list (either the 0x00 signifying the end
+** of the position-list, or the 0x01 that precedes the column number of
+** the next column in the position-list).
**
-** 2. The number of columns in the FTS3 table (nCol).
-**
-** 3. A matrix of (nPhrase*nCol) integers containing the sum of the
-** number of hits for each phrase in each column across all rows
-** of the table.
-**
-** The total size of the global matchinfo array, assuming the number of
-** columns is N and the number of phrases is P is:
-**
-** 2 + P*(N+1)
-**
-** The number of hits for the 3rd phrase in the second column is found
-** using the expression:
-**
-** aGlobal[2 + P*(1+2) + 1]
-*/
-typedef struct MatchInfo MatchInfo;
-struct MatchInfo {
- Fts3Table *pTab; /* FTS3 Table */
- Fts3Cursor *pCursor; /* FTS3 Cursor */
- int iPhrase; /* Number of phrases so far */
- int nCol; /* Number of columns in table */
- u32 *aGlobal; /* Pre-allocated buffer */
-};
-
-/*
-** This function is used to count the entries in a column-list (delta-encoded
-** list of term offsets within a single column of a single row).
+** The number of elements in the column-list is returned.
*/
static int fts3ColumnlistCount(char **ppCollist){
char *pEnd = *ppCollist;
return nEntry;
}
-static void fts3LoadColumnlistCounts(char **pp, u32 *aOut){
+static void fts3LoadColumnlistCounts(char **pp, u32 *aOut, int isGlobal){
char *pCsr = *pp;
while( *pCsr ){
+ int nHit;
sqlite3_int64 iCol = 0;
if( *pCsr==0x01 ){
pCsr++;
pCsr += sqlite3Fts3GetVarint(pCsr, &iCol);
}
- aOut[iCol] += fts3ColumnlistCount(&pCsr);
+ nHit = fts3ColumnlistCount(&pCsr);
+ if( isGlobal ){
+ if( nHit ) aOut[iCol*3+1]++;
+ }
+ aOut[iCol*3] += nHit;
}
pCsr++;
*pp = pCsr;
/*
** fts3ExprIterate() callback used to collect the "global" matchinfo stats
-** for a single query.
+** for a single query. The "global" stats are those elements of the matchinfo
+** array that are constant for all rows returned by the current query.
*/
static int fts3ExprGlobalMatchinfoCb(
Fts3Expr *pExpr, /* Phrase expression node */
- int iPhrase,
+ int iPhrase, /* Phrase number (numbered from zero) */
void *pCtx /* Pointer to MatchInfo structure */
){
MatchInfo *p = (MatchInfo *)pCtx;
char *pCsr;
char *pEnd;
- const int iStart = 2 + p->nCol*p->iPhrase;
+ const int iStart = 2 + (iPhrase * p->nCol * 3) + 1;
assert( pExpr->isLoaded );
pCsr = pExpr->aDoclist;
pEnd = &pExpr->aDoclist[pExpr->nDoclist];
while( pCsr<pEnd ){
- while( *pCsr++ & 0x80 );
- fts3LoadColumnlistCounts(&pCsr, &p->aGlobal[iStart]);
+ while( *pCsr++ & 0x80 ); /* Skip past docid. */
+ fts3LoadColumnlistCounts(&pCsr, &p->aMatchinfo[iStart], 1);
}
- p->iPhrase++;
return SQLITE_OK;
}
+/*
+** fts3ExprIterate() callback used to collect the "local" matchinfo stats
+** for a single query. The "local" stats are those elements of the matchinfo
+** array that are different for each row returned by the query.
+*/
static int fts3ExprLocalMatchinfoCb(
Fts3Expr *pExpr, /* Phrase expression node */
- int iPhrase,
+ int iPhrase, /* Phrase number */
void *pCtx /* Pointer to MatchInfo structure */
){
MatchInfo *p = (MatchInfo *)pCtx;
- p->iPhrase++;
if( pExpr->aDoclist ){
char *pCsr;
- int iOffset = 2 + p->nCol*(p->aGlobal[0]+iPhrase);
+ int iStart = 2 + (iPhrase * p->nCol * 3);
+ int i;
+
+ for(i=0; i<p->nCol; i++) p->aMatchinfo[iStart+i*3] = 0;
- memset(&p->aGlobal[iOffset], 0, p->nCol*sizeof(u32));
pCsr = sqlite3Fts3FindPositions(pExpr, p->pCursor->iPrevId, -1);
- if( pCsr ) fts3LoadColumnlistCounts(&pCsr, &p->aGlobal[iOffset]);
+ if( pCsr ){
+ fts3LoadColumnlistCounts(&pCsr, &p->aMatchinfo[iStart], 0);
+ }
}
return SQLITE_OK;
}
/*
-** Populate pCsr->aMatchinfo[] with data for the current row. The 'matchinfo'
-** data is an array of 32-bit unsigned integers (C type u32).
+** Populate pCsr->aMatchinfo[] with data for the current row. The
+** 'matchinfo' data is an array of 32-bit unsigned integers (C type u32).
*/
static int fts3GetMatchinfo(Fts3Cursor *pCsr){
- MatchInfo g;
+ MatchInfo sInfo;
Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
+
+ sInfo.pCursor = pCsr;
+ sInfo.nCol = pTab->nColumn;
+
if( pCsr->aMatchinfo==0 ){
- int rc;
- int nPhrase;
- int nMatchinfo;
+ /* If Fts3Cursor.aMatchinfo[] is NULL, then this is the first time the
+ ** matchinfo function has been called for this query. In this case
+ ** allocate the array used to accumulate the matchinfo data and
+ ** initialize those elements that are constant for every row.
+ */
+ int rc; /* Return Code */
+ int nPhrase; /* Number of phrases */
+ int nMatchinfo; /* Number of u32 elements in match-info */
- g.pTab = pTab;
- g.nCol = pTab->nColumn;
- g.iPhrase = 0;
+ /* Load doclists for each phrase in the query. */
rc = fts3ExprLoadDoclists(pCsr, &nPhrase, 0);
if( rc!=SQLITE_OK ){
return rc;
}
+ nMatchinfo = 2 + 3*sInfo.nCol*nPhrase;
- nMatchinfo = 2 + 2*g.nCol*nPhrase;
-
- g.iPhrase = 0;
- g.aGlobal = (u32 *)sqlite3_malloc(sizeof(u32)*nMatchinfo);
- if( !g.aGlobal ){
+ sInfo.aMatchinfo = (u32 *)sqlite3_malloc(sizeof(u32)*nMatchinfo);
+ if( !sInfo.aMatchinfo ){
return SQLITE_NOMEM;
}
- memset(g.aGlobal, 0, sizeof(u32)*nMatchinfo);
+ memset(sInfo.aMatchinfo, 0, sizeof(u32)*nMatchinfo);
- g.aGlobal[0] = nPhrase;
- g.aGlobal[1] = g.nCol;
- (void)fts3ExprIterate(pCsr->pExpr, fts3ExprGlobalMatchinfoCb, (void *)&g);
- pCsr->aMatchinfo = g.aGlobal;
- }
+ /* First element of match-info is the number of phrases in the query */
+ sInfo.aMatchinfo[0] = nPhrase;
+ sInfo.aMatchinfo[1] = sInfo.nCol;
+ (void)fts3ExprIterate(pCsr->pExpr, fts3ExprGlobalMatchinfoCb,(void*)&sInfo);
- g.pTab = pTab;
- g.pCursor = pCsr;
- g.nCol = pTab->nColumn;
- g.iPhrase = 0;
- g.aGlobal = pCsr->aMatchinfo;
+ pCsr->aMatchinfo = sInfo.aMatchinfo;
+ }
+ sInfo.aMatchinfo = pCsr->aMatchinfo;
if( pCsr->isMatchinfoOk ){
- (void)fts3ExprIterate(pCsr->pExpr, fts3ExprLocalMatchinfoCb, (void *)&g);
+ (void)fts3ExprIterate(pCsr->pExpr, fts3ExprLocalMatchinfoCb, (void*)&sInfo);
pCsr->isMatchinfoOk = 0;
}
return SQLITE_OK;
}
+/*
+** Implementation of snippet() function.
+*/
void sqlite3Fts3Snippet(
sqlite3_context *pCtx, /* SQLite function call context */
Fts3Cursor *pCsr, /* Cursor object */
typedef struct TermOffset TermOffset;
+typedef struct TermOffsetCtx TermOffsetCtx;
+
struct TermOffset {
char *pList; /* Position-list */
int iPos; /* Position just read from pList */
- int iOff;
+ int iOff; /* Offset of this term from read positions */
};
-typedef struct TermOffsetCtx TermOffsetCtx;
struct TermOffsetCtx {
int iCol; /* Column of table to populate aTerm for */
){
Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
sqlite3_tokenizer_module const *pMod = pTab->pTokenizer->pModule;
- const char *ZDUMMY;
- int NDUMMY;
-
+ const char *ZDUMMY; /* Dummy argument used with xNext() */
+ int NDUMMY; /* Dummy argument used with xNext() */
int rc; /* Return Code */
int nToken; /* Number of tokens in query */
int iCol; /* Column currently being processed */
StrBuffer res = {0, 0, 0}; /* Result string */
+ TermOffsetCtx sCtx; /* Context for fts3ExprTermOffsetInit() */
- TermOffsetCtx sCtx;
memset(&sCtx, 0, sizeof(sCtx));
-
assert( pCsr->isRequireSeek==0 );
/* Count the number of terms in the query */
}
sCtx.iDocid = pCsr->iPrevId;
+ /* Loop through the table columns, appending offset information to
+ ** string-buffer res for each column.
+ */
for(iCol=0; iCol<pTab->nColumn; iCol++){
sqlite3_tokenizer_cursor *pC; /* Tokenizer cursor */
int iStart;
return;
}
+/*
+** Implementation of matchinfo() function.
+*/
void sqlite3Fts3Matchinfo(sqlite3_context *pContext, Fts3Cursor *pCsr){
int rc = fts3GetMatchinfo(pCsr);
if( rc!=SQLITE_OK ){
sqlite3_result_error_code(pContext, rc);
}else{
- int n = sizeof(u32)*(2+pCsr->aMatchinfo[0]*pCsr->aMatchinfo[1]*2);
+ int n = sizeof(u32)*(2+pCsr->aMatchinfo[0]*pCsr->aMatchinfo[1]*3);
sqlite3_result_blob(pContext, pCsr->aMatchinfo, n, SQLITE_TRANSIENT);
}
}