From 2670a173ed0c2ad508efca74a5f6884c739e09a2 Mon Sep 17 00:00:00 2001 From: shess Date: Tue, 10 Oct 2006 17:37:14 +0000 Subject: [PATCH] Copy fts1/ to fts2/, changing reference from fts1 to fts2. For future reference, the source versions copied were: README.txt r1.1 fts1.c r1.37 fts1.h r1.2 fts1_hash.c r1.1 fts1_hash.h r1.1 fts1_porter.c r1.1 fts1_tokenizer.h r1.4 fts1_tokenizer1.c r1.6 (CVS 3471) FossilOrigin-Name: d0d1e7cdcc1dd085f1e359ce35c441699d517b02 --- ext/fts2/README.txt | 4 + ext/fts2/fts2.c | 3264 ++++++++++++++++++++++++++++++++++++ ext/fts2/fts2.h | 11 + ext/fts2/fts2_hash.c | 369 ++++ ext/fts2/fts2_hash.h | 112 ++ ext/fts2/fts2_porter.c | 645 +++++++ ext/fts2/fts2_tokenizer.h | 90 + ext/fts2/fts2_tokenizer1.c | 220 +++ manifest | 20 +- manifest.uuid | 2 +- 10 files changed, 4730 insertions(+), 7 deletions(-) create mode 100644 ext/fts2/README.txt create mode 100644 ext/fts2/fts2.c create mode 100644 ext/fts2/fts2.h create mode 100644 ext/fts2/fts2_hash.c create mode 100644 ext/fts2/fts2_hash.h create mode 100644 ext/fts2/fts2_porter.c create mode 100644 ext/fts2/fts2_tokenizer.h create mode 100644 ext/fts2/fts2_tokenizer1.c diff --git a/ext/fts2/README.txt b/ext/fts2/README.txt new file mode 100644 index 0000000000..517a2a0434 --- /dev/null +++ b/ext/fts2/README.txt @@ -0,0 +1,4 @@ +This folder contains source code to the second full-text search +extension for SQLite. While the API is the same, this version uses a +substantially different storage schema from fts1, so tables will need +to be rebuilt. diff --git a/ext/fts2/fts2.c b/ext/fts2/fts2.c new file mode 100644 index 0000000000..e3fb5b3daa --- /dev/null +++ b/ext/fts2/fts2.c @@ -0,0 +1,3264 @@ +/* The author disclaims copyright to this source code. + * + * This is an SQLite module implementing full-text search. + */ + +/* +** The code in this file is only compiled if: +** +** * The FTS2 module is being built as an extension +** (in which case SQLITE_CORE is not defined), or +** +** * The FTS2 module is being built into the core of +** SQLite (in which case SQLITE_ENABLE_FTS2 is defined). +*/ +#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2) + +#if defined(SQLITE_ENABLE_FTS2) && !defined(SQLITE_CORE) +# define SQLITE_CORE 1 +#endif + +#include +#if !defined(__APPLE__) +#include +#else +#include +#endif +#include +#include +#include + +#include "fts2.h" +#include "fts2_hash.h" +#include "fts2_tokenizer.h" +#include "sqlite3.h" +#include "sqlite3ext.h" +SQLITE_EXTENSION_INIT1 + + +#if 0 +# define TRACE(A) printf A; fflush(stdout) +#else +# define TRACE(A) +#endif + +/* utility functions */ + +typedef struct StringBuffer { + int len; /* length, not including null terminator */ + int alloced; /* Space allocated for s[] */ + char *s; /* Content of the string */ +} StringBuffer; + +void initStringBuffer(StringBuffer *sb){ + sb->len = 0; + sb->alloced = 100; + sb->s = malloc(100); + sb->s[0] = '\0'; +} + +void nappend(StringBuffer *sb, const char *zFrom, int nFrom){ + if( sb->len + nFrom >= sb->alloced ){ + sb->alloced = sb->len + nFrom + 100; + sb->s = realloc(sb->s, sb->alloced+1); + if( sb->s==0 ){ + initStringBuffer(sb); + return; + } + } + memcpy(sb->s + sb->len, zFrom, nFrom); + sb->len += nFrom; + sb->s[sb->len] = 0; +} +void append(StringBuffer *sb, const char *zFrom){ + nappend(sb, zFrom, strlen(zFrom)); +} + +/* We encode variable-length integers in little-endian order using seven bits + * per byte as follows: +** +** KEY: +** A = 0xxxxxxx 7 bits of data and one flag bit +** B = 1xxxxxxx 7 bits of data and one flag bit +** +** 7 bits - A +** 14 bits - BA +** 21 bits - BBA +** and so on. +*/ + +/* We may need up to VARINT_MAX bytes to store an encoded 64-bit integer. */ +#define VARINT_MAX 10 + +/* Write a 64-bit variable-length integer to memory starting at p[0]. + * The length of data written will be between 1 and VARINT_MAX bytes. + * The number of bytes written is returned. */ +static int putVarint(char *p, sqlite_int64 v){ + unsigned char *q = (unsigned char *) p; + sqlite_uint64 vu = v; + do{ + *q++ = (unsigned char) ((vu & 0x7f) | 0x80); + vu >>= 7; + }while( vu!=0 ); + q[-1] &= 0x7f; /* turn off high bit in final byte */ + assert( q - (unsigned char *)p <= VARINT_MAX ); + return (int) (q - (unsigned char *)p); +} + +/* Read a 64-bit variable-length integer from memory starting at p[0]. + * Return the number of bytes read, or 0 on error. + * The value is stored in *v. */ +static int getVarint(const char *p, sqlite_int64 *v){ + const unsigned char *q = (const unsigned char *) p; + sqlite_uint64 x = 0, y = 1; + while( (*q & 0x80) == 0x80 ){ + x += y * (*q++ & 0x7f); + y <<= 7; + if( q - (unsigned char *)p >= VARINT_MAX ){ /* bad data */ + assert( 0 ); + return 0; + } + } + x += y * (*q++); + *v = (sqlite_int64) x; + return (int) (q - (unsigned char *)p); +} + +static int getVarint32(const char *p, int *pi){ + sqlite_int64 i; + int ret = getVarint(p, &i); + *pi = (int) i; + assert( *pi==i ); + return ret; +} + +/*** Document lists *** + * + * A document list holds a sorted list of varint-encoded document IDs. + * + * A doclist with type DL_POSITIONS_OFFSETS is stored like this: + * + * array { + * varint docid; + * array { + * varint position; (delta from previous position plus POS_BASE) + * varint startOffset; (delta from previous startOffset) + * varint endOffset; (delta from startOffset) + * } + * } + * + * Here, array { X } means zero or more occurrences of X, adjacent in memory. + * + * A position list may hold positions for text in multiple columns. A position + * POS_COLUMN is followed by a varint containing the index of the column for + * following positions in the list. Any positions appearing before any + * occurrences of POS_COLUMN are for column 0. + * + * A doclist with type DL_POSITIONS is like the above, but holds only docids + * and positions without offset information. + * + * A doclist with type DL_DOCIDS is like the above, but holds only docids + * without positions or offset information. + * + * On disk, every document list has positions and offsets, so we don't bother + * to serialize a doclist's type. + * + * We don't yet delta-encode document IDs; doing so will probably be a + * modest win. + * + * NOTE(shess) I've thought of a slightly (1%) better offset encoding. + * After the first offset, estimate the next offset by using the + * current token position and the previous token position and offset, + * offset to handle some variance. So the estimate would be + * (iPosition*w->iStartOffset/w->iPosition-64), which is delta-encoded + * as normal. Offsets more than 64 chars from the estimate are + * encoded as the delta to the previous start offset + 128. An + * additional tiny increment can be gained by using the end offset of + * the previous token to make the estimate a tiny bit more precise. +*/ + +typedef enum DocListType { + DL_DOCIDS, /* docids only */ + DL_POSITIONS, /* docids + positions */ + DL_POSITIONS_OFFSETS /* docids + positions + offsets */ +} DocListType; + +/* +** By default, only positions and not offsets are stored in the doclists. +** To change this so that offsets are stored too, compile with +** +** -DDL_DEFAULT=DL_POSITIONS_OFFSETS +** +*/ +#ifndef DL_DEFAULT +# define DL_DEFAULT DL_POSITIONS +#endif + +typedef struct DocList { + char *pData; + int nData; + DocListType iType; + int iLastColumn; /* the last column written */ + int iLastPos; /* the last position written */ + int iLastOffset; /* the last start offset written */ +} DocList; + +enum { + POS_END = 0, /* end of this position list */ + POS_COLUMN, /* followed by new column number */ + POS_BASE +}; + +/* Initialize a new DocList to hold the given data. */ +static void docListInit(DocList *d, DocListType iType, + const char *pData, int nData){ + d->nData = nData; + if( nData>0 ){ + d->pData = malloc(nData); + memcpy(d->pData, pData, nData); + } else { + d->pData = NULL; + } + d->iType = iType; + d->iLastColumn = 0; + d->iLastPos = d->iLastOffset = 0; +} + +/* Create a new dynamically-allocated DocList. */ +static DocList *docListNew(DocListType iType){ + DocList *d = (DocList *) malloc(sizeof(DocList)); + docListInit(d, iType, 0, 0); + return d; +} + +static void docListDestroy(DocList *d){ + free(d->pData); +#ifndef NDEBUG + memset(d, 0x55, sizeof(*d)); +#endif +} + +static void docListDelete(DocList *d){ + docListDestroy(d); + free(d); +} + +static char *docListEnd(DocList *d){ + return d->pData + d->nData; +} + +/* Append a varint to a DocList's data. */ +static void appendVarint(DocList *d, sqlite_int64 i){ + char c[VARINT_MAX]; + int n = putVarint(c, i); + d->pData = realloc(d->pData, d->nData + n); + memcpy(d->pData + d->nData, c, n); + d->nData += n; +} + +static void docListAddDocid(DocList *d, sqlite_int64 iDocid){ + appendVarint(d, iDocid); + if( d->iType>=DL_POSITIONS ){ + appendVarint(d, POS_END); /* initially empty position list */ + d->iLastColumn = 0; + d->iLastPos = d->iLastOffset = 0; + } +} + +/* helper function for docListAddPos and docListAddPosOffset */ +static void addPos(DocList *d, int iColumn, int iPos){ + assert( d->nData>0 ); + --d->nData; /* remove previous terminator */ + if( iColumn!=d->iLastColumn ){ + assert( iColumn>d->iLastColumn ); + appendVarint(d, POS_COLUMN); + appendVarint(d, iColumn); + d->iLastColumn = iColumn; + d->iLastPos = d->iLastOffset = 0; + } + assert( iPos>=d->iLastPos ); + appendVarint(d, iPos-d->iLastPos+POS_BASE); + d->iLastPos = iPos; +} + +/* Add a position to the last position list in a doclist. */ +static void docListAddPos(DocList *d, int iColumn, int iPos){ + assert( d->iType==DL_POSITIONS ); + addPos(d, iColumn, iPos); + appendVarint(d, POS_END); /* add new terminator */ +} + +/* +** Add a position and starting and ending offsets to a doclist. +** +** If the doclist is setup to handle only positions, then insert +** the position only and ignore the offsets. +*/ +static void docListAddPosOffset( + DocList *d, /* Doclist under construction */ + int iColumn, /* Column the inserted term is part of */ + int iPos, /* Position of the inserted term */ + int iStartOffset, /* Starting offset of inserted term */ + int iEndOffset /* Ending offset of inserted term */ +){ + assert( d->iType>=DL_POSITIONS ); + addPos(d, iColumn, iPos); + if( d->iType==DL_POSITIONS_OFFSETS ){ + assert( iStartOffset>=d->iLastOffset ); + appendVarint(d, iStartOffset-d->iLastOffset); + d->iLastOffset = iStartOffset; + assert( iEndOffset>=iStartOffset ); + appendVarint(d, iEndOffset-iStartOffset); + } + appendVarint(d, POS_END); /* add new terminator */ +} + +/* +** A DocListReader object is a cursor into a doclist. Initialize +** the cursor to the beginning of the doclist by calling readerInit(). +** Then use routines +** +** peekDocid() +** readDocid() +** readPosition() +** skipPositionList() +** and so forth... +** +** to read information out of the doclist. When we reach the end +** of the doclist, atEnd() returns TRUE. +*/ +typedef struct DocListReader { + DocList *pDoclist; /* The document list we are stepping through */ + char *p; /* Pointer to next unread byte in the doclist */ + int iLastColumn; + int iLastPos; /* the last position read, or -1 when not in a position list */ +} DocListReader; + +/* +** Initialize the DocListReader r to point to the beginning of pDoclist. +*/ +static void readerInit(DocListReader *r, DocList *pDoclist){ + r->pDoclist = pDoclist; + if( pDoclist!=NULL ){ + r->p = pDoclist->pData; + } + r->iLastColumn = -1; + r->iLastPos = -1; +} + +/* +** Return TRUE if we have reached then end of pReader and there is +** nothing else left to read. +*/ +static int atEnd(DocListReader *pReader){ + return pReader->pDoclist==0 || (pReader->p >= docListEnd(pReader->pDoclist)); +} + +/* Peek at the next docid without advancing the read pointer. +*/ +static sqlite_int64 peekDocid(DocListReader *pReader){ + sqlite_int64 ret; + assert( !atEnd(pReader) ); + assert( pReader->iLastPos==-1 ); + getVarint(pReader->p, &ret); + return ret; +} + +/* Read the next docid. See also nextDocid(). +*/ +static sqlite_int64 readDocid(DocListReader *pReader){ + sqlite_int64 ret; + assert( !atEnd(pReader) ); + assert( pReader->iLastPos==-1 ); + pReader->p += getVarint(pReader->p, &ret); + if( pReader->pDoclist->iType>=DL_POSITIONS ){ + pReader->iLastColumn = 0; + pReader->iLastPos = 0; + } + return ret; +} + +/* Read the next position and column index from a position list. + * Returns the position, or -1 at the end of the list. */ +static int readPosition(DocListReader *pReader, int *iColumn){ + int i; + int iType = pReader->pDoclist->iType; + + if( pReader->iLastPos==-1 ){ + return -1; + } + assert( !atEnd(pReader) ); + + if( iTypep += getVarint32(pReader->p, &i); + if( i==POS_END ){ + pReader->iLastColumn = pReader->iLastPos = -1; + *iColumn = -1; + return -1; + } + if( i==POS_COLUMN ){ + pReader->p += getVarint32(pReader->p, &pReader->iLastColumn); + pReader->iLastPos = 0; + pReader->p += getVarint32(pReader->p, &i); + assert( i>=POS_BASE ); + } + pReader->iLastPos += ((int) i)-POS_BASE; + if( iType>=DL_POSITIONS_OFFSETS ){ + /* Skip over offsets, ignoring them for now. */ + int iStart, iEnd; + pReader->p += getVarint32(pReader->p, &iStart); + pReader->p += getVarint32(pReader->p, &iEnd); + } + *iColumn = pReader->iLastColumn; + return pReader->iLastPos; +} + +/* Skip past the end of a position list. */ +static void skipPositionList(DocListReader *pReader){ + DocList *p = pReader->pDoclist; + if( p && p->iType>=DL_POSITIONS ){ + int iColumn; + while( readPosition(pReader, &iColumn)!=-1 ){} + } +} + +/* Skip over a docid, including its position list if the doclist has + * positions. */ +static void skipDocument(DocListReader *pReader){ + readDocid(pReader); + skipPositionList(pReader); +} + +/* Skip past all docids which are less than [iDocid]. Returns 1 if a docid + * matching [iDocid] was found. */ +static int skipToDocid(DocListReader *pReader, sqlite_int64 iDocid){ + sqlite_int64 d = 0; + while( !atEnd(pReader) && (d=peekDocid(pReader))iType>=DL_POSITIONS ){ + int iPos, iCol; + const char *zDiv = ""; + printf("("); + while( (iPos = readPosition(&r, &iCol))>=0 ){ + printf("%s%d:%d", zDiv, iCol, iPos); + zDiv = ":"; + } + printf(")"); + } + } + printf("\n"); + fflush(stdout); +} +#endif /* SQLITE_DEBUG */ + +/* Trim the given doclist to contain only positions in column + * [iRestrictColumn]. */ +static void docListRestrictColumn(DocList *in, int iRestrictColumn){ + DocListReader r; + DocList out; + + assert( in->iType>=DL_POSITIONS ); + readerInit(&r, in); + docListInit(&out, DL_POSITIONS, NULL, 0); + + while( !atEnd(&r) ){ + sqlite_int64 iDocid = readDocid(&r); + int iPos, iColumn; + + docListAddDocid(&out, iDocid); + while( (iPos = readPosition(&r, &iColumn)) != -1 ){ + if( iColumn==iRestrictColumn ){ + docListAddPos(&out, iColumn, iPos); + } + } + } + + docListDestroy(in); + *in = out; +} + +/* Trim the given doclist by discarding any docids without any remaining + * positions. */ +static void docListDiscardEmpty(DocList *in) { + DocListReader r; + DocList out; + + /* TODO: It would be nice to implement this operation in place; that + * could save a significant amount of memory in queries with long doclists. */ + assert( in->iType>=DL_POSITIONS ); + readerInit(&r, in); + docListInit(&out, DL_POSITIONS, NULL, 0); + + while( !atEnd(&r) ){ + sqlite_int64 iDocid = readDocid(&r); + int match = 0; + int iPos, iColumn; + while( (iPos = readPosition(&r, &iColumn)) != -1 ){ + if( !match ){ + docListAddDocid(&out, iDocid); + match = 1; + } + docListAddPos(&out, iColumn, iPos); + } + } + + docListDestroy(in); + *in = out; +} + +/* Helper function for docListUpdate() and docListAccumulate(). +** Splices a doclist element into the doclist represented by r, +** leaving r pointing after the newly spliced element. +*/ +static void docListSpliceElement(DocListReader *r, sqlite_int64 iDocid, + const char *pSource, int nSource){ + DocList *d = r->pDoclist; + char *pTarget; + int nTarget, found; + + found = skipToDocid(r, iDocid); + + /* Describe slice in d to place pSource/nSource. */ + pTarget = r->p; + if( found ){ + skipDocument(r); + nTarget = r->p-pTarget; + }else{ + nTarget = 0; + } + + /* The sense of the following is that there are three possibilities. + ** If nTarget==nSource, we should not move any memory nor realloc. + ** If nTarget>nSource, trim target and realloc. + ** If nTargetnSource ){ + memmove(pTarget+nSource, pTarget+nTarget, docListEnd(d)-(pTarget+nTarget)); + } + if( nTarget!=nSource ){ + int iDoclist = pTarget-d->pData; + d->pData = realloc(d->pData, d->nData+nSource-nTarget); + pTarget = d->pData+iDoclist; + } + if( nTargetnData += nSource-nTarget; + r->p = pTarget+nSource; +} + +/* Insert/update pUpdate into the doclist. */ +static void docListUpdate(DocList *d, DocList *pUpdate){ + DocListReader reader; + + assert( d!=NULL && pUpdate!=NULL ); + assert( d->iType==pUpdate->iType); + + readerInit(&reader, d); + docListSpliceElement(&reader, firstDocid(pUpdate), + pUpdate->pData, pUpdate->nData); +} + +/* Propagate elements from pUpdate to pAcc, overwriting elements with +** matching docids. +*/ +static void docListAccumulate(DocList *pAcc, DocList *pUpdate){ + DocListReader accReader, updateReader; + + /* Handle edge cases where one doclist is empty. */ + assert( pAcc!=NULL ); + if( pUpdate==NULL || pUpdate->nData==0 ) return; + if( pAcc->nData==0 ){ + pAcc->pData = malloc(pUpdate->nData); + memcpy(pAcc->pData, pUpdate->pData, pUpdate->nData); + pAcc->nData = pUpdate->nData; + return; + } + + readerInit(&accReader, pAcc); + readerInit(&updateReader, pUpdate); + + while( !atEnd(&updateReader) ){ + char *pSource = updateReader.p; + sqlite_int64 iDocid = readDocid(&updateReader); + skipPositionList(&updateReader); + docListSpliceElement(&accReader, iDocid, pSource, updateReader.p-pSource); + } +} + +/* +** Read the next docid off of pIn. Return 0 if we reach the end. +* +* TODO: This assumes that docids are never 0, but they may actually be 0 since +* users can choose docids when inserting into a full-text table. Fix this. +*/ +static sqlite_int64 nextDocid(DocListReader *pIn){ + skipPositionList(pIn); + return atEnd(pIn) ? 0 : readDocid(pIn); +} + +/* +** pLeft and pRight are two DocListReaders that are pointing to +** positions lists of the same document: iDocid. +** +** If there are no instances in pLeft or pRight where the position +** of pLeft is one less than the position of pRight, then this +** routine adds nothing to pOut. +** +** If there are one or more instances where positions from pLeft +** are exactly one less than positions from pRight, then add a new +** document record to pOut. If pOut wants to hold positions, then +** include the positions from pRight that are one more than a +** position in pLeft. In other words: pRight.iPos==pLeft.iPos+1. +** +** pLeft and pRight are left pointing at the next document record. +*/ +static void mergePosList( + DocListReader *pLeft, /* Left position list */ + DocListReader *pRight, /* Right position list */ + sqlite_int64 iDocid, /* The docid from pLeft and pRight */ + DocList *pOut /* Write the merged document record here */ +){ + int iLeftCol, iLeftPos = readPosition(pLeft, &iLeftCol); + int iRightCol, iRightPos = readPosition(pRight, &iRightCol); + int match = 0; + + /* Loop until we've reached the end of both position lists. */ + while( iLeftPos!=-1 && iRightPos!=-1 ){ + if( iLeftCol==iRightCol && iLeftPos+1==iRightPos ){ + if( !match ){ + docListAddDocid(pOut, iDocid); + match = 1; + } + if( pOut->iType>=DL_POSITIONS ){ + docListAddPos(pOut, iRightCol, iRightPos); + } + iLeftPos = readPosition(pLeft, &iLeftCol); + iRightPos = readPosition(pRight, &iRightCol); + }else if( iRightCol=0 ) skipPositionList(pLeft); + if( iRightPos>=0 ) skipPositionList(pRight); +} + +/* We have two doclists: pLeft and pRight. +** Write the phrase intersection of these two doclists into pOut. +** +** A phrase intersection means that two documents only match +** if pLeft.iPos+1==pRight.iPos. +** +** The output pOut may or may not contain positions. If pOut +** does contain positions, they are the positions of pRight. +*/ +static void docListPhraseMerge( + DocList *pLeft, /* Doclist resulting from the words on the left */ + DocList *pRight, /* Doclist for the next word to the right */ + DocList *pOut /* Write the combined doclist here */ +){ + DocListReader left, right; + sqlite_int64 docidLeft, docidRight; + + readerInit(&left, pLeft); + readerInit(&right, pRight); + docidLeft = nextDocid(&left); + docidRight = nextDocid(&right); + + while( docidLeft>0 && docidRight>0 ){ + if( docidLeftiType0 && docidRight>0 ){ + if( docidLeft0 && docidRight>0 ){ + if( docidLeft<=docidRight ){ + docListAddDocid(pOut, docidLeft); + }else{ + docListAddDocid(pOut, docidRight); + } + priorLeft = docidLeft; + if( docidLeft<=docidRight ){ + docidLeft = nextDocid(&left); + } + if( docidRight>0 && docidRight<=priorLeft ){ + docidRight = nextDocid(&right); + } + } + while( docidLeft>0 ){ + docListAddDocid(pOut, docidLeft); + docidLeft = nextDocid(&left); + } + while( docidRight>0 ){ + docListAddDocid(pOut, docidRight); + docidRight = nextDocid(&right); + } +} + +/* We have two doclists: pLeft and pRight. +** Write into pOut all documents that occur in pLeft but not +** in pRight. +** +** Only docids are matched. Position information is ignored. +** +** The output pOut never holds positions. +*/ +static void docListExceptMerge( + DocList *pLeft, /* Doclist resulting from the words on the left */ + DocList *pRight, /* Doclist for the next word to the right */ + DocList *pOut /* Write the combined doclist here */ +){ + DocListReader left, right; + sqlite_int64 docidLeft, docidRight, priorLeft; + + readerInit(&left, pLeft); + readerInit(&right, pRight); + docidLeft = nextDocid(&left); + docidRight = nextDocid(&right); + + while( docidLeft>0 && docidRight>0 ){ + priorLeft = docidLeft; + if( docidLeft0 && docidRight<=priorLeft ){ + docidRight = nextDocid(&right); + } + } + while( docidLeft>0 ){ + docListAddDocid(pOut, docidLeft); + docidLeft = nextDocid(&left); + } +} + +static char *string_dup_n(const char *s, int n){ + char *str = malloc(n + 1); + memcpy(str, s, n); + str[n] = '\0'; + return str; +} + +/* Duplicate a string; the caller must free() the returned string. + * (We don't use strdup() since it's not part of the standard C library and + * may not be available everywhere.) */ +static char *string_dup(const char *s){ + return string_dup_n(s, strlen(s)); +} + +/* Format a string, replacing each occurrence of the % character with + * zName. This may be more convenient than sqlite_mprintf() + * when one string is used repeatedly in a format string. + * The caller must free() the returned string. */ +static char *string_format(const char *zFormat, const char *zName){ + const char *p; + size_t len = 0; + size_t nName = strlen(zName); + char *result; + char *r; + + /* first compute length needed */ + for(p = zFormat ; *p ; ++p){ + len += (*p=='%' ? nName : 1); + } + len += 1; /* for null terminator */ + + r = result = malloc(len); + for(p = zFormat; *p; ++p){ + if( *p=='%' ){ + memcpy(r, zName, nName); + r += nName; + } else { + *r++ = *p; + } + } + *r++ = '\0'; + assert( r == result + len ); + return result; +} + +static int sql_exec(sqlite3 *db, const char *zName, const char *zFormat){ + char *zCommand = string_format(zFormat, zName); + int rc; + TRACE(("FTS2 sql: %s\n", zCommand)); + rc = sqlite3_exec(db, zCommand, NULL, 0, NULL); + free(zCommand); + return rc; +} + +static int sql_prepare(sqlite3 *db, const char *zName, sqlite3_stmt **ppStmt, + const char *zFormat){ + char *zCommand = string_format(zFormat, zName); + int rc; + TRACE(("FTS2 prepare: %s\n", zCommand)); + rc = sqlite3_prepare(db, zCommand, -1, ppStmt, NULL); + free(zCommand); + return rc; +} + +/* end utility functions */ + +/* Forward reference */ +typedef struct fulltext_vtab fulltext_vtab; + +/* A single term in a query is represented by an instances of +** the following structure. +*/ +typedef struct QueryTerm { + short int nPhrase; /* How many following terms are part of the same phrase */ + short int iPhrase; /* This is the i-th term of a phrase. */ + short int iColumn; /* Column of the index that must match this term */ + signed char isOr; /* this term is preceded by "OR" */ + signed char isNot; /* this term is preceded by "-" */ + char *pTerm; /* text of the term. '\000' terminated. malloced */ + int nTerm; /* Number of bytes in pTerm[] */ +} QueryTerm; + + +/* A query string is parsed into a Query structure. + * + * We could, in theory, allow query strings to be complicated + * nested expressions with precedence determined by parentheses. + * But none of the major search engines do this. (Perhaps the + * feeling is that an parenthesized expression is two complex of + * an idea for the average user to grasp.) Taking our lead from + * the major search engines, we will allow queries to be a list + * of terms (with an implied AND operator) or phrases in double-quotes, + * with a single optional "-" before each non-phrase term to designate + * negation and an optional OR connector. + * + * OR binds more tightly than the implied AND, which is what the + * major search engines seem to do. So, for example: + * + * [one two OR three] ==> one AND (two OR three) + * [one OR two three] ==> (one OR two) AND three + * + * A "-" before a term matches all entries that lack that term. + * The "-" must occur immediately before the term with in intervening + * space. This is how the search engines do it. + * + * A NOT term cannot be the right-hand operand of an OR. If this + * occurs in the query string, the NOT is ignored: + * + * [one OR -two] ==> one OR two + * + */ +typedef struct Query { + fulltext_vtab *pFts; /* The full text index */ + int nTerms; /* Number of terms in the query */ + QueryTerm *pTerms; /* Array of terms. Space obtained from malloc() */ + int nextIsOr; /* Set the isOr flag on the next inserted term */ + int nextColumn; /* Next word parsed must be in this column */ + int dfltColumn; /* The default column */ +} Query; + + +/* +** An instance of the following structure keeps track of generated +** matching-word offset information and snippets. +*/ +typedef struct Snippet { + int nMatch; /* Total number of matches */ + int nAlloc; /* Space allocated for aMatch[] */ + struct snippetMatch { /* One entry for each matching term */ + char snStatus; /* Status flag for use while constructing snippets */ + short int iCol; /* The column that contains the match */ + short int iTerm; /* The index in Query.pTerms[] of the matching term */ + short int nByte; /* Number of bytes in the term */ + int iStart; /* The offset to the first character of the term */ + } *aMatch; /* Points to space obtained from malloc */ + char *zOffset; /* Text rendering of aMatch[] */ + int nOffset; /* strlen(zOffset) */ + char *zSnippet; /* Snippet text */ + int nSnippet; /* strlen(zSnippet) */ +} Snippet; + + +typedef enum QueryType { + QUERY_GENERIC, /* table scan */ + QUERY_ROWID, /* lookup by rowid */ + QUERY_FULLTEXT /* QUERY_FULLTEXT + [i] is a full-text search for column i*/ +} QueryType; + +/* TODO(shess) CHUNK_MAX controls how much data we allow in segment 0 +** before we start aggregating into larger segments. Lower CHUNK_MAX +** means that for a given input we have more individual segments per +** term, which means more rows in the table and a bigger index (due to +** both more rows and bigger rowids). But it also reduces the average +** cost of adding new elements to the segment 0 doclist, and it seems +** to reduce the number of pages read and written during inserts. 256 +** was chosen by measuring insertion times for a certain input (first +** 10k documents of Enron corpus), though including query performance +** in the decision may argue for a larger value. +*/ +#define CHUNK_MAX 256 + +typedef enum fulltext_statement { + CONTENT_INSERT_STMT, + CONTENT_SELECT_STMT, + CONTENT_UPDATE_STMT, + CONTENT_DELETE_STMT, + + TERM_SELECT_STMT, + TERM_SELECT_ALL_STMT, + TERM_INSERT_STMT, + TERM_UPDATE_STMT, + TERM_DELETE_STMT, + + MAX_STMT /* Always at end! */ +} fulltext_statement; + +/* These must exactly match the enum above. */ +/* TODO(adam): Is there some risk that a statement (in particular, +** pTermSelectStmt) will be used in two cursors at once, e.g. if a +** query joins a virtual table to itself? If so perhaps we should +** move some of these to the cursor object. +*/ +static const char *const fulltext_zStatement[MAX_STMT] = { + /* CONTENT_INSERT */ NULL, /* generated in contentInsertStatement() */ + /* CONTENT_SELECT */ "select * from %_content where rowid = ?", + /* CONTENT_UPDATE */ NULL, /* generated in contentUpdateStatement() */ + /* CONTENT_DELETE */ "delete from %_content where rowid = ?", + + /* TERM_SELECT */ + "select rowid, doclist from %_term where term = ? and segment = ?", + /* TERM_SELECT_ALL */ + "select doclist from %_term where term = ? order by segment", + /* TERM_INSERT */ + "insert into %_term (rowid, term, segment, doclist) values (?, ?, ?, ?)", + /* TERM_UPDATE */ "update %_term set doclist = ? where rowid = ?", + /* TERM_DELETE */ "delete from %_term where rowid = ?", +}; + +/* +** A connection to a fulltext index is an instance of the following +** structure. The xCreate and xConnect methods create an instance +** of this structure and xDestroy and xDisconnect free that instance. +** All other methods receive a pointer to the structure as one of their +** arguments. +*/ +struct fulltext_vtab { + sqlite3_vtab base; /* Base class used by SQLite core */ + sqlite3 *db; /* The database connection */ + const char *zName; /* virtual table name */ + int nColumn; /* number of columns in virtual table */ + char **azColumn; /* column names. malloced */ + char **azContentColumn; /* column names in content table; malloced */ + sqlite3_tokenizer *pTokenizer; /* tokenizer for inserts and queries */ + + /* Precompiled statements which we keep as long as the table is + ** open. + */ + sqlite3_stmt *pFulltextStatements[MAX_STMT]; +}; + +/* +** When the core wants to do a query, it create a cursor using a +** call to xOpen. This structure is an instance of a cursor. It +** is destroyed by xClose. +*/ +typedef struct fulltext_cursor { + sqlite3_vtab_cursor base; /* Base class used by SQLite core */ + QueryType iCursorType; /* Copy of sqlite3_index_info.idxNum */ + sqlite3_stmt *pStmt; /* Prepared statement in use by the cursor */ + int eof; /* True if at End Of Results */ + Query q; /* Parsed query string */ + Snippet snippet; /* Cached snippet for the current row */ + int iColumn; /* Column being searched */ + DocListReader result; /* used when iCursorType == QUERY_FULLTEXT */ +} fulltext_cursor; + +static struct fulltext_vtab *cursor_vtab(fulltext_cursor *c){ + return (fulltext_vtab *) c->base.pVtab; +} + +static const sqlite3_module fulltextModule; /* forward declaration */ + +/* Append a list of strings separated by commas to a StringBuffer. */ +static void appendList(StringBuffer *sb, int nString, char **azString){ + int i; + for(i=0; i0 ) append(sb, ", "); + append(sb, azString[i]); + } +} + +/* Return a dynamically generated statement of the form + * insert into %_content (rowid, ...) values (?, ...) + */ +static const char *contentInsertStatement(fulltext_vtab *v){ + StringBuffer sb; + int i; + + initStringBuffer(&sb); + append(&sb, "insert into %_content (rowid, "); + appendList(&sb, v->nColumn, v->azContentColumn); + append(&sb, ") values (?"); + for(i=0; inColumn; ++i) + append(&sb, ", ?"); + append(&sb, ")"); + return sb.s; +} + +/* Return a dynamically generated statement of the form + * update %_content set [col_0] = ?, [col_1] = ?, ... + * where rowid = ? + */ +static const char *contentUpdateStatement(fulltext_vtab *v){ + StringBuffer sb; + int i; + + initStringBuffer(&sb); + append(&sb, "update %_content set "); + for(i=0; inColumn; ++i) { + if( i>0 ){ + append(&sb, ", "); + } + append(&sb, v->azContentColumn[i]); + append(&sb, " = ?"); + } + append(&sb, " where rowid = ?"); + return sb.s; +} + +/* Puts a freshly-prepared statement determined by iStmt in *ppStmt. +** If the indicated statement has never been prepared, it is prepared +** and cached, otherwise the cached version is reset. +*/ +static int sql_get_statement(fulltext_vtab *v, fulltext_statement iStmt, + sqlite3_stmt **ppStmt){ + assert( iStmtpFulltextStatements[iStmt]==NULL ){ + const char *zStmt; + int rc; + switch( iStmt ){ + case CONTENT_INSERT_STMT: + zStmt = contentInsertStatement(v); break; + case CONTENT_UPDATE_STMT: + zStmt = contentUpdateStatement(v); break; + default: + zStmt = fulltext_zStatement[iStmt]; + } + rc = sql_prepare(v->db, v->zName, &v->pFulltextStatements[iStmt], + zStmt); + if( zStmt != fulltext_zStatement[iStmt]) free((void *) zStmt); + if( rc!=SQLITE_OK ) return rc; + } else { + int rc = sqlite3_reset(v->pFulltextStatements[iStmt]); + if( rc!=SQLITE_OK ) return rc; + } + + *ppStmt = v->pFulltextStatements[iStmt]; + return SQLITE_OK; +} + +/* Step the indicated statement, handling errors SQLITE_BUSY (by +** retrying) and SQLITE_SCHEMA (by re-preparing and transferring +** bindings to the new statement). +** TODO(adam): We should extend this function so that it can work with +** statements declared locally, not only globally cached statements. +*/ +static int sql_step_statement(fulltext_vtab *v, fulltext_statement iStmt, + sqlite3_stmt **ppStmt){ + int rc; + sqlite3_stmt *s = *ppStmt; + assert( iStmtpFulltextStatements[iStmt] ); + + while( (rc=sqlite3_step(s))!=SQLITE_DONE && rc!=SQLITE_ROW ){ + sqlite3_stmt *pNewStmt; + + if( rc==SQLITE_BUSY ) continue; + if( rc!=SQLITE_ERROR ) return rc; + + rc = sqlite3_reset(s); + if( rc!=SQLITE_SCHEMA ) return SQLITE_ERROR; + + v->pFulltextStatements[iStmt] = NULL; /* Still in s */ + rc = sql_get_statement(v, iStmt, &pNewStmt); + if( rc!=SQLITE_OK ) goto err; + *ppStmt = pNewStmt; + + rc = sqlite3_transfer_bindings(s, pNewStmt); + if( rc!=SQLITE_OK ) goto err; + + rc = sqlite3_finalize(s); + if( rc!=SQLITE_OK ) return rc; + s = pNewStmt; + } + return rc; + + err: + sqlite3_finalize(s); + return rc; +} + +/* Like sql_step_statement(), but convert SQLITE_DONE to SQLITE_OK. +** Useful for statements like UPDATE, where we expect no results. +*/ +static int sql_single_step_statement(fulltext_vtab *v, + fulltext_statement iStmt, + sqlite3_stmt **ppStmt){ + int rc = sql_step_statement(v, iStmt, ppStmt); + return (rc==SQLITE_DONE) ? SQLITE_OK : rc; +} + +/* insert into %_content (rowid, ...) values ([rowid], [pValues]) */ +static int content_insert(fulltext_vtab *v, sqlite3_value *rowid, + sqlite3_value **pValues){ + sqlite3_stmt *s; + int i; + int rc = sql_get_statement(v, CONTENT_INSERT_STMT, &s); + if( rc!=SQLITE_OK ) return rc; + + rc = sqlite3_bind_value(s, 1, rowid); + if( rc!=SQLITE_OK ) return rc; + + for(i=0; inColumn; ++i){ + rc = sqlite3_bind_value(s, 2+i, pValues[i]); + if( rc!=SQLITE_OK ) return rc; + } + + return sql_single_step_statement(v, CONTENT_INSERT_STMT, &s); +} + +/* update %_content set col0 = pValues[0], col1 = pValues[1], ... + * where rowid = [iRowid] */ +static int content_update(fulltext_vtab *v, sqlite3_value **pValues, + sqlite_int64 iRowid){ + sqlite3_stmt *s; + int i; + int rc = sql_get_statement(v, CONTENT_UPDATE_STMT, &s); + if( rc!=SQLITE_OK ) return rc; + + for(i=0; inColumn; ++i){ + rc = sqlite3_bind_value(s, 1+i, pValues[i]); + if( rc!=SQLITE_OK ) return rc; + } + + rc = sqlite3_bind_int64(s, 1+v->nColumn, iRowid); + if( rc!=SQLITE_OK ) return rc; + + return sql_single_step_statement(v, CONTENT_UPDATE_STMT, &s); +} + +void freeStringArray(int nString, const char **pString){ + int i; + + for (i=0 ; i < nString ; ++i) { + free((void *) pString[i]); + } + free((void *) pString); +} + +/* select * from %_content where rowid = [iRow] + * The caller must delete the returned array and all strings in it. + * + * TODO: Perhaps we should return pointer/length strings here for consistency + * with other code which uses pointer/length. */ +static int content_select(fulltext_vtab *v, sqlite_int64 iRow, + const char ***pValues){ + sqlite3_stmt *s; + const char **values; + int i; + int rc; + + *pValues = NULL; + + rc = sql_get_statement(v, CONTENT_SELECT_STMT, &s); + if( rc!=SQLITE_OK ) return rc; + + rc = sqlite3_bind_int64(s, 1, iRow); + if( rc!=SQLITE_OK ) return rc; + + rc = sql_step_statement(v, CONTENT_SELECT_STMT, &s); + if( rc!=SQLITE_ROW ) return rc; + + values = (const char **) malloc(v->nColumn * sizeof(const char *)); + for(i=0; inColumn; ++i){ + values[i] = string_dup((char*)sqlite3_column_text(s, i)); + } + + /* We expect only one row. We must execute another sqlite3_step() + * to complete the iteration; otherwise the table will remain locked. */ + rc = sqlite3_step(s); + if( rc==SQLITE_DONE ){ + *pValues = values; + return SQLITE_OK; + } + + freeStringArray(v->nColumn, values); + return rc; +} + +/* delete from %_content where rowid = [iRow ] */ +static int content_delete(fulltext_vtab *v, sqlite_int64 iRow){ + sqlite3_stmt *s; + int rc = sql_get_statement(v, CONTENT_DELETE_STMT, &s); + if( rc!=SQLITE_OK ) return rc; + + rc = sqlite3_bind_int64(s, 1, iRow); + if( rc!=SQLITE_OK ) return rc; + + return sql_single_step_statement(v, CONTENT_DELETE_STMT, &s); +} + +/* select rowid, doclist from %_term + * where term = [pTerm] and segment = [iSegment] + * If found, returns SQLITE_ROW; the caller must free the + * returned doclist. If no rows found, returns SQLITE_DONE. */ +static int term_select(fulltext_vtab *v, const char *pTerm, int nTerm, + int iSegment, + sqlite_int64 *rowid, DocList *out){ + sqlite3_stmt *s; + int rc = sql_get_statement(v, TERM_SELECT_STMT, &s); + if( rc!=SQLITE_OK ) return rc; + + rc = sqlite3_bind_text(s, 1, pTerm, nTerm, SQLITE_STATIC); + if( rc!=SQLITE_OK ) return rc; + + rc = sqlite3_bind_int(s, 2, iSegment); + if( rc!=SQLITE_OK ) return rc; + + rc = sql_step_statement(v, TERM_SELECT_STMT, &s); + if( rc!=SQLITE_ROW ) return rc; + + *rowid = sqlite3_column_int64(s, 0); + docListInit(out, DL_DEFAULT, + sqlite3_column_blob(s, 1), sqlite3_column_bytes(s, 1)); + + /* We expect only one row. We must execute another sqlite3_step() + * to complete the iteration; otherwise the table will remain locked. */ + rc = sqlite3_step(s); + return rc==SQLITE_DONE ? SQLITE_ROW : rc; +} + +/* Load the segment doclists for term pTerm and merge them in +** appropriate order into out. Returns SQLITE_OK if successful. If +** there are no segments for pTerm, successfully returns an empty +** doclist in out. +** +** Each document consists of 1 or more "columns". The number of +** columns is v->nColumn. If iColumn==v->nColumn, then return +** position information about all columns. If iColumnnColumn, +** then only return position information about the iColumn-th column +** (where the first column is 0). +*/ +static int term_select_all( + fulltext_vtab *v, /* The fulltext index we are querying against */ + int iColumn, /* If nColumn ){ /* querying a single column */ + docListRestrictColumn(&old, iColumn); + } + + /* doclist contains the newer data, so write it over old. Then + ** steal accumulated result for doclist. + */ + docListAccumulate(&old, &doclist); + docListDestroy(&doclist); + doclist = old; + } + if( rc!=SQLITE_DONE ){ + docListDestroy(&doclist); + return rc; + } + + docListDiscardEmpty(&doclist); + *out = doclist; + return SQLITE_OK; +} + +/* insert into %_term (rowid, term, segment, doclist) + values ([piRowid], [pTerm], [iSegment], [doclist]) +** Lets sqlite select rowid if piRowid is NULL, else uses *piRowid. +** +** NOTE(shess) piRowid is IN, with values of "space of int64" plus +** null, it is not used to pass data back to the caller. +*/ +static int term_insert(fulltext_vtab *v, sqlite_int64 *piRowid, + const char *pTerm, int nTerm, + int iSegment, DocList *doclist){ + sqlite3_stmt *s; + int rc = sql_get_statement(v, TERM_INSERT_STMT, &s); + if( rc!=SQLITE_OK ) return rc; + + if( piRowid==NULL ){ + rc = sqlite3_bind_null(s, 1); + }else{ + rc = sqlite3_bind_int64(s, 1, *piRowid); + } + if( rc!=SQLITE_OK ) return rc; + + rc = sqlite3_bind_text(s, 2, pTerm, nTerm, SQLITE_STATIC); + if( rc!=SQLITE_OK ) return rc; + + rc = sqlite3_bind_int(s, 3, iSegment); + if( rc!=SQLITE_OK ) return rc; + + rc = sqlite3_bind_blob(s, 4, doclist->pData, doclist->nData, SQLITE_STATIC); + if( rc!=SQLITE_OK ) return rc; + + return sql_single_step_statement(v, TERM_INSERT_STMT, &s); +} + +/* update %_term set doclist = [doclist] where rowid = [rowid] */ +static int term_update(fulltext_vtab *v, sqlite_int64 rowid, + DocList *doclist){ + sqlite3_stmt *s; + int rc = sql_get_statement(v, TERM_UPDATE_STMT, &s); + if( rc!=SQLITE_OK ) return rc; + + rc = sqlite3_bind_blob(s, 1, doclist->pData, doclist->nData, SQLITE_STATIC); + if( rc!=SQLITE_OK ) return rc; + + rc = sqlite3_bind_int64(s, 2, rowid); + if( rc!=SQLITE_OK ) return rc; + + return sql_single_step_statement(v, TERM_UPDATE_STMT, &s); +} + +static int term_delete(fulltext_vtab *v, sqlite_int64 rowid){ + sqlite3_stmt *s; + int rc = sql_get_statement(v, TERM_DELETE_STMT, &s); + if( rc!=SQLITE_OK ) return rc; + + rc = sqlite3_bind_int64(s, 1, rowid); + if( rc!=SQLITE_OK ) return rc; + + return sql_single_step_statement(v, TERM_DELETE_STMT, &s); +} + +/* +** Free the memory used to contain a fulltext_vtab structure. +*/ +static void fulltext_vtab_destroy(fulltext_vtab *v){ + int iStmt, i; + + TRACE(("FTS2 Destroy %p\n", v)); + for( iStmt=0; iStmtpFulltextStatements[iStmt]!=NULL ){ + sqlite3_finalize(v->pFulltextStatements[iStmt]); + v->pFulltextStatements[iStmt] = NULL; + } + } + + if( v->pTokenizer!=NULL ){ + v->pTokenizer->pModule->xDestroy(v->pTokenizer); + v->pTokenizer = NULL; + } + + free(v->azColumn); + for(i = 0; i < v->nColumn; ++i) { + sqlite3_free(v->azContentColumn[i]); + } + free(v->azContentColumn); + free(v); +} + +/* +** Token types for parsing the arguments to xConnect or xCreate. +*/ +#define TOKEN_EOF 0 /* End of file */ +#define TOKEN_SPACE 1 /* Any kind of whitespace */ +#define TOKEN_ID 2 /* An identifier */ +#define TOKEN_STRING 3 /* A string literal */ +#define TOKEN_PUNCT 4 /* A single punctuation character */ + +/* +** If X is a character that can be used in an identifier then +** IdChar(X) will be true. Otherwise it is false. +** +** For ASCII, any character with the high-order bit set is +** allowed in an identifier. For 7-bit characters, +** sqlite3IsIdChar[X] must be 1. +** +** Ticket #1066. the SQL standard does not allow '$' in the +** middle of identfiers. But many SQL implementations do. +** SQLite will allow '$' in identifiers for compatibility. +** But the feature is undocumented. +*/ +static const char isIdChar[] = { +/* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */ + 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 2x */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 3x */ + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 4x */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 5x */ + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 6x */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 7x */ +}; +#define IdChar(C) (((c=C)&0x80)!=0 || (c>0x1f && isIdChar[c-0x20])) + + +/* +** Return the length of the token that begins at z[0]. +** Store the token type in *tokenType before returning. +*/ +static int getToken(const char *z, int *tokenType){ + int i, c; + switch( *z ){ + case 0: { + *tokenType = TOKEN_EOF; + return 0; + } + case ' ': case '\t': case '\n': case '\f': case '\r': { + for(i=1; isspace(z[i]); i++){} + *tokenType = TOKEN_SPACE; + return i; + } + case '\'': + case '"': { + int delim = z[0]; + for(i=1; (c=z[i])!=0; i++){ + if( c==delim ){ + if( z[i+1]==delim ){ + i++; + }else{ + break; + } + } + } + *tokenType = TOKEN_STRING; + return i + (c!=0); + } + case '[': { + for(i=1, c=z[0]; c!=']' && (c=z[i])!=0; i++){} + *tokenType = TOKEN_ID; + return i; + } + default: { + if( !IdChar(*z) ){ + break; + } + for(i=1; IdChar(z[i]); i++){} + *tokenType = TOKEN_ID; + return i; + } + } + *tokenType = TOKEN_PUNCT; + return 1; +} + +/* +** A token extracted from a string is an instance of the following +** structure. +*/ +typedef struct Token { + const char *z; /* Pointer to token text. Not '\000' terminated */ + short int n; /* Length of the token text in bytes. */ +} Token; + +/* +** Given a input string (which is really one of the argv[] parameters +** passed into xConnect or xCreate) split the string up into tokens. +** Return an array of pointers to '\000' terminated strings, one string +** for each non-whitespace token. +** +** The returned array is terminated by a single NULL pointer. +** +** Space to hold the returned array is obtained from a single +** malloc and should be freed by passing the return value to free(). +** The individual strings within the token list are all a part of +** the single memory allocation and will all be freed at once. +*/ +static char **tokenizeString(const char *z, int *pnToken){ + int nToken = 0; + Token *aToken = malloc( strlen(z) * sizeof(aToken[0]) ); + int n = 1; + int e, i; + int totalSize = 0; + char **azToken; + char *zCopy; + while( n>0 ){ + n = getToken(z, &e); + if( e!=TOKEN_SPACE ){ + aToken[nToken].z = z; + aToken[nToken].n = n; + nToken++; + totalSize += n+1; + } + z += n; + } + azToken = (char**)malloc( nToken*sizeof(char*) + totalSize ); + zCopy = (char*)&azToken[nToken]; + nToken--; + for(i=0; i=0 ){ + azIn[j] = azIn[i]; + } + j++; + } + } + azIn[j] = 0; + } +} + + +/* +** Find the first alphanumeric token in the string zIn. Null-terminate +** this token. Remove any quotation marks. And return a pointer to +** the result. +*/ +static char *firstToken(char *zIn, char **pzTail){ + int i, n, ttype; + i = 0; + while(1){ + n = getToken(zIn, &ttype); + if( ttype==TOKEN_SPACE ){ + zIn += n; + }else if( ttype==TOKEN_EOF ){ + *pzTail = zIn; + return 0; + }else{ + zIn[n] = 0; + *pzTail = &zIn[1]; + dequoteString(zIn); + return zIn; + } + } + /*NOTREACHED*/ +} + +/* Return true if... +** +** * s begins with the string t, ignoring case +** * s is longer than t +** * The first character of s beyond t is not a alphanumeric +** +** Ignore leading space in *s. +** +** To put it another way, return true if the first token of +** s[] is t[]. +*/ +static int startsWith(const char *s, const char *t){ + while( isspace(*s) ){ s++; } + while( *t ){ + if( tolower(*s++)!=tolower(*t++) ) return 0; + } + return *s!='_' && !isalnum(*s); +} + +/* +** An instance of this structure defines the "spec" of a +** full text index. This structure is populated by parseSpec +** and use by fulltextConnect and fulltextCreate. +*/ +typedef struct TableSpec { + const char *zName; /* Name of the full-text index */ + int nColumn; /* Number of columns to be indexed */ + char **azColumn; /* Original names of columns to be indexed */ + char **azContentColumn; /* Column names for %_content */ + char **azTokenizer; /* Name of tokenizer and its arguments */ +} TableSpec; + +/* +** Reclaim all of the memory used by a TableSpec +*/ +void clearTableSpec(TableSpec *p) { + free(p->azColumn); + free(p->azContentColumn); + free(p->azTokenizer); +} + +/* Parse a CREATE VIRTUAL TABLE statement, which looks like this: + * + * CREATE VIRTUAL TABLE email + * USING fts2(subject, body, tokenize mytokenizer(myarg)) + * + * We return parsed information in a TableSpec structure. + * + */ +int parseSpec(TableSpec *pSpec, int argc, const char *const*argv, char**pzErr){ + int i, j, n; + char *z, *zDummy; + char **azArg; + const char *zTokenizer = 0; /* argv[] entry describing the tokenizer */ + + assert( argc>=3 ); + /* Current interface: + ** argv[0] - module name + ** argv[1] - database name + ** argv[2] - table name + ** argv[3..] - columns, optionally followed by tokenizer specification + ** and snippet delimiters specification. + */ + + /* Make a copy of the complete argv[][] array in a single allocation. + ** The argv[][] array is read-only and transient. We can write to the + ** copy in order to modify things and the copy is persistent. + */ + memset(pSpec, 0, sizeof(*pSpec)); + for(i=n=0; izName = azArg[2]; + pSpec->nColumn = 0; + pSpec->azColumn = azArg; + zTokenizer = "tokenize simple"; + for(i=3, j=0; inColumn] = firstToken(azArg[i], &zDummy); + pSpec->nColumn++; + } + } + if( pSpec->nColumn==0 ){ + azArg[0] = "content"; + pSpec->nColumn = 1; + } + + /* + ** Construct the list of content column names. + ** + ** Each content column name will be of the form cNNAAAA + ** where NN is the column number and AAAA is the sanitized + ** column name. "sanitized" means that special characters are + ** converted to "_". The cNN prefix guarantees that all column + ** names are unique. + ** + ** The AAAA suffix is not strictly necessary. It is included + ** for the convenience of people who might examine the generated + ** %_content table and wonder what the columns are used for. + */ + pSpec->azContentColumn = malloc( pSpec->nColumn * sizeof(char *) ); + if( pSpec->azContentColumn==0 ){ + clearTableSpec(pSpec); + return SQLITE_NOMEM; + } + for(i=0; inColumn; i++){ + char *p; + pSpec->azContentColumn[i] = sqlite3_mprintf("c%d%s", i, azArg[i]); + for (p = pSpec->azContentColumn[i]; *p ; ++p) { + if( !isalnum(*p) ) *p = '_'; + } + } + + /* + ** Parse the tokenizer specification string. + */ + pSpec->azTokenizer = tokenizeString(zTokenizer, &n); + tokenListToIdList(pSpec->azTokenizer); + + return SQLITE_OK; +} + +/* +** Generate a CREATE TABLE statement that describes the schema of +** the virtual table. Return a pointer to this schema string. +** +** Space is obtained from sqlite3_mprintf() and should be freed +** using sqlite3_free(). +*/ +static char *fulltextSchema( + int nColumn, /* Number of columns */ + const char *const* azColumn, /* List of columns */ + const char *zTableName /* Name of the table */ +){ + int i; + char *zSchema, *zNext; + const char *zSep = "("; + zSchema = sqlite3_mprintf("CREATE TABLE x"); + for(i=0; ibase */ + v->db = db; + v->zName = spec->zName; /* Freed when azColumn is freed */ + v->nColumn = spec->nColumn; + v->azContentColumn = spec->azContentColumn; + spec->azContentColumn = 0; + v->azColumn = spec->azColumn; + spec->azColumn = 0; + + if( spec->azTokenizer==0 ){ + return SQLITE_NOMEM; + } + /* TODO(shess) For now, add new tokenizers as else if clauses. */ + if( spec->azTokenizer[0]==0 || startsWith(spec->azTokenizer[0], "simple") ){ + sqlite3Fts2SimpleTokenizerModule(&m); + }else if( startsWith(spec->azTokenizer[0], "porter") ){ + sqlite3Fts2PorterTokenizerModule(&m); + }else{ + *pzErr = sqlite3_mprintf("unknown tokenizer: %s", spec->azTokenizer[0]); + rc = SQLITE_ERROR; + goto err; + } + for(n=0; spec->azTokenizer[n]; n++){} + if( n ){ + rc = m->xCreate(n-1, (const char*const*)&spec->azTokenizer[1], + &v->pTokenizer); + }else{ + rc = m->xCreate(0, 0, &v->pTokenizer); + } + if( rc!=SQLITE_OK ) goto err; + v->pTokenizer->pModule = m; + + /* TODO: verify the existence of backing tables foo_content, foo_term */ + + schema = fulltextSchema(v->nColumn, (const char*const*)v->azColumn, + spec->zName); + rc = sqlite3_declare_vtab(db, schema); + sqlite3_free(schema); + if( rc!=SQLITE_OK ) goto err; + + memset(v->pFulltextStatements, 0, sizeof(v->pFulltextStatements)); + + *ppVTab = &v->base; + TRACE(("FTS2 Connect %p\n", v)); + + return rc; + +err: + fulltext_vtab_destroy(v); + return rc; +} + +static int fulltextConnect( + sqlite3 *db, + void *pAux, + int argc, const char *const*argv, + sqlite3_vtab **ppVTab, + char **pzErr +){ + TableSpec spec; + int rc = parseSpec(&spec, argc, argv, pzErr); + if( rc!=SQLITE_OK ) return rc; + + rc = constructVtab(db, &spec, ppVTab, pzErr); + clearTableSpec(&spec); + return rc; +} + + /* The %_content table holds the text of each document, with + ** the rowid used as the docid. + ** + ** The %_term table maps each term to a document list blob + ** containing elements sorted by ascending docid, each element + ** encoded as: + ** + ** docid varint-encoded + ** token elements: + ** position+1 varint-encoded as delta from previous position + ** start offset varint-encoded as delta from previous start offset + ** end offset varint-encoded as delta from start offset + ** + ** The sentinel position of 0 indicates the end of the token list. + ** + ** Additionally, doclist blobs are chunked into multiple segments, + ** using segment to order the segments. New elements are added to + ** the segment at segment 0, until it exceeds CHUNK_MAX. Then + ** segment 0 is deleted, and the doclist is inserted at segment 1. + ** If there is already a doclist at segment 1, the segment 0 doclist + ** is merged with it, the segment 1 doclist is deleted, and the + ** merged doclist is inserted at segment 2, repeating those + ** operations until an insert succeeds. + ** + ** Since this structure doesn't allow us to update elements in place + ** in case of deletion or update, these are simply written to + ** segment 0 (with an empty token list in case of deletion), with + ** docListAccumulate() taking care to retain lower-segment + ** information in preference to higher-segment information. + */ + /* TODO(shess) Provide a VACUUM type operation which both removes + ** deleted elements which are no longer necessary, and duplicated + ** elements. I suspect this will probably not be necessary in + ** practice, though. + */ +static int fulltextCreate(sqlite3 *db, void *pAux, + int argc, const char * const *argv, + sqlite3_vtab **ppVTab, char **pzErr){ + int rc; + TableSpec spec; + StringBuffer schema; + TRACE(("FTS2 Create\n")); + + rc = parseSpec(&spec, argc, argv, pzErr); + if( rc!=SQLITE_OK ) return rc; + + initStringBuffer(&schema); + append(&schema, "CREATE TABLE %_content("); + appendList(&schema, spec.nColumn, spec.azContentColumn); + append(&schema, ")"); + rc = sql_exec(db, spec.zName, schema.s); + free(schema.s); + if( rc!=SQLITE_OK ) goto out; + + rc = sql_exec(db, spec.zName, + "create table %_term(term text, segment integer, doclist blob, " + "primary key(term, segment));"); + if( rc!=SQLITE_OK ) goto out; + + rc = constructVtab(db, &spec, ppVTab, pzErr); + +out: + clearTableSpec(&spec); + return rc; +} + +/* Decide how to handle an SQL query. */ +static int fulltextBestIndex(sqlite3_vtab *pVTab, sqlite3_index_info *pInfo){ + int i; + + for(i=0; inConstraint; ++i){ + const struct sqlite3_index_constraint *pConstraint; + pConstraint = &pInfo->aConstraint[i]; + if( pConstraint->usable ) { + if( pConstraint->iColumn==-1 && + pConstraint->op==SQLITE_INDEX_CONSTRAINT_EQ ){ + pInfo->idxNum = QUERY_ROWID; /* lookup by rowid */ + } else if( pConstraint->iColumn>=0 && + pConstraint->op==SQLITE_INDEX_CONSTRAINT_MATCH ){ + /* full-text search */ + pInfo->idxNum = QUERY_FULLTEXT + pConstraint->iColumn; + } else continue; + + pInfo->aConstraintUsage[i].argvIndex = 1; + pInfo->aConstraintUsage[i].omit = 1; + + /* An arbitrary value for now. + * TODO: Perhaps rowid matches should be considered cheaper than + * full-text searches. */ + pInfo->estimatedCost = 1.0; + + return SQLITE_OK; + } + } + pInfo->idxNum = QUERY_GENERIC; + TRACE(("FTS2 BestIndex\n")); + return SQLITE_OK; +} + +static int fulltextDisconnect(sqlite3_vtab *pVTab){ + TRACE(("FTS2 Disconnect %p\n", pVTab)); + fulltext_vtab_destroy((fulltext_vtab *)pVTab); + return SQLITE_OK; +} + +static int fulltextDestroy(sqlite3_vtab *pVTab){ + fulltext_vtab *v = (fulltext_vtab *)pVTab; + int rc; + + TRACE(("FTS2 Destroy %p\n", pVTab)); + rc = sql_exec(v->db, v->zName, + "drop table %_content; drop table %_term"); + if( rc!=SQLITE_OK ) return rc; + + fulltext_vtab_destroy((fulltext_vtab *)pVTab); + return SQLITE_OK; +} + +static int fulltextOpen(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCursor){ + fulltext_cursor *c; + + c = (fulltext_cursor *) calloc(sizeof(fulltext_cursor), 1); + /* sqlite will initialize c->base */ + *ppCursor = &c->base; + TRACE(("FTS2 Open %p: %p\n", pVTab, c)); + + return SQLITE_OK; +} + + +/* Free all of the dynamically allocated memory held by *q +*/ +static void queryClear(Query *q){ + int i; + for(i = 0; i < q->nTerms; ++i){ + free(q->pTerms[i].pTerm); + } + free(q->pTerms); + memset(q, 0, sizeof(*q)); +} + +/* Free all of the dynamically allocated memory held by the +** Snippet +*/ +static void snippetClear(Snippet *p){ + free(p->aMatch); + free(p->zOffset); + free(p->zSnippet); + memset(p, 0, sizeof(*p)); +} +/* +** Append a single entry to the p->aMatch[] log. +*/ +static void snippetAppendMatch( + Snippet *p, /* Append the entry to this snippet */ + int iCol, int iTerm, /* The column and query term */ + int iStart, int nByte /* Offset and size of the match */ +){ + int i; + struct snippetMatch *pMatch; + if( p->nMatch+1>=p->nAlloc ){ + p->nAlloc = p->nAlloc*2 + 10; + p->aMatch = realloc(p->aMatch, p->nAlloc*sizeof(p->aMatch[0]) ); + if( p->aMatch==0 ){ + p->nMatch = 0; + p->nAlloc = 0; + return; + } + } + i = p->nMatch++; + pMatch = &p->aMatch[i]; + pMatch->iCol = iCol; + pMatch->iTerm = iTerm; + pMatch->iStart = iStart; + pMatch->nByte = nByte; +} + +/* +** Sizing information for the circular buffer used in snippetOffsetsOfColumn() +*/ +#define FTS2_ROTOR_SZ (32) +#define FTS2_ROTOR_MASK (FTS2_ROTOR_SZ-1) + +/* +** Add entries to pSnippet->aMatch[] for every match that occurs against +** document zDoc[0..nDoc-1] which is stored in column iColumn. +*/ +static void snippetOffsetsOfColumn( + Query *pQuery, + Snippet *pSnippet, + int iColumn, + const char *zDoc, + int nDoc +){ + const sqlite3_tokenizer_module *pTModule; /* The tokenizer module */ + sqlite3_tokenizer *pTokenizer; /* The specific tokenizer */ + sqlite3_tokenizer_cursor *pTCursor; /* Tokenizer cursor */ + fulltext_vtab *pVtab; /* The full text index */ + int nColumn; /* Number of columns in the index */ + const QueryTerm *aTerm; /* Query string terms */ + int nTerm; /* Number of query string terms */ + int i, j; /* Loop counters */ + int rc; /* Return code */ + unsigned int match, prevMatch; /* Phrase search bitmasks */ + const char *zToken; /* Next token from the tokenizer */ + int nToken; /* Size of zToken */ + int iBegin, iEnd, iPos; /* Offsets of beginning and end */ + + /* The following variables keep a circular buffer of the last + ** few tokens */ + unsigned int iRotor = 0; /* Index of current token */ + int iRotorBegin[FTS2_ROTOR_SZ]; /* Beginning offset of token */ + int iRotorLen[FTS2_ROTOR_SZ]; /* Length of token */ + + pVtab = pQuery->pFts; + nColumn = pVtab->nColumn; + pTokenizer = pVtab->pTokenizer; + pTModule = pTokenizer->pModule; + rc = pTModule->xOpen(pTokenizer, zDoc, nDoc, &pTCursor); + if( rc ) return; + pTCursor->pTokenizer = pTokenizer; + aTerm = pQuery->pTerms; + nTerm = pQuery->nTerms; + if( nTerm>=FTS2_ROTOR_SZ ){ + nTerm = FTS2_ROTOR_SZ - 1; + } + prevMatch = 0; + while(1){ + rc = pTModule->xNext(pTCursor, &zToken, &nToken, &iBegin, &iEnd, &iPos); + if( rc ) break; + iRotorBegin[iRotor&FTS2_ROTOR_MASK] = iBegin; + iRotorLen[iRotor&FTS2_ROTOR_MASK] = iEnd-iBegin; + match = 0; + for(i=0; i=0 && iCol1 && (prevMatch & (1<=0; j--){ + int k = (iRotor-j) & FTS2_ROTOR_MASK; + snippetAppendMatch(pSnippet, iColumn, i-j, + iRotorBegin[k], iRotorLen[k]); + } + } + } + prevMatch = match<<1; + iRotor++; + } + pTModule->xClose(pTCursor); +} + + +/* +** Compute all offsets for the current row of the query. +** If the offsets have already been computed, this routine is a no-op. +*/ +static void snippetAllOffsets(fulltext_cursor *p){ + int nColumn; + int iColumn, i; + int iFirst, iLast; + fulltext_vtab *pFts; + + if( p->snippet.nMatch ) return; + if( p->q.nTerms==0 ) return; + pFts = p->q.pFts; + nColumn = pFts->nColumn; + iColumn = p->iCursorType; + if( iColumn<0 || iColumn>=nColumn ){ + iFirst = 0; + iLast = nColumn-1; + }else{ + iFirst = iColumn; + iLast = iColumn; + } + for(i=iFirst; i<=iLast; i++){ + const char *zDoc; + int nDoc; + zDoc = (const char*)sqlite3_column_text(p->pStmt, i+1); + nDoc = sqlite3_column_bytes(p->pStmt, i+1); + snippetOffsetsOfColumn(&p->q, &p->snippet, i, zDoc, nDoc); + } +} + +/* +** Convert the information in the aMatch[] array of the snippet +** into the string zOffset[0..nOffset-1]. +*/ +static void snippetOffsetText(Snippet *p){ + int i; + int cnt = 0; + StringBuffer sb; + char zBuf[200]; + if( p->zOffset ) return; + initStringBuffer(&sb); + for(i=0; inMatch; i++){ + struct snippetMatch *pMatch = &p->aMatch[i]; + zBuf[0] = ' '; + sprintf(&zBuf[cnt>0], "%d %d %d %d", pMatch->iCol, + pMatch->iTerm, pMatch->iStart, pMatch->nByte); + append(&sb, zBuf); + cnt++; + } + p->zOffset = sb.s; + p->nOffset = sb.len; +} + +/* +** zDoc[0..nDoc-1] is phrase of text. aMatch[0..nMatch-1] are a set +** of matching words some of which might be in zDoc. zDoc is column +** number iCol. +** +** iBreak is suggested spot in zDoc where we could begin or end an +** excerpt. Return a value similar to iBreak but possibly adjusted +** to be a little left or right so that the break point is better. +*/ +static int wordBoundary( + int iBreak, /* The suggested break point */ + const char *zDoc, /* Document text */ + int nDoc, /* Number of bytes in zDoc[] */ + struct snippetMatch *aMatch, /* Matching words */ + int nMatch, /* Number of entries in aMatch[] */ + int iCol /* The column number for zDoc[] */ +){ + int i; + if( iBreak<=10 ){ + return 0; + } + if( iBreak>=nDoc-10 ){ + return nDoc; + } + for(i=0; i0 && aMatch[i-1].iStart+aMatch[i-1].nByte>=iBreak ){ + return aMatch[i-1].iStart; + } + } + for(i=1; i<=10; i++){ + if( isspace(zDoc[iBreak-i]) ){ + return iBreak - i + 1; + } + if( isspace(zDoc[iBreak+i]) ){ + return iBreak + i + 1; + } + } + return iBreak; +} + +/* +** If the StringBuffer does not end in white space, add a single +** space character to the end. +*/ +static void appendWhiteSpace(StringBuffer *p){ + if( p->len==0 ) return; + if( isspace(p->s[p->len-1]) ) return; + append(p, " "); +} + +/* +** Remove white space from teh end of the StringBuffer +*/ +static void trimWhiteSpace(StringBuffer *p){ + while( p->len>0 && isspace(p->s[p->len-1]) ){ + p->len--; + } +} + + + +/* +** Allowed values for Snippet.aMatch[].snStatus +*/ +#define SNIPPET_IGNORE 0 /* It is ok to omit this match from the snippet */ +#define SNIPPET_DESIRED 1 /* We want to include this match in the snippet */ + +/* +** Generate the text of a snippet. +*/ +static void snippetText( + fulltext_cursor *pCursor, /* The cursor we need the snippet for */ + const char *zStartMark, /* Markup to appear before each match */ + const char *zEndMark, /* Markup to appear after each match */ + const char *zEllipsis /* Ellipsis mark */ +){ + int i, j; + struct snippetMatch *aMatch; + int nMatch; + int nDesired; + StringBuffer sb; + int tailCol; + int tailOffset; + int iCol; + int nDoc; + const char *zDoc; + int iStart, iEnd; + int tailEllipsis = 0; + int iMatch; + + + free(pCursor->snippet.zSnippet); + pCursor->snippet.zSnippet = 0; + aMatch = pCursor->snippet.aMatch; + nMatch = pCursor->snippet.nMatch; + initStringBuffer(&sb); + + for(i=0; iq.nTerms; i++){ + for(j=0; j0; i++){ + if( aMatch[i].snStatus!=SNIPPET_DESIRED ) continue; + nDesired--; + iCol = aMatch[i].iCol; + zDoc = (const char*)sqlite3_column_text(pCursor->pStmt, iCol+1); + nDoc = sqlite3_column_bytes(pCursor->pStmt, iCol+1); + iStart = aMatch[i].iStart - 40; + iStart = wordBoundary(iStart, zDoc, nDoc, aMatch, nMatch, iCol); + if( iStart<=10 ){ + iStart = 0; + } + if( iCol==tailCol && iStart<=tailOffset+20 ){ + iStart = tailOffset; + } + if( (iCol!=tailCol && tailCol>=0) || iStart!=tailOffset ){ + trimWhiteSpace(&sb); + appendWhiteSpace(&sb); + append(&sb, zEllipsis); + appendWhiteSpace(&sb); + } + iEnd = aMatch[i].iStart + aMatch[i].nByte + 40; + iEnd = wordBoundary(iEnd, zDoc, nDoc, aMatch, nMatch, iCol); + if( iEnd>=nDoc-10 ){ + iEnd = nDoc; + tailEllipsis = 0; + }else{ + tailEllipsis = 1; + } + while( iMatchsnippet.zSnippet = sb.s; + pCursor->snippet.nSnippet = sb.len; +} + + +/* +** Close the cursor. For additional information see the documentation +** on the xClose method of the virtual table interface. +*/ +static int fulltextClose(sqlite3_vtab_cursor *pCursor){ + fulltext_cursor *c = (fulltext_cursor *) pCursor; + TRACE(("FTS2 Close %p\n", c)); + sqlite3_finalize(c->pStmt); + queryClear(&c->q); + snippetClear(&c->snippet); + if( c->result.pDoclist!=NULL ){ + docListDelete(c->result.pDoclist); + } + free(c); + return SQLITE_OK; +} + +static int fulltextNext(sqlite3_vtab_cursor *pCursor){ + fulltext_cursor *c = (fulltext_cursor *) pCursor; + sqlite_int64 iDocid; + int rc; + + TRACE(("FTS2 Next %p\n", pCursor)); + snippetClear(&c->snippet); + if( c->iCursorType < QUERY_FULLTEXT ){ + /* TODO(shess) Handle SQLITE_SCHEMA AND SQLITE_BUSY. */ + rc = sqlite3_step(c->pStmt); + switch( rc ){ + case SQLITE_ROW: + c->eof = 0; + return SQLITE_OK; + case SQLITE_DONE: + c->eof = 1; + return SQLITE_OK; + default: + c->eof = 1; + return rc; + } + } else { /* full-text query */ + rc = sqlite3_reset(c->pStmt); + if( rc!=SQLITE_OK ) return rc; + + iDocid = nextDocid(&c->result); + if( iDocid==0 ){ + c->eof = 1; + return SQLITE_OK; + } + rc = sqlite3_bind_int64(c->pStmt, 1, iDocid); + if( rc!=SQLITE_OK ) return rc; + /* TODO(shess) Handle SQLITE_SCHEMA AND SQLITE_BUSY. */ + rc = sqlite3_step(c->pStmt); + if( rc==SQLITE_ROW ){ /* the case we expect */ + c->eof = 0; + return SQLITE_OK; + } + /* an error occurred; abort */ + return rc==SQLITE_DONE ? SQLITE_ERROR : rc; + } +} + + +/* Return a DocList corresponding to the query term *pTerm. If *pTerm +** is the first term of a phrase query, go ahead and evaluate the phrase +** query and return the doclist for the entire phrase query. +** +** The result is stored in pTerm->doclist. +*/ +static int docListOfTerm( + fulltext_vtab *v, /* The full text index */ + int iColumn, /* column to restrict to. No restrition if >=nColumn */ + QueryTerm *pQTerm, /* Term we are looking for, or 1st term of a phrase */ + DocList **ppResult /* Write the result here */ +){ + DocList *pLeft, *pRight, *pNew; + int i, rc; + + pLeft = docListNew(DL_POSITIONS); + rc = term_select_all(v, iColumn, pQTerm->pTerm, pQTerm->nTerm, pLeft); + if( rc ) return rc; + for(i=1; i<=pQTerm->nPhrase; i++){ + pRight = docListNew(DL_POSITIONS); + rc = term_select_all(v, iColumn, pQTerm[i].pTerm, pQTerm[i].nTerm, pRight); + if( rc ){ + docListDelete(pLeft); + return rc; + } + pNew = docListNew(inPhrase ? DL_POSITIONS : DL_DOCIDS); + docListPhraseMerge(pLeft, pRight, pNew); + docListDelete(pLeft); + docListDelete(pRight); + pLeft = pNew; + } + *ppResult = pLeft; + return SQLITE_OK; +} + +/* Add a new term pTerm[0..nTerm-1] to the query *q. +*/ +static void queryAdd(Query *q, const char *pTerm, int nTerm){ + QueryTerm *t; + ++q->nTerms; + q->pTerms = realloc(q->pTerms, q->nTerms * sizeof(q->pTerms[0])); + if( q->pTerms==0 ){ + q->nTerms = 0; + return; + } + t = &q->pTerms[q->nTerms - 1]; + memset(t, 0, sizeof(*t)); + t->pTerm = malloc(nTerm+1); + memcpy(t->pTerm, pTerm, nTerm); + t->pTerm[nTerm] = 0; + t->nTerm = nTerm; + t->isOr = q->nextIsOr; + q->nextIsOr = 0; + t->iColumn = q->nextColumn; + q->nextColumn = q->dfltColumn; +} + +/* +** Check to see if the string zToken[0...nToken-1] matches any +** column name in the virtual table. If it does, +** return the zero-indexed column number. If not, return -1. +*/ +static int checkColumnSpecifier( + fulltext_vtab *pVtab, /* The virtual table */ + const char *zToken, /* Text of the token */ + int nToken /* Number of characters in the token */ +){ + int i; + for(i=0; inColumn; i++){ + if( memcmp(pVtab->azColumn[i], zToken, nToken)==0 + && pVtab->azColumn[i][nToken]==0 ){ + return i; + } + } + return -1; +} + +/* +** Parse the text at pSegment[0..nSegment-1]. Add additional terms +** to the query being assemblied in pQuery. +** +** inPhrase is true if pSegment[0..nSegement-1] is contained within +** double-quotes. If inPhrase is true, then the first term +** is marked with the number of terms in the phrase less one and +** OR and "-" syntax is ignored. If inPhrase is false, then every +** term found is marked with nPhrase=0 and OR and "-" syntax is significant. +*/ +static int tokenizeSegment( + sqlite3_tokenizer *pTokenizer, /* The tokenizer to use */ + const char *pSegment, int nSegment, /* Query expression being parsed */ + int inPhrase, /* True if within "..." */ + Query *pQuery /* Append results here */ +){ + const sqlite3_tokenizer_module *pModule = pTokenizer->pModule; + sqlite3_tokenizer_cursor *pCursor; + int firstIndex = pQuery->nTerms; + int iCol; + int nTerm = 1; + + int rc = pModule->xOpen(pTokenizer, pSegment, nSegment, &pCursor); + if( rc!=SQLITE_OK ) return rc; + pCursor->pTokenizer = pTokenizer; + + while( 1 ){ + const char *pToken; + int nToken, iBegin, iEnd, iPos; + + rc = pModule->xNext(pCursor, + &pToken, &nToken, + &iBegin, &iEnd, &iPos); + if( rc!=SQLITE_OK ) break; + if( !inPhrase && + pSegment[iEnd]==':' && + (iCol = checkColumnSpecifier(pQuery->pFts, pToken, nToken))>=0 ){ + pQuery->nextColumn = iCol; + continue; + } + if( !inPhrase && pQuery->nTerms>0 && nToken==2 + && pSegment[iBegin]=='O' && pSegment[iBegin+1]=='R' ){ + pQuery->nextIsOr = 1; + continue; + } + queryAdd(pQuery, pToken, nToken); + if( !inPhrase && iBegin>0 && pSegment[iBegin-1]=='-' ){ + pQuery->pTerms[pQuery->nTerms-1].isNot = 1; + } + pQuery->pTerms[pQuery->nTerms-1].iPhrase = nTerm; + if( inPhrase ){ + nTerm++; + } + } + + if( inPhrase && pQuery->nTerms>firstIndex ){ + pQuery->pTerms[firstIndex].nPhrase = pQuery->nTerms - firstIndex - 1; + } + + return pModule->xClose(pCursor); +} + +/* Parse a query string, yielding a Query object pQuery. +** +** The calling function will need to queryClear() to clean up +** the dynamically allocated memory held by pQuery. +*/ +static int parseQuery( + fulltext_vtab *v, /* The fulltext index */ + const char *zInput, /* Input text of the query string */ + int nInput, /* Size of the input text */ + int dfltColumn, /* Default column of the index to match against */ + Query *pQuery /* Write the parse results here. */ +){ + int iInput, inPhrase = 0; + + if( zInput==0 ) nInput = 0; + if( nInput<0 ) nInput = strlen(zInput); + pQuery->nTerms = 0; + pQuery->pTerms = NULL; + pQuery->nextIsOr = 0; + pQuery->nextColumn = dfltColumn; + pQuery->dfltColumn = dfltColumn; + pQuery->pFts = v; + + for(iInput=0; iInputiInput ){ + tokenizeSegment(v->pTokenizer, zInput+iInput, i-iInput, inPhrase, + pQuery); + } + iInput = i; + if( i=nColumn +** they are allowed to match against any column. +*/ +static int fulltextQuery( + fulltext_vtab *v, /* The full text index */ + int iColumn, /* Match against this column by default */ + const char *zInput, /* The query string */ + int nInput, /* Number of bytes in zInput[] */ + DocList **pResult, /* Write the result doclist here */ + Query *pQuery /* Put parsed query string here */ +){ + int i, iNext, rc; + DocList *pLeft = NULL; + DocList *pRight, *pNew, *pOr; + int nNot = 0; + QueryTerm *aTerm; + + rc = parseQuery(v, zInput, nInput, iColumn, pQuery); + if( rc!=SQLITE_OK ) return rc; + + /* Merge AND terms. */ + aTerm = pQuery->pTerms; + for(i = 0; inTerms; i=iNext){ + if( aTerm[i].isNot ){ + /* Handle all NOT terms in a separate pass */ + nNot++; + iNext = i + aTerm[i].nPhrase+1; + continue; + } + iNext = i + aTerm[i].nPhrase + 1; + rc = docListOfTerm(v, aTerm[i].iColumn, &aTerm[i], &pRight); + if( rc ){ + queryClear(pQuery); + return rc; + } + while( iNextnTerms && aTerm[iNext].isOr ){ + rc = docListOfTerm(v, aTerm[iNext].iColumn, &aTerm[iNext], &pOr); + iNext += aTerm[iNext].nPhrase + 1; + if( rc ){ + queryClear(pQuery); + return rc; + } + pNew = docListNew(DL_DOCIDS); + docListOrMerge(pRight, pOr, pNew); + docListDelete(pRight); + docListDelete(pOr); + pRight = pNew; + } + if( pLeft==0 ){ + pLeft = pRight; + }else{ + pNew = docListNew(DL_DOCIDS); + docListAndMerge(pLeft, pRight, pNew); + docListDelete(pRight); + docListDelete(pLeft); + pLeft = pNew; + } + } + + if( nNot && pLeft==0 ){ + /* We do not yet know how to handle a query of only NOT terms */ + return SQLITE_ERROR; + } + + /* Do the EXCEPT terms */ + for(i=0; inTerms; i += aTerm[i].nPhrase + 1){ + if( !aTerm[i].isNot ) continue; + rc = docListOfTerm(v, aTerm[i].iColumn, &aTerm[i], &pRight); + if( rc ){ + queryClear(pQuery); + docListDelete(pLeft); + return rc; + } + pNew = docListNew(DL_DOCIDS); + docListExceptMerge(pLeft, pRight, pNew); + docListDelete(pRight); + docListDelete(pLeft); + pLeft = pNew; + } + + *pResult = pLeft; + return rc; +} + +/* +** This is the xFilter interface for the virtual table. See +** the virtual table xFilter method documentation for additional +** information. +** +** If idxNum==QUERY_GENERIC then do a full table scan against +** the %_content table. +** +** If idxNum==QUERY_ROWID then do a rowid lookup for a single entry +** in the %_content table. +** +** If idxNum>=QUERY_FULLTEXT then use the full text index. The +** column on the left-hand side of the MATCH operator is column +** number idxNum-QUERY_FULLTEXT, 0 indexed. argv[0] is the right-hand +** side of the MATCH operator. +*/ +static int fulltextFilter( + sqlite3_vtab_cursor *pCursor, /* The cursor used for this query */ + int idxNum, const char *idxStr, /* Which indexing scheme to use */ + int argc, sqlite3_value **argv /* Arguments for the indexing scheme */ +){ + fulltext_cursor *c = (fulltext_cursor *) pCursor; + fulltext_vtab *v = cursor_vtab(c); + int rc; + char *zSql; + + TRACE(("FTS2 Filter %p\n",pCursor)); + + zSql = sqlite3_mprintf("select rowid, * from %%_content %s", + idxNum==QUERY_GENERIC ? "" : "where rowid=?"); + rc = sql_prepare(v->db, v->zName, &c->pStmt, zSql); + sqlite3_free(zSql); + if( rc!=SQLITE_OK ) goto out; + + c->iCursorType = idxNum; + switch( idxNum ){ + case QUERY_GENERIC: + break; + + case QUERY_ROWID: + rc = sqlite3_bind_int64(c->pStmt, 1, sqlite3_value_int64(argv[0])); + if( rc!=SQLITE_OK ) goto out; + break; + + default: /* full-text search */ + { + const char *zQuery = (const char *)sqlite3_value_text(argv[0]); + DocList *pResult; + assert( idxNum<=QUERY_FULLTEXT+v->nColumn); + assert( argc==1 ); + queryClear(&c->q); + rc = fulltextQuery(v, idxNum-QUERY_FULLTEXT, zQuery, -1, &pResult, &c->q); + if( rc!=SQLITE_OK ) goto out; + readerInit(&c->result, pResult); + break; + } + } + + rc = fulltextNext(pCursor); + +out: + return rc; +} + +/* This is the xEof method of the virtual table. The SQLite core +** calls this routine to find out if it has reached the end of +** a query's results set. +*/ +static int fulltextEof(sqlite3_vtab_cursor *pCursor){ + fulltext_cursor *c = (fulltext_cursor *) pCursor; + return c->eof; +} + +/* This is the xColumn method of the virtual table. The SQLite +** core calls this method during a query when it needs the value +** of a column from the virtual table. This method needs to use +** one of the sqlite3_result_*() routines to store the requested +** value back in the pContext. +*/ +static int fulltextColumn(sqlite3_vtab_cursor *pCursor, + sqlite3_context *pContext, int idxCol){ + fulltext_cursor *c = (fulltext_cursor *) pCursor; + fulltext_vtab *v = cursor_vtab(c); + + if( idxColnColumn ){ + sqlite3_value *pVal = sqlite3_column_value(c->pStmt, idxCol+1); + sqlite3_result_value(pContext, pVal); + }else if( idxCol==v->nColumn ){ + /* The extra column whose name is the same as the table. + ** Return a blob which is a pointer to the cursor + */ + sqlite3_result_blob(pContext, &c, sizeof(c), SQLITE_TRANSIENT); + } + return SQLITE_OK; +} + +/* This is the xRowid method. The SQLite core calls this routine to +** retrive the rowid for the current row of the result set. The +** rowid should be written to *pRowid. +*/ +static int fulltextRowid(sqlite3_vtab_cursor *pCursor, sqlite_int64 *pRowid){ + fulltext_cursor *c = (fulltext_cursor *) pCursor; + + *pRowid = sqlite3_column_int64(c->pStmt, 0); + return SQLITE_OK; +} + +/* Add all terms in [zText] to the given hash table. If [iColumn] > 0, + * we also store positions and offsets in the hash table using the given + * column number. */ +static int buildTerms(fulltext_vtab *v, fts2Hash *terms, sqlite_int64 iDocid, + const char *zText, int iColumn){ + sqlite3_tokenizer *pTokenizer = v->pTokenizer; + sqlite3_tokenizer_cursor *pCursor; + const char *pToken; + int nTokenBytes; + int iStartOffset, iEndOffset, iPosition; + int rc; + + rc = pTokenizer->pModule->xOpen(pTokenizer, zText, -1, &pCursor); + if( rc!=SQLITE_OK ) return rc; + + pCursor->pTokenizer = pTokenizer; + while( SQLITE_OK==pTokenizer->pModule->xNext(pCursor, + &pToken, &nTokenBytes, + &iStartOffset, &iEndOffset, + &iPosition) ){ + DocList *p; + + /* Positions can't be negative; we use -1 as a terminator internally. */ + if( iPosition<0 ){ + pTokenizer->pModule->xClose(pCursor); + return SQLITE_ERROR; + } + + p = fts2HashFind(terms, pToken, nTokenBytes); + if( p==NULL ){ + p = docListNew(DL_DEFAULT); + docListAddDocid(p, iDocid); + fts2HashInsert(terms, pToken, nTokenBytes, p); + } + if( iColumn>=0 ){ + docListAddPosOffset(p, iColumn, iPosition, iStartOffset, iEndOffset); + } + } + + /* TODO(shess) Check return? Should this be able to cause errors at + ** this point? Actually, same question about sqlite3_finalize(), + ** though one could argue that failure there means that the data is + ** not durable. *ponder* + */ + pTokenizer->pModule->xClose(pCursor); + return rc; +} + +/* Update the %_terms table to map the term [pTerm] to the given rowid. */ +static int index_insert_term(fulltext_vtab *v, const char *pTerm, int nTerm, + DocList *d){ + sqlite_int64 iIndexRow; + DocList doclist; + int iSegment = 0, rc; + + rc = term_select(v, pTerm, nTerm, iSegment, &iIndexRow, &doclist); + if( rc==SQLITE_DONE ){ + docListInit(&doclist, DL_DEFAULT, 0, 0); + docListUpdate(&doclist, d); + /* TODO(shess) Consider length(doclist)>CHUNK_MAX? */ + rc = term_insert(v, NULL, pTerm, nTerm, iSegment, &doclist); + goto err; + } + if( rc!=SQLITE_ROW ) return SQLITE_ERROR; + + docListUpdate(&doclist, d); + if( doclist.nData<=CHUNK_MAX ){ + rc = term_update(v, iIndexRow, &doclist); + goto err; + } + + /* Doclist doesn't fit, delete what's there, and accumulate + ** forward. + */ + rc = term_delete(v, iIndexRow); + if( rc!=SQLITE_OK ) goto err; + + /* Try to insert the doclist into a higher segment bucket. On + ** failure, accumulate existing doclist with the doclist from that + ** bucket, and put results in the next bucket. + */ + iSegment++; + while( (rc=term_insert(v, &iIndexRow, pTerm, nTerm, iSegment, + &doclist))!=SQLITE_OK ){ + sqlite_int64 iSegmentRow; + DocList old; + int rc2; + + /* Retain old error in case the term_insert() error was really an + ** error rather than a bounced insert. + */ + rc2 = term_select(v, pTerm, nTerm, iSegment, &iSegmentRow, &old); + if( rc2!=SQLITE_ROW ) goto err; + + rc = term_delete(v, iSegmentRow); + if( rc!=SQLITE_OK ) goto err; + + /* Reusing lowest-number deleted row keeps the index smaller. */ + if( iSegmentRownColumn ; ++i){ + char *zText = (char*)sqlite3_value_text(pValues[i]); + int rc = buildTerms(v, terms, iRowid, zText, i); + if( rc!=SQLITE_OK ) return rc; + } + return SQLITE_OK; +} + +/* Add empty doclists for all terms in the given row's content to the hash + * table [pTerms]. */ +static int deleteTerms(fulltext_vtab *v, fts2Hash *pTerms, sqlite_int64 iRowid){ + const char **pValues; + int i; + + int rc = content_select(v, iRowid, &pValues); + if( rc!=SQLITE_OK ) return rc; + + for(i = 0 ; i < v->nColumn; ++i) { + rc = buildTerms(v, pTerms, iRowid, pValues[i], -1); + if( rc!=SQLITE_OK ) break; + } + + freeStringArray(v->nColumn, pValues); + return SQLITE_OK; +} + +/* Insert a row into the %_content table; set *piRowid to be the ID of the + * new row. Fill [pTerms] with new doclists for the %_term table. */ +static int index_insert(fulltext_vtab *v, sqlite3_value *pRequestRowid, + sqlite3_value **pValues, + sqlite_int64 *piRowid, fts2Hash *pTerms){ + int rc; + + rc = content_insert(v, pRequestRowid, pValues); /* execute an SQL INSERT */ + if( rc!=SQLITE_OK ) return rc; + *piRowid = sqlite3_last_insert_rowid(v->db); + return insertTerms(v, pTerms, *piRowid, pValues); +} + +/* Delete a row from the %_content table; fill [pTerms] with empty doclists + * to be written to the %_term table. */ +static int index_delete(fulltext_vtab *v, sqlite_int64 iRow, fts2Hash *pTerms){ + int rc = deleteTerms(v, pTerms, iRow); + if( rc!=SQLITE_OK ) return rc; + return content_delete(v, iRow); /* execute an SQL DELETE */ +} + +/* Update a row in the %_content table; fill [pTerms] with new doclists for the + * %_term table. */ +static int index_update(fulltext_vtab *v, sqlite_int64 iRow, + sqlite3_value **pValues, fts2Hash *pTerms){ + /* Generate an empty doclist for each term that previously appeared in this + * row. */ + int rc = deleteTerms(v, pTerms, iRow); + if( rc!=SQLITE_OK ) return rc; + + /* Now add positions for terms which appear in the updated row. */ + rc = insertTerms(v, pTerms, iRow, pValues); + if( rc!=SQLITE_OK ) return rc; + + return content_update(v, pValues, iRow); /* execute an SQL UPDATE */ +} + +/* This function implements the xUpdate callback; it's the top-level entry + * point for inserting, deleting or updating a row in a full-text table. */ +static int fulltextUpdate(sqlite3_vtab *pVtab, int nArg, sqlite3_value **ppArg, + sqlite_int64 *pRowid){ + fulltext_vtab *v = (fulltext_vtab *) pVtab; + fts2Hash terms; /* maps term string -> PosList */ + int rc; + fts2HashElem *e; + + TRACE(("FTS2 Update %p\n", pVtab)); + + fts2HashInit(&terms, FTS2_HASH_STRING, 1); + + if( nArg<2 ){ + rc = index_delete(v, sqlite3_value_int64(ppArg[0]), &terms); + } else if( sqlite3_value_type(ppArg[0]) != SQLITE_NULL ){ + /* An update: + * ppArg[0] = old rowid + * ppArg[1] = new rowid + * ppArg[2..2+v->nColumn-1] = values + * ppArg[2+v->nColumn] = value for magic column (we ignore this) + */ + sqlite_int64 rowid = sqlite3_value_int64(ppArg[0]); + if( sqlite3_value_type(ppArg[1]) != SQLITE_INTEGER || + sqlite3_value_int64(ppArg[1]) != rowid ){ + rc = SQLITE_ERROR; /* we don't allow changing the rowid */ + } else { + assert( nArg==2+v->nColumn+1); + rc = index_update(v, rowid, &ppArg[2], &terms); + } + } else { + /* An insert: + * ppArg[1] = requested rowid + * ppArg[2..2+v->nColumn-1] = values + * ppArg[2+v->nColumn] = value for magic column (we ignore this) + */ + assert( nArg==2+v->nColumn+1); + rc = index_insert(v, ppArg[1], &ppArg[2], pRowid, &terms); + } + + if( rc==SQLITE_OK ){ + /* Write updated doclists to disk. */ + for(e=fts2HashFirst(&terms); e; e=fts2HashNext(e)){ + DocList *p = fts2HashData(e); + rc = index_insert_term(v, fts2HashKey(e), fts2HashKeysize(e), p); + if( rc!=SQLITE_OK ) break; + } + } + + /* clean up */ + for(e=fts2HashFirst(&terms); e; e=fts2HashNext(e)){ + DocList *p = fts2HashData(e); + docListDelete(p); + } + fts2HashClear(&terms); + + return rc; +} + +/* +** Implementation of the snippet() function for FTS2 +*/ +static void snippetFunc( + sqlite3_context *pContext, + int argc, + sqlite3_value **argv +){ + fulltext_cursor *pCursor; + if( argc<1 ) return; + if( sqlite3_value_type(argv[0])!=SQLITE_BLOB || + sqlite3_value_bytes(argv[0])!=sizeof(pCursor) ){ + sqlite3_result_error(pContext, "illegal first argument to html_snippet",-1); + }else{ + const char *zStart = ""; + const char *zEnd = ""; + const char *zEllipsis = "..."; + memcpy(&pCursor, sqlite3_value_blob(argv[0]), sizeof(pCursor)); + if( argc>=2 ){ + zStart = (const char*)sqlite3_value_text(argv[1]); + if( argc>=3 ){ + zEnd = (const char*)sqlite3_value_text(argv[2]); + if( argc>=4 ){ + zEllipsis = (const char*)sqlite3_value_text(argv[3]); + } + } + } + snippetAllOffsets(pCursor); + snippetText(pCursor, zStart, zEnd, zEllipsis); + sqlite3_result_text(pContext, pCursor->snippet.zSnippet, + pCursor->snippet.nSnippet, SQLITE_STATIC); + } +} + +/* +** Implementation of the offsets() function for FTS2 +*/ +static void snippetOffsetsFunc( + sqlite3_context *pContext, + int argc, + sqlite3_value **argv +){ + fulltext_cursor *pCursor; + if( argc<1 ) return; + if( sqlite3_value_type(argv[0])!=SQLITE_BLOB || + sqlite3_value_bytes(argv[0])!=sizeof(pCursor) ){ + sqlite3_result_error(pContext, "illegal first argument to offsets",-1); + }else{ + memcpy(&pCursor, sqlite3_value_blob(argv[0]), sizeof(pCursor)); + snippetAllOffsets(pCursor); + snippetOffsetText(&pCursor->snippet); + sqlite3_result_text(pContext, + pCursor->snippet.zOffset, pCursor->snippet.nOffset, + SQLITE_STATIC); + } +} + +/* +** This routine implements the xFindFunction method for the FTS2 +** virtual table. +*/ +static int fulltextFindFunction( + sqlite3_vtab *pVtab, + int nArg, + const char *zName, + void (**pxFunc)(sqlite3_context*,int,sqlite3_value**), + void **ppArg +){ + if( strcmp(zName,"snippet")==0 ){ + *pxFunc = snippetFunc; + return 1; + }else if( strcmp(zName,"offsets")==0 ){ + *pxFunc = snippetOffsetsFunc; + return 1; + } + return 0; +} + +static const sqlite3_module fulltextModule = { + /* iVersion */ 0, + /* xCreate */ fulltextCreate, + /* xConnect */ fulltextConnect, + /* xBestIndex */ fulltextBestIndex, + /* xDisconnect */ fulltextDisconnect, + /* xDestroy */ fulltextDestroy, + /* xOpen */ fulltextOpen, + /* xClose */ fulltextClose, + /* xFilter */ fulltextFilter, + /* xNext */ fulltextNext, + /* xEof */ fulltextEof, + /* xColumn */ fulltextColumn, + /* xRowid */ fulltextRowid, + /* xUpdate */ fulltextUpdate, + /* xBegin */ 0, + /* xSync */ 0, + /* xCommit */ 0, + /* xRollback */ 0, + /* xFindFunction */ fulltextFindFunction, +}; + +int sqlite3Fts2Init(sqlite3 *db){ + sqlite3_overload_function(db, "snippet", -1); + sqlite3_overload_function(db, "offsets", -1); + return sqlite3_create_module(db, "fts2", &fulltextModule, 0); +} + +#if !SQLITE_CORE +int sqlite3_extension_init(sqlite3 *db, char **pzErrMsg, + const sqlite3_api_routines *pApi){ + SQLITE_EXTENSION_INIT2(pApi) + return sqlite3Fts2Init(db); +} +#endif + +#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2) */ diff --git a/ext/fts2/fts2.h b/ext/fts2/fts2.h new file mode 100644 index 0000000000..d52979ad40 --- /dev/null +++ b/ext/fts2/fts2.h @@ -0,0 +1,11 @@ +#include "sqlite3.h" + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + +int sqlite3Fts2Init(sqlite3 *db); + +#ifdef __cplusplus +} /* extern "C" */ +#endif /* __cplusplus */ diff --git a/ext/fts2/fts2_hash.c b/ext/fts2/fts2_hash.c new file mode 100644 index 0000000000..30feed2526 --- /dev/null +++ b/ext/fts2/fts2_hash.c @@ -0,0 +1,369 @@ +/* +** 2001 September 22 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** This is the implementation of generic hash-tables used in SQLite. +** We've modified it slightly to serve as a standalone hash table +** implementation for the full-text indexing module. +*/ +#include +#include +#include + +/* +** The code in this file is only compiled if: +** +** * The FTS2 module is being built as an extension +** (in which case SQLITE_CORE is not defined), or +** +** * The FTS2 module is being built into the core of +** SQLite (in which case SQLITE_ENABLE_FTS2 is defined). +*/ +#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2) + + +#include "fts2_hash.h" + +static void *malloc_and_zero(int n){ + void *p = malloc(n); + if( p ){ + memset(p, 0, n); + } + return p; +} + +/* Turn bulk memory into a hash table object by initializing the +** fields of the Hash structure. +** +** "pNew" is a pointer to the hash table that is to be initialized. +** keyClass is one of the constants +** FTS2_HASH_BINARY or FTS2_HASH_STRING. The value of keyClass +** determines what kind of key the hash table will use. "copyKey" is +** true if the hash table should make its own private copy of keys and +** false if it should just use the supplied pointer. +*/ +void sqlite3Fts2HashInit(fts2Hash *pNew, int keyClass, int copyKey){ + assert( pNew!=0 ); + assert( keyClass>=FTS2_HASH_STRING && keyClass<=FTS2_HASH_BINARY ); + pNew->keyClass = keyClass; + pNew->copyKey = copyKey; + pNew->first = 0; + pNew->count = 0; + pNew->htsize = 0; + pNew->ht = 0; + pNew->xMalloc = malloc_and_zero; + pNew->xFree = free; +} + +/* Remove all entries from a hash table. Reclaim all memory. +** Call this routine to delete a hash table or to reset a hash table +** to the empty state. +*/ +void sqlite3Fts2HashClear(fts2Hash *pH){ + fts2HashElem *elem; /* For looping over all elements of the table */ + + assert( pH!=0 ); + elem = pH->first; + pH->first = 0; + if( pH->ht ) pH->xFree(pH->ht); + pH->ht = 0; + pH->htsize = 0; + while( elem ){ + fts2HashElem *next_elem = elem->next; + if( pH->copyKey && elem->pKey ){ + pH->xFree(elem->pKey); + } + pH->xFree(elem); + elem = next_elem; + } + pH->count = 0; +} + +/* +** Hash and comparison functions when the mode is FTS2_HASH_STRING +*/ +static int strHash(const void *pKey, int nKey){ + const char *z = (const char *)pKey; + int h = 0; + if( nKey<=0 ) nKey = (int) strlen(z); + while( nKey > 0 ){ + h = (h<<3) ^ h ^ *z++; + nKey--; + } + return h & 0x7fffffff; +} +static int strCompare(const void *pKey1, int n1, const void *pKey2, int n2){ + if( n1!=n2 ) return 1; + return strncmp((const char*)pKey1,(const char*)pKey2,n1); +} + +/* +** Hash and comparison functions when the mode is FTS2_HASH_BINARY +*/ +static int binHash(const void *pKey, int nKey){ + int h = 0; + const char *z = (const char *)pKey; + while( nKey-- > 0 ){ + h = (h<<3) ^ h ^ *(z++); + } + return h & 0x7fffffff; +} +static int binCompare(const void *pKey1, int n1, const void *pKey2, int n2){ + if( n1!=n2 ) return 1; + return memcmp(pKey1,pKey2,n1); +} + +/* +** Return a pointer to the appropriate hash function given the key class. +** +** The C syntax in this function definition may be unfamilar to some +** programmers, so we provide the following additional explanation: +** +** The name of the function is "hashFunction". The function takes a +** single parameter "keyClass". The return value of hashFunction() +** is a pointer to another function. Specifically, the return value +** of hashFunction() is a pointer to a function that takes two parameters +** with types "const void*" and "int" and returns an "int". +*/ +static int (*hashFunction(int keyClass))(const void*,int){ + if( keyClass==FTS2_HASH_STRING ){ + return &strHash; + }else{ + assert( keyClass==FTS2_HASH_BINARY ); + return &binHash; + } +} + +/* +** Return a pointer to the appropriate hash function given the key class. +** +** For help in interpreted the obscure C code in the function definition, +** see the header comment on the previous function. +*/ +static int (*compareFunction(int keyClass))(const void*,int,const void*,int){ + if( keyClass==FTS2_HASH_STRING ){ + return &strCompare; + }else{ + assert( keyClass==FTS2_HASH_BINARY ); + return &binCompare; + } +} + +/* Link an element into the hash table +*/ +static void insertElement( + fts2Hash *pH, /* The complete hash table */ + struct _fts2ht *pEntry, /* The entry into which pNew is inserted */ + fts2HashElem *pNew /* The element to be inserted */ +){ + fts2HashElem *pHead; /* First element already in pEntry */ + pHead = pEntry->chain; + if( pHead ){ + pNew->next = pHead; + pNew->prev = pHead->prev; + if( pHead->prev ){ pHead->prev->next = pNew; } + else { pH->first = pNew; } + pHead->prev = pNew; + }else{ + pNew->next = pH->first; + if( pH->first ){ pH->first->prev = pNew; } + pNew->prev = 0; + pH->first = pNew; + } + pEntry->count++; + pEntry->chain = pNew; +} + + +/* Resize the hash table so that it cantains "new_size" buckets. +** "new_size" must be a power of 2. The hash table might fail +** to resize if sqliteMalloc() fails. +*/ +static void rehash(fts2Hash *pH, int new_size){ + struct _fts2ht *new_ht; /* The new hash table */ + fts2HashElem *elem, *next_elem; /* For looping over existing elements */ + int (*xHash)(const void*,int); /* The hash function */ + + assert( (new_size & (new_size-1))==0 ); + new_ht = (struct _fts2ht *)pH->xMalloc( new_size*sizeof(struct _fts2ht) ); + if( new_ht==0 ) return; + if( pH->ht ) pH->xFree(pH->ht); + pH->ht = new_ht; + pH->htsize = new_size; + xHash = hashFunction(pH->keyClass); + for(elem=pH->first, pH->first=0; elem; elem = next_elem){ + int h = (*xHash)(elem->pKey, elem->nKey) & (new_size-1); + next_elem = elem->next; + insertElement(pH, &new_ht[h], elem); + } +} + +/* This function (for internal use only) locates an element in an +** hash table that matches the given key. The hash for this key has +** already been computed and is passed as the 4th parameter. +*/ +static fts2HashElem *findElementGivenHash( + const fts2Hash *pH, /* The pH to be searched */ + const void *pKey, /* The key we are searching for */ + int nKey, + int h /* The hash for this key. */ +){ + fts2HashElem *elem; /* Used to loop thru the element list */ + int count; /* Number of elements left to test */ + int (*xCompare)(const void*,int,const void*,int); /* comparison function */ + + if( pH->ht ){ + struct _fts2ht *pEntry = &pH->ht[h]; + elem = pEntry->chain; + count = pEntry->count; + xCompare = compareFunction(pH->keyClass); + while( count-- && elem ){ + if( (*xCompare)(elem->pKey,elem->nKey,pKey,nKey)==0 ){ + return elem; + } + elem = elem->next; + } + } + return 0; +} + +/* Remove a single entry from the hash table given a pointer to that +** element and a hash on the element's key. +*/ +static void removeElementGivenHash( + fts2Hash *pH, /* The pH containing "elem" */ + fts2HashElem* elem, /* The element to be removed from the pH */ + int h /* Hash value for the element */ +){ + struct _fts2ht *pEntry; + if( elem->prev ){ + elem->prev->next = elem->next; + }else{ + pH->first = elem->next; + } + if( elem->next ){ + elem->next->prev = elem->prev; + } + pEntry = &pH->ht[h]; + if( pEntry->chain==elem ){ + pEntry->chain = elem->next; + } + pEntry->count--; + if( pEntry->count<=0 ){ + pEntry->chain = 0; + } + if( pH->copyKey && elem->pKey ){ + pH->xFree(elem->pKey); + } + pH->xFree( elem ); + pH->count--; + if( pH->count<=0 ){ + assert( pH->first==0 ); + assert( pH->count==0 ); + fts2HashClear(pH); + } +} + +/* Attempt to locate an element of the hash table pH with a key +** that matches pKey,nKey. Return the data for this element if it is +** found, or NULL if there is no match. +*/ +void *sqlite3Fts2HashFind(const fts2Hash *pH, const void *pKey, int nKey){ + int h; /* A hash on key */ + fts2HashElem *elem; /* The element that matches key */ + int (*xHash)(const void*,int); /* The hash function */ + + if( pH==0 || pH->ht==0 ) return 0; + xHash = hashFunction(pH->keyClass); + assert( xHash!=0 ); + h = (*xHash)(pKey,nKey); + assert( (pH->htsize & (pH->htsize-1))==0 ); + elem = findElementGivenHash(pH,pKey,nKey, h & (pH->htsize-1)); + return elem ? elem->data : 0; +} + +/* Insert an element into the hash table pH. The key is pKey,nKey +** and the data is "data". +** +** If no element exists with a matching key, then a new +** element is created. A copy of the key is made if the copyKey +** flag is set. NULL is returned. +** +** If another element already exists with the same key, then the +** new data replaces the old data and the old data is returned. +** The key is not copied in this instance. If a malloc fails, then +** the new data is returned and the hash table is unchanged. +** +** If the "data" parameter to this function is NULL, then the +** element corresponding to "key" is removed from the hash table. +*/ +void *sqlite3Fts2HashInsert( + fts2Hash *pH, /* The hash table to insert into */ + const void *pKey, /* The key */ + int nKey, /* Number of bytes in the key */ + void *data /* The data */ +){ + int hraw; /* Raw hash value of the key */ + int h; /* the hash of the key modulo hash table size */ + fts2HashElem *elem; /* Used to loop thru the element list */ + fts2HashElem *new_elem; /* New element added to the pH */ + int (*xHash)(const void*,int); /* The hash function */ + + assert( pH!=0 ); + xHash = hashFunction(pH->keyClass); + assert( xHash!=0 ); + hraw = (*xHash)(pKey, nKey); + assert( (pH->htsize & (pH->htsize-1))==0 ); + h = hraw & (pH->htsize-1); + elem = findElementGivenHash(pH,pKey,nKey,h); + if( elem ){ + void *old_data = elem->data; + if( data==0 ){ + removeElementGivenHash(pH,elem,h); + }else{ + elem->data = data; + } + return old_data; + } + if( data==0 ) return 0; + new_elem = (fts2HashElem*)pH->xMalloc( sizeof(fts2HashElem) ); + if( new_elem==0 ) return data; + if( pH->copyKey && pKey!=0 ){ + new_elem->pKey = pH->xMalloc( nKey ); + if( new_elem->pKey==0 ){ + pH->xFree(new_elem); + return data; + } + memcpy((void*)new_elem->pKey, pKey, nKey); + }else{ + new_elem->pKey = (void*)pKey; + } + new_elem->nKey = nKey; + pH->count++; + if( pH->htsize==0 ){ + rehash(pH,8); + if( pH->htsize==0 ){ + pH->count = 0; + pH->xFree(new_elem); + return data; + } + } + if( pH->count > pH->htsize ){ + rehash(pH,pH->htsize*2); + } + assert( pH->htsize>0 ); + assert( (pH->htsize & (pH->htsize-1))==0 ); + h = hraw & (pH->htsize-1); + insertElement(pH, &pH->ht[h], new_elem); + new_elem->data = data; + return 0; +} + +#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2) */ diff --git a/ext/fts2/fts2_hash.h b/ext/fts2/fts2_hash.h new file mode 100644 index 0000000000..97f35291cb --- /dev/null +++ b/ext/fts2/fts2_hash.h @@ -0,0 +1,112 @@ +/* +** 2001 September 22 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** This is the header file for the generic hash-table implemenation +** used in SQLite. We've modified it slightly to serve as a standalone +** hash table implementation for the full-text indexing module. +** +*/ +#ifndef _FTS2_HASH_H_ +#define _FTS2_HASH_H_ + +/* Forward declarations of structures. */ +typedef struct fts2Hash fts2Hash; +typedef struct fts2HashElem fts2HashElem; + +/* A complete hash table is an instance of the following structure. +** The internals of this structure are intended to be opaque -- client +** code should not attempt to access or modify the fields of this structure +** directly. Change this structure only by using the routines below. +** However, many of the "procedures" and "functions" for modifying and +** accessing this structure are really macros, so we can't really make +** this structure opaque. +*/ +struct fts2Hash { + char keyClass; /* HASH_INT, _POINTER, _STRING, _BINARY */ + char copyKey; /* True if copy of key made on insert */ + int count; /* Number of entries in this table */ + fts2HashElem *first; /* The first element of the array */ + void *(*xMalloc)(int); /* malloc() function to use */ + void (*xFree)(void *); /* free() function to use */ + int htsize; /* Number of buckets in the hash table */ + struct _fts2ht { /* the hash table */ + int count; /* Number of entries with this hash */ + fts2HashElem *chain; /* Pointer to first entry with this hash */ + } *ht; +}; + +/* Each element in the hash table is an instance of the following +** structure. All elements are stored on a single doubly-linked list. +** +** Again, this structure is intended to be opaque, but it can't really +** be opaque because it is used by macros. +*/ +struct fts2HashElem { + fts2HashElem *next, *prev; /* Next and previous elements in the table */ + void *data; /* Data associated with this element */ + void *pKey; int nKey; /* Key associated with this element */ +}; + +/* +** There are 2 different modes of operation for a hash table: +** +** FTS2_HASH_STRING pKey points to a string that is nKey bytes long +** (including the null-terminator, if any). Case +** is respected in comparisons. +** +** FTS2_HASH_BINARY pKey points to binary data nKey bytes long. +** memcmp() is used to compare keys. +** +** A copy of the key is made if the copyKey parameter to fts2HashInit is 1. +*/ +#define FTS2_HASH_STRING 1 +#define FTS2_HASH_BINARY 2 + +/* +** Access routines. To delete, insert a NULL pointer. +*/ +void sqlite3Fts2HashInit(fts2Hash*, int keytype, int copyKey); +void *sqlite3Fts2HashInsert(fts2Hash*, const void *pKey, int nKey, void *pData); +void *sqlite3Fts2HashFind(const fts2Hash*, const void *pKey, int nKey); +void sqlite3Fts2HashClear(fts2Hash*); + +/* +** Shorthand for the functions above +*/ +#define fts2HashInit sqlite3Fts2HashInit +#define fts2HashInsert sqlite3Fts2HashInsert +#define fts2HashFind sqlite3Fts2HashFind +#define fts2HashClear sqlite3Fts2HashClear + +/* +** Macros for looping over all elements of a hash table. The idiom is +** like this: +** +** fts2Hash h; +** fts2HashElem *p; +** ... +** for(p=fts2HashFirst(&h); p; p=fts2HashNext(p)){ +** SomeStructure *pData = fts2HashData(p); +** // do something with pData +** } +*/ +#define fts2HashFirst(H) ((H)->first) +#define fts2HashNext(E) ((E)->next) +#define fts2HashData(E) ((E)->data) +#define fts2HashKey(E) ((E)->pKey) +#define fts2HashKeysize(E) ((E)->nKey) + +/* +** Number of entries in a hash table +*/ +#define fts2HashCount(H) ((H)->count) + +#endif /* _FTS2_HASH_H_ */ diff --git a/ext/fts2/fts2_porter.c b/ext/fts2/fts2_porter.c new file mode 100644 index 0000000000..2372b966bb --- /dev/null +++ b/ext/fts2/fts2_porter.c @@ -0,0 +1,645 @@ +/* +** 2006 September 30 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** Implementation of the full-text-search tokenizer that implements +** a Porter stemmer. +*/ + +/* +** The code in this file is only compiled if: +** +** * The FTS2 module is being built as an extension +** (in which case SQLITE_CORE is not defined), or +** +** * The FTS2 module is being built into the core of +** SQLite (in which case SQLITE_ENABLE_FTS2 is defined). +*/ +#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2) + + +#include +#if !defined(__APPLE__) +#include +#else +#include +#endif +#include +#include +#include + +#include "fts2_tokenizer.h" + +/* +** Class derived from sqlite3_tokenizer +*/ +typedef struct porter_tokenizer { + sqlite3_tokenizer base; /* Base class */ +} porter_tokenizer; + +/* +** Class derived from sqlit3_tokenizer_cursor +*/ +typedef struct porter_tokenizer_cursor { + sqlite3_tokenizer_cursor base; + const char *zInput; /* input we are tokenizing */ + int nInput; /* size of the input */ + int iOffset; /* current position in zInput */ + int iToken; /* index of next token to be returned */ + char *zToken; /* storage for current token */ + int nAllocated; /* space allocated to zToken buffer */ +} porter_tokenizer_cursor; + + +/* Forward declaration */ +static const sqlite3_tokenizer_module porterTokenizerModule; + + +/* +** Create a new tokenizer instance. +*/ +static int porterCreate( + int argc, const char * const *argv, + sqlite3_tokenizer **ppTokenizer +){ + porter_tokenizer *t; + int i; + +for(i=0; ibase; + return SQLITE_OK; +} + +/* +** Destroy a tokenizer +*/ +static int porterDestroy(sqlite3_tokenizer *pTokenizer){ + free(pTokenizer); + return SQLITE_OK; +} + +/* +** Prepare to begin tokenizing a particular string. The input +** string to be tokenized is zInput[0..nInput-1]. A cursor +** used to incrementally tokenize this string is returned in +** *ppCursor. +*/ +static int porterOpen( + sqlite3_tokenizer *pTokenizer, /* The tokenizer */ + const char *zInput, int nInput, /* String to be tokenized */ + sqlite3_tokenizer_cursor **ppCursor /* OUT: Tokenization cursor */ +){ + porter_tokenizer_cursor *c; + + c = (porter_tokenizer_cursor *) malloc(sizeof(porter_tokenizer_cursor)); + c->zInput = zInput; + if( zInput==0 ){ + c->nInput = 0; + }else if( nInput<0 ){ + c->nInput = (int)strlen(zInput); + }else{ + c->nInput = nInput; + } + c->iOffset = 0; /* start tokenizing at the beginning */ + c->iToken = 0; + c->zToken = NULL; /* no space allocated, yet. */ + c->nAllocated = 0; + + *ppCursor = &c->base; + return SQLITE_OK; +} + +/* +** Close a tokenization cursor previously opened by a call to +** porterOpen() above. +*/ +static int porterClose(sqlite3_tokenizer_cursor *pCursor){ + porter_tokenizer_cursor *c = (porter_tokenizer_cursor *) pCursor; + free(c->zToken); + free(c); + return SQLITE_OK; +} +/* +** Vowel or consonant +*/ +static const char cType[] = { + 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, + 1, 1, 1, 2, 1 +}; + +/* +** isConsonant() and isVowel() determine if their first character in +** the string they point to is a consonant or a vowel, according +** to Porter ruls. +** +** A consonate is any letter other than 'a', 'e', 'i', 'o', or 'u'. +** 'Y' is a consonant unless it follows another consonant, +** in which case it is a vowel. +** +** In these routine, the letters are in reverse order. So the 'y' rule +** is that 'y' is a consonant unless it is followed by another +** consonent. +*/ +static int isVowel(const char*); +static int isConsonant(const char *z){ + int j; + char x = *z; + if( x==0 ) return 0; + assert( x>='a' && x<='z' ); + j = cType[x-'a']; + if( j<2 ) return j; + return z[1]==0 || isVowel(z + 1); +} +static int isVowel(const char *z){ + int j; + char x = *z; + if( x==0 ) return 0; + assert( x>='a' && x<='z' ); + j = cType[x-'a']; + if( j<2 ) return 1-j; + return isConsonant(z + 1); +} + +/* +** Let any sequence of one or more vowels be represented by V and let +** C be sequence of one or more consonants. Then every word can be +** represented as: +** +** [C] (VC){m} [V] +** +** In prose: A word is an optional consonant followed by zero or +** vowel-consonant pairs followed by an optional vowel. "m" is the +** number of vowel consonant pairs. This routine computes the value +** of m for the first i bytes of a word. +** +** Return true if the m-value for z is 1 or more. In other words, +** return true if z contains at least one vowel that is followed +** by a consonant. +** +** In this routine z[] is in reverse order. So we are really looking +** for an instance of of a consonant followed by a vowel. +*/ +static int m_gt_0(const char *z){ + while( isVowel(z) ){ z++; } + if( *z==0 ) return 0; + while( isConsonant(z) ){ z++; } + return *z!=0; +} + +/* Like mgt0 above except we are looking for a value of m which is +** exactly 1 +*/ +static int m_eq_1(const char *z){ + while( isVowel(z) ){ z++; } + if( *z==0 ) return 0; + while( isConsonant(z) ){ z++; } + if( *z==0 ) return 0; + while( isVowel(z) ){ z++; } + if( *z==0 ) return 1; + while( isConsonant(z) ){ z++; } + return *z==0; +} + +/* Like mgt0 above except we are looking for a value of m>1 instead +** or m>0 +*/ +static int m_gt_1(const char *z){ + while( isVowel(z) ){ z++; } + if( *z==0 ) return 0; + while( isConsonant(z) ){ z++; } + if( *z==0 ) return 0; + while( isVowel(z) ){ z++; } + if( *z==0 ) return 0; + while( isConsonant(z) ){ z++; } + return *z!=0; +} + +/* +** Return TRUE if there is a vowel anywhere within z[0..n-1] +*/ +static int hasVowel(const char *z){ + while( isConsonant(z) ){ z++; } + return *z!=0; +} + +/* +** Return TRUE if the word ends in a double consonant. +** +** The text is reversed here. So we are really looking at +** the first two characters of z[]. +*/ +static int doubleConsonant(const char *z){ + return isConsonant(z) && z[0]==z[1] && isConsonant(z+1); +} + +/* +** Return TRUE if the word ends with three letters which +** are consonant-vowel-consonent and where the final consonant +** is not 'w', 'x', or 'y'. +** +** The word is reversed here. So we are really checking the +** first three letters and the first one cannot be in [wxy]. +*/ +static int star_oh(const char *z){ + return + z[0]!=0 && isConsonant(z) && + z[0]!='w' && z[0]!='x' && z[0]!='y' && + z[1]!=0 && isVowel(z+1) && + z[2]!=0 && isConsonant(z+2); +} + +/* +** If the word ends with zFrom and xCond() is true for the stem +** of the word that preceeds the zFrom ending, then change the +** ending to zTo. +** +** The input word *pz and zFrom are both in reverse order. zTo +** is in normal order. +** +** Return TRUE if zFrom matches. Return FALSE if zFrom does not +** match. Not that TRUE is returned even if xCond() fails and +** no substitution occurs. +*/ +static int stem( + char **pz, /* The word being stemmed (Reversed) */ + const char *zFrom, /* If the ending matches this... (Reversed) */ + const char *zTo, /* ... change the ending to this (not reversed) */ + int (*xCond)(const char*) /* Condition that must be true */ +){ + char *z = *pz; + while( *zFrom && *zFrom==*z ){ z++; zFrom++; } + if( *zFrom!=0 ) return 0; + if( xCond && !xCond(z) ) return 1; + while( *zTo ){ + *(--z) = *(zTo++); + } + *pz = z; + return 1; +} + +/* +** This is the fallback stemmer used when the porter stemmer is +** inappropriate. The input word is copied into the output with +** US-ASCII case folding. If the input word is too long (more +** than 20 bytes if it contains no digits or more than 6 bytes if +** it contains digits) then word is truncated to 20 or 6 bytes +** by taking 10 or 3 bytes from the beginning and end. +*/ +static void copy_stemmer(const char *zIn, int nIn, char *zOut, int *pnOut){ + int i, mx, j; + int hasDigit = 0; + for(i=0; i='A' && c<='Z' ){ + zOut[i] = c - 'A' + 'a'; + }else{ + if( c>='0' && c<='9' ) hasDigit = 1; + zOut[i] = c; + } + } + mx = hasDigit ? 3 : 10; + if( nIn>mx*2 ){ + for(j=mx, i=nIn-mx; i=sizeof(zReverse)-7 ){ + /* The word is too big or too small for the porter stemmer. + ** Fallback to the copy stemmer */ + copy_stemmer(zIn, nIn, zOut, pnOut); + return; + } + for(i=0, j=sizeof(zReverse)-6; i='A' && c<='Z' ){ + zReverse[j] = c + 'a' - 'A'; + }else if( c>='a' && c<='z' ){ + zReverse[j] = c; + }else{ + /* The use of a character not in [a-zA-Z] means that we fallback + ** to the copy stemmer */ + copy_stemmer(zIn, nIn, zOut, pnOut); + return; + } + } + memset(&zReverse[sizeof(zReverse)-5], 0, 5); + z = &zReverse[j+1]; + + + /* Step 1a */ + if( z[0]=='s' ){ + if( + !stem(&z, "sess", "ss", 0) && + !stem(&z, "sei", "i", 0) && + !stem(&z, "ss", "ss", 0) + ){ + z++; + } + } + + /* Step 1b */ + z2 = z; + if( stem(&z, "dee", "ee", m_gt_0) ){ + /* Do nothing. The work was all in the test */ + }else if( + (stem(&z, "gni", "", hasVowel) || stem(&z, "de", "", hasVowel)) + && z!=z2 + ){ + if( stem(&z, "ta", "ate", 0) || + stem(&z, "lb", "ble", 0) || + stem(&z, "zi", "ize", 0) ){ + /* Do nothing. The work was all in the test */ + }else if( doubleConsonant(z) && (*z!='l' && *z!='s' && *z!='z') ){ + z++; + }else if( m_eq_1(z) && star_oh(z) ){ + *(--z) = 'e'; + } + } + + /* Step 1c */ + if( z[0]=='y' && hasVowel(z+1) ){ + z[0] = 'i'; + } + + /* Step 2 */ + switch( z[1] ){ + case 'a': + stem(&z, "lanoita", "ate", m_gt_0) || + stem(&z, "lanoit", "tion", m_gt_0); + break; + case 'c': + stem(&z, "icne", "ence", m_gt_0) || + stem(&z, "icna", "ance", m_gt_0); + break; + case 'e': + stem(&z, "rezi", "ize", m_gt_0); + break; + case 'g': + stem(&z, "igol", "log", m_gt_0); + break; + case 'l': + stem(&z, "ilb", "ble", m_gt_0) || + stem(&z, "illa", "al", m_gt_0) || + stem(&z, "iltne", "ent", m_gt_0) || + stem(&z, "ile", "e", m_gt_0) || + stem(&z, "ilsuo", "ous", m_gt_0); + break; + case 'o': + stem(&z, "noitazi", "ize", m_gt_0) || + stem(&z, "noita", "ate", m_gt_0) || + stem(&z, "rota", "ate", m_gt_0); + break; + case 's': + stem(&z, "msila", "al", m_gt_0) || + stem(&z, "ssenevi", "ive", m_gt_0) || + stem(&z, "ssenluf", "ful", m_gt_0) || + stem(&z, "ssensuo", "ous", m_gt_0); + break; + case 't': + stem(&z, "itila", "al", m_gt_0) || + stem(&z, "itivi", "ive", m_gt_0) || + stem(&z, "itilib", "ble", m_gt_0); + break; + } + + /* Step 3 */ + switch( z[0] ){ + case 'e': + stem(&z, "etaci", "ic", m_gt_0) || + stem(&z, "evita", "", m_gt_0) || + stem(&z, "ezila", "al", m_gt_0); + break; + case 'i': + stem(&z, "itici", "ic", m_gt_0); + break; + case 'l': + stem(&z, "laci", "ic", m_gt_0) || + stem(&z, "luf", "", m_gt_0); + break; + case 's': + stem(&z, "ssen", "", m_gt_0); + break; + } + + /* Step 4 */ + switch( z[1] ){ + case 'a': + if( z[0]=='l' && m_gt_1(z+2) ){ + z += 2; + } + break; + case 'c': + if( z[0]=='e' && z[2]=='n' && (z[3]=='a' || z[3]=='e') && m_gt_1(z+4) ){ + z += 4; + } + break; + case 'e': + if( z[0]=='r' && m_gt_1(z+2) ){ + z += 2; + } + break; + case 'i': + if( z[0]=='c' && m_gt_1(z+2) ){ + z += 2; + } + break; + case 'l': + if( z[0]=='e' && z[2]=='b' && (z[3]=='a' || z[3]=='i') && m_gt_1(z+4) ){ + z += 4; + } + break; + case 'n': + if( z[0]=='t' ){ + if( z[2]=='a' ){ + if( m_gt_1(z+3) ){ + z += 3; + } + }else if( z[2]=='e' ){ + stem(&z, "tneme", "", m_gt_1) || + stem(&z, "tnem", "", m_gt_1) || + stem(&z, "tne", "", m_gt_1); + } + } + break; + case 'o': + if( z[0]=='u' ){ + if( m_gt_1(z+2) ){ + z += 2; + } + }else if( z[3]=='s' || z[3]=='t' ){ + stem(&z, "noi", "", m_gt_1); + } + break; + case 's': + if( z[0]=='m' && z[2]=='i' && m_gt_1(z+3) ){ + z += 3; + } + break; + case 't': + stem(&z, "eta", "", m_gt_1) || + stem(&z, "iti", "", m_gt_1); + break; + case 'u': + if( z[0]=='s' && z[2]=='o' && m_gt_1(z+3) ){ + z += 3; + } + break; + case 'v': + case 'z': + if( z[0]=='e' && z[2]=='i' && m_gt_1(z+3) ){ + z += 3; + } + break; + } + + /* Step 5a */ + if( z[0]=='e' ){ + if( m_gt_1(z+1) ){ + z++; + }else if( m_eq_1(z+1) && !star_oh(z+1) ){ + z++; + } + } + + /* Step 5b */ + if( m_gt_1(z) && z[0]=='l' && z[1]=='l' ){ + z++; + } + + /* z[] is now the stemmed word in reverse order. Flip it back + ** around into forward order and return. + */ + *pnOut = i = strlen(z); + zOut[i] = 0; + while( *z ){ + zOut[--i] = *(z++); + } +} + +/* +** Characters that can be part of a token. We assume any character +** whose value is greater than 0x80 (any UTF character) can be +** part of a token. In other words, delimiters all must have +** values of 0x7f or lower. +*/ +const char isIdChar[] = { +/* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 3x */ + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 4x */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 5x */ + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 6x */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 7x */ +}; +#define idChar(C) (((ch=C)&0x80)!=0 || (ch>0x2f && isIdChar[ch-0x30])) +#define isDelim(C) (((ch=C)&0x80)==0 && (ch<0x30 || !isIdChar[ch-0x30])) + +/* +** Extract the next token from a tokenization cursor. The cursor must +** have been opened by a prior call to porterOpen(). +*/ +static int porterNext( + sqlite3_tokenizer_cursor *pCursor, /* Cursor returned by porterOpen */ + const char **pzToken, /* OUT: *pzToken is the token text */ + int *pnBytes, /* OUT: Number of bytes in token */ + int *piStartOffset, /* OUT: Starting offset of token */ + int *piEndOffset, /* OUT: Ending offset of token */ + int *piPosition /* OUT: Position integer of token */ +){ + porter_tokenizer_cursor *c = (porter_tokenizer_cursor *) pCursor; + const char *z = c->zInput; + + while( c->iOffsetnInput ){ + int iStartOffset, ch; + + /* Scan past delimiter characters */ + while( c->iOffsetnInput && isDelim(z[c->iOffset]) ){ + c->iOffset++; + } + + /* Count non-delimiter characters. */ + iStartOffset = c->iOffset; + while( c->iOffsetnInput && !isDelim(z[c->iOffset]) ){ + c->iOffset++; + } + + if( c->iOffset>iStartOffset ){ + int n = c->iOffset-iStartOffset; + if( n>c->nAllocated ){ + c->nAllocated = n+20; + c->zToken = realloc(c->zToken, c->nAllocated); + } + porter_stemmer(&z[iStartOffset], n, c->zToken, pnBytes); + *pzToken = c->zToken; + *piStartOffset = iStartOffset; + *piEndOffset = c->iOffset; + *piPosition = c->iToken++; + return SQLITE_OK; + } + } + return SQLITE_DONE; +} + +/* +** The set of routines that implement the porter-stemmer tokenizer +*/ +static const sqlite3_tokenizer_module porterTokenizerModule = { + 0, + porterCreate, + porterDestroy, + porterOpen, + porterClose, + porterNext, +}; + +/* +** Allocate a new porter tokenizer. Return a pointer to the new +** tokenizer in *ppModule +*/ +void sqlite3Fts2PorterTokenizerModule( + sqlite3_tokenizer_module const**ppModule +){ + *ppModule = &porterTokenizerModule; +} + +#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2) */ diff --git a/ext/fts2/fts2_tokenizer.h b/ext/fts2/fts2_tokenizer.h new file mode 100644 index 0000000000..35f8238b20 --- /dev/null +++ b/ext/fts2/fts2_tokenizer.h @@ -0,0 +1,90 @@ +/* +** 2006 July 10 +** +** The author disclaims copyright to this source code. +** +************************************************************************* +** Defines the interface to tokenizers used by fulltext-search. There +** are three basic components: +** +** sqlite3_tokenizer_module is a singleton defining the tokenizer +** interface functions. This is essentially the class structure for +** tokenizers. +** +** sqlite3_tokenizer is used to define a particular tokenizer, perhaps +** including customization information defined at creation time. +** +** sqlite3_tokenizer_cursor is generated by a tokenizer to generate +** tokens from a particular input. +*/ +#ifndef _FTS2_TOKENIZER_H_ +#define _FTS2_TOKENIZER_H_ + +/* TODO(shess) Only used for SQLITE_OK and SQLITE_DONE at this time. +** If tokenizers are to be allowed to call sqlite3_*() functions, then +** we will need a way to register the API consistently. +*/ +#include "sqlite3.h" + +/* +** Structures used by the tokenizer interface. +*/ +typedef struct sqlite3_tokenizer sqlite3_tokenizer; +typedef struct sqlite3_tokenizer_cursor sqlite3_tokenizer_cursor; +typedef struct sqlite3_tokenizer_module sqlite3_tokenizer_module; + +struct sqlite3_tokenizer_module { + int iVersion; /* currently 0 */ + + /* + ** Create and destroy a tokenizer. argc/argv are passed down from + ** the fulltext virtual table creation to allow customization. + */ + int (*xCreate)(int argc, const char *const*argv, + sqlite3_tokenizer **ppTokenizer); + int (*xDestroy)(sqlite3_tokenizer *pTokenizer); + + /* + ** Tokenize a particular input. Call xOpen() to prepare to + ** tokenize, xNext() repeatedly until it returns SQLITE_DONE, then + ** xClose() to free any internal state. The pInput passed to + ** xOpen() must exist until the cursor is closed. The ppToken + ** result from xNext() is only valid until the next call to xNext() + ** or until xClose() is called. + */ + /* TODO(shess) current implementation requires pInput to be + ** nul-terminated. This should either be fixed, or pInput/nBytes + ** should be converted to zInput. + */ + int (*xOpen)(sqlite3_tokenizer *pTokenizer, + const char *pInput, int nBytes, + sqlite3_tokenizer_cursor **ppCursor); + int (*xClose)(sqlite3_tokenizer_cursor *pCursor); + int (*xNext)(sqlite3_tokenizer_cursor *pCursor, + const char **ppToken, int *pnBytes, + int *piStartOffset, int *piEndOffset, int *piPosition); +}; + +struct sqlite3_tokenizer { + const sqlite3_tokenizer_module *pModule; /* The module for this tokenizer */ + /* Tokenizer implementations will typically add additional fields */ +}; + +struct sqlite3_tokenizer_cursor { + sqlite3_tokenizer *pTokenizer; /* Tokenizer for this cursor. */ + /* Tokenizer implementations will typically add additional fields */ +}; + +/* +** Get the module for a tokenizer which generates tokens based on a +** set of non-token characters. The default is to break tokens at any +** non-alnum character, though the set of delimiters can also be +** specified by the first argv argument to xCreate(). +*/ +/* TODO(shess) This doesn't belong here. Need some sort of +** registration process. +*/ +void sqlite3Fts2SimpleTokenizerModule(sqlite3_tokenizer_module const**ppModule); +void sqlite3Fts2PorterTokenizerModule(sqlite3_tokenizer_module const**ppModule); + +#endif /* _FTS2_TOKENIZER_H_ */ diff --git a/ext/fts2/fts2_tokenizer1.c b/ext/fts2/fts2_tokenizer1.c new file mode 100644 index 0000000000..07619076e4 --- /dev/null +++ b/ext/fts2/fts2_tokenizer1.c @@ -0,0 +1,220 @@ +/* +** The author disclaims copyright to this source code. +** +************************************************************************* +** Implementation of the "simple" full-text-search tokenizer. +*/ + +/* +** The code in this file is only compiled if: +** +** * The FTS2 module is being built as an extension +** (in which case SQLITE_CORE is not defined), or +** +** * The FTS2 module is being built into the core of +** SQLite (in which case SQLITE_ENABLE_FTS2 is defined). +*/ +#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2) + + +#include +#if !defined(__APPLE__) +#include +#else +#include +#endif +#include +#include +#include + +#include "fts2_tokenizer.h" + +typedef struct simple_tokenizer { + sqlite3_tokenizer base; + char delim[128]; /* flag ASCII delimiters */ +} simple_tokenizer; + +typedef struct simple_tokenizer_cursor { + sqlite3_tokenizer_cursor base; + const char *pInput; /* input we are tokenizing */ + int nBytes; /* size of the input */ + int iOffset; /* current position in pInput */ + int iToken; /* index of next token to be returned */ + char *pToken; /* storage for current token */ + int nTokenAllocated; /* space allocated to zToken buffer */ +} simple_tokenizer_cursor; + + +/* Forward declaration */ +static const sqlite3_tokenizer_module simpleTokenizerModule; + +static int isDelim(simple_tokenizer *t, unsigned char c){ + return c<0x80 && t->delim[c]; +} + +/* +** Create a new tokenizer instance. +*/ +static int simpleCreate( + int argc, const char * const *argv, + sqlite3_tokenizer **ppTokenizer +){ + simple_tokenizer *t; + + t = (simple_tokenizer *) calloc(sizeof(simple_tokenizer), 1); + /* TODO(shess) Delimiters need to remain the same from run to run, + ** else we need to reindex. One solution would be a meta-table to + ** track such information in the database, then we'd only want this + ** information on the initial create. + */ + if( argc>1 ){ + int i, n = strlen(argv[1]); + for(i=0; i=0x80 ){ + free(t); + return SQLITE_ERROR; + } + t->delim[ch] = 1; + } + } else { + /* Mark non-alphanumeric ASCII characters as delimiters */ + int i; + for(i=1; i<0x80; i++){ + t->delim[i] = !isalnum(i); + } + } + + *ppTokenizer = &t->base; + return SQLITE_OK; +} + +/* +** Destroy a tokenizer +*/ +static int simpleDestroy(sqlite3_tokenizer *pTokenizer){ + free(pTokenizer); + return SQLITE_OK; +} + +/* +** Prepare to begin tokenizing a particular string. The input +** string to be tokenized is pInput[0..nBytes-1]. A cursor +** used to incrementally tokenize this string is returned in +** *ppCursor. +*/ +static int simpleOpen( + sqlite3_tokenizer *pTokenizer, /* The tokenizer */ + const char *pInput, int nBytes, /* String to be tokenized */ + sqlite3_tokenizer_cursor **ppCursor /* OUT: Tokenization cursor */ +){ + simple_tokenizer_cursor *c; + + c = (simple_tokenizer_cursor *) malloc(sizeof(simple_tokenizer_cursor)); + c->pInput = pInput; + if( pInput==0 ){ + c->nBytes = 0; + }else if( nBytes<0 ){ + c->nBytes = (int)strlen(pInput); + }else{ + c->nBytes = nBytes; + } + c->iOffset = 0; /* start tokenizing at the beginning */ + c->iToken = 0; + c->pToken = NULL; /* no space allocated, yet. */ + c->nTokenAllocated = 0; + + *ppCursor = &c->base; + return SQLITE_OK; +} + +/* +** Close a tokenization cursor previously opened by a call to +** simpleOpen() above. +*/ +static int simpleClose(sqlite3_tokenizer_cursor *pCursor){ + simple_tokenizer_cursor *c = (simple_tokenizer_cursor *) pCursor; + free(c->pToken); + free(c); + return SQLITE_OK; +} + +/* +** Extract the next token from a tokenization cursor. The cursor must +** have been opened by a prior call to simpleOpen(). +*/ +static int simpleNext( + sqlite3_tokenizer_cursor *pCursor, /* Cursor returned by simpleOpen */ + const char **ppToken, /* OUT: *ppToken is the token text */ + int *pnBytes, /* OUT: Number of bytes in token */ + int *piStartOffset, /* OUT: Starting offset of token */ + int *piEndOffset, /* OUT: Ending offset of token */ + int *piPosition /* OUT: Position integer of token */ +){ + simple_tokenizer_cursor *c = (simple_tokenizer_cursor *) pCursor; + simple_tokenizer *t = (simple_tokenizer *) pCursor->pTokenizer; + unsigned char *p = (unsigned char *)c->pInput; + + while( c->iOffsetnBytes ){ + int iStartOffset; + + /* Scan past delimiter characters */ + while( c->iOffsetnBytes && isDelim(t, p[c->iOffset]) ){ + c->iOffset++; + } + + /* Count non-delimiter characters. */ + iStartOffset = c->iOffset; + while( c->iOffsetnBytes && !isDelim(t, p[c->iOffset]) ){ + c->iOffset++; + } + + if( c->iOffset>iStartOffset ){ + int i, n = c->iOffset-iStartOffset; + if( n>c->nTokenAllocated ){ + c->nTokenAllocated = n+20; + c->pToken = realloc(c->pToken, c->nTokenAllocated); + } + for(i=0; ipToken[i] = ch<0x80 ? tolower(ch) : ch; + } + *ppToken = c->pToken; + *pnBytes = n; + *piStartOffset = iStartOffset; + *piEndOffset = c->iOffset; + *piPosition = c->iToken++; + + return SQLITE_OK; + } + } + return SQLITE_DONE; +} + +/* +** The set of routines that implement the simple tokenizer +*/ +static const sqlite3_tokenizer_module simpleTokenizerModule = { + 0, + simpleCreate, + simpleDestroy, + simpleOpen, + simpleClose, + simpleNext, +}; + +/* +** Allocate a new simple tokenizer. Return a pointer to the new +** tokenizer in *ppModule +*/ +void sqlite3Fts2SimpleTokenizerModule( + sqlite3_tokenizer_module const**ppModule +){ + *ppModule = &simpleTokenizerModule; +} + +#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2) */ diff --git a/manifest b/manifest index e7ecd344c6..e5c91e8b82 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C VACUUM\snow\suses\sa\stemporary\sfile\sin\sthe\sofficial\sTEMP\sfolder\sinstead\sof\na\sfile\sin\sthe\ssame\sdirectory\sas\sthe\soriginal\sdatabase.\s(CVS\s3470) -D 2006-10-10T13:07:36 +C Copy\sfts1/\sto\sfts2/,\schanging\sreference\sfrom\sfts1\sto\sfts2.\s\sFor\sfuture\nreference,\sthe\ssource\sversions\scopied\swere:\n\nREADME.txt\sr1.1\nfts1.c\sr1.37\nfts1.h\sr1.2\nfts1_hash.c\sr1.1\nfts1_hash.h\sr1.1\nfts1_porter.c\sr1.1\nfts1_tokenizer.h\sr1.4\nfts1_tokenizer1.c\sr1.6\s(CVS\s3471) +D 2006-10-10T17:37:14 F Makefile.in 4379c909d46b38b8c5db3533084601621d4f14b2 F Makefile.linux-gcc 2d8574d1ba75f129aba2019f0b959db380a90935 F README 9c4e2d6706bdcc3efdd773ce752a8cdab4f90028 @@ -32,6 +32,14 @@ F ext/fts1/fulltext.c d935e600d87bc86b7d64f55c7520ea41d6034c5c F ext/fts1/fulltext.h 08525a47852d1d62a0be81d3fc3fe2d23b094efd F ext/fts1/simple_tokenizer.c 1844d72f7194c3fd3d7e4173053911bf0661b70d F ext/fts1/tokenizer.h 0c53421b832366d20d720d21ea3e1f6e66a36ef9 +F ext/fts2/README.txt 8c18f41574404623b76917b9da66fcb0ab38328d +F ext/fts2/fts2.c 1052f03493701fd11abdc904efd3190915091bfe +F ext/fts2/fts2.h bbdab26d34f91974d5b9ade8b7836c140a7c4ce1 +F ext/fts2/fts2_hash.c b3f22116d4ef0bc8f2da6e3fdc435c86d0951a9b +F ext/fts2/fts2_hash.h e283308156018329f042816eb09334df714e105e +F ext/fts2/fts2_porter.c 5602fce9579831a070356f8d1956705baf3f1279 +F ext/fts2/fts2_tokenizer.h 4c5ffe31d63622869eb6eec1503df7f6996fd1bd +F ext/fts2/fts2_tokenizer1.c b26f0d2fae967fbe5722f82d08506d35f10ac4e7 F install-sh 9d4de14ab9fb0facae2f48780b874848cbf2f895 F ltmain.sh f6b283068efa69f06eb8aa1fe4bddfdbdeb35826 F main.mk 22acd3d279ec40ba6959af643ba25975fcfbdcc7 @@ -402,7 +410,7 @@ F www/tclsqlite.tcl bb0d1357328a42b1993d78573e587c6dcbc964b9 F www/vdbe.tcl 87a31ace769f20d3627a64fa1fade7fed47b90d0 F www/version3.tcl 890248cf7b70e60c383b0e84d77d5132b3ead42b F www/whentouse.tcl 97e2b5cd296f7d8057e11f44427dea8a4c2db513 -P 0658bb9e3f57e6aff4745590821a0590abc815f6 -R 63ed7158b28b067d212c207cfe64406b -U drh -Z 3684897c97a0c2b8fc8841fe81c42ad8 +P b743429dd54e2dcae213ec1993e9e916a9ba678d +R 3877f9d9a9169301026353cdbbc79a36 +U shess +Z b8a5c53e502283405abefca3dadf33fe diff --git a/manifest.uuid b/manifest.uuid index 3c38baab37..108f8b98a1 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -b743429dd54e2dcae213ec1993e9e916a9ba678d \ No newline at end of file +d0d1e7cdcc1dd085f1e359ce35c441699d517b02 \ No newline at end of file -- 2.47.2