);
};
+typedef struct fts5_tokenizer_v2 fts5_tokenizer_v2;
+struct fts5_tokenizer_v2 {
+ int iVersion; /* Currently always 2 */
+
+ int (*xCreate)(void*, const char **azArg, int nArg, Fts5Tokenizer **ppOut);
+ void (*xDelete)(Fts5Tokenizer*);
+ int (*xTokenize)(Fts5Tokenizer*,
+ void *pCtx,
+ int flags, /* Mask of FTS5_TOKENIZE_* flags */
+ const char *pText, int nText,
+ int (*xToken)(
+ void *pCtx, /* Copy of 2nd argument to xTokenize() */
+ int tflags, /* Mask of FTS5_TOKEN_* flags */
+ const char *pToken, /* Pointer to buffer containing token */
+ int nToken, /* Size of token in bytes */
+ int iStart, /* Byte offset of token within input text */
+ int iEnd /* Byte offset of end of token within input text */
+ )
+ );
+
+ int (*xSetLocale)(Fts5Tokenizer*, const char *pLocale, int nLocale);
+};
+
/* Flags that may be passed as the third argument to xTokenize() */
#define FTS5_TOKENIZE_QUERY 0x0001
#define FTS5_TOKENIZE_PREFIX 0x0002
*/
typedef struct fts5_api fts5_api;
struct fts5_api {
- int iVersion; /* Currently always set to 2 */
+ int iVersion; /* Currently 3, was once 2 */
/* Create a new tokenizer */
int (*xCreateTokenizer)(
fts5_extension_function xFunction,
void (*xDestroy)(void*)
);
+
+ /* APIs below this point are only available if iVersion>=3 */
+
+ /* Create a new tokenizer */
+ int (*xCreateTokenizer_v2)(
+ fts5_api *pApi,
+ const char *zName,
+ void *pUserData,
+ fts5_tokenizer_v2 *pTokenizer,
+ void (*xDestroy)(void*)
+ );
+
+ /* Find an existing tokenizer */
+ int (*xFindTokenizer_v2)(
+ fts5_api *pApi,
+ const char *zName,
+ void **ppUserData,
+ fts5_tokenizer_v2 **ppTokenizer
+ );
};
/*
struct Fts5TokenizerConfig {
Fts5Tokenizer *pTok;
- fts5_tokenizer *pTokApi;
+ fts5_tokenizer_v2 *pTokApi;
const char **azArg;
int nArg;
int ePattern; /* FTS_PATTERN_XXX constant */
char *zContentRowid; /* "content_rowid=" option value */
int bColumnsize; /* "columnsize=" option value (dflt==1) */
int bTokendata; /* "tokendata=" option value (dflt==0) */
+ int bLocale; /* "locale=" option value (dflt==0) */
int eDetail; /* FTS5_DETAIL_XXX value */
char *zContentExprlist;
Fts5TokenizerConfig t;
int sqlite3Fts5ConfigParseRank(const char*, char**, char**);
+void sqlite3Fts5ConfigErrmsg(Fts5Config *pConfig, const char *zFmt, ...);
+
/*
** End of interface to code in fts5_config.c.
**************************************************************************/
int sqlite3Fts5FlushToDisk(Fts5Table*);
+int sqlite3Fts5ExtractText(
+ Fts5Config *pConfig,
+ int bContent, /* Loaded from content table */
+ sqlite3_value *pVal, /* Value to extract text from */
+ int *pbResetTokenizer, /* OUT: True if xSetLocale(NULL) required */
+ const char **ppText, /* OUT: Pointer to text buffer */
+ int *pnText /* OUT: Size of (*ppText) in bytes */
+);
+
+void sqlite3Fts5ClearLocale(Fts5Config *pConfig);
+
/*
** End of interface to code in fts5.c.
**************************************************************************/
return rc;
}
+ if( sqlite3_strnicmp("locale", zCmd, nCmd)==0 ){
+ if( (zArg[0]!='0' && zArg[0]!='1') || zArg[1]!='\0' ){
+ *pzErr = sqlite3_mprintf("malformed locale=... directive");
+ rc = SQLITE_ERROR;
+ }else{
+ pConfig->bLocale = (zArg[0]=='1');
+ }
+ return rc;
+ }
+
if( sqlite3_strnicmp("detail", zCmd, nCmd)==0 ){
const Fts5Enum aDetail[] = {
{ "none", FTS5_DETAIL_NONE },
sqlite3_free(zTwo);
}
+ /* If this is not an FTS5_CONTENT_NORMAL table, set bLocale */
+ if( pRet->eContent!=FTS5_CONTENT_NORMAL ){
+ pRet->bLocale = 1;
+ }
+
/* We only allow contentless_delete=1 if the table is indeed contentless. */
if( rc==SQLITE_OK
&& pRet->bContentlessDelete
}
return rc;
}
+
+void sqlite3Fts5ConfigErrmsg(Fts5Config *pConfig, const char *zFmt, ...){
+ va_list ap; /* ... printf arguments */
+ char *zMsg = 0;
+
+ va_start(ap, zFmt);
+ zMsg = sqlite3_vmprintf(zFmt, ap);
+ if( pConfig->pzErrmsg ){
+ *pConfig->pzErrmsg = zMsg;
+ }else{
+ sqlite3_free(zMsg);
+ }
+
+ va_end(ap);
+}
+
+
struct Fts5TokenizerModule {
char *zName; /* Name of tokenizer */
void *pUserData; /* User pointer passed to xCreate() */
- fts5_tokenizer x; /* Tokenizer functions */
+ fts5_tokenizer_v2 x; /* Tokenizer functions */
void (*xDestroy)(void*); /* Destructor function */
Fts5TokenizerModule *pNext; /* Next registered tokenizer module */
};
#define BitFlagAllTest(x,y) (((x) & (y))==(y))
#define BitFlagTest(x,y) (((x) & (y))!=0)
+#define FTS5_LOCALE_SUBTYPE ((unsigned int)'L')
+
/*
** Macros to Set(), Clear() and Test() cursor flags.
}
+static int fts5SetLocale(
+ Fts5Config *pConfig,
+ const char *zLocale,
+ int nLocale
+){
+ Fts5TokenizerConfig *pT = &pConfig->t;
+ int rc = SQLITE_OK;
+ if( pT->pTokApi->xSetLocale ){
+ rc = pT->pTokApi->xSetLocale(pT->pTok, zLocale, nLocale);
+ }
+ return rc;
+}
+
+void sqlite3Fts5ClearLocale(Fts5Config *pConfig){
+ fts5SetLocale(pConfig, 0, 0);
+}
+
+int sqlite3Fts5ExtractText(
+ Fts5Config *pConfig,
+ int bContent,
+ sqlite3_value *pVal, /* Value to extract text from */
+ int *pbResetTokenizer, /* OUT: True if xSetLocale(NULL) required */
+ const char **ppText, /* OUT: Pointer to text buffer */
+ int *pnText /* OUT: Size of (*ppText) in bytes */
+){
+ const char *pText = 0;
+ int nText = 0;
+ int bResetTokenizer = 0;
+ int rc = SQLITE_OK;
+
+ int bDecodeBlob = 0;
+ if( sqlite3_value_type(pVal)==SQLITE_BLOB ){
+ if( sqlite3_value_subtype(pVal)==FTS5_LOCALE_SUBTYPE
+ || (bContent && pConfig->bLocale && pConfig->eContent==FTS5_CONTENT_NORMAL)
+ ){
+ bDecodeBlob = 1;
+ }
+ }
+
+ if( bDecodeBlob ){
+ const u8 *pBlob = sqlite3_value_blob(pVal);
+ int nBlob = sqlite3_value_bytes(pVal);
+ int nLocale = 0;
+
+ for(nLocale=0; nLocale<nBlob; nLocale++){
+ if( pBlob[nLocale]==0x00 ) break;
+ }
+
+ if( nLocale==nBlob ) return SQLITE_ERROR;
+ pText = (const char*)&pBlob[nLocale+1];
+ nText = nBlob-nLocale-1;
+
+ rc = fts5SetLocale(pConfig, (const char*)pBlob, nLocale);
+ bResetTokenizer = 1;
+
+ }else{
+ pText = (const char*)sqlite3_value_text(pVal);
+ nText = sqlite3_value_bytes(pVal);
+ }
+
+ *ppText = pText;
+ *pnText = nText;
+ *pbResetTokenizer = bResetTokenizer;
+
+ return rc;
+}
+
+/*
+** Argument pVal is the text of a full-text search expression. It may or
+** may not have been wrapped by fts5_locale(). This function extracts
+** the text of the expression, and sets output variable (*pzText) to
+** point to a nul-terminated buffer containing the expression.
+**
+** If pVal was an fts5_locale() value, then the tokenizer has been
+** configured to us the required locale.
+**
+** If output variable (*pbFreeAndReset) is set to true, then the caller
+** is required to (a) call xSetLocale(NULL) to reset the tokenizer locale,
+** and (b) call sqlite3_free() to free (*pzText).
+*/
+static int fts5ExtractExprText(
+ Fts5FullTable *pTab,
+ sqlite3_value *pVal,
+ char **pzText,
+ int *pbFreeAndReset
+){
+ const char *zText = 0;
+ int nText = 0;
+ int rc = SQLITE_OK;
+ int bReset = 0;
+
+ *pbFreeAndReset = 0;
+ rc = sqlite3Fts5ExtractText(pTab->p.pConfig, 0, pVal, &bReset, &zText,&nText);
+ if( rc==SQLITE_OK ){
+ if( bReset ){
+ *pzText = sqlite3Fts5Mprintf(&rc, "%.*s", nText, zText);
+ if( rc!=SQLITE_OK ){
+ sqlite3Fts5ClearLocale(pTab->p.pConfig);
+ }else{
+ *pbFreeAndReset = 1;
+ }
+ }else{
+ *pzText = (char*)zText;
+ }
+ }
+
+ return rc;
+}
+
+
/*
** This is the xFilter interface for the virtual table. See
** the virtual table xFilter method documentation for additional
pRank = apVal[i];
break;
case 'M': {
- const char *zText = (const char*)sqlite3_value_text(apVal[i]);
+ char *zText = 0;
+ int bFreeAndReset = 0;
+
+ rc = fts5ExtractExprText(pTab, apVal[i], &zText, &bFreeAndReset);
+ if( rc!=SQLITE_OK ) goto filter_out;
if( zText==0 ) zText = "";
+
iCol = 0;
do{
iCol = iCol*10 + (idxStr[iIdxStr]-'0');
** indicates that the MATCH expression is not a full text query,
** but a request for an internal parameter. */
rc = fts5SpecialMatch(pTab, pCsr, &zText[1]);
- goto filter_out;
}else{
char **pzErr = &pTab->p.base.zErrMsg;
rc = sqlite3Fts5ExprNew(pConfig, 0, iCol, zText, &pExpr, pzErr);
rc = sqlite3Fts5ExprAnd(&pCsr->pExpr, pExpr);
pExpr = 0;
}
- if( rc!=SQLITE_OK ) goto filter_out;
}
+ if( bFreeAndReset ){
+ sqlite3_free(zText);
+ sqlite3Fts5ClearLocale(pConfig);
+ }
+
+ if( zText[0]=='*' || rc!=SQLITE_OK ) goto filter_out;
+
break;
}
case 'L':
return rc;
}
+static void fts5ExtractValueFromColumn(
+ sqlite3_context *pCtx,
+ Fts5Config *pConfig,
+ sqlite3_value *pVal
+){
+ if( sqlite3_value_type(pVal)==SQLITE_BLOB ){
+ if( sqlite3_value_subtype(pVal)==FTS5_LOCALE_SUBTYPE
+ || (pConfig->bLocale && pConfig->eContent==FTS5_CONTENT_NORMAL)
+ ){
+ const u8 *pBlob = sqlite3_value_blob(pVal);
+ int nBlob = sqlite3_value_bytes(pVal);
+
+ if( nBlob>=4 && memcmp(pBlob, "\0\0\0\0", 4)==0 ){
+ sqlite3_result_blob(pCtx, &pBlob[4], nBlob-4, SQLITE_TRANSIENT);
+ }else{
+ int ii;
+ for(ii=0; ii<nBlob && pBlob[ii]; ii++);
+ if( ii<nBlob ){
+ const char *pText = (const char*)&pBlob[ii+1];
+ sqlite3_result_text(pCtx, pText, nBlob-ii-1, SQLITE_TRANSIENT);
+ }
+ }
+ return;
+ }
+ }
+
+ sqlite3_result_value(pCtx, pVal);
+}
+
/*
** This is the xColumn method, called by SQLite to request a value from
** the row that the supplied cursor currently points to.
pConfig->pzErrmsg = &pTab->p.base.zErrMsg;
rc = fts5SeekCursor(pCsr, 1);
if( rc==SQLITE_OK ){
- sqlite3_result_value(pCtx, sqlite3_column_value(pCsr->pStmt, iCol+1));
+ sqlite3_value *pVal = sqlite3_column_value(pCsr->pStmt, iCol+1);
+ fts5ExtractValueFromColumn(pCtx, pConfig, pVal);
}
pConfig->pzErrmsg = 0;
}else if( pConfig->bContentlessDelete && sqlite3_vtab_nochange(pCtx) ){
/*
** Register a new tokenizer. This is the implementation of the
-** fts5_api.xCreateTokenizer() method.
+** fts5_api.xCreateTokenizer_v2() method.
*/
-static int fts5CreateTokenizer(
+static int fts5CreateTokenizer_v2(
fts5_api *pApi, /* Global context (one per db handle) */
const char *zName, /* Name of new function */
void *pUserData, /* User data for aux. function */
- fts5_tokenizer *pTokenizer, /* Tokenizer implementation */
+ fts5_tokenizer_v2 *pTokenizer, /* Tokenizer implementation */
void(*xDestroy)(void*) /* Destructor for pUserData */
){
Fts5Global *pGlobal = (Fts5Global*)pApi;
- Fts5TokenizerModule *pNew;
- sqlite3_int64 nName; /* Size of zName and its \0 terminator */
- sqlite3_int64 nByte; /* Bytes of space to allocate */
int rc = SQLITE_OK;
- nName = strlen(zName) + 1;
- nByte = sizeof(Fts5TokenizerModule) + nName;
- pNew = (Fts5TokenizerModule*)sqlite3_malloc64(nByte);
- if( pNew ){
- memset(pNew, 0, (size_t)nByte);
- pNew->zName = (char*)&pNew[1];
- memcpy(pNew->zName, zName, nName);
- pNew->pUserData = pUserData;
- pNew->x = *pTokenizer;
- pNew->xDestroy = xDestroy;
- pNew->pNext = pGlobal->pTok;
- pGlobal->pTok = pNew;
- if( pNew->pNext==0 ){
- pGlobal->pDfltTok = pNew;
- }
+ if( pTokenizer->iVersion>2 ){
+ rc = SQLITE_ERROR;
}else{
- rc = SQLITE_NOMEM;
+ Fts5TokenizerModule *pNew;
+ sqlite3_int64 nName; /* Size of zName and its \0 terminator */
+ sqlite3_int64 nByte; /* Bytes of space to allocate */
+
+ nName = strlen(zName) + 1;
+ nByte = sizeof(Fts5TokenizerModule) + nName;
+ pNew = (Fts5TokenizerModule*)sqlite3Fts5MallocZero(&rc, nByte);
+ if( pNew ){
+ pNew->zName = (char*)&pNew[1];
+ memcpy(pNew->zName, zName, nName);
+ pNew->pUserData = pUserData;
+ pNew->x = *pTokenizer;
+ pNew->xDestroy = xDestroy;
+ pNew->pNext = pGlobal->pTok;
+ pGlobal->pTok = pNew;
+ if( pNew->pNext==0 ){
+ pGlobal->pDfltTok = pNew;
+ }
+ }
}
return rc;
}
+/*
+** The fts5_api.xCreateTokenizer() method.
+*/
+static int fts5CreateTokenizer(
+ fts5_api *pApi, /* Global context (one per db handle) */
+ const char *zName, /* Name of new function */
+ void *pUserData, /* User data for aux. function */
+ fts5_tokenizer *pTokenizer, /* Tokenizer implementation */
+ void(*xDestroy)(void*) /* Destructor for pUserData */
+){
+ fts5_tokenizer_v2 tok;
+
+ memset(&tok, 0, sizeof(tok));
+ tok.iVersion = 2;
+ tok.xCreate = pTokenizer->xCreate;
+ tok.xTokenize = pTokenizer->xTokenize;
+ tok.xDelete = pTokenizer->xDelete;
+
+ return fts5CreateTokenizer_v2(pApi, zName, pUserData, &tok, xDestroy);
+}
+
static Fts5TokenizerModule *fts5LocateTokenizer(
Fts5Global *pGlobal,
const char *zName
/*
** Find a tokenizer. This is the implementation of the
-** fts5_api.xFindTokenizer() method.
+** fts5_api.xFindTokenizer_v2() method.
*/
-static int fts5FindTokenizer(
+static int fts5FindTokenizer_v2(
fts5_api *pApi, /* Global context (one per db handle) */
- const char *zName, /* Name of new function */
+ const char *zName, /* Name of tokenizer */
void **ppUserData,
- fts5_tokenizer *pTokenizer /* Populate this object */
+ fts5_tokenizer_v2 **ppTokenizer /* Populate this object */
){
int rc = SQLITE_OK;
Fts5TokenizerModule *pMod;
pMod = fts5LocateTokenizer((Fts5Global*)pApi, zName);
if( pMod ){
- *pTokenizer = pMod->x;
+ *ppTokenizer = &pMod->x;
*ppUserData = pMod->pUserData;
}else{
- memset(pTokenizer, 0, sizeof(fts5_tokenizer));
+ *ppTokenizer = 0;
+ *ppUserData = 0;
rc = SQLITE_ERROR;
}
return rc;
}
+/*
+** Find a tokenizer. This is the implementation of the
+** fts5_api.xFindTokenizer() method.
+*/
+static int fts5FindTokenizer(
+ fts5_api *pApi, /* Global context (one per db handle) */
+ const char *zName, /* Name of new function */
+ void **ppUserData,
+ fts5_tokenizer *pTokenizer /* Populate this object */
+){
+ fts5_tokenizer_v2 *pV2 = 0;
+ int rc = SQLITE_OK;
+
+ rc = fts5FindTokenizer_v2(pApi, zName, ppUserData, &pV2);
+ if( rc==SQLITE_OK ){
+ pTokenizer->xCreate = pV2->xCreate;
+ pTokenizer->xDelete = pV2->xDelete;
+ pTokenizer->xTokenize = pV2->xTokenize;
+ }
+
+ return rc;
+}
+
int fts5GetTokenizer(
Fts5Global *pGlobal,
const char **azArg,
sqlite3_result_text(pCtx, "--FTS5-SOURCE-ID--", -1, SQLITE_TRANSIENT);
}
+/*
+** Implementation of fts5_locale() function.
+*/
+static void fts5LocaleFunc(
+ sqlite3_context *pCtx, /* Function call context */
+ int nArg, /* Number of args */
+ sqlite3_value **apArg /* Function arguments */
+){
+ u8 *pBlob = 0;
+ int nBlob = 0;
+
+ const char *zLocale = 0;
+ int nLocale = 0;
+ const char *zText = 0;
+ int nText = 0;
+
+ assert( nArg==2 );
+ UNUSED_PARAM(nArg);
+
+ zLocale = (const char*)sqlite3_value_text(apArg[0]);
+ nLocale = sqlite3_value_bytes(apArg[0]);
+
+ zText = (const char*)sqlite3_value_text(apArg[1]);
+ nText = sqlite3_value_bytes(apArg[1]);
+
+ nBlob = nLocale + 1 + nText;
+ pBlob = (u8*)sqlite3_malloc(nBlob);
+ if( pBlob==0 ){
+ sqlite3_result_error_nomem(pCtx);
+ return;
+ }
+
+ if( zLocale ) memcpy(pBlob, zLocale, nLocale);
+ pBlob[nLocale] = 0x00;
+ if( zText ) memcpy(&pBlob[nLocale+1], zText, nText);
+
+ sqlite3_result_blob(pCtx, pBlob, nBlob, sqlite3_free);
+ sqlite3_result_subtype(pCtx, FTS5_LOCALE_SUBTYPE);
+}
+
/*
** Return true if zName is the extension on one of the shadow tables used
** by this module.
void *p = (void*)pGlobal;
memset(pGlobal, 0, sizeof(Fts5Global));
pGlobal->db = db;
- pGlobal->api.iVersion = 2;
+ pGlobal->api.iVersion = 3;
pGlobal->api.xCreateFunction = fts5CreateAux;
pGlobal->api.xCreateTokenizer = fts5CreateTokenizer;
pGlobal->api.xFindTokenizer = fts5FindTokenizer;
+ pGlobal->api.xCreateTokenizer_v2 = fts5CreateTokenizer_v2;
+ pGlobal->api.xFindTokenizer_v2 = fts5FindTokenizer_v2;
rc = sqlite3_create_module_v2(db, "fts5", &fts5Mod, p, fts5ModuleDestroy);
if( rc==SQLITE_OK ) rc = sqlite3Fts5IndexInit(db);
if( rc==SQLITE_OK ) rc = sqlite3Fts5ExprInit(pGlobal, db);
p, fts5SourceIdFunc, 0, 0
);
}
+ if( rc==SQLITE_OK ){
+ rc = sqlite3_create_function(
+ db, "fts5_locale", 2,
+ SQLITE_UTF8|SQLITE_INNOCUOUS|SQLITE_RESULT_SUBTYPE,
+ p, fts5LocaleFunc, 0, 0
+ );
+ }
}
/* If SQLITE_FTS5_ENABLE_TEST_MI is defined, assume that the file
ctx.iCol = -1;
for(iCol=1; rc==SQLITE_OK && iCol<=pConfig->nCol; iCol++){
if( pConfig->abUnindexed[iCol-1]==0 ){
- const char *zText;
- int nText;
+ sqlite3_value *pVal = 0;
+ const char *pText = 0;
+ int nText = 0;
+ int bReset = 0;
+
assert( pSeek==0 || apVal==0 );
assert( pSeek!=0 || apVal!=0 );
if( pSeek ){
- zText = (const char*)sqlite3_column_text(pSeek, iCol);
- nText = sqlite3_column_bytes(pSeek, iCol);
- }else if( ALWAYS(apVal) ){
- zText = (const char*)sqlite3_value_text(apVal[iCol-1]);
- nText = sqlite3_value_bytes(apVal[iCol-1]);
+ pVal = sqlite3_column_value(pSeek, iCol);
}else{
- continue;
+ pVal = apVal[iCol-1];
}
- ctx.szCol = 0;
- rc = sqlite3Fts5Tokenize(pConfig, FTS5_TOKENIZE_DOCUMENT,
- zText, nText, (void*)&ctx, fts5StorageInsertCallback
- );
- p->aTotalSize[iCol-1] -= (i64)ctx.szCol;
- if( p->aTotalSize[iCol-1]<0 && rc==SQLITE_OK ){
- rc = FTS5_CORRUPT;
+
+ rc = sqlite3Fts5ExtractText(pConfig,pSeek!=0,pVal,&bReset,&pText,&nText);
+ if( rc==SQLITE_OK ){
+ ctx.szCol = 0;
+ rc = sqlite3Fts5Tokenize(pConfig, FTS5_TOKENIZE_DOCUMENT,
+ pText, nText, (void*)&ctx, fts5StorageInsertCallback
+ );
+ p->aTotalSize[iCol-1] -= (i64)ctx.szCol;
+ if( p->aTotalSize[iCol-1]<0 && rc==SQLITE_OK ){
+ rc = FTS5_CORRUPT;
+ }
+ if( bReset ) sqlite3Fts5ClearLocale(pConfig);
}
}
}
for(ctx.iCol=0; rc==SQLITE_OK && ctx.iCol<pConfig->nCol; ctx.iCol++){
ctx.szCol = 0;
if( pConfig->abUnindexed[ctx.iCol]==0 ){
- const char *zText = (const char*)sqlite3_column_text(pScan, ctx.iCol+1);
- int nText = sqlite3_column_bytes(pScan, ctx.iCol+1);
- rc = sqlite3Fts5Tokenize(pConfig,
- FTS5_TOKENIZE_DOCUMENT,
- zText, nText,
- (void*)&ctx,
- fts5StorageInsertCallback
+ int bReset = 0;
+ int nText = 0;
+ const char *pText = 0;
+ rc = sqlite3Fts5ExtractText(pConfig, 1,
+ sqlite3_column_value(pScan, ctx.iCol+1), &bReset, &pText, &nText
);
+
+ if( rc==SQLITE_OK ){
+ rc = sqlite3Fts5Tokenize(pConfig,
+ FTS5_TOKENIZE_DOCUMENT,
+ pText, nText,
+ (void*)&ctx,
+ fts5StorageInsertCallback
+ );
+ if( bReset ) sqlite3Fts5ClearLocale(pConfig);
+ }
}
sqlite3Fts5BufferAppendVarint(&rc, &buf, ctx.szCol);
p->aTotalSize[ctx.iCol] += (i64)ctx.szCol;
for(ctx.iCol=0; rc==SQLITE_OK && ctx.iCol<pConfig->nCol; ctx.iCol++){
ctx.szCol = 0;
if( pConfig->abUnindexed[ctx.iCol]==0 ){
- const char *zText = (const char*)sqlite3_value_text(apVal[ctx.iCol+2]);
- int nText = sqlite3_value_bytes(apVal[ctx.iCol+2]);
- rc = sqlite3Fts5Tokenize(pConfig,
- FTS5_TOKENIZE_DOCUMENT,
- zText, nText,
- (void*)&ctx,
- fts5StorageInsertCallback
+ int bReset = 0;
+ int nText = 0;
+ const char *pText = 0;
+ rc = sqlite3Fts5ExtractText(
+ pConfig, 0, apVal[ctx.iCol+2], &bReset, &pText, &nText
);
+ if( rc==SQLITE_OK ){
+ if( bReset && pConfig->bLocale==0 ){
+ rc = SQLITE_ERROR;
+ sqlite3Fts5ConfigErrmsg(pConfig,
+ "fts5_locale() may not be used without locale=1"
+ );
+ }else{
+ rc = sqlite3Fts5Tokenize(pConfig,
+ FTS5_TOKENIZE_DOCUMENT, pText, nText, (void*)&ctx,
+ fts5StorageInsertCallback
+ );
+ }
+ if( bReset ) sqlite3Fts5ClearLocale(pConfig);
+ }
}
sqlite3Fts5BufferAppendVarint(&rc, &buf, ctx.szCol);
p->aTotalSize[ctx.iCol] += (i64)ctx.szCol;
rc = sqlite3Fts5TermsetNew(&ctx.pTermset);
}
if( rc==SQLITE_OK ){
- const char *zText = (const char*)sqlite3_column_text(pScan, i+1);
- int nText = sqlite3_column_bytes(pScan, i+1);
- rc = sqlite3Fts5Tokenize(pConfig,
- FTS5_TOKENIZE_DOCUMENT,
- zText, nText,
- (void*)&ctx,
- fts5StorageIntegrityCallback
+ const char *pText = 0;
+ int nText = 0;
+ int bReset = 0;
+
+ rc = sqlite3Fts5ExtractText(pConfig, 1,
+ sqlite3_column_value(pScan, i+1), &bReset, &pText, &nText
);
+
+ if( rc==SQLITE_OK ){
+ rc = sqlite3Fts5Tokenize(pConfig,
+ FTS5_TOKENIZE_DOCUMENT,
+ pText, nText,
+ (void*)&ctx,
+ fts5StorageIntegrityCallback
+ );
+ if( bReset ) sqlite3Fts5ClearLocale(pConfig);
+ }
}
if( rc==SQLITE_OK && pConfig->bColumnsize && ctx.szCol!=aColSize[i] ){
rc = FTS5_CORRUPT;
struct F5tTokenizerContext {
void *pCtx;
int (*xToken)(void*, int, const char*, int, int, int);
+ F5tTokenizerInstance *pInst;
};
struct F5tTokenizerModule {
F5tTokenizerContext *pContext;
};
+/*
+** zLocale:
+** Buffer zLocale contains the current locale, as configured by the most
+** recent call to xSetLocale(). A NULL (default) locale is represented as
+** a 0 byte string - "\0".
+**
+** This can be retrieved by a Tcl tokenize script using [sqlite3_fts5_locale].
+*/
struct F5tTokenizerInstance {
Tcl_Interp *interp;
Tcl_Obj *pScript;
F5tTokenizerContext *pContext;
+ char zLocale[128];
};
+static int f5tTokenizerSetLocale(
+ Fts5Tokenizer *pTokenizer,
+ const char *pLocale,
+ int nLocale
+){
+ F5tTokenizerInstance *pInst = (F5tTokenizerInstance*)pTokenizer;
+ if( nLocale>=sizeof(pInst->zLocale) ){
+ return SQLITE_ERROR;
+ }
+
+ memset(pInst->zLocale, 0, sizeof(pInst->zLocale));
+ memcpy(pInst->zLocale, pLocale, nLocale);
+
+ return SQLITE_OK;
+}
+
static int f5tTokenizerCreate(
void *pCtx,
const char **azArg,
int (*xToken)(void*, int, const char*, int, int, int)
){
F5tTokenizerInstance *pInst = (F5tTokenizerInstance*)p;
+ F5tTokenizerInstance *pOldInst = 0;
void *pOldCtx;
int (*xOldToken)(void*, int, const char*, int, int, int);
Tcl_Obj *pEval;
pOldCtx = pInst->pContext->pCtx;
xOldToken = pInst->pContext->xToken;
+ pOldInst = pInst->pContext->pInst;
pInst->pContext->pCtx = pCtx;
pInst->pContext->xToken = xToken;
+ pInst->pContext->pInst = pInst;
assert(
flags==FTS5_TOKENIZE_DOCUMENT
pInst->pContext->pCtx = pOldCtx;
pInst->pContext->xToken = xOldToken;
+ pInst->pContext->pInst = pOldInst;
return rc;
}
+/*
+** sqlite3_fts5_locale
+*/
+static int SQLITE_TCLAPI f5tTokenizerLocale(
+ void * clientData,
+ Tcl_Interp *interp,
+ int objc,
+ Tcl_Obj *CONST objv[]
+){
+ F5tTokenizerContext *p = (F5tTokenizerContext*)clientData;
+
+ if( objc!=1 ){
+ Tcl_WrongNumArgs(interp, 1, objv, "");
+ return TCL_ERROR;
+ }
+
+ if( p->xToken==0 ){
+ Tcl_AppendResult(interp,
+ "sqlite3_fts5_locale may only be used by tokenizer callback", 0
+ );
+ return TCL_ERROR;
+ }
+
+ Tcl_SetObjResult(interp, Tcl_NewStringObj(p->pInst->zLocale, -1));
+ return TCL_OK;
+}
+
/*
** sqlite3_fts5_token ?-colocated? TEXT START END
*/
fts5_api *pApi;
char *zName;
Tcl_Obj *pScript;
- fts5_tokenizer t;
F5tTokenizerModule *pMod;
int rc;
+ int bV2 = 0; /* True to use _v2 API */
+
+ if( objc==5 ){
+ const char *zArg = Tcl_GetString(objv[1]);
+ if( 0==strcmp(zArg, "-v2") ){
+ objv++;
+ objc--;
+ bV2 = 1;
+ }
+ }
if( objc!=4 ){
- Tcl_WrongNumArgs(interp, 1, objv, "DB NAME SCRIPT");
+ Tcl_WrongNumArgs(interp, 1, objv, "?-v2? DB NAME SCRIPT");
return TCL_ERROR;
}
if( f5tDbAndApi(interp, objv[1], &db, &pApi) ){
zName = Tcl_GetString(objv[2]);
pScript = objv[3];
- t.xCreate = f5tTokenizerCreate;
- t.xTokenize = f5tTokenizerTokenize;
- t.xDelete = f5tTokenizerDelete;
-
pMod = (F5tTokenizerModule*)ckalloc(sizeof(F5tTokenizerModule));
pMod->interp = interp;
pMod->pScript = pScript;
pMod->pContext = pContext;
Tcl_IncrRefCount(pScript);
- rc = pApi->xCreateTokenizer(pApi, zName, (void*)pMod, &t, f5tDelTokenizer);
+
+ if( bV2==0 ){
+ fts5_tokenizer t;
+ t.xCreate = f5tTokenizerCreate;
+ t.xTokenize = f5tTokenizerTokenize;
+ t.xDelete = f5tTokenizerDelete;
+ rc = pApi->xCreateTokenizer(pApi, zName, (void*)pMod, &t, f5tDelTokenizer);
+ }else{
+ fts5_tokenizer_v2 t2;
+ memset(&t2, 0, sizeof(t2));
+ t2.iVersion = 2;
+ t2.xCreate = f5tTokenizerCreate;
+ t2.xTokenize = f5tTokenizerTokenize;
+ t2.xDelete = f5tTokenizerDelete;
+ t2.xSetLocale = f5tTokenizerSetLocale;
+ rc = pApi->xCreateTokenizer_v2(pApi, zName,(void*)pMod,&t2,f5tDelTokenizer);
+ }
+
if( rc!=SQLITE_OK ){
- Tcl_AppendResult(interp, "error in fts5_api.xCreateTokenizer()", 0);
+ Tcl_AppendResult(interp, (
+ bV2 ? "error in fts5_api.xCreateTokenizer_v2()"
+ : "error in fts5_api.xCreateTokenizer()"
+ ), 0);
return TCL_ERROR;
}
} aCmd[] = {
{ "sqlite3_fts5_create_tokenizer", f5tCreateTokenizer, 1 },
{ "sqlite3_fts5_token", f5tTokenizerReturn, 1 },
+ { "sqlite3_fts5_locale", f5tTokenizerLocale, 1 },
{ "sqlite3_fts5_tokenize", f5tTokenize, 0 },
{ "sqlite3_fts5_create_function", f5tCreateFunction, 0 },
{ "sqlite3_fts5_may_be_corrupt", f5tMayBeCorrupt, 0 },
--- /dev/null
+# 2014 Dec 20
+#
+# The author disclaims copyright to this source code. In place of
+# a legal notice, here is a blessing:
+#
+# May you do good and not evil.
+# May you find forgiveness for yourself and forgive others.
+# May you share freely, never taking more than you give.
+#
+#***********************************************************************
+#
+# Tests focusing on the built-in fts5 tokenizers.
+#
+
+source [file join [file dirname [info script]] fts5_common.tcl]
+set testprefix fts5locale
+
+# If SQLITE_ENABLE_FTS5 is not defined, omit this file.
+ifcapable !fts5 {
+ finish_test
+ return
+}
+
+proc transform_token {locale token} {
+ switch -- $locale {
+ reverse {
+ set ret ""
+ foreach c [split $token ""] {
+ set ret "$c$ret"
+ }
+ set token $ret
+ }
+
+ default {
+ # no-op
+ }
+ }
+
+ set token
+}
+
+proc tcl_create {args} { return "tcl_tokenize" }
+proc tcl_tokenize {tflags text} {
+ foreach {w iStart iEnd} [fts5_tokenize_split $text] {
+ set w [transform_token [sqlite3_fts5_locale] $w]
+ sqlite3_fts5_token $w $iStart $iEnd
+ }
+}
+
+#-------------------------------------------------------------------------
+# Check that queries can have a locale attached to them.
+#
+reset_db
+sqlite3_fts5_create_tokenizer -v2 db tcl tcl_create
+
+do_execsql_test 1.0 {
+ CREATE VIRTUAL TABLE t1 USING fts5(a, tokenize=tcl);
+ INSERT INTO t1 VALUES('abc');
+ INSERT INTO t1 VALUES('cba');
+} {}
+
+do_execsql_test 1.1 {
+ SELECT rowid, a FROM t1( fts5_locale('en_US', 'abc') );
+} {1 abc}
+
+do_execsql_test 1.2 {
+ SELECT rowid, a FROM t1( fts5_locale('reverse', 'abc') );
+} {2 cba}
+
+#-------------------------------------------------------------------------
+# Test that the locale= option exists and seems to accept values. And
+# that fts5_locale() values may only be inserted into an internal-content
+# table if the locale=1 option was specified.
+#
+reset_db
+sqlite3_fts5_create_tokenizer -v2 db tcl tcl_create
+
+do_execsql_test 2.1 {
+ CREATE VIRTUAL TABLE b1 USING fts5(x, y, locale=1, tokenize=tcl);
+ CREATE VIRTUAL TABLE b2 USING fts5(x, y, locale=0, tokenize=tcl);
+
+ CREATE VIRTUAL TABLE ttt USING fts5vocab('b1', instance);
+}
+
+do_catchsql_test 2.2 {
+ CREATE VIRTUAL TABLE b3 USING fts5(x, y, locale=2);
+} {1 {malformed locale=... directive}}
+
+do_catchsql_test 2.3 {
+ INSERT INTO b1(b1, rank) VALUES('locale', 0);
+} {1 {SQL logic error}}
+
+do_execsql_test 2.4 {
+ INSERT INTO b1 VALUES('abc', 'one two three');
+ INSERT INTO b1 VALUES('def', fts5_locale('reverse', 'four five six'));
+}
+
+do_execsql_test 2.5 {
+ INSERT INTO b2 VALUES('abc', 'one two three');
+}
+
+do_catchsql_test 2.6 {
+ INSERT INTO b2 VALUES('def', fts5_locale('reverse', 'four five six'));
+} {1 {fts5_locale() may not be used without locale=1}}
+
+do_execsql_test 2.7 { SELECT rowid FROM b1('one') } {1}
+do_execsql_test 2.8 { SELECT rowid FROM b1('four') } {}
+do_execsql_test 2.9 { SELECT rowid FROM b1('ruof') } 2
+do_execsql_test 2.10 { SELECT rowid FROM b1(fts5_locale('reverse', 'five'))} 2
+
+do_execsql_test 2.11 {
+ SELECT x, quote(y) FROM b1
+} {
+ abc {'one two three'}
+ def {'four five six'}
+}
+
+do_execsql_test 2.12 { SELECT quote(y) FROM b1('ruof') } {
+ {'four five six'}
+}
+
+do_execsql_test 2.13 {
+ INSERT INTO b1(b1) VALUES('integrity-check');
+}
+do_execsql_test 2.14 {
+ INSERT INTO b1(b1) VALUES('rebuild');
+}
+do_execsql_test 2.15 {
+ INSERT INTO b1(b1) VALUES('integrity-check');
+}
+
+do_execsql_test 2.16 {
+ DELETE FROM b1 WHERE rowid=2
+}
+do_execsql_test 2.17 {
+ INSERT INTO b1(b1) VALUES('integrity-check');
+}
+
+#-------------------------------------------------------------------------
+# Test the 'delete' command with contentless tables.
+#
+reset_db
+sqlite3_fts5_create_tokenizer -v2 db tcl tcl_create
+
+do_execsql_test 3.1 {
+ CREATE VIRTUAL TABLE c1 USING fts5(x, content=, tokenize=tcl);
+ CREATE VIRTUAL TABLE c2 USING fts5vocab('c1', instance);
+
+ INSERT INTO c1 VALUES('hello world');
+ INSERT INTO c1 VALUES( fts5_locale('reverse', 'one two three') );
+}
+
+do_execsql_test 3.2 {
+ SELECT DISTINCT term FROM c2 ORDER BY 1
+} {
+ eerht eno hello owt world
+}
+
+do_execsql_test 3.3 {
+ INSERT INTO c1(c1, rowid, x)
+ VALUES('delete', 2, fts5_locale('reverse', 'one two three') );
+}
+
+do_execsql_test 3.4 {
+ SELECT DISTINCT term FROM c2 ORDER BY 1
+} {
+ hello world
+}
+
+
+
+# execsql_pp { SELECT * FROM ttt }
+
+finish_test
+
+
-C Add\sthe\spercentile\sextension\sto\sthe\sCLI\sby\sdefault.
-D 2024-07-24T13:53:51.649
+C Add\sthe\sfts5_locale()\sfunction,\sand\sbegin\sadding\sthe\srelated\sfunctionality\sto\sfts5.
+D 2024-07-26T20:50:33.303
F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1
F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea
F LICENSE.md df5091916dbb40e6e9686186587125e1b2ff51f022cc334e886c19a0e9982724
F ext/fts3/unicode/mkunicode.tcl d5aebf022fa4577ee8cdf27468f0d847879993959101f6dbd6348ef0cfc324a7
F ext/fts3/unicode/parseunicode.tcl a981bd6466d12dd17967515801c3ff23f74a281be1a03cf1e6f52a6959fc77eb
F ext/fts5/extract_api_docs.tcl bc3a0ca78be7d3df08e7602c00ca48021ebae40682d75eb001bfdf6e54ffb44e
-F ext/fts5/fts5.h 6b49ce6eb2e395e7fd84557b21d32f5de8041f2fada4c617e481e99427e24b6e
-F ext/fts5/fts5Int.h 41fb3a2dd40e818cc96c6f4176dbdf2aaa8f57043cfc9a8f2676e7e6a72ad764
+F ext/fts5/fts5.h 38a9553791828b3cf677b9347735fc531d54015ce4f5229d5cf1e2a5c1d3955a
+F ext/fts5/fts5Int.h b4a5ed934cb3da55737c4d75cb5f26a39b17470fca67c06c7fe6878992998c99
F ext/fts5/fts5_aux.c 4584e88878e54828bf7d4d0d83deedd232ec60628b7731be02bad6adb62304b1
F ext/fts5/fts5_buffer.c 0eec58bff585f1a44ea9147eae5da2447292080ea435957f7488c70673cb6f09
-F ext/fts5/fts5_config.c 68cb87a49215f8e7028000b681df4057c430a4a6afbd676463886da94c9e1c37
+F ext/fts5/fts5_config.c 0c96490fbad746b3780174f38b2ee5e3d719f2f81ee6b58ca828772871e0f680
F ext/fts5/fts5_expr.c c7336d5f9ecc0e2b014d700be2bec0ea383b0e82c494a7c5c4ac622327c2bfad
F ext/fts5/fts5_hash.c adda4272be401566a6e0ba1acbe70ee5cb97fce944bc2e04dc707152a0ec91b1
F ext/fts5/fts5_index.c eb9a0dda3bc6ef969a6be8d2746af56856e67251810ddba08622b45be8477abe
-F ext/fts5/fts5_main.c 77fefb37e7931095a5ff271a28fbe4f73ec46d5492ef1f35d405d98e137ad8ed
-F ext/fts5/fts5_storage.c 1d7e08d4331da2f3f7e78e70eef2ed6a013d91ba16175c651adbc5ad672235aa
-F ext/fts5/fts5_tcl.c fdf7e2bb9a9186cfcaf2d2ce11d338309342b7a7593c2812bc54455db53da5d2
+F ext/fts5/fts5_main.c 5b6f85aae5f25ee4e8762f26eb8c998c9c53443bb56483ebf712aca591bcb41e
+F ext/fts5/fts5_storage.c 1d7b358af3d4a7a4c5a7258a847229ca54c1b26d4f1b9e971ea5f2539631c3d4
+F ext/fts5/fts5_tcl.c a1c307785bb505735a8d914fff7d08881e64ba28c40c406b218c591010d1bc9e
F ext/fts5/fts5_test_mi.c 08c11ec968148d4cb4119d96d819f8c1f329812c568bac3684f5464be177d3ee
F ext/fts5/fts5_test_tok.c 3cb0a9b508b30d17ef025ccddd26ae3dc8ddffbe76c057616e59a9aa85d36f3b
F ext/fts5/fts5_tokenize.c fa5493075101540270f572038fc1723d44fcc97bfbf237c8530013b8a27860be
F ext/fts5/test/fts5lastrowid.test f36298a1fb9f988bde060a274a7ce638faa9c38a31400f8d2d27ea9373e0c4a1
F ext/fts5/test/fts5leftjoin.test c0b4cafb9661379e576dc4405c0891d8fcc2782680740513c4d1fc114b43d4ad
F ext/fts5/test/fts5limits.test 8ab67cf5d311c124b6ceb0062d0297767176df4572d955fce79fa43004dff01c
+F ext/fts5/test/fts5locale.test 92c6ae79df0aa57b379c50e400151f4a9a36d292819beefc31019c749249844a
F ext/fts5/test/fts5matchinfo.test 877520582feb86bbfd95ab780099bcba4526f18ac75ee34979144cf86ba3a5a3
F ext/fts5/test/fts5merge.test 2654df0bcdb2d117c2d38b6aeb0168061be01c643f9e9194b36c43a2970e8082
F ext/fts5/test/fts5merge2.test 3ebad1a59d6ad3fb66eff6523a09e95dc6367cbefb3cd73196801dea0425c8e2
F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc
F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e
F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0
-P 095c22e62248f8ef50cd8531171827f50a7bdd4fc1128bf0e616a3eb2dce980e
-R 76e7f4761efc98a28727d2a1442d8c13
-U drh
-Z 956d236c4ff2c748d10dde2ade86c2ae
+P bcc31846964102385d5a21eb5e85d7db153b155e76b4e2847c9453d3d0e1af04
+R f9f51b6d625a93fd57b76dfee17ab828
+T *branch * fts5-locale
+T *sym-fts5-locale *
+T -sym-trunk *
+U dan
+Z 2b42cc25153434047e86f4740a226c4e
# Remove this line to create a well-formed Fossil manifest.
-bcc31846964102385d5a21eb5e85d7db153b155e76b4e2847c9453d3d0e1af04
+8839ef7cfb49239e7f1c4812a53a93a672827c88d6921408b1d5062b352c87cc