From: dan Date: Mon, 13 May 2024 20:06:08 +0000 (+0000) Subject: Allow existing fts5 tables to be dropped even if the associated tokenizer is not... X-Git-Tag: version-3.47.0~393^2^3~3 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=12b205c637804c43cd9c991d8583085a523698b4;p=thirdparty%2Fsqlite.git Allow existing fts5 tables to be dropped even if the associated tokenizer is not available. FossilOrigin-Name: 69ef47eeee8b53684c321393be34f03600694fbc86377f8720ff80307846aff6 --- diff --git a/ext/fts5/fts5Int.h b/ext/fts5/fts5Int.h index 9beb26e056..f1a02c1841 100644 --- a/ext/fts5/fts5Int.h +++ b/ext/fts5/fts5Int.h @@ -142,6 +142,15 @@ struct Fts5Colset { */ typedef struct Fts5Config Fts5Config; +typedef struct Fts5TokenizerConfig Fts5TokenizerConfig; + +struct Fts5TokenizerConfig { + Fts5Tokenizer *pTok; + fts5_tokenizer *pTokApi; + const char **azArg; + int nArg; + int ePattern; /* FTS_PATTERN_XXX constant */ +}; /* ** An instance of the following structure encodes all information that can @@ -184,6 +193,7 @@ typedef struct Fts5Config Fts5Config; */ struct Fts5Config { sqlite3 *db; /* Database handle */ + Fts5Global *pGlobal; /* Global fts5 object for handle db */ char *zDb; /* Database holding FTS index (e.g. "main") */ char *zName; /* Name of FTS index */ int nCol; /* Number of columns */ @@ -199,10 +209,8 @@ struct Fts5Config { int bTokendata; /* "tokendata=" option value (dflt==0) */ int eDetail; /* FTS5_DETAIL_XXX value */ char *zContentExprlist; - Fts5Tokenizer *pTok; - fts5_tokenizer *pTokApi; + Fts5TokenizerConfig t; int bLock; /* True when table is preparing statement */ - int ePattern; /* FTS_PATTERN_XXX constant */ /* Values loaded from the %_config table */ int iVersion; /* fts5 file format 'version' */ @@ -597,13 +605,7 @@ struct Fts5Table { Fts5Index *pIndex; /* Full-text index */ }; -int sqlite3Fts5GetTokenizer( - Fts5Global*, - const char **azArg, - int nArg, - Fts5Config*, - char **pzErr -); +int sqlite3Fts5LoadTokenizer(Fts5Config *pConfig); Fts5Table *sqlite3Fts5TableFromCsrid(Fts5Global*, i64); diff --git a/ext/fts5/fts5_config.c b/ext/fts5/fts5_config.c index d2e8309cd2..32f52a6cab 100644 --- a/ext/fts5/fts5_config.c +++ b/ext/fts5/fts5_config.c @@ -298,12 +298,11 @@ static int fts5ConfigParseSpecial( if( sqlite3_strnicmp("tokenize", zCmd, nCmd)==0 ){ const char *p = (const char*)zArg; sqlite3_int64 nArg = strlen(zArg) + 1; - char **azArg = sqlite3Fts5MallocZero(&rc, sizeof(char*) * nArg); - char *pDel = sqlite3Fts5MallocZero(&rc, nArg * 2); - char *pSpace = pDel; + char **azArg = sqlite3Fts5MallocZero(&rc, (sizeof(char*) + 2) * nArg); - if( azArg && pSpace ){ - if( pConfig->pTok ){ + if( azArg ){ + char *pSpace = (char*)&azArg[nArg]; + if( pConfig->t.azArg ){ *pzErr = sqlite3_mprintf("multiple tokenize=... directives"); rc = SQLITE_ERROR; }else{ @@ -326,16 +325,14 @@ static int fts5ConfigParseSpecial( *pzErr = sqlite3_mprintf("parse error in tokenize directive"); rc = SQLITE_ERROR; }else{ - rc = sqlite3Fts5GetTokenizer(pGlobal, - (const char**)azArg, (int)nArg, pConfig, - pzErr - ); + pConfig->t.azArg = (const char**)azArg; + pConfig->t.nArg = nArg; + azArg = 0; } } } - sqlite3_free(azArg); - sqlite3_free(pDel); + return rc; } @@ -412,16 +409,6 @@ static int fts5ConfigParseSpecial( return SQLITE_ERROR; } -/* -** Allocate an instance of the default tokenizer ("simple") at -** Fts5Config.pTokenizer. Return SQLITE_OK if successful, or an SQLite error -** code if an error occurs. -*/ -static int fts5ConfigDefaultTokenizer(Fts5Global *pGlobal, Fts5Config *pConfig){ - assert( pConfig->pTok==0 && pConfig->pTokApi==0 ); - return sqlite3Fts5GetTokenizer(pGlobal, 0, 0, pConfig, 0); -} - /* ** Gobble up the first bareword or quoted word from the input buffer zIn. ** Return a pointer to the character immediately following the last in @@ -554,6 +541,7 @@ int sqlite3Fts5ConfigParse( *ppOut = pRet = (Fts5Config*)sqlite3_malloc(sizeof(Fts5Config)); if( pRet==0 ) return SQLITE_NOMEM; memset(pRet, 0, sizeof(Fts5Config)); + pRet->pGlobal = pGlobal; pRet->db = db; pRet->iCookie = -1; @@ -640,13 +628,6 @@ int sqlite3Fts5ConfigParse( rc = SQLITE_ERROR; } - /* If a tokenizer= option was successfully parsed, the tokenizer has - ** already been allocated. Otherwise, allocate an instance of the default - ** tokenizer (unicode61) now. */ - if( rc==SQLITE_OK && pRet->pTok==0 ){ - rc = fts5ConfigDefaultTokenizer(pGlobal, pRet); - } - /* If no zContent option was specified, fill in the default values. */ if( rc==SQLITE_OK && pRet->zContent==0 ){ const char *zTail = 0; @@ -688,9 +669,10 @@ int sqlite3Fts5ConfigParse( void sqlite3Fts5ConfigFree(Fts5Config *pConfig){ if( pConfig ){ int i; - if( pConfig->pTok ){ - pConfig->pTokApi->xDelete(pConfig->pTok); + if( pConfig->t.pTok ){ + pConfig->t.pTokApi->xDelete(pConfig->t.pTok); } + sqlite3_free(pConfig->t.azArg); sqlite3_free(pConfig->zDb); sqlite3_free(pConfig->zName); for(i=0; inCol; i++){ @@ -765,10 +747,18 @@ int sqlite3Fts5Tokenize( void *pCtx, /* Context passed to xToken() */ int (*xToken)(void*, int, const char*, int, int, int) /* Callback */ ){ - if( pText==0 ) return SQLITE_OK; - return pConfig->pTokApi->xTokenize( - pConfig->pTok, pCtx, flags, pText, nText, xToken - ); + int rc = SQLITE_OK; + if( pText ){ + if( pConfig->t.pTok==0 ){ + rc = sqlite3Fts5LoadTokenizer(pConfig); + } + if( rc==SQLITE_OK ){ + rc = pConfig->t.pTokApi->xTokenize( + pConfig->t.pTok, pCtx, flags, pText, nText, xToken + ); + } + } + return rc; } /* diff --git a/ext/fts5/fts5_main.c b/ext/fts5/fts5_main.c index f609f7f34a..0155a27f4c 100644 --- a/ext/fts5/fts5_main.c +++ b/ext/fts5/fts5_main.c @@ -377,8 +377,12 @@ static int fts5InitVtab( assert( (rc==SQLITE_OK && *pzErr==0) || pConfig==0 ); } if( rc==SQLITE_OK ){ + pConfig->pzErrmsg = pzErr; pTab->p.pConfig = pConfig; pTab->pGlobal = pGlobal; + if( bCreate ){ + rc = sqlite3Fts5LoadTokenizer(pConfig); + } } /* Open the index sub-system */ @@ -400,11 +404,8 @@ static int fts5InitVtab( /* Load the initial configuration */ if( rc==SQLITE_OK ){ - assert( pConfig->pzErrmsg==0 ); - pConfig->pzErrmsg = pzErr; rc = sqlite3Fts5IndexLoadConfig(pTab->p.pIndex); sqlite3Fts5IndexRollback(pTab->p.pIndex); - pConfig->pzErrmsg = 0; } if( rc==SQLITE_OK && pConfig->eContent==FTS5_CONTENT_NORMAL ){ @@ -414,6 +415,7 @@ static int fts5InitVtab( rc = sqlite3_vtab_config(db, SQLITE_VTAB_INNOCUOUS); } + if( pConfig ) pConfig->pzErrmsg = 0; if( rc!=SQLITE_OK ){ fts5FreeVtab(pTab); pTab = 0; @@ -481,10 +483,10 @@ static int fts5UsePatternMatch( ){ assert( FTS5_PATTERN_GLOB==SQLITE_INDEX_CONSTRAINT_GLOB ); assert( FTS5_PATTERN_LIKE==SQLITE_INDEX_CONSTRAINT_LIKE ); - if( pConfig->ePattern==FTS5_PATTERN_GLOB && p->op==FTS5_PATTERN_GLOB ){ + if( pConfig->t.ePattern==FTS5_PATTERN_GLOB && p->op==FTS5_PATTERN_GLOB ){ return 1; } - if( pConfig->ePattern==FTS5_PATTERN_LIKE + if( pConfig->t.ePattern==FTS5_PATTERN_LIKE && (p->op==FTS5_PATTERN_LIKE || p->op==FTS5_PATTERN_GLOB) ){ return 1; @@ -584,6 +586,14 @@ static int fts5BestIndexMethod(sqlite3_vtab *pVTab, sqlite3_index_info *pInfo){ return SQLITE_ERROR; } + if( pConfig->t.pTok==0 ){ + int rc; + pConfig->pzErrmsg = &pVTab->zErrMsg; + rc = sqlite3Fts5LoadTokenizer(pConfig); + pConfig->pzErrmsg = 0; + if( rc!=SQLITE_OK ) return rc; + } + idxStr = (char*)sqlite3_malloc(pInfo->nConstraint * 8 + 1); if( idxStr==0 ) return SQLITE_NOMEM; pInfo->idxStr = idxStr; @@ -626,6 +636,7 @@ static int fts5BestIndexMethod(sqlite3_vtab *pVTab, sqlite3_index_info *pInfo){ idxStr += strlen(&idxStr[iIdxStr]); pInfo->aConstraintUsage[i].argvIndex = ++iCons; assert( idxStr[iIdxStr]=='\0' ); + bSeenMatch = 1; }else if( bSeenEq==0 && p->op==SQLITE_INDEX_CONSTRAINT_EQ && iCol<0 ){ idxStr[iIdxStr++] = '='; bSeenEq = 1; @@ -2860,7 +2871,7 @@ static int fts5FindTokenizer( return rc; } -int sqlite3Fts5GetTokenizer( +int fts5GetTokenizer( Fts5Global *pGlobal, const char **azArg, int nArg, @@ -2877,26 +2888,37 @@ int sqlite3Fts5GetTokenizer( *pzErr = sqlite3_mprintf("no such tokenizer: %s", azArg[0]); }else{ rc = pMod->x.xCreate( - pMod->pUserData, (azArg?&azArg[1]:0), (nArg?nArg-1:0), &pConfig->pTok + pMod->pUserData, (azArg?&azArg[1]:0), (nArg?nArg-1:0), &pConfig->t.pTok ); - pConfig->pTokApi = &pMod->x; + pConfig->t.pTokApi = &pMod->x; if( rc!=SQLITE_OK ){ if( pzErr ) *pzErr = sqlite3_mprintf("error in tokenizer constructor"); }else{ - pConfig->ePattern = sqlite3Fts5TokenizerPattern( - pMod->x.xCreate, pConfig->pTok + pConfig->t.ePattern = sqlite3Fts5TokenizerPattern( + pMod->x.xCreate, pConfig->t.pTok ); } } if( rc!=SQLITE_OK ){ - pConfig->pTokApi = 0; - pConfig->pTok = 0; + pConfig->t.pTokApi = 0; + pConfig->t.pTok = 0; } return rc; } +/* +** Attempt to instantiate the tokenizer. +*/ +int sqlite3Fts5LoadTokenizer(Fts5Config *pConfig){ + return fts5GetTokenizer( + pConfig->pGlobal, pConfig->t.azArg, pConfig->t.nArg, + pConfig, pConfig->pzErrmsg + ); +} + + static void fts5ModuleDestroy(void *pCtx){ Fts5TokenizerModule *pTok, *pNextTok; Fts5Auxiliary *pAux, *pNextAux; diff --git a/ext/fts5/test/fts5tokenizer.test b/ext/fts5/test/fts5tokenizer.test index 27370657ed..f469a98888 100644 --- a/ext/fts5/test/fts5tokenizer.test +++ b/ext/fts5/test/fts5tokenizer.test @@ -300,5 +300,48 @@ set ::flags [list] do_execsql_test 9.5.1 { SELECT * FROM t1('"abc xyz*"'); } {} do_test 9.5.2 { set ::flags } {query} +#------------------------------------------------------------------------- +# +reset_db +do_execsql_test 10.1 { + CREATE VIRTUAL TABLE x1 USING fts5(x, tokenize=unicode61); + PRAGMA writable_schema = 1; + UPDATE sqlite_schema + SET sql = 'CREATE VIRTUAL TABLE x1 USING fts5(x, tokenize="unicode61 error");' + WHERE name = 'x1'; +} + +db close +sqlite3 db test.db + +do_catchsql_test 10.2 { + SELECT * FROM x1 +} {1 {error in tokenizer constructor}} + +do_catchsql_test 10.3 { + INSERT INTO x1 VALUES('abc'); +} {1 {error in tokenizer constructor}} + +do_execsql_test 10.4 { + PRAGMA writable_schema = 1; + UPDATE sqlite_schema + SET sql = 'CREATE VIRTUAL TABLE x1 USING fts5(x, tokenize="nosuch error");' + WHERE name = 'x1'; +} + +db close +sqlite3 db test.db + +do_catchsql_test 10.5 { + SELECT * FROM x1 +} {1 {no such tokenizer: nosuch}} +do_catchsql_test 10.6 { + INSERT INTO x1 VALUES('abc'); +} {1 {no such tokenizer: nosuch}} + +do_execsql_test 10.7 { + DROP TABLE x1; + SELECT * FROM sqlite_schema; +} finish_test diff --git a/ext/fts5/test/fts5trigram2.test b/ext/fts5/test/fts5trigram2.test index f5beae5b28..00f6d43952 100644 --- a/ext/fts5/test/fts5trigram2.test +++ b/ext/fts5/test/fts5trigram2.test @@ -101,9 +101,20 @@ do_execsql_test 4.0 { CREATE VIRTUAL TABLE t4 USING fts5(z, tokenize=trigram); } {} -breakpoint do_execsql_test 4.1 { INSERT INTO t4 VALUES('ABCD'); + INSERT INTO t4 VALUES('DEFG'); } {} +db close +sqlite3 db test.db + +do_eqp_test 4.1 { + SELECT rowid FROM t4 WHERE z LIKE '%abc%' +} {VIRTUAL TABLE INDEX 0:L0} + +do_execsql_test 4.2 { + SELECT rowid FROM t4 WHERE z LIKE '%abc%' +} {1} + finish_test diff --git a/manifest b/manifest index 1c2904f809..3c967f0f90 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Add\stests\sto\sbestindexC.test.\sNo\schanges\sto\scode. -D 2024-05-11T16:44:56.528 +C Allow\sexisting\sfts5\stables\sto\sbe\sdropped\seven\sif\sthe\sassociated\stokenizer\sis\snot\savailable. +D 2024-05-13T20:06:08.273 F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1 F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea F LICENSE.md df5091916dbb40e6e9686186587125e1b2ff51f022cc334e886c19a0e9982724 @@ -93,14 +93,14 @@ F ext/fts3/unicode/mkunicode.tcl d5aebf022fa4577ee8cdf27468f0d847879993959101f6d F ext/fts3/unicode/parseunicode.tcl a981bd6466d12dd17967515801c3ff23f74a281be1a03cf1e6f52a6959fc77eb F ext/fts5/extract_api_docs.tcl bc3a0ca78be7d3df08e7602c00ca48021ebae40682d75eb001bfdf6e54ffb44e F ext/fts5/fts5.h 8856e11a5f0269cd346754cea0765efe8089635b80cad3222e8bfdb08cd5348a -F ext/fts5/fts5Int.h defa43c0932265138ee910ca416e6baccf8b774e0f3d610e74be1ab2880e9834 +F ext/fts5/fts5Int.h 965ded9eedfa4bbe6b39048d4b0cce804a13ec954f11a088d1205a48703ce03f F ext/fts5/fts5_aux.c 4584e88878e54828bf7d4d0d83deedd232ec60628b7731be02bad6adb62304b1 F ext/fts5/fts5_buffer.c 0eec58bff585f1a44ea9147eae5da2447292080ea435957f7488c70673cb6f09 -F ext/fts5/fts5_config.c 8072a207034b51ae9b7694121d1b5715c794e94b275e088f70ae532378ca5cdf +F ext/fts5/fts5_config.c 74093394dc26750becc302922e5fe308a8597a70e6e83e66c892242952468163 F ext/fts5/fts5_expr.c e91156ebdcc08d837f4f324168f69f3c0d7fdef0e521fd561efb48ef3297b696 F ext/fts5/fts5_hash.c adda4272be401566a6e0ba1acbe70ee5cb97fce944bc2e04dc707152a0ec91b1 F ext/fts5/fts5_index.c ee0f4d50bc0c58a7c5ef7d645e7e38e1e59315b8ea9d722ae00c5f949ee65379 -F ext/fts5/fts5_main.c d68bd9533d5a638b7f6fae61c3cb0a15257dcdcccedaf3d0b3c9f55940c85048 +F ext/fts5/fts5_main.c 942fea921d67f75f745d02bf383e5b949bf96a8b78bcfffca40f06439d252fae F ext/fts5/fts5_storage.c f9e31b0d155e9b2c92d5d3a09ad7a56b937fbf1c7f962e10f4ca6281349f3934 F ext/fts5/fts5_tcl.c fdf7e2bb9a9186cfcaf2d2ce11d338309342b7a7593c2812bc54455db53da5d2 F ext/fts5/fts5_test_mi.c 08c11ec968148d4cb4119d96d819f8c1f329812c568bac3684f5464be177d3ee @@ -227,10 +227,10 @@ F ext/fts5/test/fts5synonym.test 1651815b8008de170e8e600dcacc17521d765482ea8f074 F ext/fts5/test/fts5synonym2.test e2f6ff68c4fbe12a866a3a87510f553d9dac99bcb74c10b56487c4c0a562fcf5 F ext/fts5/test/fts5tok1.test 1f7817499f5971450d8c4a652114b3d833393c8134e32422d0af27884ffe9cef F ext/fts5/test/fts5tok2.test dcacb32d4a2a3f0dd3215d4a3987f78ae4be21a2 -F ext/fts5/test/fts5tokenizer.test ac3c9112b263a639fb0508ae73a3ee886bf4866d2153771a8e8a20c721305a43 +F ext/fts5/test/fts5tokenizer.test e5ff1fa93ae267592ad2280ae545133ac24d19cf44116c736e7ece9c02285b5b F ext/fts5/test/fts5tokenizer2.test cb5428c7cfb3b6a74b7adfcde65506e329112003e8dffa7501d01c2d18d02569 F ext/fts5/test/fts5trigram.test 6c4e37864f3e7d90673db5563d9736d7e40080ab94d10ebdffa94c1b77941da0 -F ext/fts5/test/fts5trigram2.test 9fe4207f8a4241747aff1005258b564958588d21bfd240d6cd4c2e955d31c156 +F ext/fts5/test/fts5trigram2.test c91f0a94f7e1ff859682228646abeab4c0eba2effc46af2cbc8f0f48b05a0566 F ext/fts5/test/fts5ubsan.test 783d5a8d13ebfa169e634940228db54540780e3ba7a87ad1e4510e61440bf64b F ext/fts5/test/fts5umlaut.test a42fe2fe6387c40c49ab27ccbd070e1ae38e07f38d05926482cc0bccac9ad602 F ext/fts5/test/fts5unicode.test 17056f4efe6b0a5d4f41fdf7a7dc9af2873004562eaa899d40633b93dc95f5a9 @@ -2190,8 +2190,11 @@ F vsixtest/vsixtest.tcl 6a9a6ab600c25a91a7acc6293828957a386a8a93 F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0 -P c6fd70b3c23fa00eaac9286d4a67e5c8ac76f926c11c220250c34032647bedc1 -R da518a0d510ea768080aa36a2357bedd +P 58d938c0e03c3c8d8796c537f89e69734ba6263d60ba37e345259cb6fdffbea5 +R 8cd6e7131b6ecbfb841a3c6de5597a39 +T *branch * fts5-delay-tokenizer +T *sym-fts5-delay-tokenizer * +T -sym-trunk * U dan -Z 00211d5eb53202476ca10f2c5f60b56e +Z f97ec1a719d45c2730606c10145cb9ed # Remove this line to create a well-formed Fossil manifest. diff --git a/manifest.uuid b/manifest.uuid index 1e5e1fe061..a91811beb8 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -58d938c0e03c3c8d8796c537f89e69734ba6263d60ba37e345259cb6fdffbea5 \ No newline at end of file +69ef47eeee8b53684c321393be34f03600694fbc86377f8720ff80307846aff6 \ No newline at end of file