From: dan Date: Tue, 3 Oct 2023 19:06:52 +0000 (+0000) Subject: Fixes for fts5 expression parser module to allow embedded 0x00 bytes in tokens. X-Git-Tag: version-3.45.0~114^2~26 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=1846de49a46bbe1487679d5fdc4283acd38ba307;p=thirdparty%2Fsqlite.git Fixes for fts5 expression parser module to allow embedded 0x00 bytes in tokens. FossilOrigin-Name: 342c8d0783f449817d3f565ff6b9f010a6c690beeea32f1861640810490a8b5f --- diff --git a/ext/fts5/fts5_expr.c b/ext/fts5/fts5_expr.c index f5101ba065..745a5d9fa6 100644 --- a/ext/fts5/fts5_expr.c +++ b/ext/fts5/fts5_expr.c @@ -100,7 +100,8 @@ struct Fts5ExprNode { struct Fts5ExprTerm { u8 bPrefix; /* True for a prefix term */ u8 bFirst; /* True if token must be first in column */ - char *zTerm; /* nul-terminated term */ + char *pTerm; /* Term data */ + int nTerm; /* Size of term in bytes */ Fts5IndexIter *pIter; /* Iterator for this term */ Fts5ExprTerm *pSynonym; /* Pointer to first in list of synonyms */ }; @@ -967,7 +968,7 @@ static int fts5ExprNearInitAll( p->pIter = 0; } rc = sqlite3Fts5IndexQuery( - pExpr->pIndex, p->zTerm, (int)strlen(p->zTerm), + pExpr->pIndex, p->pTerm, p->nTerm, (pTerm->bPrefix ? FTS5INDEX_QUERY_PREFIX : 0) | (pExpr->bDesc ? FTS5INDEX_QUERY_DESC : 0), pNear->pColset, @@ -1604,7 +1605,7 @@ static void fts5ExprPhraseFree(Fts5ExprPhrase *pPhrase){ Fts5ExprTerm *pSyn; Fts5ExprTerm *pNext; Fts5ExprTerm *pTerm = &pPhrase->aTerm[i]; - sqlite3_free(pTerm->zTerm); + sqlite3_free(pTerm->pTerm); sqlite3Fts5IterClose(pTerm->pIter); for(pSyn=pTerm->pSynonym; pSyn; pSyn=pNext){ pNext = pSyn->pSynonym; @@ -1735,8 +1736,9 @@ static int fts5ParseTokenize( rc = SQLITE_NOMEM; }else{ memset(pSyn, 0, (size_t)nByte); - pSyn->zTerm = ((char*)pSyn) + sizeof(Fts5ExprTerm) + sizeof(Fts5Buffer); - memcpy(pSyn->zTerm, pToken, nToken); + pSyn->pTerm = ((char*)pSyn) + sizeof(Fts5ExprTerm) + sizeof(Fts5Buffer); + pSyn->nTerm = nToken; + memcpy(pSyn->pTerm, pToken, nToken); pSyn->pSynonym = pPhrase->aTerm[pPhrase->nTerm-1].pSynonym; pPhrase->aTerm[pPhrase->nTerm-1].pSynonym = pSyn; } @@ -1761,7 +1763,8 @@ static int fts5ParseTokenize( if( rc==SQLITE_OK ){ pTerm = &pPhrase->aTerm[pPhrase->nTerm++]; memset(pTerm, 0, sizeof(Fts5ExprTerm)); - pTerm->zTerm = sqlite3Fts5Strndup(&rc, pToken, nToken); + pTerm->pTerm = sqlite3Fts5Strndup(&rc, pToken, nToken); + pTerm->nTerm = nToken; } } @@ -1913,9 +1916,7 @@ int sqlite3Fts5ExprClonePhrase( int tflags = 0; Fts5ExprTerm *p; for(p=&pOrig->aTerm[i]; p && rc==SQLITE_OK; p=p->pSynonym){ - const char *zTerm = p->zTerm; - rc = fts5ParseTokenize((void*)&sCtx, tflags, zTerm, (int)strlen(zTerm), - 0, 0); + rc = fts5ParseTokenize((void*)&sCtx, tflags, p->pTerm, p->nTerm, 0, 0); tflags = FTS5_TOKEN_COLOCATED; } if( rc==SQLITE_OK ){ @@ -2296,11 +2297,13 @@ static Fts5ExprNode *fts5ParsePhraseToAnd( if( parseGrowPhraseArray(pParse) ){ fts5ExprPhraseFree(pPhrase); }else{ + Fts5ExprTerm *p = &pNear->apPhrase[0]->aTerm[ii]; pParse->apPhrase[pParse->nPhrase++] = pPhrase; pPhrase->nTerm = 1; - pPhrase->aTerm[0].zTerm = sqlite3Fts5Strndup( - &pParse->rc, pNear->apPhrase[0]->aTerm[ii].zTerm, -1 + pPhrase->aTerm[0].pTerm = sqlite3Fts5Strndup( + &pParse->rc, p->pTerm, p->nTerm ); + pPhrase->aTerm[0].nTerm = p->nTerm; pRet->apChild[ii] = sqlite3Fts5ParseNode(pParse, FTS5_STRING, 0, 0, sqlite3Fts5ParseNearset(pParse, 0, pPhrase) ); @@ -2485,16 +2488,17 @@ static char *fts5ExprTermPrint(Fts5ExprTerm *pTerm){ /* Determine the maximum amount of space required. */ for(p=pTerm; p; p=p->pSynonym){ - nByte += (int)strlen(pTerm->zTerm) * 2 + 3 + 2; + nByte += pTerm->nTerm * 2 + 3 + 2; } zQuoted = sqlite3_malloc64(nByte); if( zQuoted ){ int i = 0; for(p=pTerm; p; p=p->pSynonym){ - char *zIn = p->zTerm; + char *zIn = p->pTerm; + char *zEnd = &zIn[p->nTerm]; zQuoted[i++] = '"'; - while( *zIn ){ + while( zInnTerm; iTerm++){ - char *zTerm = pPhrase->aTerm[iTerm].zTerm; - zRet = fts5PrintfAppend(zRet, "%s%s", iTerm==0?"":" ", zTerm); + Fts5ExprTerm *p = &pPhrase->aTerm[iTerm]; + zRet = fts5PrintfAppend(zRet, "%s%.*s", iTerm==0?"":" ", + p->nTerm, p->pTerm + ); if( pPhrase->aTerm[iTerm].bPrefix ){ zRet = fts5PrintfAppend(zRet, "*"); } @@ -2994,9 +3000,8 @@ static int fts5ExprPopulatePoslistsCb( Fts5ExprTerm *pTerm; if( p->aPopulator[i].bOk==0 ) continue; for(pTerm=&pExpr->apExprPhrase[i]->aTerm[0]; pTerm; pTerm=pTerm->pSynonym){ - int nTerm = (int)strlen(pTerm->zTerm); - if( (nTerm==nToken || (nTermbPrefix)) - && memcmp(pTerm->zTerm, pToken, nTerm)==0 + if( (pTerm->nTerm==nToken || (pTerm->nTermbPrefix)) + && memcmp(pTerm->pTerm, pToken, pTerm->nTerm)==0 ){ int rc = sqlite3Fts5PoslistWriterAppend( &pExpr->apExprPhrase[i]->poslist, &p->aPopulator[i].writer, p->iOff diff --git a/ext/fts5/test/fts5origintext.test b/ext/fts5/test/fts5origintext.test index 791b850c76..155d74c025 100644 --- a/ext/fts5/test/fts5origintext.test +++ b/ext/fts5/test/fts5origintext.test @@ -45,6 +45,10 @@ do_execsql_test 1.3 { world } +do_execsql_test 1.4 { + SELECT rowid FROM ft('Hello'); +} {1} + #------------------------------------------------------------------------- reset_db diff --git a/manifest b/manifest index 431d902181..e0c37bf1e2 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Update\sfts5_decode()\sto\sallow\sfor\sembedded\s0x00\sbytes\sin\stokens. -D 2023-10-03T17:07:54.562 +C Fixes\sfor\sfts5\sexpression\sparser\smodule\sto\sallow\sembedded\s0x00\sbytes\sin\stokens. +D 2023-10-03T19:06:52.966 F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1 F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea F LICENSE.md df5091916dbb40e6e9686186587125e1b2ff51f022cc334e886c19a0e9982724 @@ -92,7 +92,7 @@ F ext/fts5/fts5Int.h 66a38b285e2b860baa29745d8eff27f5b0809268e7820498494d9acfacc F ext/fts5/fts5_aux.c 572d5ec92ba7301df2fea3258576332f2f4d2dfd66d8263afd157d9deceac480 F ext/fts5/fts5_buffer.c 3001fbabb585d6de52947b44b455235072b741038391f830d6b729225eeaf6a5 F ext/fts5/fts5_config.c 054359543566cbff1ba65a188330660a5457299513ac71c53b3a07d934c7b081 -F ext/fts5/fts5_expr.c bd3b81ce669c4104e34ffe66570af1999a317b142c15fccb112de9fb0caa57a6 +F ext/fts5/fts5_expr.c cc215d39714b428523d2f2ef42b713c83095a28a67bc7f6f2dc4ac036a29f460 F ext/fts5/fts5_hash.c 76765856397eff56f526b0640b23a1677d737d35e07bc00e4b4b2e0fc5fda60d F ext/fts5/fts5_index.c e472083d371f420d52ec80445b9d2a99b16b23548205cb4064ddcd41bd79f63e F ext/fts5/fts5_main.c 799ec88d2309055f6406bddb0bd6ed80148c5da5eb14594c3c5309a6e944d489 @@ -187,7 +187,7 @@ F ext/fts5/test/fts5onepass.test f9b7d9b2c334900c6542a869760290e2ab5382af8fbd618 F ext/fts5/test/fts5optimize.test 36a752d24c818792032e4ff502936fc9cc5ef938721696396fdc79214b2717f1 F ext/fts5/test/fts5optimize2.test 93e742c36b487d8874621360af5b1ce4d39b04fb9e71ce9bc34015c5fc811785 F ext/fts5/test/fts5optimize3.test bf9c91bb927d0fb2b9a06318a217a0419183ac5913842e062c7e0b98ea5d0fca -F ext/fts5/test/fts5origintext.test 9a6edc85ccc4afb10e71d54d98d8170f850272e55b120520f367afbb12526674 +F ext/fts5/test/fts5origintext.test 3e1ac3230f65a0d644e9bf0738bebb09b4db9d9f123e1307d8630e42269b4afb F ext/fts5/test/fts5phrase.test 13e5d8e9083077b3d9c74315b3c92ec723cc6eb37c8155e0bfe1bba00559f07b F ext/fts5/test/fts5plan.test b65cfcca9ddd6fdaa118c61e17aeec8e8433bc5b6bb307abd116514f79c49c5a F ext/fts5/test/fts5porter.test 8d08010c28527db66bc3feebd2b8767504aaeb9b101a986342fa7833d49d0d15 @@ -2123,8 +2123,8 @@ F vsixtest/vsixtest.tcl 6a9a6ab600c25a91a7acc6293828957a386a8a93 F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0 -P c027c092c4af53bd6ae3cc6e2b4439167d9eeb0f9de549b6a2c2a72a67ee886c -R 9e81ed5ff713a928831c6c73df8f7a54 +P e051120067fd87f57b498e505e3960cf4d14e8e33bad940618cc0823253254f7 +R 5cce41f02eae121cb66e72942ef56113 U dan -Z 0a0daf2566a3400fafbefb55947df637 +Z 80b8e2664e768ed2ac03913fcf0180ea # Remove this line to create a well-formed Fossil manifest. diff --git a/manifest.uuid b/manifest.uuid index 53a545a52f..4798938837 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -e051120067fd87f57b498e505e3960cf4d14e8e33bad940618cc0823253254f7 \ No newline at end of file +342c8d0783f449817d3f565ff6b9f010a6c690beeea32f1861640810490a8b5f \ No newline at end of file