From: dan Date: Tue, 24 Dec 2019 14:27:03 +0000 (+0000) Subject: Fix a spurious report of corruption that could be made by the fts5 integrity-check... X-Git-Tag: version-3.31.0~159 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=c26e78d275134213fa35956b969efa9d5834a664;p=thirdparty%2Fsqlite.git Fix a spurious report of corruption that could be made by the fts5 integrity-check in SQLITE_DEBUG builds if the fts5 index contains malformed utf text. FossilOrigin-Name: a11b393dc2c882cf0b3c47c3405bf43ca1d6459605bd39cccce4d32da653a72d --- diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index 015696f7b6..427594aff7 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -5723,6 +5723,37 @@ static int fts5QueryCksum( return rc; } +/* +** Check if buffer z[], size n bytes, contains as series of valid utf-8 +** encoded codepoints. If so, return 0. Otherwise, if the buffer does not +** contain valid utf-8, return non-zero. +*/ +static int fts5TestUtf8(const char *z, int n){ + assert( n>0 ); + int i = 0; + while( i=n || (z[i+1] & 0xC0)!=0x80 ) return 1; + i += 2; + }else + if( (z[i] & 0xF0)==0xE0 ){ + if( i+2>=n || (z[i+1] & 0xC0)!=0x80 || (z[i+2] & 0xC0)!=0x80 ) return 1; + i += 3; + }else + if( (z[i] & 0xF8)==0xF0 ){ + if( i+3>=n || (z[i+1] & 0xC0)!=0x80 || (z[i+2] & 0xC0)!=0x80 ) return 1; + if( (z[i+2] & 0xC0)!=0x80 ) return 1; + i += 3; + }else{ + return 1; + } + } + + return 0; +} /* ** This function is also purely an internal test. It does not contribute to @@ -5763,8 +5794,14 @@ static void fts5TestTerm( ** This check may only be performed if the hash table is empty. This ** is because the hash table only supports a single scan query at ** a time, and the multi-iter loop from which this function is called - ** is already performing such a scan. */ - if( p->nPendingData==0 ){ + ** is already performing such a scan. + ** + ** Also only do this if buffer zTerm contains nTerm bytes of valid + ** utf-8. Otherwise, the last part of the buffer contents might contain + ** a non-utf-8 sequence that happens to be a prefix of a valid utf-8 + ** character stored in the main fts index, which will cause the + ** test to fail. */ + if( p->nPendingData==0 && 0==fts5TestUtf8(zTerm, nTerm) ){ if( iIdx>0 && rc==SQLITE_OK ){ int f = flags|FTS5INDEX_QUERY_TEST_NOIDX; ck2 = 0; diff --git a/ext/fts5/test/fts5misc.test b/ext/fts5/test/fts5misc.test index e2f4b2d09c..d117205bb3 100644 --- a/ext/fts5/test/fts5misc.test +++ b/ext/fts5/test/fts5misc.test @@ -250,6 +250,21 @@ do_execsql_test 9.2 { -4764623217061966105 8324454597464624651 } +#------------------------------------------------------------------------- +# +reset_db +do_execsql_test 10.0 { + CREATE VIRTUAL TABLE vt1 USING fts5(c1, c2, prefix = 1, tokenize = "ascii"); + INSERT INTO vt1 VALUES (x'e4', '䔬'); +} + +do_execsql_test 10.1 { + SELECT quote(CAST(c1 AS blob)), quote(CAST(c2 AS blob)) FROM vt1 +} {X'E4' X'E494AC'} + +do_execsql_test 10.2 { + INSERT INTO vt1(vt1) VALUES('integrity-check'); +} finish_test diff --git a/manifest b/manifest index 689b8742db..5f1dc5644d 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Convert\san\sALWAYS()\sinto\san\sassert()\swith\san\sextra\serror\sterm.\nDbsqlfuzz\sfind,\swith\stest\scase\sin\sTH3. -D 2019-12-24T13:41:33.515 +C Fix\sa\sspurious\sreport\sof\scorruption\sthat\scould\sbe\smade\sby\sthe\sfts5\sintegrity-check\sin\sSQLITE_DEBUG\sbuilds\sif\sthe\sfts5\sindex\scontains\smalformed\sutf\stext. +D 2019-12-24T14:27:03.948 F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1 F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea F LICENSE.md df5091916dbb40e6e9686186587125e1b2ff51f022cc334e886c19a0e9982724 @@ -116,7 +116,7 @@ F ext/fts5/fts5_buffer.c 5a5fe0159752c0fb0a5a93c722e9db2662822709490769d482b76a6 F ext/fts5/fts5_config.c b447948f35ad3354e8fe5e242e0a7e7b5b941555400b9404259944e3aa570037 F ext/fts5/fts5_expr.c 2be456484786333d559dc2987a00f2750981fab91d52db8452a8046278c5f22e F ext/fts5/fts5_hash.c 1cc0095646f5f3b46721aa112fb4f9bf29ae175cb5338f89dcec66ed97acfe75 -F ext/fts5/fts5_index.c 99b77ae1f503978ca76985bcfff7345c822aed8bbaa8edb3747f804f614685b5 +F ext/fts5/fts5_index.c d0b7e5e79c136c6e27c96c8e8b5db7db8ec750edda427008afbec07b813178d4 F ext/fts5/fts5_main.c 9db1f173d299466aeff89bd949fb1eb0a181265726fb56f11e07ea292dcc9a73 F ext/fts5/fts5_storage.c 3ecda8edadc1f62a355d6789776be0da609f8658c50d72e422674093ab7e1528 F ext/fts5/fts5_tcl.c 39bcbae507f594aad778172fa914cad0f585bf92fd3b078c686e249282db0d95 @@ -189,7 +189,7 @@ F ext/fts5/test/fts5leftjoin.test c0b4cafb9661379e576dc4405c0891d8fcc27826807405 F ext/fts5/test/fts5matchinfo.test 50d86da66ec5b27603dcd90ba0227f5d9deb10351cbc52974a88e24f6fc9b076 F ext/fts5/test/fts5merge.test e92a8db28b45931e7a9c7b1bbd36101692759d00274df74d83fd29d25d53b3a6 F ext/fts5/test/fts5merge2.test 3ebad1a59d6ad3fb66eff6523a09e95dc6367cbefb3cd73196801dea0425c8e2 -F ext/fts5/test/fts5misc.test a5b53328b5b79275915de8f67ae85905eb2133d8dbcc808411f67c094b1bd347 +F ext/fts5/test/fts5misc.test b294b1d7ad814da30e473905a8165de1bfe137822d243f2ab8cbf20ecc37bd1e F ext/fts5/test/fts5multi.test a15bc91cdb717492e6e1b66fec1c356cb57386b980c7ba5af1915f97fe878581 F ext/fts5/test/fts5multiclient.test 5ff811c028d6108045ffef737f1e9f05028af2458e456c0937c1d1b8dea56d45 F ext/fts5/test/fts5near.test 211477940142d733ac04fad97cb24095513ab2507073a99c2765c3ddd2ef58bd @@ -1852,7 +1852,7 @@ F vsixtest/vsixtest.tcl 6a9a6ab600c25a91a7acc6293828957a386a8a93 F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0 -P 401c9d30e06191d938503aae024bc453d960fa64dc812ed86c661f94533247fd -R 7bfc5184e8e7362443b5d6dff6be172e -U drh -Z 7327fcfaeb195a694e2119a9482b57cd +P b473ad35c5ce355853e1805a5c0658bda1500775f22f59c6b6759ae990e65aca +R 88e0f620d2e9b56c3b0053f9411f1c0c +U dan +Z 0716b408ff1a9ce855db0a1c4ccdfffe diff --git a/manifest.uuid b/manifest.uuid index eedb68f236..83c2c41554 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -b473ad35c5ce355853e1805a5c0658bda1500775f22f59c6b6759ae990e65aca \ No newline at end of file +a11b393dc2c882cf0b3c47c3405bf43ca1d6459605bd39cccce4d32da653a72d \ No newline at end of file