From: mistachkin Date: Sat, 12 Oct 2013 00:56:21 +0000 (+0000) Subject: Fix Unicode character encoding issues on Windows. X-Git-Tag: version-3.8.1~22 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=549bc3db1fc56b7b6751f86fc70017029091ae99;p=thirdparty%2Fsqlite.git Fix Unicode character encoding issues on Windows. FossilOrigin-Name: c9310c9a2bad11f1d033a57b33ea7aed43a8238d --- diff --git a/manifest b/manifest index 1579b56e79..888c047bd8 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Identify\srequirements\stext\sin\sthe\sSQLITE_CONFIG_\sdocumentation.\s\sFix\sa\stypo\n(a\sduplicated\sword)\sin\spart\sof\sthat\sdocumentation.\s\sAdd\ssome\srequirements\nmarks\sfor\sDETACH\sto\sthe\stest\sscripts.\s\sNo\scode\schanges. -D 2013-10-11T23:37:57.890 +C Fix\sUnicode\scharacter\sencoding\sissues\son\sWindows. +D 2013-10-12T00:56:21.621 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in e2d28ec95bd17ab4f3b6ee40b7102e9d7a0857b9 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -564,7 +564,7 @@ F test/fts4merge2.test 5faa558d1b672f82b847d2a337465fa745e46891 F test/fts4merge3.test aab02a09f50fe6baaddc2e159c3eabc116d45fc7 F test/fts4merge4.test c19c85ca1faa7b6d536832b49c12e1867235f584 F test/fts4noti.test aed33ba44808852dcb24bf70fa132e7bf530f057 -F test/fts4unicode.test 20195bca1e3a4301924c2c8b46257d64127f17df +F test/fts4unicode.test e28ba1a14181e709dcdf47455f207adf14c7cfe0 F test/full.test 6b3c8fb43c6beab6b95438c1675374b95fab245d F test/func.test c7e80a44eebac8604397eb2ad83d0d5d9d541237 F test/func2.test 772d66227e4e6684b86053302e2d74a2500e1e0f @@ -1123,7 +1123,7 @@ F tool/vdbe-compress.tcl f12c884766bd14277f4fcedcae07078011717381 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh d1a6de74685f360ab718efda6265994b99bbea01 F tool/win/sqlite.vsix 030f3eeaf2cb811a3692ab9c14d021a75ce41fff -P 7c24d22ffa1e12f3d24cad06b5ff7cc34219b2bb -R d7d2fd1a5883ae409ba0ba13e9010231 -U drh -Z a2b10bb03f0ef138962d7a6660a6c09c +P 1be0a3adaba2914c65c46fbebc4906ae4e70f899 +R ca24bf11e504ff21d9315944c9b4b854 +U mistachkin +Z ae94ccc0d8018ab65fe4f23e4fae4aa0 diff --git a/manifest.uuid b/manifest.uuid index 8c5019e9e4..d6f3901595 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -1be0a3adaba2914c65c46fbebc4906ae4e70f899 \ No newline at end of file +c9310c9a2bad11f1d033a57b33ea7aed43a8238d \ No newline at end of file diff --git a/test/fts4unicode.test b/test/fts4unicode.test index b8b98a327a..8edc2877da 100644 --- a/test/fts4unicode.test +++ b/test/fts4unicode.test @@ -44,31 +44,36 @@ proc do_unicode_token_test3 {tn args} { } do_unicode_token_test 1.0 {a B c D} {0 a a 1 b B 2 c c 3 d D} -do_unicode_token_test 1.1 {Ä Ö Ü} {0 ä Ä 1 ö Ö 2 ü Ü} -do_unicode_token_test 1.2 {xÄx xÖx xÜx} {0 xäx xÄx 1 xöx xÖx 2 xüx xÜx} + +do_unicode_token_test 1.1 "\uC4 \uD6 \uDC" \ + "0 \uE4 \uC4 1 \uF6 \uD6 2 \uFC \uDC" + +do_unicode_token_test 1.2 "x\uC4x x\uD6x x\uDCx" \ + "0 x\uE4x x\uC4x 1 x\uF6x x\uD6x 2 x\uFCx x\uDCx" # 0x00DF is a small "sharp s". 0x1E9E is a capital sharp s. do_unicode_token_test 1.3 "\uDF" "0 \uDF \uDF" -do_unicode_token_test 1.4 "\u1E9E" "0 ß \u1E9E" -do_unicode_token_test 1.5 "\u1E9E" "0 \uDF \u1E9E" +do_unicode_token_test 1.4 "\u1E9E" "0 \uDF \u1E9E" -do_unicode_token_test 1.6 "The quick brown fox" { +do_unicode_token_test 1.5 "The quick brown fox" { 0 the The 1 quick quick 2 brown brown 3 fox fox } -do_unicode_token_test 1.7 "The\u00bfquick\u224ebrown\u2263fox" { +do_unicode_token_test 1.6 "The\u00bfquick\u224ebrown\u2263fox" { 0 the The 1 quick quick 2 brown brown 3 fox fox } -do_unicode_token_test2 1.8 {a B c D} {0 a a 1 b B 2 c c 3 d D} -do_unicode_token_test2 1.9 {Ä Ö Ü} {0 a Ä 1 o Ö 2 u Ü} -do_unicode_token_test2 1.10 {xÄx xÖx xÜx} {0 xax xÄx 1 xox xÖx 2 xux xÜx} +do_unicode_token_test2 1.7 {a B c D} {0 a a 1 b B 2 c c 3 d D} +do_unicode_token_test2 1.8 "\uC4 \uD6 \uDC" "0 a \uC4 1 o \uD6 2 u \uDC" + +do_unicode_token_test2 1.9 "x\uC4x x\uD6x x\uDCx" \ + "0 xax x\uC4x 1 xox x\uD6x 2 xux x\uDCx" # Check that diacritics are removed if remove_diacritics=1 is specified. # And that they do not break tokens. -do_unicode_token_test2 1.11 "xx\u0301xx" "0 xxxx xx\u301xx" +do_unicode_token_test2 1.10 "xx\u0301xx" "0 xxxx xx\u301xx" # Title-case mappings work -do_unicode_token_test 1.12 "\u01c5" "0 \u01c6 \u01c5" +do_unicode_token_test 1.11 "\u01c5" "0 \u01c6 \u01c5" #------------------------------------------------------------------------- #