From: danielk1977 Date: Mon, 7 May 2007 11:53:13 +0000 (+0000) Subject: Add interface to configure SQLite to use ICU collation functions. (CVS 3936) X-Git-Tag: version-3.4.0~145 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=2559136971217b8a9bfe53b9a308989c6e7bfe56;p=thirdparty%2Fsqlite.git Add interface to configure SQLite to use ICU collation functions. (CVS 3936) FossilOrigin-Name: b29a81b4fbb926fa09186340342848b9fe589033 --- diff --git a/ext/icu/icu.c b/ext/icu/icu.c index 2dddc0489d..e58e834caa 100644 --- a/ext/icu/icu.c +++ b/ext/icu/icu.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include "sqlite3.h" @@ -51,6 +52,24 @@ static void xFree(void *p){ ** http://unicode.org/reports/tr21/tr21-5.html#Caseless_Matching */ +/* +** This function is called when an ICU function called from within +** the implementation of an SQL scalar function returns an error. +** +** The scalar function context passed as the first argument is +** loaded with an error message based on the following two args. +*/ +static void icuFunctionError( + sqlite3_context *pCtx, /* SQLite scalar function context */ + const char *zName, /* Name of ICU function that failed */ + UErrorCode e /* Error code returned by ICU function */ +){ + char zBuf[128]; + sqlite3_snprintf(128, zBuf, "ICU error: %s(): %s", zName, u_errorName(e)); + zBuf[127] = '\0'; + sqlite3_result_error(pCtx, zBuf, -1); +} + /* ** Function to delete compiled regexp objects. Registered as ** a destructor function with sqlite3_set_auxdata(). @@ -104,7 +123,7 @@ static void icuRegexpFunc(sqlite3_context *p, int nArg, sqlite3_value **apArg){ sqlite3_set_auxdata(p, 0, pExpr, icuRegexpDelete); }else{ assert(!pExpr); - sqlite3_result_error(p, "Error compiling regular expression", -1); + icuFunctionError(p, "uregex_open", status); return; } } @@ -112,14 +131,14 @@ static void icuRegexpFunc(sqlite3_context *p, int nArg, sqlite3_value **apArg){ /* Configure the text that the regular expression operates on. */ uregex_setText(pExpr, zString, -1, &status); if( !U_SUCCESS(status) ){ - sqlite3_result_error(p, "Error configuring regular expression", -1); + icuFunctionError(p, "uregex_setText", status); return; } /* Attempt the match */ res = uregex_matches(pExpr, 0, &status); if( !U_SUCCESS(status) ){ - sqlite3_result_error(p, "Error matching regular expression", -1); + icuFunctionError(p, "uregex_matches", status); return; } @@ -190,13 +209,94 @@ static void icuCaseFunc16(sqlite3_context *p, int nArg, sqlite3_value **apArg){ } if( !U_SUCCESS(status) ){ - sqlite3_result_error(p, "Error converting case", -1); + icuFunctionError(p, "u_strToLower()/u_strToUpper", status); return; } sqlite3_result_text16(p, zOutput, -1, xFree); } +/* +** Collation sequence destructor function. The pCtx argument points to +** a UCollator structure previously allocated using ucol_open(). +*/ +static void icuCollationDel(void *pCtx){ + UCollator *p = (UCollator *)pCtx; + ucol_close(p); +} + +/* +** Collation sequence comparison function. The pCtx argument points to +** a UCollator structure previously allocated using ucol_open(). +*/ +static int icuCollationColl( + void *pCtx, + int nLeft, + const void *zLeft, + int nRight, + const void *zRight +){ + UCollationResult res; + UCollator *p = (UCollator *)pCtx; + res = ucol_strcoll(p, (UChar *)zLeft, nLeft/2, (UChar *)zRight, nRight/2); + switch( res ){ + case UCOL_LESS: return -1; + case UCOL_GREATER: return +1; + case UCOL_EQUAL: return 0; + } + assert(!"Bad return value from ucol_strcoll()"); + return 0; +} + +/* +** Implementation of the scalar function icu_load_collation(). +** +** This scalar function is used to add ICU collation based collation +** types to an SQLite database connection. It is intended to be called +** as follows: +** +** SELECT icu_load_collation(, ); +** +** Where is a string containing an ICU locale identifier (i.e. +** "en_AU", "tr_TR" etc.) and is the name of the +** collation sequence to create. +*/ +static void icuLoadCollation( + sqlite3_context *p, + int nArg, + sqlite3_value **apArg +){ + sqlite3 *db = (sqlite3 *)sqlite3_user_data(p); + UErrorCode status = U_ZERO_ERROR; + const char *zLocale; /* Locale identifier - (eg. "jp_JP") */ + const char *zName; /* SQL Collation sequence name (eg. "japanese") */ + UCollator *pUCollator; /* ICU library collation object */ + int rc; /* Return code from sqlite3_create_collation_x() */ + + assert(nArg==2); + zLocale = (const char *)sqlite3_value_text(apArg[0]); + zName = (const char *)sqlite3_value_text(apArg[1]); + + if( !zLocale || !zName ){ + return; + } + + pUCollator = ucol_open(zLocale, &status); + if( !U_SUCCESS(status) ){ + icuFunctionError(p, "ucol_open", status); + return; + } + assert(p); + + rc = sqlite3_create_collation_x(db, zName, SQLITE_UTF16, (void *)pUCollator, + icuCollationColl, icuCollationDel + ); + if( rc!=SQLITE_OK ){ + ucol_close(pUCollator); + sqlite3_result_error(p, "Error registering collation function", -1); + } +} + /* ** Register the ICU extension functions with database db. */ @@ -219,6 +319,8 @@ int sqlite3IcuInit(sqlite3 *db){ {"lower", 2, SQLITE_UTF8, 0, icuCaseFunc16}, {"upper", 1, SQLITE_UTF8, (void*)1, icuCaseFunc16}, {"upper", 2, SQLITE_UTF8, (void*)1, icuCaseFunc16}, + + {"icu_load_collation", 2, SQLITE_UTF8, (void*)db, icuLoadCollation}, }; int rc = SQLITE_OK; diff --git a/manifest b/manifest index 7bbff81533..b34d0fafa0 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Change\ssqlite3_snprintf()\sso\sthat\sit\sdoes\snot\swrite\sa\szero-terminator\sif\nthe\sbuffer\ssize\sargument\sis\sless\sthan\s1.\s\sTicket\s#2341.\s\sAdded\sdocumentation\nabout\sthe\ssqlite3_snprintf()\sfunction.\s(CVS\s3935) -D 2007-05-07T11:24:30 +C Add\sinterface\sto\sconfigure\sSQLite\sto\suse\sICU\scollation\sfunctions.\s(CVS\s3936) +D 2007-05-07T11:53:14 F Makefile.in ea8888bdcf53313d26576fcabcb6d0a10ecd35cd F Makefile.linux-gcc 2d8574d1ba75f129aba2019f0b959db380a90935 F README 9c4e2d6706bdcc3efdd773ce752a8cdab4f90028 @@ -43,7 +43,7 @@ F ext/fts2/fts2_porter.c 991a45463553c7318063fe7773368a6c0f39e35d F ext/fts2/fts2_tokenizer.h 4c5ffe31d63622869eb6eec1503df7f6996fd1bd F ext/fts2/fts2_tokenizer1.c 5c979fe8815f95396beb22b627571da895a025af F ext/fts2/mkfts2amal.tcl 2a9ec76b0760fe7f3669dca5bc0d60728bc1c977 -F ext/icu/icu.c a30999ba467749ed6232d02cc8c4b5a0e62cd727 +F ext/icu/icu.c 509ac3d8afea8af6835edb9d96a52a80dd56c152 F install-sh 9d4de14ab9fb0facae2f48780b874848cbf2f895 F ltmain.sh 56abb507100ed2d4261f6dd1653dec3cf4066387 F main.mk 09c19ae05ac9e5654d5fd866a980b21ad9df8f30 @@ -246,6 +246,7 @@ F test/fts2m.test 4b30142ead6f3ed076e880a2a464064c5ad58c51 F test/fts2n.test a70357e72742681eaebfdbe9007b87ff3b771638 F test/func.test 6727c7729472ae52b5acd86e802f89aa350ba50f F test/hook.test 7e7645fd9a033f79cce8fdff151e32715e7ec50a +F test/icu.test e6bfae7f625c88fd14df6f540fe835bdfc1e4329 F test/in.test 369cb2aa1eab02296b4ec470732fe8c131260b1d F test/incrblob.test 7f82ae497364612aa17a37f77f12e01e2bee9f20 F test/incrblob_err.test 9dae0762ba4d73b516d176d091c6b2b16f625953 @@ -481,7 +482,7 @@ F www/tclsqlite.tcl bb0d1357328a42b1993d78573e587c6dcbc964b9 F www/vdbe.tcl 87a31ace769f20d3627a64fa1fade7fed47b90d0 F www/version3.tcl 890248cf7b70e60c383b0e84d77d5132b3ead42b F www/whentouse.tcl fc46eae081251c3c181bd79c5faef8195d7991a5 -P ff49d48f2f025898a0f4ace1fc227e1d367ea89f -R d693f630962da031deefd4769c7a8268 -U drh -Z ff527a494f455c458a9191e7c78f4220 +P f3ae4ac5fe0bfa2f91e76a6def86c444e51fe80b +R 20cd23ff512d65479e7ed637ec43cb14 +U danielk1977 +Z 04087bd460c94ac099a97176e3a307b1 diff --git a/manifest.uuid b/manifest.uuid index ed94877353..4d788566b8 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -f3ae4ac5fe0bfa2f91e76a6def86c444e51fe80b \ No newline at end of file +b29a81b4fbb926fa09186340342848b9fe589033 \ No newline at end of file diff --git a/test/icu.test b/test/icu.test new file mode 100644 index 0000000000..2a247c6c5c --- /dev/null +++ b/test/icu.test @@ -0,0 +1,118 @@ +# 2007 May 1 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# +# $Id: icu.test,v 1.1 2007/05/07 11:53:14 danielk1977 Exp $ +# + +set testdir [file dirname $argv0] +source $testdir/tester.tcl + +ifcapable !icu { + finish_test + return +} + +# Create a table to work with. +# +execsql {CREATE TABLE test1(i1 int, i2 int, r1 real, r2 real, t1 text, t2 text)} +execsql {INSERT INTO test1 VALUES(1,2,1.1,2.2,'hello','world')} +proc test_expr {name settings expr result} { + do_test $name [format { + db one { + BEGIN; + UPDATE test1 SET %s; + SELECT %s FROM test1; + ROLLBACK; + } + } $settings $expr] $result +} + +# Tests of the REGEXP operator. +# +test_expr icu-1.1 {i1='hello'} {i1 REGEXP 'hello'} 1 +test_expr icu-1.2 {i1='hello'} {i1 REGEXP '.ello'} 1 +test_expr icu-1.3 {i1='hello'} {i1 REGEXP '.ell'} 0 +test_expr icu-1.4 {i1='hello'} {i1 REGEXP '.ell.*'} 1 +test_expr icu-1.5 {i1=NULL} {i1 REGEXP '.ell.*'} {} + +# Some non-ascii characters with defined case mappings +# +set ::EGRAVE "\xC8" +set ::egrave "\xE8" + +set ::OGRAVE "\xD2" +set ::ograve "\xF2" + +# That German letter that looks a bit like a B. The +# upper-case version of which is "SS" (two characters). +# +set ::szlig "\xDF" + +# Tests of the upper()/lower() functions. +# +test_expr icu-2.1 {i1='HellO WorlD'} {upper(i1)} {HELLO WORLD} +test_expr icu-2.2 {i1='HellO WorlD'} {lower(i1)} {hello world} +test_expr icu-2.3 {i1=$::egrave} {lower(i1)} $::egrave +test_expr icu-2.4 {i1=$::egrave} {upper(i1)} $::EGRAVE +test_expr icu-2.5 {i1=$::ograve} {lower(i1)} $::ograve +test_expr icu-2.6 {i1=$::ograve} {upper(i1)} $::OGRAVE +test_expr icu-2.3 {i1=$::EGRAVE} {lower(i1)} $::egrave +test_expr icu-2.4 {i1=$::EGRAVE} {upper(i1)} $::EGRAVE +test_expr icu-2.5 {i1=$::OGRAVE} {lower(i1)} $::ograve +test_expr icu-2.6 {i1=$::OGRAVE} {upper(i1)} $::OGRAVE + +test_expr icu-2.7 {i1=$::szlig} {upper(i1)} "SS" +test_expr icu-2.8 {i1='SS'} {lower(i1)} "ss" + +# In turkish (locale="tr_TR"), the lower case version of I +# is "small dotless i" (code point 0x131 (decimal 305)). +# +set ::small_dotless_i "\u0131" +test_expr icu-3.1 {i1='I'} {lower(i1)} "i" +test_expr icu-3.2 {i1='I'} {lower(i1, 'tr_tr')} $::small_dotless_i +test_expr icu-3.3 {i1='I'} {lower(i1, 'en_AU')} "i" + +#-------------------------------------------------------------------- +# Test the collation sequence function. +# +do_test icu-4.1 { + execsql { + CREATE TABLE fruit(name); + INSERT INTO fruit VALUES('plum'); + INSERT INTO fruit VALUES('cherry'); + INSERT INTO fruit VALUES('apricot'); + INSERT INTO fruit VALUES('peach'); + INSERT INTO fruit VALUES('chokecherry'); + INSERT INTO fruit VALUES('yamot'); + } +} {} +do_test icu-4.2 { + execsql { + SELECT icu_load_collation('en_US', 'AmericanEnglish'); + SELECT icu_load_collation('lt_LT', 'Lithuanian'); + } + execsql { + SELECT name FROM fruit ORDER BY name COLLATE AmericanEnglish ASC; + } +} {apricot cherry chokecherry peach plum yamot} + + +# Test collation using Lithuanian rules. In the Lithuanian +# alphabet, "y" comes right after "i". +# +do_test icu-4.3 { + execsql { + SELECT name FROM fruit ORDER BY name COLLATE Lithuanian ASC; + } +} {apricot cherry chokecherry yamot peach plum} + +finish_test +