#include <unicode/utypes.h>
#include <unicode/uregex.h>
#include <unicode/ustring.h>
+#include <unicode/ucol.h>
#include <assert.h>
#include "sqlite3.h"
** http://unicode.org/reports/tr21/tr21-5.html#Caseless_Matching
*/
+/*
+** This function is called when an ICU function called from within
+** the implementation of an SQL scalar function returns an error.
+**
+** The scalar function context passed as the first argument is
+** loaded with an error message based on the following two args.
+*/
+static void icuFunctionError(
+ sqlite3_context *pCtx, /* SQLite scalar function context */
+ const char *zName, /* Name of ICU function that failed */
+ UErrorCode e /* Error code returned by ICU function */
+){
+ char zBuf[128];
+ sqlite3_snprintf(128, zBuf, "ICU error: %s(): %s", zName, u_errorName(e));
+ zBuf[127] = '\0';
+ sqlite3_result_error(pCtx, zBuf, -1);
+}
+
/*
** Function to delete compiled regexp objects. Registered as
** a destructor function with sqlite3_set_auxdata().
sqlite3_set_auxdata(p, 0, pExpr, icuRegexpDelete);
}else{
assert(!pExpr);
- sqlite3_result_error(p, "Error compiling regular expression", -1);
+ icuFunctionError(p, "uregex_open", status);
return;
}
}
/* Configure the text that the regular expression operates on. */
uregex_setText(pExpr, zString, -1, &status);
if( !U_SUCCESS(status) ){
- sqlite3_result_error(p, "Error configuring regular expression", -1);
+ icuFunctionError(p, "uregex_setText", status);
return;
}
/* Attempt the match */
res = uregex_matches(pExpr, 0, &status);
if( !U_SUCCESS(status) ){
- sqlite3_result_error(p, "Error matching regular expression", -1);
+ icuFunctionError(p, "uregex_matches", status);
return;
}
}
if( !U_SUCCESS(status) ){
- sqlite3_result_error(p, "Error converting case", -1);
+ icuFunctionError(p, "u_strToLower()/u_strToUpper", status);
return;
}
sqlite3_result_text16(p, zOutput, -1, xFree);
}
+/*
+** Collation sequence destructor function. The pCtx argument points to
+** a UCollator structure previously allocated using ucol_open().
+*/
+static void icuCollationDel(void *pCtx){
+ UCollator *p = (UCollator *)pCtx;
+ ucol_close(p);
+}
+
+/*
+** Collation sequence comparison function. The pCtx argument points to
+** a UCollator structure previously allocated using ucol_open().
+*/
+static int icuCollationColl(
+ void *pCtx,
+ int nLeft,
+ const void *zLeft,
+ int nRight,
+ const void *zRight
+){
+ UCollationResult res;
+ UCollator *p = (UCollator *)pCtx;
+ res = ucol_strcoll(p, (UChar *)zLeft, nLeft/2, (UChar *)zRight, nRight/2);
+ switch( res ){
+ case UCOL_LESS: return -1;
+ case UCOL_GREATER: return +1;
+ case UCOL_EQUAL: return 0;
+ }
+ assert(!"Bad return value from ucol_strcoll()");
+ return 0;
+}
+
+/*
+** Implementation of the scalar function icu_load_collation().
+**
+** This scalar function is used to add ICU collation based collation
+** types to an SQLite database connection. It is intended to be called
+** as follows:
+**
+** SELECT icu_load_collation(<locale>, <collation-name>);
+**
+** Where <locale> is a string containing an ICU locale identifier (i.e.
+** "en_AU", "tr_TR" etc.) and <collation-name> is the name of the
+** collation sequence to create.
+*/
+static void icuLoadCollation(
+ sqlite3_context *p,
+ int nArg,
+ sqlite3_value **apArg
+){
+ sqlite3 *db = (sqlite3 *)sqlite3_user_data(p);
+ UErrorCode status = U_ZERO_ERROR;
+ const char *zLocale; /* Locale identifier - (eg. "jp_JP") */
+ const char *zName; /* SQL Collation sequence name (eg. "japanese") */
+ UCollator *pUCollator; /* ICU library collation object */
+ int rc; /* Return code from sqlite3_create_collation_x() */
+
+ assert(nArg==2);
+ zLocale = (const char *)sqlite3_value_text(apArg[0]);
+ zName = (const char *)sqlite3_value_text(apArg[1]);
+
+ if( !zLocale || !zName ){
+ return;
+ }
+
+ pUCollator = ucol_open(zLocale, &status);
+ if( !U_SUCCESS(status) ){
+ icuFunctionError(p, "ucol_open", status);
+ return;
+ }
+ assert(p);
+
+ rc = sqlite3_create_collation_x(db, zName, SQLITE_UTF16, (void *)pUCollator,
+ icuCollationColl, icuCollationDel
+ );
+ if( rc!=SQLITE_OK ){
+ ucol_close(pUCollator);
+ sqlite3_result_error(p, "Error registering collation function", -1);
+ }
+}
+
/*
** Register the ICU extension functions with database db.
*/
{"lower", 2, SQLITE_UTF8, 0, icuCaseFunc16},
{"upper", 1, SQLITE_UTF8, (void*)1, icuCaseFunc16},
{"upper", 2, SQLITE_UTF8, (void*)1, icuCaseFunc16},
+
+ {"icu_load_collation", 2, SQLITE_UTF8, (void*)db, icuLoadCollation},
};
int rc = SQLITE_OK;
-C Change\ssqlite3_snprintf()\sso\sthat\sit\sdoes\snot\swrite\sa\szero-terminator\sif\nthe\sbuffer\ssize\sargument\sis\sless\sthan\s1.\s\sTicket\s#2341.\s\sAdded\sdocumentation\nabout\sthe\ssqlite3_snprintf()\sfunction.\s(CVS\s3935)
-D 2007-05-07T11:24:30
+C Add\sinterface\sto\sconfigure\sSQLite\sto\suse\sICU\scollation\sfunctions.\s(CVS\s3936)
+D 2007-05-07T11:53:14
F Makefile.in ea8888bdcf53313d26576fcabcb6d0a10ecd35cd
F Makefile.linux-gcc 2d8574d1ba75f129aba2019f0b959db380a90935
F README 9c4e2d6706bdcc3efdd773ce752a8cdab4f90028
F ext/fts2/fts2_tokenizer.h 4c5ffe31d63622869eb6eec1503df7f6996fd1bd
F ext/fts2/fts2_tokenizer1.c 5c979fe8815f95396beb22b627571da895a025af
F ext/fts2/mkfts2amal.tcl 2a9ec76b0760fe7f3669dca5bc0d60728bc1c977
-F ext/icu/icu.c a30999ba467749ed6232d02cc8c4b5a0e62cd727
+F ext/icu/icu.c 509ac3d8afea8af6835edb9d96a52a80dd56c152
F install-sh 9d4de14ab9fb0facae2f48780b874848cbf2f895
F ltmain.sh 56abb507100ed2d4261f6dd1653dec3cf4066387
F main.mk 09c19ae05ac9e5654d5fd866a980b21ad9df8f30
F test/fts2n.test a70357e72742681eaebfdbe9007b87ff3b771638
F test/func.test 6727c7729472ae52b5acd86e802f89aa350ba50f
F test/hook.test 7e7645fd9a033f79cce8fdff151e32715e7ec50a
+F test/icu.test e6bfae7f625c88fd14df6f540fe835bdfc1e4329
F test/in.test 369cb2aa1eab02296b4ec470732fe8c131260b1d
F test/incrblob.test 7f82ae497364612aa17a37f77f12e01e2bee9f20
F test/incrblob_err.test 9dae0762ba4d73b516d176d091c6b2b16f625953
F www/vdbe.tcl 87a31ace769f20d3627a64fa1fade7fed47b90d0
F www/version3.tcl 890248cf7b70e60c383b0e84d77d5132b3ead42b
F www/whentouse.tcl fc46eae081251c3c181bd79c5faef8195d7991a5
-P ff49d48f2f025898a0f4ace1fc227e1d367ea89f
-R d693f630962da031deefd4769c7a8268
-U drh
-Z ff527a494f455c458a9191e7c78f4220
+P f3ae4ac5fe0bfa2f91e76a6def86c444e51fe80b
+R 20cd23ff512d65479e7ed637ec43cb14
+U danielk1977
+Z 04087bd460c94ac099a97176e3a307b1
--- /dev/null
+# 2007 May 1
+#
+# The author disclaims copyright to this source code. In place of
+# a legal notice, here is a blessing:
+#
+# May you do good and not evil.
+# May you find forgiveness for yourself and forgive others.
+# May you share freely, never taking more than you give.
+#
+#***********************************************************************
+#
+# $Id: icu.test,v 1.1 2007/05/07 11:53:14 danielk1977 Exp $
+#
+
+set testdir [file dirname $argv0]
+source $testdir/tester.tcl
+
+ifcapable !icu {
+ finish_test
+ return
+}
+
+# Create a table to work with.
+#
+execsql {CREATE TABLE test1(i1 int, i2 int, r1 real, r2 real, t1 text, t2 text)}
+execsql {INSERT INTO test1 VALUES(1,2,1.1,2.2,'hello','world')}
+proc test_expr {name settings expr result} {
+ do_test $name [format {
+ db one {
+ BEGIN;
+ UPDATE test1 SET %s;
+ SELECT %s FROM test1;
+ ROLLBACK;
+ }
+ } $settings $expr] $result
+}
+
+# Tests of the REGEXP operator.
+#
+test_expr icu-1.1 {i1='hello'} {i1 REGEXP 'hello'} 1
+test_expr icu-1.2 {i1='hello'} {i1 REGEXP '.ello'} 1
+test_expr icu-1.3 {i1='hello'} {i1 REGEXP '.ell'} 0
+test_expr icu-1.4 {i1='hello'} {i1 REGEXP '.ell.*'} 1
+test_expr icu-1.5 {i1=NULL} {i1 REGEXP '.ell.*'} {}
+
+# Some non-ascii characters with defined case mappings
+#
+set ::EGRAVE "\xC8"
+set ::egrave "\xE8"
+
+set ::OGRAVE "\xD2"
+set ::ograve "\xF2"
+
+# That German letter that looks a bit like a B. The
+# upper-case version of which is "SS" (two characters).
+#
+set ::szlig "\xDF"
+
+# Tests of the upper()/lower() functions.
+#
+test_expr icu-2.1 {i1='HellO WorlD'} {upper(i1)} {HELLO WORLD}
+test_expr icu-2.2 {i1='HellO WorlD'} {lower(i1)} {hello world}
+test_expr icu-2.3 {i1=$::egrave} {lower(i1)} $::egrave
+test_expr icu-2.4 {i1=$::egrave} {upper(i1)} $::EGRAVE
+test_expr icu-2.5 {i1=$::ograve} {lower(i1)} $::ograve
+test_expr icu-2.6 {i1=$::ograve} {upper(i1)} $::OGRAVE
+test_expr icu-2.3 {i1=$::EGRAVE} {lower(i1)} $::egrave
+test_expr icu-2.4 {i1=$::EGRAVE} {upper(i1)} $::EGRAVE
+test_expr icu-2.5 {i1=$::OGRAVE} {lower(i1)} $::ograve
+test_expr icu-2.6 {i1=$::OGRAVE} {upper(i1)} $::OGRAVE
+
+test_expr icu-2.7 {i1=$::szlig} {upper(i1)} "SS"
+test_expr icu-2.8 {i1='SS'} {lower(i1)} "ss"
+
+# In turkish (locale="tr_TR"), the lower case version of I
+# is "small dotless i" (code point 0x131 (decimal 305)).
+#
+set ::small_dotless_i "\u0131"
+test_expr icu-3.1 {i1='I'} {lower(i1)} "i"
+test_expr icu-3.2 {i1='I'} {lower(i1, 'tr_tr')} $::small_dotless_i
+test_expr icu-3.3 {i1='I'} {lower(i1, 'en_AU')} "i"
+
+#--------------------------------------------------------------------
+# Test the collation sequence function.
+#
+do_test icu-4.1 {
+ execsql {
+ CREATE TABLE fruit(name);
+ INSERT INTO fruit VALUES('plum');
+ INSERT INTO fruit VALUES('cherry');
+ INSERT INTO fruit VALUES('apricot');
+ INSERT INTO fruit VALUES('peach');
+ INSERT INTO fruit VALUES('chokecherry');
+ INSERT INTO fruit VALUES('yamot');
+ }
+} {}
+do_test icu-4.2 {
+ execsql {
+ SELECT icu_load_collation('en_US', 'AmericanEnglish');
+ SELECT icu_load_collation('lt_LT', 'Lithuanian');
+ }
+ execsql {
+ SELECT name FROM fruit ORDER BY name COLLATE AmericanEnglish ASC;
+ }
+} {apricot cherry chokecherry peach plum yamot}
+
+
+# Test collation using Lithuanian rules. In the Lithuanian
+# alphabet, "y" comes right after "i".
+#
+do_test icu-4.3 {
+ execsql {
+ SELECT name FROM fruit ORDER BY name COLLATE Lithuanian ASC;
+ }
+} {apricot cherry chokecherry yamot peach plum}
+
+finish_test
+