** for handling unicode data) and SQLite. The integration uses
** ICU to provide the following to SQLite:
**
+** * An implementation of the SQL regexp() function (and hence REGEXP
+** operator) using the ICU uregex_XX() APIs.
+**
** * Implementations of the SQL scalar upper() and lower()
-** functions for case mapping,
+** functions for case mapping.
**
** * Collation sequences
**
-** * Implementation of the SQL regexp() function (and hence REGEXP
-** operator) using the ICU uregex_XX() APIs.
-**
** * LIKE
*/
}
/*
-** LIKE operator.
+** Compare two UTF-8 strings for equality where the first string is
+** a "LIKE" expression. Return true (1) if they are the same and
+** false (0) if they are different.
+*/
+static int icuLikeCompare(
+ const uint8_t *zPattern, /* The UTF-8 LIKE pattern */
+ const uint8_t *zString, /* The UTF-8 string to compare against */
+ const UChar32 uEsc /* The escape character */
+){
+ static const int MATCH_ONE = (UChar32)'_';
+ static const int MATCH_ALL = (UChar32)'%';
+
+ int iPattern = 0; /* Current byte index in zPattern */
+ int iString = 0; /* Current byte index in zString */
+
+ int prevEscape = 0; /* True if the previous character was uEsc */
+
+ while( zPattern[iPattern]!=0 ){
+
+ /* Read (and consume) the next character from the input pattern. */
+ UChar32 uPattern;
+ U8_NEXT_UNSAFE(zPattern, iPattern, uPattern);
+ assert(uPattern!=0);
+
+ /* There are now 4 possibilities:
+ **
+ ** 1. uPattern is an unescaped match-all character "%",
+ ** 2. uPattern is an unescaped match-one character "_",
+ ** 3. uPattern is an unescaped escape character, or
+ ** 4. uPattern is to be handled as an ordinary character
+ */
+ if( !prevEscape && uPattern==MATCH_ALL ){
+ /* Case 1. */
+ uint8_t c;
+
+ /* Skip any MATCH_ALL or MATCH_ONE characters that follow a
+ ** MATCH_ALL. For each MATCH_ONE, skip one character in the
+ ** test string.
+ */
+ while( (c=zPattern[iPattern]) == MATCH_ALL || c == MATCH_ONE ){
+ if( c==MATCH_ONE ){
+ if( zString[iString]==0 ) return 0;
+ U8_FWD_1_UNSAFE(zString, iString);
+ }
+ iPattern++;
+ }
+
+ if( zPattern[iPattern]==0 ) return 1;
+
+ while( zString[iString] ){
+ if( icuLikeCompare(&zPattern[iPattern], &zString[iString], uEsc) ){
+ return 1;
+ }
+ U8_FWD_1_UNSAFE(zString, iString);
+ }
+ return 0;
+
+ }else if( !prevEscape && uPattern==MATCH_ONE ){
+ /* Case 2. */
+ if( zString[iString]==0 ) return 0;
+ U8_FWD_1_UNSAFE(zString, iString);
+
+ }else if( !prevEscape && uPattern==uEsc){
+ /* Case 3. */
+ prevEscape = 1;
+
+ }else{
+ /* Case 4. */
+ UChar32 uString;
+ U8_NEXT_UNSAFE(zString, iString, uString);
+ uString = u_foldCase(uString, U_FOLD_CASE_DEFAULT);
+ uPattern = u_foldCase(uPattern, U_FOLD_CASE_DEFAULT);
+ if( uString!=uPattern ){
+ return 0;
+ }
+ prevEscape = 0;
+ }
+ }
+
+ return zString[iString]==0;
+}
+
+/*
+** Implementation of the like() SQL function. This function implements
+** the build-in LIKE operator. The first argument to the function is the
+** pattern and the second argument is the string. So, the SQL statements:
**
-** http://unicode.org/reports/tr21/tr21-5.html#Caseless_Matching
+** A LIKE B
+**
+** is implemented as like(B, A). If there is an escape character E,
+**
+** A LIKE B ESCAPE E
+**
+** is mapped to like(B, A, E).
*/
+static void icuLikeFunc(
+ sqlite3_context *context,
+ int argc,
+ sqlite3_value **argv
+){
+ const unsigned char *zA = sqlite3_value_text(argv[0]);
+ const unsigned char *zB = sqlite3_value_text(argv[1]);
+ UChar32 uEsc = 0;
+
+ if( argc==3 ){
+ /* The escape character string must consist of a single UTF-8 character.
+ ** Otherwise, return an error.
+ */
+ int nE= sqlite3_value_bytes(argv[2]);
+ const unsigned char *zE = sqlite3_value_text(argv[2]);
+ int i = 0;
+ if( zE==0 ) return;
+ U8_NEXT(zE, i, nE, uEsc);
+ if( i!=nE){
+ sqlite3_result_error(context,
+ "ESCAPE expression must be a single character", -1);
+ return;
+ }
+ }
+
+ if( zA && zB ){
+ sqlite3_result_int(context, icuLikeCompare(zA, zB, uEsc));
+ }
+}
/*
** This function is called when an ICU function called from within
}
zInput = sqlite3_value_text16(apArg[0]);
+ if( !zInput ){
+ return;
+ }
nInput = sqlite3_value_bytes16(apArg[0]);
nOutput = nInput * 2 + 2;
case UCOL_GREATER: return +1;
case UCOL_EQUAL: return 0;
}
- assert(!"Bad return value from ucol_strcoll()");
+ assert(!"Unexpected return value from ucol_strcoll()");
return 0;
}
}
assert(p);
- rc = sqlite3_create_collation_x(db, zName, SQLITE_UTF16, (void *)pUCollator,
+ rc = sqlite3_create_collation_v2(db, zName, SQLITE_UTF16, (void *)pUCollator,
icuCollationColl, icuCollationDel
);
if( rc!=SQLITE_OK ){
void *pContext; /* sqlite3_user_data() context */
void (*xFunc)(sqlite3_context*,int,sqlite3_value**);
} scalars[] = {
- {"regexp", 2, SQLITE_ANY, 0, icuRegexpFunc},
+ {"regexp",-1, SQLITE_ANY, 0, icuRegexpFunc},
{"lower", 1, SQLITE_UTF16, 0, icuCaseFunc16},
{"lower", 2, SQLITE_UTF16, 0, icuCaseFunc16},
{"upper", 1, SQLITE_UTF8, (void*)1, icuCaseFunc16},
{"upper", 2, SQLITE_UTF8, (void*)1, icuCaseFunc16},
+ {"like", 2, SQLITE_UTF8, 0, icuLikeFunc},
+ {"like", 3, SQLITE_UTF8, 0, icuLikeFunc},
+
{"icu_load_collation", 2, SQLITE_UTF8, (void*)db, icuLoadCollation},
};
-C Change\sthe\sname\sof\screate_collation_x()\sto\screate_collation_v2().\sAlso\sadd\ssome\stests\sfor\sit.\s(CVS\s3938)
-D 2007-05-07T14:58:53
+C Add\sa\sversion\sof\sthe\sLIKE\soperator\sto\sthe\sicu\sextension.\sRequires\soptimisation.\s(CVS\s3939)
+D 2007-05-07T16:58:02
F Makefile.in ab0f3cb6b34aa8ccec0bb57e6696fd4bd6b34a8f
F Makefile.linux-gcc 2d8574d1ba75f129aba2019f0b959db380a90935
F README 9c4e2d6706bdcc3efdd773ce752a8cdab4f90028
F ext/fts2/fts2_tokenizer.h 4c5ffe31d63622869eb6eec1503df7f6996fd1bd
F ext/fts2/fts2_tokenizer1.c 5c979fe8815f95396beb22b627571da895a025af
F ext/fts2/mkfts2amal.tcl 2a9ec76b0760fe7f3669dca5bc0d60728bc1c977
-F ext/icu/icu.c 509ac3d8afea8af6835edb9d96a52a80dd56c152
+F ext/icu/icu.c 6b47f5bbaf32bce03112282ecca1f54bec969e42
F install-sh 9d4de14ab9fb0facae2f48780b874848cbf2f895
F ltmain.sh 56abb507100ed2d4261f6dd1653dec3cf4066387
F main.mk 09c19ae05ac9e5654d5fd866a980b21ad9df8f30
F test/fts2l.test 4c53c89ce3919003765ff4fd8d98ecf724d97dd3
F test/fts2m.test 4b30142ead6f3ed076e880a2a464064c5ad58c51
F test/fts2n.test a70357e72742681eaebfdbe9007b87ff3b771638
-F test/func.test 6727c7729472ae52b5acd86e802f89aa350ba50f
+F test/func.test 5e32fe07bf4113ce2923df28af78c76702f6cd92
F test/hook.test 7e7645fd9a033f79cce8fdff151e32715e7ec50a
F test/icu.test e6bfae7f625c88fd14df6f540fe835bdfc1e4329
F test/in.test 369cb2aa1eab02296b4ec470732fe8c131260b1d
F www/vdbe.tcl 87a31ace769f20d3627a64fa1fade7fed47b90d0
F www/version3.tcl 890248cf7b70e60c383b0e84d77d5132b3ead42b
F www/whentouse.tcl fc46eae081251c3c181bd79c5faef8195d7991a5
-P db51f59a7bb7530f919858e2c51057839f6c9f4d
-R 27185812931e7f9d41fbfd4819a22118
+P ddc4e4797ff902692c4f0d86ec5f4e94cc7f0741
+R c3baf3c645e55d9b878c7a05ae7a30bb
U danielk1977
-Z ae7686d308deffcdeb3aacd0475a723f
+Z a53ea51c5cda49495951728b7ccf7458