*/
}]
puts ""
- puts "#if defined(SQLITE_ENABLE_FTS4_UNICODE61)"
- puts "#if defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_ENABLE_FTS4)"
+ if {$::generate_fts5_code} {
+ puts "#if defined(SQLITE_ENABLE_FTS5)"
+ } else {
+ puts "#if defined(SQLITE_ENABLE_FTS4_UNICODE61)"
+ puts "#if defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_ENABLE_FTS4)"
+ }
puts ""
puts "#include <assert.h>"
puts ""
# our liking.
#
proc usage {} {
- puts -nonewline stderr "Usage: $::argv0 ?-test? "
+ puts -nonewline stderr "Usage: $::argv0 ?-test? ?-fts5? "
puts stderr "<CaseFolding.txt file> <UnicodeData.txt file>"
exit 1
}
-if {[llength $argv]!=2 && [llength $argv]!=3} usage
-if {[llength $argv]==3 && [lindex $argv 0]!="-test"} usage
+if {[llength $argv]<2} usage
set unicodedata.txt [lindex $argv end]
set casefolding.txt [lindex $argv end-1]
-set generate_test_code [expr {[llength $argv]==3}]
+
+set generate_test_code 0
+set generate_fts5_code 0
+set function_prefix "sqlite3Fts"
+for {set i 0} {$i < [llength $argv]-2} {incr i} {
+ switch -- [lindex $argv $i] {
+ -test {
+ set generate_test_code 1
+ }
+ -fts5 {
+ set function_prefix sqlite3Fts5
+ set generate_fts5_code 1
+ }
+ default {
+ usage
+ }
+ }
+}
print_fileheader
# Print the isalnum() function to stdout.
#
set lRange [an_load_separator_ranges]
-print_isalnum sqlite3FtsUnicodeIsalnum $lRange
+print_isalnum ${function_prefix}UnicodeIsalnum $lRange
# Leave a gap between the two generated C functions.
#
print_rd $mappings
puts ""
puts ""
-print_isdiacritic sqlite3FtsUnicodeIsdiacritic $mappings
+print_isdiacritic ${function_prefix}UnicodeIsdiacritic $mappings
puts ""
puts ""
# Print the fold() function to stdout.
#
-print_fold sqlite3FtsUnicodeFold
+print_fold ${function_prefix}UnicodeFold
# Print the test routines and main() function to stdout, if -test
# was specified.
#
if {$::generate_test_code} {
- print_test_isalnum sqlite3FtsUnicodeIsalnum $lRange
- print_fold_test sqlite3FtsUnicodeFold $mappings
+ print_test_isalnum ${function_prefix}UnicodeIsalnum $lRange
+ print_fold_test ${function_prefix}UnicodeFold $mappings
print_test_main
}
-puts "#endif /* defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_ENABLE_FTS4) */"
-puts "#endif /* !defined(SQLITE_ENABLE_FTS4_UNICODE61) */"
+if {$generate_fts5_code} {
+ puts "#endif /* defined(SQLITE_ENABLE_FTS5) */"
+} else {
+ puts "#endif /* defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_ENABLE_FTS4) */"
+ puts "#endif /* !defined(SQLITE_ENABLE_FTS4_UNICODE61) */"
+}
return TCL_OK;
}
+typedef struct F5tTokenizeCtx F5tTokenizeCtx;
+struct F5tTokenizeCtx {
+ Tcl_Obj *pRet;
+ int bSubst;
+ const char *zInput;
+};
+
static int xTokenizeCb2(
void *pCtx,
const char *zToken, int nToken,
int iStart, int iEnd, int iPos
){
- Tcl_Obj *pRet = (Tcl_Obj*)pCtx;
- Tcl_ListObjAppendElement(0, pRet, Tcl_NewStringObj(zToken, nToken));
- Tcl_ListObjAppendElement(0, pRet, Tcl_NewIntObj(iStart));
- Tcl_ListObjAppendElement(0, pRet, Tcl_NewIntObj(iEnd));
- Tcl_ListObjAppendElement(0, pRet, Tcl_NewIntObj(iPos));
+ F5tTokenizeCtx *p = (F5tTokenizeCtx*)pCtx;
+ if( p->bSubst ){
+ Tcl_ListObjAppendElement(0, p->pRet, Tcl_NewIntObj(iPos));
+ Tcl_ListObjAppendElement(0, p->pRet, Tcl_NewStringObj(zToken, nToken));
+ Tcl_ListObjAppendElement(
+ 0, p->pRet, Tcl_NewStringObj(&p->zInput[iStart], iEnd-iStart)
+ );
+ }else{
+ Tcl_ListObjAppendElement(0, p->pRet, Tcl_NewStringObj(zToken, nToken));
+ Tcl_ListObjAppendElement(0, p->pRet, Tcl_NewIntObj(iStart));
+ Tcl_ListObjAppendElement(0, p->pRet, Tcl_NewIntObj(iEnd));
+ Tcl_ListObjAppendElement(0, p->pRet, Tcl_NewIntObj(iPos));
+ }
return SQLITE_OK;
}
int objc,
Tcl_Obj *CONST objv[]
){
- char *zName;
char *zText;
int nText;
sqlite3 *db = 0;
void *pUserdata;
int rc;
- if( objc!=4 ){
- Tcl_WrongNumArgs(interp, 1, objv, "DB NAME TEXT");
+ int nArg;
+ const char **azArg;
+ F5tTokenizeCtx ctx;
+
+ if( objc!=4 && objc!=5 ){
+ Tcl_WrongNumArgs(interp, 1, objv, "?-subst? DB NAME TEXT");
return TCL_ERROR;
}
- if( f5tDbAndApi(interp, objv[1], &db, &pApi) ) return TCL_ERROR;
- zName = Tcl_GetString(objv[2]);
- zText = Tcl_GetStringFromObj(objv[3], &nText);
+ if( objc==5 ){
+ char *zOpt = Tcl_GetString(objv[1]);
+ if( strcmp("-subst", zOpt) ){
+ Tcl_AppendResult(interp, "unrecognized option: ", zOpt, 0);
+ return TCL_ERROR;
+ }
+ }
+ if( f5tDbAndApi(interp, objv[objc-3], &db, &pApi) ) return TCL_ERROR;
+ if( Tcl_SplitList(interp, Tcl_GetString(objv[objc-2]), &nArg, &azArg) ){
+ return TCL_ERROR;
+ }
+ if( nArg==0 ){
+ Tcl_AppendResult(interp, "no such tokenizer: ", 0);
+ Tcl_Free((void*)azArg);
+ return TCL_ERROR;
+ }
+ zText = Tcl_GetStringFromObj(objv[objc-1], &nText);
- rc = pApi->xFindTokenizer(pApi, zName, &pUserdata, &tokenizer);
+ rc = pApi->xFindTokenizer(pApi, azArg[0], &pUserdata, &tokenizer);
if( rc!=SQLITE_OK ){
- Tcl_AppendResult(interp, "no such tokenizer: ", zName, 0);
+ Tcl_AppendResult(interp, "no such tokenizer: ", azArg[0], 0);
return TCL_ERROR;
}
- rc = tokenizer.xCreate(pUserdata, 0, 0, &pTok);
+ rc = tokenizer.xCreate(pUserdata, &azArg[1], nArg-1, &pTok);
if( rc!=SQLITE_OK ){
Tcl_AppendResult(interp, "error in tokenizer.xCreate()", 0);
return TCL_ERROR;
pRet = Tcl_NewObj();
Tcl_IncrRefCount(pRet);
- rc = tokenizer.xTokenize(pTok, pRet, zText, nText, xTokenizeCb2);
+ ctx.bSubst = (objc==5);
+ ctx.pRet = pRet;
+ ctx.zInput = zText;
+ rc = tokenizer.xTokenize(pTok, (void*)&ctx, zText, nText, xTokenizeCb2);
tokenizer.xDelete(pTok);
if( rc!=SQLITE_OK ){
Tcl_AppendResult(interp, "error in tokenizer.xTokenize()", 0);
}
+ Tcl_Free((void*)azArg);
Tcl_SetObjResult(interp, pRet);
Tcl_DecrRefCount(pRet);
return TCL_OK;
#include <string.h>
#include <assert.h>
+/**************************************************************************
+** Start of unicode61 tokenizer implementation.
+*/
/*
** Create a "simple" tokenizer.
const char *pText, int nText,
int (*xToken)(void*, const char*, int nToken, int iStart, int iEnd, int iPos)
){
- int rc;
+ int rc = SQLITE_OK;
int ie;
int is = 0;
int iPos = 0;
int nFold = sizeof(aFold);
char *pFold = aFold;
- do {
+ while( is<nText && rc==SQLITE_OK ){
int nByte;
/* Skip any leading divider characters. */
rc = xToken(pCtx, pFold, nByte, is, ie, iPos);
iPos++;
is = ie+1;
- }while( is<nText && rc==SQLITE_OK );
+ }
if( pFold!=aFold ) sqlite3_free(pFold);
if( rc==SQLITE_DONE ) rc = SQLITE_OK;
return rc;
}
+/**************************************************************************
+** Start of unicode61 tokenizer implementation.
+*/
+
+/*
+** Functions in fts5_unicode2.c.
+*/
+int sqlite3Fts5UnicodeIsalnum(int c);
+int sqlite3Fts5UnicodeIsdiacritic(int c);
+int sqlite3Fts5UnicodeFold(int c, int bRemoveDiacritic);
+
+
+/*
+** The following two macros - READ_UTF8 and WRITE_UTF8 - have been copied
+** from the sqlite3 source file utf.c. If this file is compiled as part
+** of the amalgamation, they are not required.
+*/
+#ifndef SQLITE_AMALGAMATION
+
+static const unsigned char sqlite3Utf8Trans1[] = {
+ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
+ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
+ 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
+ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
+ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+ 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x00, 0x00,
+};
+
+#define READ_UTF8(zIn, zTerm, c) \
+ c = *(zIn++); \
+ if( c>=0xc0 ){ \
+ c = sqlite3Utf8Trans1[c-0xc0]; \
+ while( zIn!=zTerm && (*zIn & 0xc0)==0x80 ){ \
+ c = (c<<6) + (0x3f & *(zIn++)); \
+ } \
+ if( c<0x80 \
+ || (c&0xFFFFF800)==0xD800 \
+ || (c&0xFFFFFFFE)==0xFFFE ){ c = 0xFFFD; } \
+ }
+
+#define WRITE_UTF8(zOut, c) { \
+ if( c<0x00080 ){ \
+ *zOut++ = (unsigned char)(c&0xFF); \
+ } \
+ else if( c<0x00800 ){ \
+ *zOut++ = 0xC0 + (unsigned char)((c>>6)&0x1F); \
+ *zOut++ = 0x80 + (unsigned char)(c & 0x3F); \
+ } \
+ else if( c<0x10000 ){ \
+ *zOut++ = 0xE0 + (unsigned char)((c>>12)&0x0F); \
+ *zOut++ = 0x80 + (unsigned char)((c>>6) & 0x3F); \
+ *zOut++ = 0x80 + (unsigned char)(c & 0x3F); \
+ }else{ \
+ *zOut++ = 0xF0 + (unsigned char)((c>>18) & 0x07); \
+ *zOut++ = 0x80 + (unsigned char)((c>>12) & 0x3F); \
+ *zOut++ = 0x80 + (unsigned char)((c>>6) & 0x3F); \
+ *zOut++ = 0x80 + (unsigned char)(c & 0x3F); \
+ } \
+}
+
+#endif /* ifndef SQLITE_AMALGAMATION */
+
+typedef struct Unicode61Tokenizer Unicode61Tokenizer;
+struct Unicode61Tokenizer {
+ int bRemoveDiacritic; /* True if remove_diacritics=1 is set */
+ int nException;
+ int *aiException;
+};
+
+static int fts5UnicodeAddExceptions(
+ Unicode61Tokenizer *p, /* Tokenizer object */
+ const char *z, /* Characters to treat as exceptions */
+ int bTokenChars /* 1 for 'tokenchars', 0 for 'separators' */
+){
+ int rc = SQLITE_OK;
+ int n = strlen(z);
+ int *aNew;
+
+ if( n>0 ){
+ aNew = (int*)sqlite3_realloc(p->aiException, (n+p->nException)*sizeof(int));
+ if( aNew ){
+ int nNew = p->nException;
+ const unsigned char *zCsr = (const unsigned char*)z;
+ const unsigned char *zTerm = (const unsigned char*)&z[n];
+ while( zCsr<zTerm ){
+ int iCode;
+ int bToken;
+ READ_UTF8(zCsr, zTerm, iCode);
+ bToken = sqlite3Fts5UnicodeIsalnum(iCode);
+ assert( (bToken==0 || bToken==1) );
+ assert( (bTokenChars==0 || bTokenChars==1) );
+ if( bToken!=bTokenChars && sqlite3Fts5UnicodeIsdiacritic(iCode)==0 ){
+ int i;
+ for(i=0; i<nNew; i++){
+ if( aNew[i]>iCode ) break;
+ }
+ memmove(&aNew[i+1], &aNew[i], (nNew-i)*sizeof(int));
+ aNew[i] = iCode;
+ nNew++;
+ }
+ }
+ p->aiException = aNew;
+ p->nException = nNew;
+ }else{
+ rc = SQLITE_NOMEM;
+ }
+ }
+
+ return rc;
+}
+
+/*
+** Return true if the p->aiException[] array contains the value iCode.
+*/
+static int fts5UnicodeIsException(Unicode61Tokenizer *p, int iCode){
+ if( p->nException>0 ){
+ int *a = p->aiException;
+ int iLo = 0;
+ int iHi = p->nException-1;
+
+ while( iHi>=iLo ){
+ int iTest = (iHi + iLo) / 2;
+ if( iCode==a[iTest] ){
+ return 1;
+ }else if( iCode>a[iTest] ){
+ iLo = iTest+1;
+ }else{
+ iHi = iTest-1;
+ }
+ }
+ }
+
+ return 0;
+}
+
+/*
+** Create a "unicode61" tokenizer.
+*/
+static int fts5UnicodeCreate(
+ void *pCtx,
+ const char **azArg, int nArg,
+ Fts5Tokenizer **ppOut
+){
+ int rc = SQLITE_OK; /* Return code */
+ Unicode61Tokenizer *p = 0; /* New tokenizer object */
+
+ if( nArg%2 ){
+ rc = SQLITE_ERROR;
+ }else{
+ p = (Unicode61Tokenizer*)sqlite3_malloc(sizeof(Unicode61Tokenizer));
+ if( p ){
+ int i;
+ memset(p, 0, sizeof(Unicode61Tokenizer));
+ p->bRemoveDiacritic = 1;
+ for(i=0; rc==SQLITE_OK && i<nArg; i+=2){
+ const char *zArg = azArg[i+1];
+ if( 0==sqlite3_stricmp(azArg[i], "remove_diacritics") ){
+ if( (zArg[0]!='0' && zArg[0]!='1') || zArg[1] ){
+ rc = SQLITE_ERROR;
+ }
+ p->bRemoveDiacritic = (zArg[0]=='1');
+ }else
+ if( 0==sqlite3_stricmp(azArg[i], "tokenchars") ){
+ rc = fts5UnicodeAddExceptions(p, zArg, 1);
+ }else
+ if( 0==sqlite3_stricmp(azArg[i], "separators") ){
+ rc = fts5UnicodeAddExceptions(p, zArg, 0);
+ }else{
+ rc = SQLITE_ERROR;
+ }
+ }
+ }else{
+ rc = SQLITE_NOMEM;
+ }
+ *ppOut = (Fts5Tokenizer*)p;
+ }
+ return rc;
+}
+
+/*
+** Delete a "unicode61" tokenizer.
+*/
+static void fts5UnicodeDelete(Fts5Tokenizer *pTok){
+ Unicode61Tokenizer *p = (Unicode61Tokenizer*)pTok;
+ sqlite3_free(p->aiException);
+ sqlite3_free(p);
+ return;
+}
+
+/*
+** Return true if, for the purposes of tokenizing with the tokenizer
+** passed as the first argument, codepoint iCode is considered a token
+** character (not a separator).
+*/
+static int fts5UnicodeIsAlnum(Unicode61Tokenizer *p, int iCode){
+ assert( (sqlite3Fts5UnicodeIsalnum(iCode) & 0xFFFFFFFE)==0 );
+ return sqlite3Fts5UnicodeIsalnum(iCode) ^ fts5UnicodeIsException(p, iCode);
+}
+
+/*
+** Tokenize some text using a unicode61 tokenizer.
+*/
+static int fts5UnicodeTokenize(
+ Fts5Tokenizer *pTokenizer,
+ void *pCtx,
+ const char *pText, int nText,
+ int (*xToken)(void*, const char*, int nToken, int iStart, int iEnd, int iPos)
+){
+ Unicode61Tokenizer *p = (Unicode61Tokenizer*)pTokenizer;
+ const unsigned char *zInput = (const unsigned char*)pText;
+ const unsigned char *zTerm = &zInput[nText];
+ const unsigned char *z = zInput;
+ int rc = SQLITE_OK;
+ int nBuf = 0;
+ unsigned char *zBuf = 0;
+ unsigned char *zOut = 0;
+ int iPos = 0;
+
+ while( rc==SQLITE_OK && z<zTerm ){
+ int iCode;
+ int bAlnum;
+ const unsigned char *zStart;
+ const unsigned char *zCode;
+
+ if( zOut==zBuf ) zStart = z;
+ zCode = z;
+ READ_UTF8(z, zTerm, iCode);
+ bAlnum = fts5UnicodeIsAlnum(p, iCode);
+ if( bAlnum==0 && zOut>zBuf ){
+ bAlnum = sqlite3Fts5UnicodeIsdiacritic(iCode);
+ }
+
+ if( bAlnum ){
+ int iOut;
+
+ /* Grow the output buffer if required */
+ while( (zOut-zBuf)+4>=nBuf ){
+ unsigned char *zNew;
+ nBuf = (nBuf ? nBuf*2 : 128);
+ zNew = sqlite3_realloc(zBuf, nBuf);
+ if( zNew==0 ){
+ rc = SQLITE_NOMEM;
+ goto tokenize_finished;
+ }else{
+ zOut = &zNew[zOut-zBuf];
+ zBuf = zNew;
+ }
+ }
+
+ /* Write the new character to it */
+ iOut = sqlite3Fts5UnicodeFold(iCode, p->bRemoveDiacritic);
+ if( iOut ) WRITE_UTF8(zOut, iOut);
+ }
+
+ if( zOut>zBuf && (bAlnum==0 || z>=zTerm) ){
+ int ie = (bAlnum ? z : zCode) - zInput;
+ rc = xToken(pCtx, (const char*)zBuf, zOut-zBuf, zStart-zInput, ie, iPos);
+ zOut = zBuf;
+ iPos++;
+ }
+ }
+
+ tokenize_finished:
+ sqlite3_free(zBuf);
+ return rc;
+}
+
/**************************************************************************
** Start of porter2 stemmer implementation.
*/
const char *zName;
fts5_tokenizer x;
} aBuiltin[] = {
- { "porter", { fts5PorterCreate, fts5PorterDelete, fts5PorterTokenize } },
- { "simple", { fts5SimpleCreate, fts5SimpleDelete, fts5SimpleTokenize } }
+ { "porter", {fts5PorterCreate, fts5PorterDelete, fts5PorterTokenize }},
+ { "unicode61", {fts5UnicodeCreate, fts5UnicodeDelete, fts5UnicodeTokenize}},
+ { "simple", {fts5SimpleCreate, fts5SimpleDelete, fts5SimpleTokenize }}
};
int rc = SQLITE_OK; /* Return code */
--- /dev/null
+/*
+** 2012 May 25
+**
+** The author disclaims copyright to this source code. In place of
+** a legal notice, here is a blessing:
+**
+** May you do good and not evil.
+** May you find forgiveness for yourself and forgive others.
+** May you share freely, never taking more than you give.
+**
+******************************************************************************
+*/
+
+/*
+** DO NOT EDIT THIS MACHINE GENERATED FILE.
+*/
+
+#if defined(SQLITE_ENABLE_FTS5)
+
+#include <assert.h>
+
+/*
+** Return true if the argument corresponds to a unicode codepoint
+** classified as either a letter or a number. Otherwise false.
+**
+** The results are undefined if the value passed to this function
+** is less than zero.
+*/
+int sqlite3Fts5UnicodeIsalnum(int c){
+ /* Each unsigned integer in the following array corresponds to a contiguous
+ ** range of unicode codepoints that are not either letters or numbers (i.e.
+ ** codepoints for which this function should return 0).
+ **
+ ** The most significant 22 bits in each 32-bit value contain the first
+ ** codepoint in the range. The least significant 10 bits are used to store
+ ** the size of the range (always at least 1). In other words, the value
+ ** ((C<<22) + N) represents a range of N codepoints starting with codepoint
+ ** C. It is not possible to represent a range larger than 1023 codepoints
+ ** using this format.
+ */
+ const static unsigned int aEntry[] = {
+ 0x00000030, 0x0000E807, 0x00016C06, 0x0001EC2F, 0x0002AC07,
+ 0x0002D001, 0x0002D803, 0x0002EC01, 0x0002FC01, 0x00035C01,
+ 0x0003DC01, 0x000B0804, 0x000B480E, 0x000B9407, 0x000BB401,
+ 0x000BBC81, 0x000DD401, 0x000DF801, 0x000E1002, 0x000E1C01,
+ 0x000FD801, 0x00120808, 0x00156806, 0x00162402, 0x00163C01,
+ 0x00164437, 0x0017CC02, 0x00180005, 0x00181816, 0x00187802,
+ 0x00192C15, 0x0019A804, 0x0019C001, 0x001B5001, 0x001B580F,
+ 0x001B9C07, 0x001BF402, 0x001C000E, 0x001C3C01, 0x001C4401,
+ 0x001CC01B, 0x001E980B, 0x001FAC09, 0x001FD804, 0x00205804,
+ 0x00206C09, 0x00209403, 0x0020A405, 0x0020C00F, 0x00216403,
+ 0x00217801, 0x0023901B, 0x00240004, 0x0024E803, 0x0024F812,
+ 0x00254407, 0x00258804, 0x0025C001, 0x00260403, 0x0026F001,
+ 0x0026F807, 0x00271C02, 0x00272C03, 0x00275C01, 0x00278802,
+ 0x0027C802, 0x0027E802, 0x00280403, 0x0028F001, 0x0028F805,
+ 0x00291C02, 0x00292C03, 0x00294401, 0x0029C002, 0x0029D401,
+ 0x002A0403, 0x002AF001, 0x002AF808, 0x002B1C03, 0x002B2C03,
+ 0x002B8802, 0x002BC002, 0x002C0403, 0x002CF001, 0x002CF807,
+ 0x002D1C02, 0x002D2C03, 0x002D5802, 0x002D8802, 0x002DC001,
+ 0x002E0801, 0x002EF805, 0x002F1803, 0x002F2804, 0x002F5C01,
+ 0x002FCC08, 0x00300403, 0x0030F807, 0x00311803, 0x00312804,
+ 0x00315402, 0x00318802, 0x0031FC01, 0x00320802, 0x0032F001,
+ 0x0032F807, 0x00331803, 0x00332804, 0x00335402, 0x00338802,
+ 0x00340802, 0x0034F807, 0x00351803, 0x00352804, 0x00355C01,
+ 0x00358802, 0x0035E401, 0x00360802, 0x00372801, 0x00373C06,
+ 0x00375801, 0x00376008, 0x0037C803, 0x0038C401, 0x0038D007,
+ 0x0038FC01, 0x00391C09, 0x00396802, 0x003AC401, 0x003AD006,
+ 0x003AEC02, 0x003B2006, 0x003C041F, 0x003CD00C, 0x003DC417,
+ 0x003E340B, 0x003E6424, 0x003EF80F, 0x003F380D, 0x0040AC14,
+ 0x00412806, 0x00415804, 0x00417803, 0x00418803, 0x00419C07,
+ 0x0041C404, 0x0042080C, 0x00423C01, 0x00426806, 0x0043EC01,
+ 0x004D740C, 0x004E400A, 0x00500001, 0x0059B402, 0x005A0001,
+ 0x005A6C02, 0x005BAC03, 0x005C4803, 0x005CC805, 0x005D4802,
+ 0x005DC802, 0x005ED023, 0x005F6004, 0x005F7401, 0x0060000F,
+ 0x0062A401, 0x0064800C, 0x0064C00C, 0x00650001, 0x00651002,
+ 0x0066C011, 0x00672002, 0x00677822, 0x00685C05, 0x00687802,
+ 0x0069540A, 0x0069801D, 0x0069FC01, 0x006A8007, 0x006AA006,
+ 0x006C0005, 0x006CD011, 0x006D6823, 0x006E0003, 0x006E840D,
+ 0x006F980E, 0x006FF004, 0x00709014, 0x0070EC05, 0x0071F802,
+ 0x00730008, 0x00734019, 0x0073B401, 0x0073C803, 0x00770027,
+ 0x0077F004, 0x007EF401, 0x007EFC03, 0x007F3403, 0x007F7403,
+ 0x007FB403, 0x007FF402, 0x00800065, 0x0081A806, 0x0081E805,
+ 0x00822805, 0x0082801A, 0x00834021, 0x00840002, 0x00840C04,
+ 0x00842002, 0x00845001, 0x00845803, 0x00847806, 0x00849401,
+ 0x00849C01, 0x0084A401, 0x0084B801, 0x0084E802, 0x00850005,
+ 0x00852804, 0x00853C01, 0x00864264, 0x00900027, 0x0091000B,
+ 0x0092704E, 0x00940200, 0x009C0475, 0x009E53B9, 0x00AD400A,
+ 0x00B39406, 0x00B3BC03, 0x00B3E404, 0x00B3F802, 0x00B5C001,
+ 0x00B5FC01, 0x00B7804F, 0x00B8C00C, 0x00BA001A, 0x00BA6C59,
+ 0x00BC00D6, 0x00BFC00C, 0x00C00005, 0x00C02019, 0x00C0A807,
+ 0x00C0D802, 0x00C0F403, 0x00C26404, 0x00C28001, 0x00C3EC01,
+ 0x00C64002, 0x00C6580A, 0x00C70024, 0x00C8001F, 0x00C8A81E,
+ 0x00C94001, 0x00C98020, 0x00CA2827, 0x00CB003F, 0x00CC0100,
+ 0x01370040, 0x02924037, 0x0293F802, 0x02983403, 0x0299BC10,
+ 0x029A7C01, 0x029BC008, 0x029C0017, 0x029C8002, 0x029E2402,
+ 0x02A00801, 0x02A01801, 0x02A02C01, 0x02A08C09, 0x02A0D804,
+ 0x02A1D004, 0x02A20002, 0x02A2D011, 0x02A33802, 0x02A38012,
+ 0x02A3E003, 0x02A4980A, 0x02A51C0D, 0x02A57C01, 0x02A60004,
+ 0x02A6CC1B, 0x02A77802, 0x02A8A40E, 0x02A90C01, 0x02A93002,
+ 0x02A97004, 0x02A9DC03, 0x02A9EC01, 0x02AAC001, 0x02AAC803,
+ 0x02AADC02, 0x02AAF802, 0x02AB0401, 0x02AB7802, 0x02ABAC07,
+ 0x02ABD402, 0x02AF8C0B, 0x03600001, 0x036DFC02, 0x036FFC02,
+ 0x037FFC01, 0x03EC7801, 0x03ECA401, 0x03EEC810, 0x03F4F802,
+ 0x03F7F002, 0x03F8001A, 0x03F88007, 0x03F8C023, 0x03F95013,
+ 0x03F9A004, 0x03FBFC01, 0x03FC040F, 0x03FC6807, 0x03FCEC06,
+ 0x03FD6C0B, 0x03FF8007, 0x03FFA007, 0x03FFE405, 0x04040003,
+ 0x0404DC09, 0x0405E411, 0x0406400C, 0x0407402E, 0x040E7C01,
+ 0x040F4001, 0x04215C01, 0x04247C01, 0x0424FC01, 0x04280403,
+ 0x04281402, 0x04283004, 0x0428E003, 0x0428FC01, 0x04294009,
+ 0x0429FC01, 0x042CE407, 0x04400003, 0x0440E016, 0x04420003,
+ 0x0442C012, 0x04440003, 0x04449C0E, 0x04450004, 0x04460003,
+ 0x0446CC0E, 0x04471404, 0x045AAC0D, 0x0491C004, 0x05BD442E,
+ 0x05BE3C04, 0x074000F6, 0x07440027, 0x0744A4B5, 0x07480046,
+ 0x074C0057, 0x075B0401, 0x075B6C01, 0x075BEC01, 0x075C5401,
+ 0x075CD401, 0x075D3C01, 0x075DBC01, 0x075E2401, 0x075EA401,
+ 0x075F0C01, 0x07BBC002, 0x07C0002C, 0x07C0C064, 0x07C2800F,
+ 0x07C2C40E, 0x07C3040F, 0x07C3440F, 0x07C4401F, 0x07C4C03C,
+ 0x07C5C02B, 0x07C7981D, 0x07C8402B, 0x07C90009, 0x07C94002,
+ 0x07CC0021, 0x07CCC006, 0x07CCDC46, 0x07CE0014, 0x07CE8025,
+ 0x07CF1805, 0x07CF8011, 0x07D0003F, 0x07D10001, 0x07D108B6,
+ 0x07D3E404, 0x07D4003E, 0x07D50004, 0x07D54018, 0x07D7EC46,
+ 0x07D9140B, 0x07DA0046, 0x07DC0074, 0x38000401, 0x38008060,
+ 0x380400F0,
+ };
+ static const unsigned int aAscii[4] = {
+ 0xFFFFFFFF, 0xFC00FFFF, 0xF8000001, 0xF8000001,
+ };
+
+ if( c<128 ){
+ return ( (aAscii[c >> 5] & (1 << (c & 0x001F)))==0 );
+ }else if( c<(1<<22) ){
+ unsigned int key = (((unsigned int)c)<<10) | 0x000003FF;
+ int iRes;
+ int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1;
+ int iLo = 0;
+ while( iHi>=iLo ){
+ int iTest = (iHi + iLo) / 2;
+ if( key >= aEntry[iTest] ){
+ iRes = iTest;
+ iLo = iTest+1;
+ }else{
+ iHi = iTest-1;
+ }
+ }
+ assert( aEntry[0]<key );
+ assert( key>=aEntry[iRes] );
+ return (((unsigned int)c) >= ((aEntry[iRes]>>10) + (aEntry[iRes]&0x3FF)));
+ }
+ return 1;
+}
+
+
+/*
+** If the argument is a codepoint corresponding to a lowercase letter
+** in the ASCII range with a diacritic added, return the codepoint
+** of the ASCII letter only. For example, if passed 235 - "LATIN
+** SMALL LETTER E WITH DIAERESIS" - return 65 ("LATIN SMALL LETTER
+** E"). The resuls of passing a codepoint that corresponds to an
+** uppercase letter are undefined.
+*/
+static int remove_diacritic(int c){
+ unsigned short aDia[] = {
+ 0, 1797, 1848, 1859, 1891, 1928, 1940, 1995,
+ 2024, 2040, 2060, 2110, 2168, 2206, 2264, 2286,
+ 2344, 2383, 2472, 2488, 2516, 2596, 2668, 2732,
+ 2782, 2842, 2894, 2954, 2984, 3000, 3028, 3336,
+ 3456, 3696, 3712, 3728, 3744, 3896, 3912, 3928,
+ 3968, 4008, 4040, 4106, 4138, 4170, 4202, 4234,
+ 4266, 4296, 4312, 4344, 4408, 4424, 4472, 4504,
+ 6148, 6198, 6264, 6280, 6360, 6429, 6505, 6529,
+ 61448, 61468, 61534, 61592, 61642, 61688, 61704, 61726,
+ 61784, 61800, 61836, 61880, 61914, 61948, 61998, 62122,
+ 62154, 62200, 62218, 62302, 62364, 62442, 62478, 62536,
+ 62554, 62584, 62604, 62640, 62648, 62656, 62664, 62730,
+ 62924, 63050, 63082, 63274, 63390,
+ };
+ char aChar[] = {
+ '\0', 'a', 'c', 'e', 'i', 'n', 'o', 'u', 'y', 'y', 'a', 'c',
+ 'd', 'e', 'e', 'g', 'h', 'i', 'j', 'k', 'l', 'n', 'o', 'r',
+ 's', 't', 'u', 'u', 'w', 'y', 'z', 'o', 'u', 'a', 'i', 'o',
+ 'u', 'g', 'k', 'o', 'j', 'g', 'n', 'a', 'e', 'i', 'o', 'r',
+ 'u', 's', 't', 'h', 'a', 'e', 'o', 'y', '\0', '\0', '\0', '\0',
+ '\0', '\0', '\0', '\0', 'a', 'b', 'd', 'd', 'e', 'f', 'g', 'h',
+ 'h', 'i', 'k', 'l', 'l', 'm', 'n', 'p', 'r', 'r', 's', 't',
+ 'u', 'v', 'w', 'w', 'x', 'y', 'z', 'h', 't', 'w', 'y', 'a',
+ 'e', 'i', 'o', 'u', 'y',
+ };
+
+ unsigned int key = (((unsigned int)c)<<3) | 0x00000007;
+ int iRes = 0;
+ int iHi = sizeof(aDia)/sizeof(aDia[0]) - 1;
+ int iLo = 0;
+ while( iHi>=iLo ){
+ int iTest = (iHi + iLo) / 2;
+ if( key >= aDia[iTest] ){
+ iRes = iTest;
+ iLo = iTest+1;
+ }else{
+ iHi = iTest-1;
+ }
+ }
+ assert( key>=aDia[iRes] );
+ return ((c > (aDia[iRes]>>3) + (aDia[iRes]&0x07)) ? c : (int)aChar[iRes]);
+};
+
+
+/*
+** Return true if the argument interpreted as a unicode codepoint
+** is a diacritical modifier character.
+*/
+int sqlite3Fts5UnicodeIsdiacritic(int c){
+ unsigned int mask0 = 0x08029FDF;
+ unsigned int mask1 = 0x000361F8;
+ if( c<768 || c>817 ) return 0;
+ return (c < 768+32) ?
+ (mask0 & (1 << (c-768))) :
+ (mask1 & (1 << (c-768-32)));
+}
+
+
+/*
+** Interpret the argument as a unicode codepoint. If the codepoint
+** is an upper case character that has a lower case equivalent,
+** return the codepoint corresponding to the lower case version.
+** Otherwise, return a copy of the argument.
+**
+** The results are undefined if the value passed to this function
+** is less than zero.
+*/
+int sqlite3Fts5UnicodeFold(int c, int bRemoveDiacritic){
+ /* Each entry in the following array defines a rule for folding a range
+ ** of codepoints to lower case. The rule applies to a range of nRange
+ ** codepoints starting at codepoint iCode.
+ **
+ ** If the least significant bit in flags is clear, then the rule applies
+ ** to all nRange codepoints (i.e. all nRange codepoints are upper case and
+ ** need to be folded). Or, if it is set, then the rule only applies to
+ ** every second codepoint in the range, starting with codepoint C.
+ **
+ ** The 7 most significant bits in flags are an index into the aiOff[]
+ ** array. If a specific codepoint C does require folding, then its lower
+ ** case equivalent is ((C + aiOff[flags>>1]) & 0xFFFF).
+ **
+ ** The contents of this array are generated by parsing the CaseFolding.txt
+ ** file distributed as part of the "Unicode Character Database". See
+ ** http://www.unicode.org for details.
+ */
+ static const struct TableEntry {
+ unsigned short iCode;
+ unsigned char flags;
+ unsigned char nRange;
+ } aEntry[] = {
+ {65, 14, 26}, {181, 64, 1}, {192, 14, 23},
+ {216, 14, 7}, {256, 1, 48}, {306, 1, 6},
+ {313, 1, 16}, {330, 1, 46}, {376, 116, 1},
+ {377, 1, 6}, {383, 104, 1}, {385, 50, 1},
+ {386, 1, 4}, {390, 44, 1}, {391, 0, 1},
+ {393, 42, 2}, {395, 0, 1}, {398, 32, 1},
+ {399, 38, 1}, {400, 40, 1}, {401, 0, 1},
+ {403, 42, 1}, {404, 46, 1}, {406, 52, 1},
+ {407, 48, 1}, {408, 0, 1}, {412, 52, 1},
+ {413, 54, 1}, {415, 56, 1}, {416, 1, 6},
+ {422, 60, 1}, {423, 0, 1}, {425, 60, 1},
+ {428, 0, 1}, {430, 60, 1}, {431, 0, 1},
+ {433, 58, 2}, {435, 1, 4}, {439, 62, 1},
+ {440, 0, 1}, {444, 0, 1}, {452, 2, 1},
+ {453, 0, 1}, {455, 2, 1}, {456, 0, 1},
+ {458, 2, 1}, {459, 1, 18}, {478, 1, 18},
+ {497, 2, 1}, {498, 1, 4}, {502, 122, 1},
+ {503, 134, 1}, {504, 1, 40}, {544, 110, 1},
+ {546, 1, 18}, {570, 70, 1}, {571, 0, 1},
+ {573, 108, 1}, {574, 68, 1}, {577, 0, 1},
+ {579, 106, 1}, {580, 28, 1}, {581, 30, 1},
+ {582, 1, 10}, {837, 36, 1}, {880, 1, 4},
+ {886, 0, 1}, {902, 18, 1}, {904, 16, 3},
+ {908, 26, 1}, {910, 24, 2}, {913, 14, 17},
+ {931, 14, 9}, {962, 0, 1}, {975, 4, 1},
+ {976, 140, 1}, {977, 142, 1}, {981, 146, 1},
+ {982, 144, 1}, {984, 1, 24}, {1008, 136, 1},
+ {1009, 138, 1}, {1012, 130, 1}, {1013, 128, 1},
+ {1015, 0, 1}, {1017, 152, 1}, {1018, 0, 1},
+ {1021, 110, 3}, {1024, 34, 16}, {1040, 14, 32},
+ {1120, 1, 34}, {1162, 1, 54}, {1216, 6, 1},
+ {1217, 1, 14}, {1232, 1, 88}, {1329, 22, 38},
+ {4256, 66, 38}, {4295, 66, 1}, {4301, 66, 1},
+ {7680, 1, 150}, {7835, 132, 1}, {7838, 96, 1},
+ {7840, 1, 96}, {7944, 150, 8}, {7960, 150, 6},
+ {7976, 150, 8}, {7992, 150, 8}, {8008, 150, 6},
+ {8025, 151, 8}, {8040, 150, 8}, {8072, 150, 8},
+ {8088, 150, 8}, {8104, 150, 8}, {8120, 150, 2},
+ {8122, 126, 2}, {8124, 148, 1}, {8126, 100, 1},
+ {8136, 124, 4}, {8140, 148, 1}, {8152, 150, 2},
+ {8154, 120, 2}, {8168, 150, 2}, {8170, 118, 2},
+ {8172, 152, 1}, {8184, 112, 2}, {8186, 114, 2},
+ {8188, 148, 1}, {8486, 98, 1}, {8490, 92, 1},
+ {8491, 94, 1}, {8498, 12, 1}, {8544, 8, 16},
+ {8579, 0, 1}, {9398, 10, 26}, {11264, 22, 47},
+ {11360, 0, 1}, {11362, 88, 1}, {11363, 102, 1},
+ {11364, 90, 1}, {11367, 1, 6}, {11373, 84, 1},
+ {11374, 86, 1}, {11375, 80, 1}, {11376, 82, 1},
+ {11378, 0, 1}, {11381, 0, 1}, {11390, 78, 2},
+ {11392, 1, 100}, {11499, 1, 4}, {11506, 0, 1},
+ {42560, 1, 46}, {42624, 1, 24}, {42786, 1, 14},
+ {42802, 1, 62}, {42873, 1, 4}, {42877, 76, 1},
+ {42878, 1, 10}, {42891, 0, 1}, {42893, 74, 1},
+ {42896, 1, 4}, {42912, 1, 10}, {42922, 72, 1},
+ {65313, 14, 26},
+ };
+ static const unsigned short aiOff[] = {
+ 1, 2, 8, 15, 16, 26, 28, 32,
+ 37, 38, 40, 48, 63, 64, 69, 71,
+ 79, 80, 116, 202, 203, 205, 206, 207,
+ 209, 210, 211, 213, 214, 217, 218, 219,
+ 775, 7264, 10792, 10795, 23228, 23256, 30204, 54721,
+ 54753, 54754, 54756, 54787, 54793, 54809, 57153, 57274,
+ 57921, 58019, 58363, 61722, 65268, 65341, 65373, 65406,
+ 65408, 65410, 65415, 65424, 65436, 65439, 65450, 65462,
+ 65472, 65476, 65478, 65480, 65482, 65488, 65506, 65511,
+ 65514, 65521, 65527, 65528, 65529,
+ };
+
+ int ret = c;
+
+ assert( c>=0 );
+ assert( sizeof(unsigned short)==2 && sizeof(unsigned char)==1 );
+
+ if( c<128 ){
+ if( c>='A' && c<='Z' ) ret = c + ('a' - 'A');
+ }else if( c<65536 ){
+ int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1;
+ int iLo = 0;
+ int iRes = -1;
+
+ while( iHi>=iLo ){
+ int iTest = (iHi + iLo) / 2;
+ int cmp = (c - aEntry[iTest].iCode);
+ if( cmp>=0 ){
+ iRes = iTest;
+ iLo = iTest+1;
+ }else{
+ iHi = iTest-1;
+ }
+ }
+ assert( iRes<0 || c>=aEntry[iRes].iCode );
+
+ if( iRes>=0 ){
+ const struct TableEntry *p = &aEntry[iRes];
+ if( c<(p->iCode + p->nRange) && 0==(0x01 & p->flags & (p->iCode ^ c)) ){
+ ret = (c + (aiOff[p->flags>>1])) & 0x0000FFFF;
+ assert( ret>0 );
+ }
+ }
+
+ if( bRemoveDiacritic ) ret = remove_diacritic(ret);
+ }
+
+ else if( c>=66560 && c<66600 ){
+ ret = c + 40;
+ }
+
+ return ret;
+}
+#endif /* defined(SQLITE_ENABLE_FTS5) */
--- /dev/null
+# 2014 Dec 20
+#
+# The author disclaims copyright to this source code. In place of
+# a legal notice, here is a blessing:
+#
+# May you do good and not evil.
+# May you find forgiveness for yourself and forgive others.
+# May you share freely, never taking more than you give.
+#
+#***********************************************************************
+#
+# Tests focusing on the fts5 tokenizers
+#
+
+if {![info exists testdir]} {
+ set testdir [file join [file dirname [info script]] .. .. .. test]
+}
+source $testdir/tester.tcl
+set testprefix fts5unicode
+
+proc tokenize_test {tn tokenizer input output} {
+ uplevel [list do_test $tn [subst -nocommands {
+ set ret {}
+ foreach {z s e p} [sqlite3_fts5_tokenize db {$tokenizer} {$input}] {
+ lappend ret [set z]
+ }
+ set ret
+ }] [list {*}$output]]
+}
+
+foreach {tn t} {1 simple 2 unicode61} {
+ tokenize_test 1.$tn.0 $t {A B C D} {a b c d}
+ tokenize_test 1.$tn.1 $t {May you share freely,} {may you share freely}
+ tokenize_test 1.$tn.2 $t {..May...you.shAre.freely} {may you share freely}
+ tokenize_test 1.$tn.3 $t {} {}
+}
+
+finish_test
+
--- /dev/null
+# 2012 May 25
+#
+# The author disclaims copyright to this source code. In place of
+# a legal notice, here is a blessing:
+#
+# May you do good and not evil.
+# May you find forgiveness for yourself and forgive others.
+# May you share freely, never taking more than you give.
+#
+#*************************************************************************
+#
+# The tests in this file focus on testing the "unicode" FTS tokenizer.
+#
+# This is a modified copy of FTS4 test file "fts4_unicode.test".
+#
+
+if {![info exists testdir]} {
+ set testdir [file join [file dirname [info script]] .. .. .. test]
+}
+source $testdir/tester.tcl
+set testprefix fts5unicode2
+
+proc do_unicode_token_test {tn input res} {
+ uplevel [list do_test $tn [list \
+ sqlite3_fts5_tokenize -subst db "unicode61 remove_diacritics 0" $input
+ ] [list {*}$res]]
+}
+
+proc do_unicode_token_test2 {tn input res} {
+ uplevel [list do_test $tn [list \
+ sqlite3_fts5_tokenize -subst db "unicode61" $input
+ ] [list {*}$res]]
+}
+
+proc do_unicode_token_test3 {tn args} {
+ set tokenizer [concat unicode61 {*}[lrange $args 0 end-2]]
+ set input [lindex $args end-1]
+ set res [lindex $args end]
+ uplevel [list do_test $tn [list \
+ sqlite3_fts5_tokenize -subst db $tokenizer $input
+ ] [list {*}$res]]
+}
+
+do_unicode_token_test 1.0 {a B c D} {0 a a 1 b B 2 c c 3 d D}
+
+do_unicode_token_test 1.1 "\uC4 \uD6 \uDC" \
+ "0 \uE4 \uC4 1 \uF6 \uD6 2 \uFC \uDC"
+
+do_unicode_token_test 1.2 "x\uC4x x\uD6x x\uDCx" \
+ "0 x\uE4x x\uC4x 1 x\uF6x x\uD6x 2 x\uFCx x\uDCx"
+
+# 0x00DF is a small "sharp s". 0x1E9E is a capital sharp s.
+do_unicode_token_test 1.3 "\uDF" "0 \uDF \uDF"
+do_unicode_token_test 1.4 "\u1E9E" "0 \uDF \u1E9E"
+
+do_unicode_token_test 1.5 "The quick brown fox" {
+ 0 the The 1 quick quick 2 brown brown 3 fox fox
+}
+do_unicode_token_test 1.6 "The\u00bfquick\u224ebrown\u2263fox" {
+ 0 the The 1 quick quick 2 brown brown 3 fox fox
+}
+
+do_unicode_token_test2 1.7 {a B c D} {0 a a 1 b B 2 c c 3 d D}
+do_unicode_token_test2 1.8 "\uC4 \uD6 \uDC" "0 a \uC4 1 o \uD6 2 u \uDC"
+
+do_unicode_token_test2 1.9 "x\uC4x x\uD6x x\uDCx" \
+ "0 xax x\uC4x 1 xox x\uD6x 2 xux x\uDCx"
+
+# Check that diacritics are removed if remove_diacritics=1 is specified.
+# And that they do not break tokens.
+do_unicode_token_test2 1.10 "xx\u0301xx" "0 xxxx xx\u301xx"
+
+# Title-case mappings work
+do_unicode_token_test 1.11 "\u01c5" "0 \u01c6 \u01c5"
+
+#-------------------------------------------------------------------------
+#
+set docs [list {
+ Enhance the INSERT syntax to allow multiple rows to be inserted via the
+ VALUES clause.
+} {
+ Enhance the CREATE VIRTUAL TABLE command to support the IF NOT EXISTS clause.
+} {
+ Added the sqlite3_stricmp() interface as a counterpart to sqlite3_strnicmp().
+} {
+ Added the sqlite3_db_readonly() interface.
+} {
+ Added the SQLITE_FCNTL_PRAGMA file control, giving VFS implementations the
+ ability to add new PRAGMA statements or to override built-in PRAGMAs.
+} {
+ Queries of the form: "SELECT max(x), y FROM table" returns the value of y on
+ the same row that contains the maximum x value.
+} {
+ Added support for the FTS4 languageid option.
+} {
+ Documented support for the FTS4 content option. This feature has actually
+ been in the code since version 3.7.9 but is only now considered to be
+ officially supported.
+} {
+ Pending statements no longer block ROLLBACK. Instead, the pending statement
+ will return SQLITE_ABORT upon next access after the ROLLBACK.
+} {
+ Improvements to the handling of CSV inputs in the command-line shell
+} {
+ Fix a bug introduced in version 3.7.10 that might cause a LEFT JOIN to be
+ incorrectly converted into an INNER JOIN if the WHERE clause indexable terms
+ connected by OR.
+}]
+
+set map(a) [list "\u00C4" "\u00E4"] ; # LATIN LETTER A WITH DIAERESIS
+set map(e) [list "\u00CB" "\u00EB"] ; # LATIN LETTER E WITH DIAERESIS
+set map(i) [list "\u00CF" "\u00EF"] ; # LATIN LETTER I WITH DIAERESIS
+set map(o) [list "\u00D6" "\u00F6"] ; # LATIN LETTER O WITH DIAERESIS
+set map(u) [list "\u00DC" "\u00FC"] ; # LATIN LETTER U WITH DIAERESIS
+set map(y) [list "\u0178" "\u00FF"] ; # LATIN LETTER Y WITH DIAERESIS
+set map(h) [list "\u1E26" "\u1E27"] ; # LATIN LETTER H WITH DIAERESIS
+set map(w) [list "\u1E84" "\u1E85"] ; # LATIN LETTER W WITH DIAERESIS
+set map(x) [list "\u1E8C" "\u1E8D"] ; # LATIN LETTER X WITH DIAERESIS
+foreach k [array names map] {
+ lappend mappings [string toupper $k] [lindex $map($k) 0]
+ lappend mappings $k [lindex $map($k) 1]
+}
+proc mapdoc {doc} {
+ set doc [regsub -all {[[:space:]]+} $doc " "]
+ string map $::mappings [string trim $doc]
+}
+
+do_test 2.0 {
+ execsql { CREATE VIRTUAL TABLE t2 USING fts5(tokenize=unicode61, x); }
+ foreach doc $docs {
+ set d [mapdoc $doc]
+ execsql { INSERT INTO t2 VALUES($d) }
+ }
+} {}
+
+do_test 2.1 {
+ set q [mapdoc "row"]
+ execsql { SELECT * FROM t2 WHERE t2 MATCH $q }
+} [list [mapdoc {
+ Queries of the form: "SELECT max(x), y FROM table" returns the value of y on
+ the same row that contains the maximum x value.
+}]]
+
+foreach {tn query snippet} {
+ 2 "row" {
+ ...returns the value of y on the same [row] that contains
+ the maximum x value.
+ }
+ 3 "ROW" {
+ ...returns the value of y on the same [row] that contains
+ the maximum x value.
+ }
+ 4 "rollback" {
+ ...[ROLLBACK]. Instead, the pending statement
+ will return SQLITE_ABORT upon next access after the [ROLLBACK].
+ }
+ 5 "rOllback" {
+ ...[ROLLBACK]. Instead, the pending statement
+ will return SQLITE_ABORT upon next access after the [ROLLBACK].
+ }
+ 6 "lang*" {
+ Added support for the FTS4 [languageid] option.
+ }
+} {
+ do_test 2.$tn {
+ set q [mapdoc $query]
+ execsql {
+ SELECT snippet(t2, -1, '[', ']', '...', 15) FROM t2 WHERE t2 MATCH $q
+ }
+ } [list [mapdoc $snippet]]
+}
+
+#-------------------------------------------------------------------------
+# Make sure the unicode61 tokenizer does not crash if it is passed a
+# NULL pointer.
+reset_db
+do_execsql_test 3.1 {
+ CREATE VIRTUAL TABLE t1 USING fts5(tokenize=unicode61, x, y);
+ INSERT INTO t1 VALUES(NULL, 'a b c');
+}
+
+do_execsql_test 3.2 {
+ SELECT snippet(t1, -1, '[', ']', '...', 15) FROM t1 WHERE t1 MATCH 'b'
+} {{a [b] c}}
+
+do_execsql_test 3.3 {
+ BEGIN;
+ DELETE FROM t1;
+ INSERT INTO t1 VALUES('b b b b b b b b b b b', 'b b b b b b b b b b b b b');
+ INSERT INTO t1 SELECT * FROM t1;
+ INSERT INTO t1 SELECT * FROM t1;
+ INSERT INTO t1 SELECT * FROM t1;
+ INSERT INTO t1 SELECT * FROM t1;
+ INSERT INTO t1 SELECT * FROM t1;
+ INSERT INTO t1 SELECT * FROM t1;
+ INSERT INTO t1 SELECT * FROM t1;
+ INSERT INTO t1 SELECT * FROM t1;
+ INSERT INTO t1 SELECT * FROM t1;
+ INSERT INTO t1 SELECT * FROM t1;
+ INSERT INTO t1 SELECT * FROM t1;
+ INSERT INTO t1 SELECT * FROM t1;
+ INSERT INTO t1 SELECT * FROM t1;
+ INSERT INTO t1 SELECT * FROM t1;
+ INSERT INTO t1 SELECT * FROM t1;
+ INSERT INTO t1 SELECT * FROM t1;
+ INSERT INTO t1 VALUES('a b c', NULL);
+ INSERT INTO t1 VALUES('a x c', NULL);
+ COMMIT;
+}
+
+do_execsql_test 3.4 {
+ SELECT * FROM t1 WHERE t1 MATCH 'a b';
+} {{a b c} {}}
+
+#-------------------------------------------------------------------------
+#
+reset_db
+
+do_test 4.1 {
+ set a "abc\uFFFEdef"
+ set b "abc\uD800def"
+ set c "\uFFFEdef"
+ set d "\uD800def"
+ execsql {
+ CREATE VIRTUAL TABLE t1 USING fts5(tokenize=unicode61, x);
+ INSERT INTO t1 VALUES($a);
+ INSERT INTO t1 VALUES($b);
+ INSERT INTO t1 VALUES($c);
+ INSERT INTO t1 VALUES($d);
+ }
+} {}
+
+do_test 4.2 {
+ set a [binary format c* {0x61 0xF7 0xBF 0xBF 0xBF 0x62}]
+ set b [binary format c* {0x61 0xF7 0xBF 0xBF 0xBF 0xBF 0x62}]
+ set c [binary format c* {0x61 0xF7 0xBF 0xBF 0xBF 0xBF 0xBF 0x62}]
+ set d [binary format c* {0x61 0xF7 0xBF 0xBF 0xBF 0xBF 0xBF 0xBF 0x62}]
+ execsql {
+ INSERT INTO t1 VALUES($a);
+ INSERT INTO t1 VALUES($b);
+ INSERT INTO t1 VALUES($c);
+ INSERT INTO t1 VALUES($d);
+ }
+} {}
+
+do_test 4.3 {
+ set a [binary format c* {0xF7 0xBF 0xBF 0xBF}]
+ set b [binary format c* {0xF7 0xBF 0xBF 0xBF 0xBF}]
+ set c [binary format c* {0xF7 0xBF 0xBF 0xBF 0xBF 0xBF}]
+ set d [binary format c* {0xF7 0xBF 0xBF 0xBF 0xBF 0xBF 0xBF}]
+ execsql {
+ INSERT INTO t1 VALUES($a);
+ INSERT INTO t1 VALUES($b);
+ INSERT INTO t1 VALUES($c);
+ INSERT INTO t1 VALUES($d);
+ }
+} {}
+
+
+#-------------------------------------------------------------------------
+
+breakpoint
+do_unicode_token_test3 5.1 {tokenchars {}} {
+ sqlite3_reset sqlite3_column_int
+} {
+ 0 sqlite3 sqlite3
+ 1 reset reset
+ 2 sqlite3 sqlite3
+ 3 column column
+ 4 int int
+}
+
+do_unicode_token_test3 5.2 {tokenchars _} {
+ sqlite3_reset sqlite3_column_int
+} {
+ 0 sqlite3_reset sqlite3_reset
+ 1 sqlite3_column_int sqlite3_column_int
+}
+
+do_unicode_token_test3 5.3 {separators xyz} {
+ Laotianxhorseyrunszfast
+} {
+ 0 laotian Laotian
+ 1 horse horse
+ 2 runs runs
+ 3 fast fast
+}
+
+do_unicode_token_test3 5.4 {tokenchars xyz} {
+ Laotianxhorseyrunszfast
+} {
+ 0 laotianxhorseyrunszfast Laotianxhorseyrunszfast
+}
+
+do_unicode_token_test3 5.5 {tokenchars _} {separators zyx} {
+ sqlite3_resetxsqlite3_column_intyhonda_phantom
+} {
+ 0 sqlite3_reset sqlite3_reset
+ 1 sqlite3_column_int sqlite3_column_int
+ 2 honda_phantom honda_phantom
+}
+
+do_unicode_token_test3 5.6 "separators \u05D1" "abc\u05D1def" {
+ 0 abc abc 1 def def
+}
+
+do_unicode_token_test3 5.7 \
+ "tokenchars \u2444\u2445" \
+ "separators \u05D0\u05D1\u05D2" \
+ "\u2444fre\u2445sh\u05D0water\u05D2fish.\u2445timer" \
+ [list \
+ 0 \u2444fre\u2445sh \u2444fre\u2445sh \
+ 1 water water \
+ 2 fish fish \
+ 3 \u2445timer \u2445timer \
+ ]
+
+# Check that it is not possible to add a standalone diacritic codepoint
+# to either separators or tokenchars.
+do_unicode_token_test3 5.8 "separators \u0301" \
+ "hello\u0301world \u0301helloworld" \
+ "0 helloworld hello\u0301world 1 helloworld helloworld"
+
+do_unicode_token_test3 5.9 "tokenchars \u0301" \
+ "hello\u0301world \u0301helloworld" \
+ "0 helloworld hello\u0301world 1 helloworld helloworld"
+
+do_unicode_token_test3 5.10 "separators \u0301" \
+ "remove_diacritics 0" \
+ "hello\u0301world \u0301helloworld" \
+ "0 hello\u0301world hello\u0301world 1 helloworld helloworld"
+
+do_unicode_token_test3 5.11 "tokenchars \u0301" \
+ "remove_diacritics 0" \
+ "hello\u0301world \u0301helloworld" \
+ "0 hello\u0301world hello\u0301world 1 helloworld helloworld"
+
+
+#-------------------------------------------------------------------------
+
+proc do_tokenize {tokenizer txt} {
+ set res [list]
+ foreach {a b c} [sqlite3_fts5_tokenize -subst db $tokenizer $txt] {
+ lappend res $b
+ }
+ set res
+}
+
+# Argument $lCodepoint must be a list of codepoints (integers) that
+# correspond to whitespace characters. This command creates a string
+# $W from the codepoints, then tokenizes "${W}hello{$W}world${W}"
+# using tokenizer $tokenizer. The test passes if the tokenizer successfully
+# extracts the two 5 character tokens.
+#
+proc do_isspace_test {tn tokenizer lCp} {
+ set whitespace [format [string repeat %c [llength $lCp]] {*}$lCp]
+ set txt "${whitespace}hello${whitespace}world${whitespace}"
+ uplevel [list do_test $tn [list do_tokenize $tokenizer $txt] {hello world}]
+}
+
+set tokenizers [list unicode61]
+ifcapable icu { lappend tokenizers icu }
+
+# Some tests to check that the tokenizers can both identify white-space
+# codepoints. All codepoints tested below are of type "Zs" in the
+# UnicodeData.txt file.
+foreach T $tokenizers {
+ do_isspace_test 6.$T.1 $T 32
+ do_isspace_test 6.$T.2 $T 160
+ do_isspace_test 6.$T.3 $T 5760
+ do_isspace_test 6.$T.4 $T 6158
+ do_isspace_test 6.$T.5 $T 8192
+ do_isspace_test 6.$T.6 $T 8193
+ do_isspace_test 6.$T.7 $T 8194
+ do_isspace_test 6.$T.8 $T 8195
+ do_isspace_test 6.$T.9 $T 8196
+ do_isspace_test 6.$T.10 $T 8197
+ do_isspace_test 6.$T.11 $T 8198
+ do_isspace_test 6.$T.12 $T 8199
+ do_isspace_test 6.$T.13 $T 8200
+ do_isspace_test 6.$T.14 $T 8201
+ do_isspace_test 6.$T.15 $T 8202
+ do_isspace_test 6.$T.16 $T 8239
+ do_isspace_test 6.$T.17 $T 8287
+ do_isspace_test 6.$T.18 $T 12288
+
+ do_isspace_test 6.$T.19 $T {32 160 5760 6158}
+ do_isspace_test 6.$T.20 $T {8192 8193 8194 8195}
+ do_isspace_test 6.$T.21 $T {8196 8197 8198 8199}
+ do_isspace_test 6.$T.22 $T {8200 8201 8202 8239}
+ do_isspace_test 6.$T.23 $T {8287 12288}
+}
+
+#-------------------------------------------------------------------------
+# Test that the private use ranges are treated as alphanumeric.
+#
+foreach {tn1 c} {
+ 1 \ue000 2 \ue001 3 \uf000 4 \uf8fe 5 \uf8ff
+} {
+ foreach {tn2 config res} {
+ 1 "" "0 hello*world hello*world"
+ 2 "separators *" "0 hello hello 1 world world"
+ } {
+ set config [string map [list * $c] $config]
+ set input [string map [list * $c] "hello*world"]
+ set output [string map [list * $c] $res]
+ do_unicode_token_test3 7.$tn1.$tn2 {*}$config $input $output
+ }
+}
+
+#-------------------------------------------------------------------------
+# Cursory test of remove_diacritics=0.
+#
+# 00C4;LATIN CAPITAL LETTER A WITH DIAERESIS
+# 00D6;LATIN CAPITAL LETTER O WITH DIAERESIS
+# 00E4;LATIN SMALL LETTER A WITH DIAERESIS
+# 00F6;LATIN SMALL LETTER O WITH DIAERESIS
+#
+do_execsql_test 8.1.1 "
+ CREATE VIRTUAL TABLE t3 USING fts5(
+ content, tokenize='unicode61 remove_diacritics 1'
+ );
+ INSERT INTO t3 VALUES('o');
+ INSERT INTO t3 VALUES('a');
+ INSERT INTO t3 VALUES('O');
+ INSERT INTO t3 VALUES('A');
+ INSERT INTO t3 VALUES('\xD6');
+ INSERT INTO t3 VALUES('\xC4');
+ INSERT INTO t3 VALUES('\xF6');
+ INSERT INTO t3 VALUES('\xE4');
+"
+do_execsql_test 8.1.2 {
+ SELECT rowid FROM t3 WHERE t3 MATCH 'o' ORDER BY rowid ASC;
+} {1 3 5 7}
+do_execsql_test 8.1.3 {
+ SELECT rowid FROM t3 WHERE t3 MATCH 'a' ORDER BY rowid ASC;
+} {2 4 6 8}
+do_execsql_test 8.2.1 {
+ CREATE VIRTUAL TABLE t4 USING fts5(
+ content, tokenize='unicode61 remove_diacritics 0'
+ );
+ INSERT INTO t4 SELECT * FROM t3 ORDER BY rowid ASC;
+}
+do_execsql_test 8.2.2 {
+ SELECT rowid FROM t4 WHERE t4 MATCH 'o' ORDER BY rowid ASC;
+} {1 3}
+do_execsql_test 8.2.3 {
+ SELECT rowid FROM t4 WHERE t4 MATCH 'a' ORDER BY rowid ASC;
+} {2 4}
+
+#-------------------------------------------------------------------------
+#
+if 0 {
+foreach {tn sql} {
+ 1 {
+ CREATE VIRTUAL TABLE t5 USING fts4(tokenize=unicode61 [tokenchars= .]);
+ CREATE VIRTUAL TABLE t6 USING fts4(
+ tokenize=unicode61 [tokenchars=="] "tokenchars=[]");
+ CREATE VIRTUAL TABLE t7 USING fts4(tokenize=unicode61 [separators=x\xC4]);
+ }
+ 2 {
+ CREATE VIRTUAL TABLE t5 USING fts4(tokenize=unicode61 "tokenchars= .");
+ CREATE VIRTUAL TABLE t6 USING fts4(tokenize=unicode61 "tokenchars=[=""]");
+ CREATE VIRTUAL TABLE t7 USING fts4(tokenize=unicode61 "separators=x\xC4");
+ }
+ 3 {
+ CREATE VIRTUAL TABLE t5 USING fts4(tokenize=unicode61 'tokenchars= .');
+ CREATE VIRTUAL TABLE t6 USING fts4(tokenize=unicode61 'tokenchars=="[]');
+ CREATE VIRTUAL TABLE t7 USING fts4(tokenize=unicode61 'separators=x\xC4');
+ }
+ 4 {
+ CREATE VIRTUAL TABLE t5 USING fts4(tokenize=unicode61 `tokenchars= .`);
+ CREATE VIRTUAL TABLE t6 USING fts4(tokenize=unicode61 `tokenchars=[="]`);
+ CREATE VIRTUAL TABLE t7 USING fts4(tokenize=unicode61 `separators=x\xC4`);
+ }
+} {
+ do_execsql_test 9.$tn.0 {
+ DROP TABLE IF EXISTS t5;
+ DROP TABLE IF EXISTS t5aux;
+ DROP TABLE IF EXISTS t6;
+ DROP TABLE IF EXISTS t6aux;
+ DROP TABLE IF EXISTS t7;
+ DROP TABLE IF EXISTS t7aux;
+ }
+ do_execsql_test 9.$tn.1 $sql
+
+ do_execsql_test 9.$tn.2 {
+ CREATE VIRTUAL TABLE t5aux USING fts4aux(t5);
+ INSERT INTO t5 VALUES('one two three/four.five.six');
+ SELECT * FROM t5aux;
+ } {
+ four.five.six * 1 1 four.five.six 0 1 1
+ {one two three} * 1 1 {one two three} 0 1 1
+ }
+
+ do_execsql_test 9.$tn.3 {
+ CREATE VIRTUAL TABLE t6aux USING fts4aux(t6);
+ INSERT INTO t6 VALUES('alpha=beta"gamma/delta[epsilon]zeta');
+ SELECT * FROM t6aux;
+ } {
+ {alpha=beta"gamma} * 1 1 {alpha=beta"gamma} 0 1 1
+ {delta[epsilon]zeta} * 1 1 {delta[epsilon]zeta} 0 1 1
+ }
+
+ do_execsql_test 9.$tn.4 {
+ CREATE VIRTUAL TABLE t7aux USING fts4aux(t7);
+ INSERT INTO t7 VALUES('alephxbeth\xC4gimel');
+ SELECT * FROM t7aux;
+ } {
+ aleph * 1 1 aleph 0 1 1
+ beth * 1 1 beth 0 1 1
+ gimel * 1 1 gimel 0 1 1
+ }
+}
+
+# Check that multiple options are handled correctly.
+#
+do_execsql_test 10.1 {
+ DROP TABLE IF EXISTS t1;
+ CREATE VIRTUAL TABLE t1 USING fts4(tokenize=unicode61
+ "tokenchars=xyz" "tokenchars=.=" "separators=.=" "separators=xy"
+ "separators=a" "separators=a" "tokenchars=a" "tokenchars=a"
+ );
+
+ INSERT INTO t1 VALUES('oneatwoxthreeyfour');
+ INSERT INTO t1 VALUES('a.single=word');
+ CREATE VIRTUAL TABLE t1aux USING fts4aux(t1);
+ SELECT * FROM t1aux;
+} {
+ .single=word * 1 1 .single=word 0 1 1
+ four * 1 1 four 0 1 1
+ one * 1 1 one 0 1 1
+ three * 1 1 three 0 1 1
+ two * 1 1 two 0 1 1
+}
+
+# Test that case folding happens after tokenization, not before.
+#
+do_execsql_test 10.2 {
+ DROP TABLE IF EXISTS t2;
+ CREATE VIRTUAL TABLE t2 USING fts4(tokenize=unicode61 "separators=aB");
+ INSERT INTO t2 VALUES('oneatwoBthree');
+ INSERT INTO t2 VALUES('onebtwoAthree');
+ CREATE VIRTUAL TABLE t2aux USING fts4aux(t2);
+ SELECT * FROM t2aux;
+} {
+ one * 1 1 one 0 1 1
+ onebtwoathree * 1 1 onebtwoathree 0 1 1
+ three * 1 1 three 0 1 1
+ two * 1 1 two 0 1 1
+}
+
+# Test that the tokenchars and separators options work with the
+# fts3tokenize table.
+#
+do_execsql_test 11.1 {
+ CREATE VIRTUAL TABLE ft1 USING fts3tokenize(
+ "unicode61", "tokenchars=@.", "separators=1234567890"
+ );
+ SELECT token FROM ft1 WHERE input = 'berlin@street123sydney.road';
+} {
+ berlin@street sydney.road
+}
+
+}
+
+finish_test
LIBOBJ += fts5_index.o
LIBOBJ += fts5_storage.o
LIBOBJ += fts5_tokenize.o
+LIBOBJ += fts5_unicode2.o
LIBOBJ += fts5parse.o
fts5_tokenize.o: $(TOP)/ext/fts5/fts5_tokenize.c $(HDR) $(EXTHDR)
$(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts5/fts5_tokenize.c
+fts5_unicode2.o: $(TOP)/ext/fts5/fts5_unicode2.c $(HDR) $(EXTHDR)
+ $(TCCX) -DSQLITE_CORE -c $(TOP)/ext/fts5/fts5_unicode2.c
+
fts5parse.c: $(TOP)/ext/fts5/fts5parse.y lemon
cp $(TOP)/ext/fts5/fts5parse.y .
rm -f fts5parse.h
-C Move\sall\sfts5\stest\sfiles\sto\snew\sdirectory\s"ext/fts5/test".
-D 2014-12-29T15:59:36.706
+C Add\sa\sversion\sof\sthe\sunicode61\stokenizer\sto\sfts5.
+D 2015-01-01T16:46:10.851
F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f
F Makefile.in b03432313a3aad96c706f8164fb9f5307eaf19f5
F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23
F ext/fts3/tool/fts3view.c 6cfc5b67a5f0e09c0d698f9fd012c784bfaa9197
F ext/fts3/unicode/CaseFolding.txt 8c678ca52ecc95e16bc7afc2dbf6fc9ffa05db8c
F ext/fts3/unicode/UnicodeData.txt cd07314edb62d49fde34debdaf92fa2aa69011e7
-F ext/fts3/unicode/mkunicode.tcl dc6f268eb526710e2c6e496c372471d773d0c368
+F ext/fts3/unicode/mkunicode.tcl 2fa92b916b17ee0fc94129d36969972d463bc016
F ext/fts5/extract_api_docs.tcl 6320db4a1d0722a4e2069e661381ad75e9889786
F ext/fts5/fts5.c 37e124e24e5860f9842e5f3ee22129a786c0fd74
F ext/fts5/fts5.h 4f9d2c477c0ee1907164642471329a82cb6b203b
F ext/fts5/fts5_hash.c 63fa8379c5f2ac107d47c2b7d9ac04c95ef8a279
F ext/fts5/fts5_index.c 4a8e8535b4303400ddb5f6fb08152da0d88ebf6f
F ext/fts5/fts5_storage.c 13794781977c9a624eb8bd7b9509de241e405853
-F ext/fts5/fts5_tcl.c ce11e46589986b957b89809aabd3936d898d501b
-F ext/fts5/fts5_tokenize.c 5d6e785345b0d87d174fcc0653bfacd0d9fd7f2e
+F ext/fts5/fts5_tcl.c 664e710e2bbeed505cb91848772ca7538623a67f
+F ext/fts5/fts5_tokenize.c 5a0ad46408d09bcda2bf0addb5af42fdb75ebabb
+F ext/fts5/fts5_unicode2.c 9c7dd640d1f014bf5c3ee029759adfbb4d7e95a9
F ext/fts5/fts5parse.y 777da8e5819f75c217982c79c29d014c293acac9
-F ext/fts5/test/fts5aa.test 01fff9cf4e75c33871dd121d6adae33b609542cf w test/fts5aa.test
-F ext/fts5/test/fts5ab.test 7a58a954cae2ae50cef3ee525c57bc8eb3eb50b3 w test/fts5ab.test
-F ext/fts5/test/fts5ac.test d3de838f48d2ac8c26386832f6d93a3a3dbb5d4b w test/fts5ac.test
-F ext/fts5/test/fts5ad.test a8311d6ce46964fa1686937793dd81d284317324 w test/fts5ad.test
-F ext/fts5/test/fts5ae.test e576e646013489ce458a5b276caa787035efb175 w test/fts5ae.test
-F ext/fts5/test/fts5af.test 7e4c679bc6337ddcde6a3c9b9d81c81d2f7e77bd w test/fts5af.test
-F ext/fts5/test/fts5ag.test c79ee7707d120b79869fa2ac1538639b9fa1b997 w test/fts5ag.test
-F ext/fts5/test/fts5ah.test e510c741e9833d6335c87bef2e7f93fecfcc7c1d w test/fts5ah.test
-F ext/fts5/test/fts5ai.test 6a22f43776e1612591392721b535ca28d2c1a19f w test/fts5ai.test
-F ext/fts5/test/fts5aj.test 1a64ab4144f54bd12a520683950bf8460dd74fb3 w test/fts5aj.test
-F ext/fts5/test/fts5ak.test df2669fb76684f03d03918dfb2cf692012251b1f w test/fts5ak.test
-F ext/fts5/test/fts5al.test c055f1d682f931b8ea6c6e6251d90925f2aa55a1 w test/fts5al.test
-F ext/fts5/test/fts5auxdata.test fec4c9113176d351e567eab65fe9917e5ea0ab05 w ext/fts5/fts5auxdata.test
-F ext/fts5/test/fts5ea.test 0ef2c89e14c6360ad3905fae44409420d6b5a5c8 w test/fts5ea.test
-F ext/fts5/test/fts5fault1.test b95ed600b88bbbce5390f9097a5a5b7b01b3b9f7 w test/fts5fault1.test
-F ext/fts5/test/fts5porter.test d8f7591b733bcc1f02ca0dd313bc891a4b289562 w ext/fts5/fts5porter.test
-F ext/fts5/test/fts5tokenizer.test a1f3128e0d42c93632122c76cbe0d07a901591ca w ext/fts5/fts5tokenizer.test
+F ext/fts5/test/fts5aa.test 01fff9cf4e75c33871dd121d6adae33b609542cf
+F ext/fts5/test/fts5ab.test 7a58a954cae2ae50cef3ee525c57bc8eb3eb50b3
+F ext/fts5/test/fts5ac.test d3de838f48d2ac8c26386832f6d93a3a3dbb5d4b
+F ext/fts5/test/fts5ad.test a8311d6ce46964fa1686937793dd81d284317324
+F ext/fts5/test/fts5ae.test e576e646013489ce458a5b276caa787035efb175
+F ext/fts5/test/fts5af.test 7e4c679bc6337ddcde6a3c9b9d81c81d2f7e77bd
+F ext/fts5/test/fts5ag.test c79ee7707d120b79869fa2ac1538639b9fa1b997
+F ext/fts5/test/fts5ah.test e510c741e9833d6335c87bef2e7f93fecfcc7c1d
+F ext/fts5/test/fts5ai.test 6a22f43776e1612591392721b535ca28d2c1a19f
+F ext/fts5/test/fts5aj.test 1a64ab4144f54bd12a520683950bf8460dd74fb3
+F ext/fts5/test/fts5ak.test df2669fb76684f03d03918dfb2cf692012251b1f
+F ext/fts5/test/fts5al.test c055f1d682f931b8ea6c6e6251d90925f2aa55a1
+F ext/fts5/test/fts5auxdata.test fec4c9113176d351e567eab65fe9917e5ea0ab05
+F ext/fts5/test/fts5ea.test 0ef2c89e14c6360ad3905fae44409420d6b5a5c8
+F ext/fts5/test/fts5fault1.test b95ed600b88bbbce5390f9097a5a5b7b01b3b9f7
+F ext/fts5/test/fts5porter.test d8f7591b733bcc1f02ca0dd313bc891a4b289562
+F ext/fts5/test/fts5tokenizer.test a1f3128e0d42c93632122c76cbe0d07a901591ca
+F ext/fts5/test/fts5unicode.test b9c7bb982e0ee242a0774e636e1888ca32947a83
+F ext/fts5/test/fts5unicode2.test 7b0d64bbb7bfb7b5080e032e068404b42432ee02
F ext/icu/README.txt d9fbbad0c2f647c3fdf715fc9fd64af53aedfc43
F ext/icu/icu.c d415ccf984defeb9df2c0e1afcfaa2f6dc05eacb
F ext/icu/sqliteicu.h 728867a802baa5a96de7495e9689a8e01715ef37
F install-sh 9d4de14ab9fb0facae2f48780b874848cbf2f895 x
F ltmain.sh 3ff0879076df340d2e23ae905484d8c15d5fdea8
F magic.txt 8273bf49ba3b0c8559cb2774495390c31fd61c60
-F main.mk 863a6f5cdcc3a47a9dcbedc9af37d3c0d4172935
+F main.mk 602303f3596d10237f25da030ee1d96065e2e5a8
F mkopcodec.awk c2ff431854d702cdd2d779c9c0d1f58fa16fa4ea
F mkopcodeh.awk c6b3fa301db6ef7ac916b14c60868aeaec1337b5
F mkso.sh fd21c06b063bb16a5d25deea1752c2da6ac3ed83
F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4
F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32
F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f
-P b33fe0dd89f3180c209fa1f9e75d0a7acab12b8e
-R c65f16b94aeceea9cda28cb8f092d4a9
+P 7f148edb30103c5f4fee20cd08e38537f9615bf2
+R d01caf1e8e04bd7c1b6e26fb465c90b6
U dan
-Z 822a98c34fd542b912bf890d737a0e9f
+Z 5c3f4d7bf4502327dfa6eb630b5a26ec
-7f148edb30103c5f4fee20cd08e38537f9615bf2
\ No newline at end of file
+d09f7800cf14f73ea86d037107ef80295b2c173a
\ No newline at end of file