From: drh Date: Thu, 16 Feb 2006 18:16:36 +0000 (+0000) Subject: Or the SQLITE_UTF16_ALIGNED with the encoding field in X-Git-Tag: version-3.6.10~3064 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=7d9bd4e19e6508bc0e4cdee2cb8f6adbf3422ccd;p=thirdparty%2Fsqlite.git Or the SQLITE_UTF16_ALIGNED with the encoding field in sqlite3_create_collation and UTF16 strings will always be aligned on an even byte boundary when passed into the comparison function. (CVS 3103) FossilOrigin-Name: 7a1701e8c562087d364dff28cd7cad7ca4cdb5ae --- diff --git a/manifest b/manifest index f8c80598cb..335e08dc86 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Fix\smore\stypos\sin\sthe\sfile\sformat\sdocument.\s(CVS\s3102) -D 2006-02-16T00:32:37 +C Or\sthe\sSQLITE_UTF16_ALIGNED\swith\sthe\sencoding\sfield\sin\nsqlite3_create_collation\sand\sUTF16\sstrings\swill\salways\sbe\saligned\non\san\seven\sbyte\sboundary\swhen\spassed\sinto\sthe\scomparison\sfunction.\s(CVS\s3103) +D 2006-02-16T18:16:37 F Makefile.in 5d8dff443383918b700e495de42ec65bc1c8865b F Makefile.linux-gcc 74ba0eadf88748a9ce3fd03d2a3ede2e6715baec F README 9c4e2d6706bdcc3efdd773ce752a8cdab4f90028 @@ -48,7 +48,7 @@ F src/hash.c 449f3d6620193aa557f5d86cbc5cc6b87702b185 F src/hash.h 1b3f7e2609141fd571f62199fc38687d262e9564 F src/insert.c 67b3dc11831c58d8703eb502355ad3704ee18f66 F src/legacy.c 86b669707b3cefd570e34154e2f6457547d1df4f -F src/main.c 9a42464c44a6532003391486e802e65e88789cfc +F src/main.c f520c65fb1478e1db1c20387c60a4e5765b0d7aa F src/md5.c c5fdfa5c2593eaee2e32a5ce6c6927c986eaf217 F src/os.c 59f05de8c5777c34876607114a2fbe55ae578235 F src/os.h 93035a0e3b9dd05cdd0aaef32ea28ca28e02fe78 @@ -69,11 +69,11 @@ F src/random.c d40f8d356cecbd351ccfab6eaedd7ec1b54f5261 F src/select.c 7d069e875d0eec05129c7e8b9c99422d7c9c6321 F src/server.c 087b92a39d883e3fa113cae259d64e4c7438bc96 F src/shell.c 738f55ed75fb36731e764bfdb40756ac43b90b08 -F src/sqlite.h.in bc78a247fd9f294b30a4c03894f93fcb1e166410 -F src/sqliteInt.h 531b714c17cb8c3da3eef51355fa37dacb2eaafd +F src/sqlite.h.in 0bf6f03f9a14dde5f3a4f87471840803acaa4497 +F src/sqliteInt.h b067d282b2073c20ef143295a0fe1a2a1b204698 F src/table.c f64ec4fbfe333f8df925bc6ba494f55e05b0e75e F src/tclsqlite.c d9c26374b52cd47233ae0620d0a858a59b601f89 -F src/test1.c ca8cb34747c53479e0748c11d1a10cc07d582bb8 +F src/test1.c 9d299609a0ce35157fe15cdf8b4c663db5d40386 F src/test2.c ca74a1d8aeb7d9606e8f6b762c5daf85c1a3f92b F src/test3.c 86e99724ee898b119ed575ef9f98618afe7e5e5d F src/test4.c ff4e9406b3d2809966d8f0e82468ac5508be9f56 @@ -95,7 +95,7 @@ F src/vdbeInt.h eb3f86ab08ef11635bc78eb88c3ff13f923c233b F src/vdbeapi.c 7dc662e7c905ce666bb506dced932e0307115cbf F src/vdbeaux.c 95f4ed0bc8ed45f16823d84504310495b5dc587d F src/vdbefifo.c 9efb94c8c3f4c979ebd0028219483f88e57584f5 -F src/vdbemem.c 2034e93b32c14bda6e306bb54e3a8e930b963027 +F src/vdbemem.c 51a810d5a23cd7e93ac631bb4e4e32136d9fbeb7 F src/where.c c7d71d5e55c9c4c1e948089280fb0dec7c7d1ef6 F tclinstaller.tcl 046e3624671962dc50f0481d7c25b38ef803eb42 F test/aggerror.test b854de90f530ae37be68fbfe6de40e111358cbb2 @@ -274,6 +274,7 @@ F test/types3.test ea0ddf793ad5cd17c3b029dd8f48473059f873b6 F test/unique.test 0253c4227a5dc533e312202ce21ecfad18058d18 F test/update.test 7669ca789d62c258b678e8aa7a22a57eac10f2cf F test/utf16.test f9c13f4e2b48c42d0bfc96647d82fdf7bc11fc55 +F test/utf16align.test 7360e84472095518c56746f76b1f9d4dce99fb4d F test/vacuum.test 37f998b841cb335397c26d9bbc3457182af2565f F test/vacuum2.test 5aea8c88a65cb29f7d175296e7c819c6158d838c F test/varint.test ab7b110089a08b9926ed7390e7e97bdefeb74102 @@ -353,7 +354,7 @@ F www/tclsqlite.tcl bb0d1357328a42b1993d78573e587c6dcbc964b9 F www/vdbe.tcl 87a31ace769f20d3627a64fa1fade7fed47b90d0 F www/version3.tcl a99cf5f6d8bd4d5537584a2b342f0fb9fa601d8b F www/whentouse.tcl 97e2b5cd296f7d8057e11f44427dea8a4c2db513 -P fac0d202e1d9fcc33dc69006a369034ee003e183 -R a8c099a939bc283313912ffbefa2b486 +P d7495be8060fe9e8fa2d2f81e215833d7085888b +R 032dc5ef4ca76c28ee2e5685cd84b988 U drh -Z 64309bca2b9a9ab73935cca1b1290146 +Z fd3c7b804d30b45bb57ec3ade082098a diff --git a/manifest.uuid b/manifest.uuid index 4048da643e..8f9b543335 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -d7495be8060fe9e8fa2d2f81e215833d7085888b \ No newline at end of file +7a1701e8c562087d364dff28cd7cad7ca4cdb5ae \ No newline at end of file diff --git a/src/main.c b/src/main.c index 42e3200fac..91c8278395 100644 --- a/src/main.c +++ b/src/main.c @@ -14,7 +14,7 @@ ** other files are for internal use by SQLite and should not be ** accessed by users of the library. ** -** $Id: main.c,v 1.334 2006/02/09 13:43:29 danielk1977 Exp $ +** $Id: main.c,v 1.335 2006/02/16 18:16:37 drh Exp $ */ #include "sqliteInt.h" #include "os.h" @@ -743,6 +743,7 @@ static int createCollation( int(*xCompare)(void*,int,const void*,int,const void*) ){ CollSeq *pColl; + int enc2; if( sqlite3SafetyCheck(db) ){ return SQLITE_MISUSE; @@ -752,15 +753,13 @@ static int createCollation( ** to one of SQLITE_UTF16LE or SQLITE_UTF16BE using the ** SQLITE_UTF16NATIVE macro. SQLITE_UTF16 is not used internally. */ - if( enc==SQLITE_UTF16 ){ - enc = SQLITE_UTF16NATIVE; + enc2 = enc & ~SQLITE_UTF16_ALIGNED; + if( enc2==SQLITE_UTF16 ){ + enc2 = SQLITE_UTF16NATIVE; } - if( enc!=SQLITE_UTF8 && enc!=SQLITE_UTF16LE && enc!=SQLITE_UTF16BE ){ - sqlite3Error(db, SQLITE_ERROR, - "Param 3 to sqlite3_create_collation() must be one of " - "SQLITE_UTF8, SQLITE_UTF16, SQLITE_UTF16LE or SQLITE_UTF16BE" - ); + if( (enc2&~3)!=0 ){ + sqlite3Error(db, SQLITE_ERROR, "unknown encoding"); return SQLITE_ERROR; } @@ -768,7 +767,7 @@ static int createCollation( ** sequence. If so, and there are active VMs, return busy. If there ** are no active VMs, invalidate any pre-compiled statements. */ - pColl = sqlite3FindCollSeq(db, (u8)enc, zName, strlen(zName), 0); + pColl = sqlite3FindCollSeq(db, (u8)enc2, zName, strlen(zName), 0); if( pColl && pColl->xCmp ){ if( db->activeVdbeCnt ){ sqlite3Error(db, SQLITE_BUSY, @@ -778,11 +777,11 @@ static int createCollation( sqlite3ExpirePreparedStatements(db); } - pColl = sqlite3FindCollSeq(db, (u8)enc, zName, strlen(zName), 1); + pColl = sqlite3FindCollSeq(db, (u8)enc2, zName, strlen(zName), 1); if( pColl ){ pColl->xCmp = xCompare; pColl->pUser = pCtx; - pColl->enc = enc; + pColl->enc = enc2 | (enc & SQLITE_UTF16_ALIGNED); } sqlite3Error(db, SQLITE_OK, 0); return SQLITE_OK; @@ -1230,4 +1229,3 @@ error_out: return sqlite3ApiExit(db, rc); } #endif - diff --git a/src/sqlite.h.in b/src/sqlite.h.in index 23f1925560..14d7d87101 100644 --- a/src/sqlite.h.in +++ b/src/sqlite.h.in @@ -12,7 +12,7 @@ ** This header file defines the interface that the SQLite library ** presents to client programs. ** -** @(#) $Id: sqlite.h.in,v 1.162 2006/02/10 03:06:10 danielk1977 Exp $ +** @(#) $Id: sqlite.h.in,v 1.163 2006/02/16 18:16:37 drh Exp $ */ #ifndef _SQLITE3_H_ #define _SQLITE3_H_ @@ -1114,11 +1114,12 @@ void sqlite3_result_value(sqlite3_context*, sqlite3_value*); ** These are the allowed values for the eTextRep argument to ** sqlite3_create_collation and sqlite3_create_function. */ -#define SQLITE_UTF8 1 -#define SQLITE_UTF16LE 2 -#define SQLITE_UTF16BE 3 -#define SQLITE_UTF16 4 /* Use native byte order */ -#define SQLITE_ANY 5 /* sqlite3_create_function only */ +#define SQLITE_UTF8 1 +#define SQLITE_UTF16LE 2 +#define SQLITE_UTF16BE 3 +#define SQLITE_UTF16 4 /* Use native byte order */ +#define SQLITE_ANY 5 /* sqlite3_create_function only */ +#define SQLITE_UTF16_ALIGNED 8 /* sqlite3_create_collation only */ /* ** These two functions are used to add new collation sequences to the diff --git a/src/sqliteInt.h b/src/sqliteInt.h index 19fb27d4ef..e1b5614496 100644 --- a/src/sqliteInt.h +++ b/src/sqliteInt.h @@ -11,7 +11,7 @@ ************************************************************************* ** Internal interface definitions for SQLite. ** -** @(#) $Id: sqliteInt.h,v 1.483 2006/02/15 21:19:01 drh Exp $ +** @(#) $Id: sqliteInt.h,v 1.484 2006/02/16 18:16:37 drh Exp $ */ #ifndef _SQLITEINT_H_ #define _SQLITEINT_H_ @@ -251,12 +251,6 @@ struct BusyHandler { #include "btree.h" #include "pager.h" -/* -** This macro casts a pointer to an integer. Useful for doing -** pointer arithmetic. -*/ -#define Addr(X) ((uptr)X) - #ifdef SQLITE_MEMDEBUG /* ** The following global variables are used for testing and debugging diff --git a/src/test1.c b/src/test1.c index bdc0eb833b..82a42ad70f 100644 --- a/src/test1.c +++ b/src/test1.c @@ -13,7 +13,7 @@ ** is not included in the SQLite library. It is used for automated ** testing of the SQLite library. ** -** $Id: test1.c,v 1.206 2006/02/10 03:06:10 danielk1977 Exp $ +** $Id: test1.c,v 1.207 2006/02/16 18:16:37 drh Exp $ */ #include "sqliteInt.h" #include "tcl.h" @@ -1529,7 +1529,56 @@ bad_args: Tcl_WrongNumArgs(interp, 1, objv, "DB"); return TCL_ERROR; } -#endif /* SQLITE_OMIT_UTF16 */ + +/* +** tclcmd: add_alignment_test_collations DB +** +** Add two new collating sequences to the database DB +** +** utf16_aligned +** utf16_unaligned +** +** Both collating sequences use the same sort order as BINARY. +** The only difference is that the utf16_aligned collating +** sequence is declared with the SQLITE_UTF16_ALIGNED flag. +** Both collating functions increment the unaligned utf16 counter +** whenever they see a string that begins on an odd byte boundary. +*/ +static int unaligned_string_counter = 0; +static int alignmentCollFunc( + void *NotUsed, + int nKey1, const void *pKey1, + int nKey2, const void *pKey2 +){ + int rc, n; + n = nKey10 && 1==(1&(int)pKey1) ) unaligned_string_counter++; + if( nKey2>0 && 1==(1&(int)pKey2) ) unaligned_string_counter++; + rc = memcmp(pKey1, pKey2, n); + if( rc==0 ){ + rc = nKey1 - nKey2; + } + return rc; +} +static int add_alignment_test_collations( + void * clientData, + Tcl_Interp *interp, + int objc, + Tcl_Obj *CONST objv[] +){ + sqlite3 *db; + if( objc>=2 ){ + if( getDbPointer(interp, Tcl_GetString(objv[1]), &db) ) return TCL_ERROR; + sqlite3_create_collation(db, "utf16_unaligned", + SQLITE_UTF16, + 0, alignmentCollFunc); + sqlite3_create_collation(db, "utf16_aligned", + SQLITE_UTF16 | SQLITE_UTF16_ALIGNED, + 0, alignmentCollFunc); + } + return SQLITE_OK; +} +#endif /* !defined(SQLITE_OMIT_UTF16) */ /* ** Usage: add_test_function @@ -3582,6 +3631,7 @@ int Sqlitetest1_Init(Tcl_Interp *interp){ { "sqlite3_column_text16", test_stmt_utf16, sqlite3_column_text16 }, { "sqlite3_column_decltype16", test_stmt_utf16, sqlite3_column_decltype16}, { "sqlite3_column_name16", test_stmt_utf16, sqlite3_column_name16 }, + { "add_alignment_test_collations", add_alignment_test_collations, 0 }, #ifdef SQLITE_ENABLE_COLUMN_METADATA {"sqlite3_column_database_name16", test_stmt_utf16, sqlite3_column_database_name16}, @@ -3667,6 +3717,8 @@ int Sqlitetest1_Init(Tcl_Interp *interp){ (char*)&sqlite3_os_trace, TCL_LINK_INT); Tcl_LinkVar(interp, "sqlite3_tsd_count", (char*)&sqlite3_tsd_count, TCL_LINK_INT); + Tcl_LinkVar(interp, "unaligned_string_counter", + (char*)&unaligned_string_counter, TCL_LINK_INT); #if OS_UNIX && defined(SQLITE_TEST) && defined(THREADSAFE) && THREADSAFE Tcl_LinkVar(interp, "threadsOverrideEachOthersLocks", (char*)&threadsOverrideEachOthersLocks, TCL_LINK_INT); diff --git a/src/vdbemem.c b/src/vdbemem.c index 64416083d1..59d7626a88 100644 --- a/src/vdbemem.c +++ b/src/vdbemem.c @@ -42,6 +42,7 @@ int sqlite3VdbeChangeEncoding(Mem *pMem, int desiredEnc){ return SQLITE_ERROR; #else + /* MemTranslate() may return SQLITE_OK or SQLITE_NOMEM. If NOMEM is returned, ** then the encoding of the value may not have changed. */ @@ -596,19 +597,25 @@ int sqlite3MemCompare(const Mem *pMem1, const Mem *pMem2, const CollSeq *pColl){ if( pColl ){ if( pMem1->enc==pColl->enc ){ + /* The strings are already in the correct encoding. Call the + ** comparison function directly */ return pColl->xCmp(pColl->pUser,pMem1->n,pMem1->z,pMem2->n,pMem2->z); }else{ u8 origEnc = pMem1->enc; - rc = pColl->xCmp( - pColl->pUser, - sqlite3ValueBytes((sqlite3_value*)pMem1, pColl->enc), - sqlite3ValueText((sqlite3_value*)pMem1, pColl->enc), - sqlite3ValueBytes((sqlite3_value*)pMem2, pColl->enc), - sqlite3ValueText((sqlite3_value*)pMem2, pColl->enc) - ); - sqlite3ValueBytes((sqlite3_value*)pMem1, origEnc); + const void *v1, *v2; + int n1, n2; + /* Convert the strings into the encoding that the comparison + ** function expects */ + v1 = sqlite3ValueText((sqlite3_value*)pMem1, pColl->enc); + n1 = v1==0 ? 0 : pMem1->n; + assert( n1==sqlite3ValueBytes((sqlite3_value*)pMem1, pColl->enc) ); + v2 = sqlite3ValueText((sqlite3_value*)pMem2, pColl->enc); + n2 = v2==0 ? 0 : pMem2->n; + assert( n2==sqlite3ValueBytes((sqlite3_value*)pMem2, pColl->enc) ); + /* Do the comparison */ + rc = pColl->xCmp(pColl->pUser, n1, v1, n2, v2); + /* Convert the strings back into the database encoding */ sqlite3ValueText((sqlite3_value*)pMem1, origEnc); - sqlite3ValueBytes((sqlite3_value*)pMem2, origEnc); sqlite3ValueText((sqlite3_value*)pMem2, origEnc); return rc; } @@ -752,10 +759,14 @@ void sqlite3VdbeMemSanity(Mem *pMem, u8 db_enc){ ** except the data returned is in the encoding specified by the second ** parameter, which must be one of SQLITE_UTF16BE, SQLITE_UTF16LE or ** SQLITE_UTF8. +** +** (2006-02-16:) The enc value can be or-ed with SQLITE_UTF16_ALIGNED. +** If that is the case, then the result must be aligned on an even byte +** boundary. */ const void *sqlite3ValueText(sqlite3_value* pVal, u8 enc){ if( !pVal ) return 0; - assert( enc==SQLITE_UTF16LE || enc==SQLITE_UTF16BE || enc==SQLITE_UTF8); + assert( (enc&3)==(enc&~SQLITE_UTF16_ALIGNED) ); if( pVal->flags&MEM_Null ){ return 0; @@ -763,12 +774,23 @@ const void *sqlite3ValueText(sqlite3_value* pVal, u8 enc){ assert( (MEM_Blob>>3) == MEM_Str ); pVal->flags |= (pVal->flags & MEM_Blob)>>3; if( pVal->flags&MEM_Str ){ - sqlite3VdbeChangeEncoding(pVal, enc); + sqlite3VdbeChangeEncoding(pVal, enc & ~SQLITE_UTF16_ALIGNED); + if( (enc & SQLITE_UTF16_ALIGNED)!=0 && 1==(1&(int)pVal->z) ){ + assert( (pVal->flags & (MEM_Ephem|MEM_Static))!=0 ); + if( sqlite3VdbeMemMakeWriteable(pVal)!=SQLITE_OK ){ + return 0; + } + } }else if( !(pVal->flags&MEM_Blob) ){ sqlite3VdbeMemStringify(pVal, enc); + assert( 0==(1&(int)pVal->z) ); + } + assert(pVal->enc==(enc & ~SQLITE_UTF16_ALIGNED) || sqlite3MallocFailed() ); + if( pVal->enc==(enc & ~SQLITE_UTF16_ALIGNED) ){ + return pVal->z; + }else{ + return 0; } - assert(pVal->enc==enc || sqlite3MallocFailed() ); - return (const void *)(pVal->enc==enc ? (pVal->z) : 0); } /* diff --git a/test/utf16align.test b/test/utf16align.test new file mode 100644 index 0000000000..fb41b77422 --- /dev/null +++ b/test/utf16align.test @@ -0,0 +1,84 @@ +# 2006 February 16 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# +# This file contains code to verify that the SQLITE_UTF16_ALIGNED +# flag passed into the sqlite3_create_collation() function insures +# that all strings passed to that function are aligned on an even +# byte boundary. +# +# $Id: utf16align.test,v 1.1 2006/02/16 18:16:38 drh Exp $ + +set testdir [file dirname $argv0] +source $testdir/tester.tcl + +# Skip this entire test if we do not support UTF16 +# +ifcapable !utf16 { + finish_test + return +} + +# Create a database with a UTF16 encoding. Put in lots of string +# data of varying lengths. +# +do_test utf16align-1.0 { + set unaligned_string_counter 0 + add_alignment_test_collations [sqlite3_connection_pointer db] + execsql { + PRAGMA encoding=UTF16; + CREATE TABLE t1( + id INTEGER PRIMARY KEY, + spacer TEXT, + a TEXT COLLATE utf16_aligned, + b TEXT COLLATE utf16_unaligned + ); + INSERT INTO t1(a) VALUES("abc"); + INSERT INTO t1(a) VALUES("defghi"); + INSERT INTO t1(a) VALUES("jklmnopqrstuv"); + INSERT INTO t1(a) VALUES("wxyz0123456789-"); + UPDATE t1 SET b=a||'-'||a; + INSERT INTO t1(a,b) SELECT a||b, b||a FROM t1; + INSERT INTO t1(a,b) SELECT a||b, b||a FROM t1; + INSERT INTO t1(a,b) SELECT a||b, b||a FROM t1; + INSERT INTO t1(a,b) VALUES('one','two'); + INSERT INTO t1(a,b) SELECT a, b FROM t1; + UPDATE t1 SET spacer = CASE WHEN rowid&1 THEN 'x' ELSE 'xx' END; + SELECT count(*) FROM t1; + } +} 66 +do_test utf16align-1.1 { + set unaligned_string_counter +} 0 + +# Creating an index that uses the unaligned collation. We should see +# some unaligned strings passed to the collating function. +# +do_test utf16align-1.2 { + execsql { + CREATE INDEX t1i1 ON t1(spacer, b); + } + # puts $unaligned_string_counter + expr {$unaligned_string_counter>0} +} 1 + +# Create another index that uses the aligned collation. This time +# there should be no unaligned accesses +# +do_test utf16align-1.3 { + set unaligned_string_counter 0 + execsql { + CREATE INDEX t1i2 ON t1(spacer, a); + } + expr {$unaligned_string_counter>0} +} 0 +integrity_check utf16align-1.4 + +finish_test