-C Fix\sa\sbad\ssizeof\sin\svdbe.c.\s\sTicket\s#2522.\s(CVS\s4174)
-D 2007-07-22T19:10:21
+C Rework\sthe\sUTF8\sreader\slogic\sin\sorder\sto\savoid\sthe\suse\sof\smalloc().\nTicket\s#2523.\s(CVS\s4175)
+D 2007-07-23T19:12:42
F Makefile.in 0c0e53720f658c7a551046442dd7afba0b72bfbe
F Makefile.linux-gcc 65241babba6faf1152bf86574477baab19190499
F README 9c4e2d6706bdcc3efdd773ce752a8cdab4f90028
F src/delete.c 5c0d89b3ef7d48fe1f5124bfe8341f982747fe29
F src/experimental.c 1b2d1a6cd62ecc39610e97670332ca073c50792b
F src/expr.c de9f55b1baed00199466028ad96967208d487798
-F src/func.c 6b45261aa2c514f642201b90493af68469c04af6
+F src/func.c dcba54fc18d2b2fd02f8b7c3dc13e27d100a4d8e
F src/hash.c 67b23e14f0257b69a3e8aa663e4eeadc1a2b6fd5
F src/hash.h 1b3f7e2609141fd571f62199fc38687d262e9564
F src/insert.c 89d184422d85db0418e0f66032ccea3657078ecd
F src/shell.c e7534cce78398bc1cac4a643e931fc6221c2897e
F src/sqlite.h.in 8164526b1658a6dad472953ea91239849f913d45
F src/sqlite3ext.h a27bedc222df5e5f0f458ac99726d0483b953a91
-F src/sqliteInt.h 81183ae71162818bf60478e738ff68604128bb06
+F src/sqliteInt.h 358f3a29b98e1efdd840a928dec8f60a51e6a33e
F src/sqliteLimit.h f14609c27636ebc217c9603ade26dbdd7d0f6afa
F src/table.c a8de75bcedf84d4060d804264b067ab3b1a3561d
F src/tclsqlite.c 0d3370e01cd3b313ed29ed6b0ba00423b4329de0
F src/tokenize.c 0f0955ef7b8ab99ba2d3099faa89b80ccba3733a
F src/trigger.c 420192efe3e6f03addf7897c60c3c8bf913d3493
F src/update.c 6b10becb6235ea314ed245fbfbf8b38755e3166e
-F src/utf.c 01b2aba02b10d12903e9e1ff897215c9faf6b662
+F src/utf.c c152f99ddccc5e0214a9817aa07ab1b208b43f14
F src/util.c 9e81d417fc60bd2fe156f8f2317aa4845bc6cc90
F src/vacuum.c 8bd895d29e7074e78d4e80f948e35ddc9cf2beef
F src/vdbe.c a58fe70f11078deb16f6825cc99f099d2fad4a7b
F www/vdbe.tcl 87a31ace769f20d3627a64fa1fade7fed47b90d0
F www/version3.tcl 890248cf7b70e60c383b0e84d77d5132b3ead42b
F www/whentouse.tcl fc46eae081251c3c181bd79c5faef8195d7991a5
-P 1924ba5207bdc8d503c17cd9460c1a9f9c357635
-R 6a3d5d19ad9da4a9718db45f3a6f4e18
+P 77ebc3feb089c28155cf20873fb4eabd26fa50c1
+R 4c6f94c5ade866798dc608d64060285b
U drh
-Z f3b0c8bff800cc59d8eb156576c3d0e8
+Z 9a4a3510d0a6e206d28b34d524cb6b1e
-77ebc3feb089c28155cf20873fb4eabd26fa50c1
\ No newline at end of file
+9a059cb6bced5cdc950f7816602ac92d89a899be
\ No newline at end of file
** sqliteRegisterBuildinFunctions() found at the bottom of the file.
** All other code has file scope.
**
-** $Id: func.c,v 1.161 2007/06/22 15:21:16 danielk1977 Exp $
+** $Id: func.c,v 1.162 2007/07/23 19:12:42 drh Exp $
*/
#include "sqliteInt.h"
#include <ctype.h>
#include "vdbeInt.h"
#include "os.h"
+
/*
** Return the collating function associated with a function.
*/
** is case sensitive causing 'a' LIKE 'A' to be false */
static const struct compareInfo likeInfoAlt = { '%', '_', 0, 0 };
-/*
-** Read a single UTF-8 character and return its value.
-*/
-u32 sqlite3ReadUtf8(const unsigned char *z){
- u32 c;
- SQLITE_READ_UTF8(z, c);
- return c;
-}
-
/*
** Compare two UTF-8 strings for equality where the first string can
** potentially be a "glob" expression. Return true (1) if they
const struct compareInfo *pInfo, /* Information about how to do the compare */
const int esc /* The escape character */
){
- register int c;
+ int c, c2;
int invert;
int seen;
- int c2;
u8 matchOne = pInfo->matchOne;
u8 matchAll = pInfo->matchAll;
u8 matchSet = pInfo->matchSet;
u8 noCase = pInfo->noCase;
int prevEscape = 0; /* True if the previous character was 'escape' */
- while( (c = *zPattern)!=0 ){
+ while( (c = sqlite3Utf8Read(zPattern,0,&zPattern))!=0 ){
if( !prevEscape && c==matchAll ){
- while( (c=zPattern[1]) == matchAll || c == matchOne ){
- if( c==matchOne ){
- if( *zString==0 ) return 0;
- SQLITE_SKIP_UTF8(zString);
+ while( (c=sqlite3Utf8Read(zPattern,0,&zPattern)) == matchAll
+ || c == matchOne ){
+ if( c==matchOne && sqlite3Utf8Read(zString, 0, &zString)==0 ){
+ return 0;
}
- zPattern++;
}
- if( c && esc && sqlite3ReadUtf8(&zPattern[1])==esc ){
- u8 const *zTemp = &zPattern[1];
- SQLITE_SKIP_UTF8(zTemp);
- c = *zTemp;
- }
- if( c==0 ) return 1;
- if( c==matchSet ){
- assert( esc==0 ); /* This is GLOB, not LIKE */
- while( *zString && patternCompare(&zPattern[1],zString,pInfo,esc)==0 ){
+ if( c==0 ){
+ return 1;
+ }else if( c==esc ){
+ c = sqlite3Utf8Read(zPattern, 0, &zPattern);
+ if( c==0 ){
+ return 0;
+ }
+ }else if( c==matchSet ){
+ assert( esc==0 ); /* This is GLOB, not LIKE */
+ assert( matchSet<0x80 ); /* '[' is a single-byte character */
+ while( *zString && patternCompare(&zPattern[-1],zString,pInfo,esc)==0 ){
SQLITE_SKIP_UTF8(zString);
}
return *zString!=0;
- }else{
- while( (c2 = *zString)!=0 ){
- if( noCase ){
- c2 = sqlite3UpperToLower[c2];
- c = sqlite3UpperToLower[c];
- while( c2 != 0 && c2 != c ){ c2 = sqlite3UpperToLower[*++zString]; }
- }else{
- while( c2 != 0 && c2 != c ){ c2 = *++zString; }
+ }
+ while( (c2 = sqlite3Utf8Read(zString,0,&zString))!=0 ){
+ if( noCase ){
+ c2 = c2<0x80 ? sqlite3UpperToLower[c2] : c2;
+ c = c<0x80 ? sqlite3UpperToLower[c] : c;
+ while( c2 != 0 && c2 != c ){
+ c2 = sqlite3Utf8Read(zString, 0, &zString);
+ if( c2<0x80 ) c2 = sqlite3UpperToLower[c2];
+ }
+ }else{
+ while( c2 != 0 && c2 != c ){
+ c2 = sqlite3Utf8Read(zString, 0, &zString);
}
- if( c2==0 ) return 0;
- if( patternCompare(&zPattern[1],zString,pInfo,esc) ) return 1;
- SQLITE_SKIP_UTF8(zString);
}
- return 0;
+ if( c2==0 ) return 0;
+ if( patternCompare(zPattern,zString,pInfo,esc) ) return 1;
}
+ return 0;
}else if( !prevEscape && c==matchOne ){
- if( *zString==0 ) return 0;
- SQLITE_SKIP_UTF8(zString);
- zPattern++;
+ if( sqlite3Utf8Read(zString, 0, &zString)==0 ){
+ return 0;
+ }
}else if( c==matchSet ){
int prior_c = 0;
assert( esc==0 ); /* This only occurs for GLOB, not LIKE */
seen = 0;
invert = 0;
- c = sqlite3ReadUtf8(zString);
+ c = sqlite3Utf8Read(zString, 0, &zString);
if( c==0 ) return 0;
- c2 = *++zPattern;
- if( c2=='^' ){ invert = 1; c2 = *++zPattern; }
+ c2 = sqlite3Utf8Read(zPattern, 0, &zPattern);
+ if( c2=='^' ){
+ invert = 1;
+ c2 = sqlite3Utf8Read(zPattern, 0, &zPattern);
+ }
if( c2==']' ){
if( c==']' ) seen = 1;
- c2 = *++zPattern;
+ c2 = sqlite3Utf8Read(zPattern, 0, &zPattern);
}
- while( (c2 = sqlite3ReadUtf8(zPattern))!=0 && c2!=']' ){
- if( c2=='-' && zPattern[1]!=']' && zPattern[1]!=0 && prior_c>0 ){
- zPattern++;
- c2 = sqlite3ReadUtf8(zPattern);
+ while( c2 && c2!=']' ){
+ if( c2=='-' && zPattern[0]!=']' && zPattern[0]!=0 && prior_c>0 ){
+ c2 = sqlite3Utf8Read(zPattern, 0, &zPattern);
if( c>=prior_c && c<=c2 ) seen = 1;
prior_c = 0;
- }else if( c==c2 ){
- seen = 1;
- prior_c = c2;
}else{
+ if( c==c2 ){
+ seen = 1;
+ }
prior_c = c2;
}
- SQLITE_SKIP_UTF8(zPattern);
+ c2 = sqlite3Utf8Read(zPattern, 0, &zPattern);
+ }
+ if( c2==0 || (seen ^ invert)==0 ){
+ return 0;
}
- if( c2==0 || (seen ^ invert)==0 ) return 0;
- SQLITE_SKIP_UTF8(zString);
- zPattern++;
- }else if( esc && !prevEscape && sqlite3ReadUtf8(zPattern)==esc){
+ }else if( esc==c && !prevEscape ){
prevEscape = 1;
- SQLITE_SKIP_UTF8(zPattern);
}else{
+ c2 = sqlite3Utf8Read(zString, 0, &zString);
if( noCase ){
- if( sqlite3UpperToLower[c] != sqlite3UpperToLower[*zString] ) return 0;
- }else{
- if( c != *zString ) return 0;
+ c = c<0x80 ? sqlite3UpperToLower[c] : c;
+ c2 = c2<0x80 ? sqlite3UpperToLower[c2] : c2;
+ }
+ if( c!=c2 ){
+ return 0;
}
- zPattern++;
- zString++;
prevEscape = 0;
}
}
"ESCAPE expression must be a single character", -1);
return;
}
- escape = sqlite3ReadUtf8(zEsc);
+ escape = sqlite3Utf8Read(zEsc, 0, &zEsc);
}
if( zA && zB ){
struct compareInfo *pInfo = sqlite3_user_data(context);
*************************************************************************
** Internal interface definitions for SQLite.
**
-** @(#) $Id: sqliteInt.h,v 1.578 2007/06/26 10:38:55 danielk1977 Exp $
+** @(#) $Id: sqliteInt.h,v 1.579 2007/07/23 19:12:42 drh Exp $
*/
#ifndef _SQLITEINT_H_
#define _SQLITEINT_H_
extern int sqlite3_always_code_trigger_setup;
/*
-** A lookup table used by the SQLITE_READ_UTF8 macro. The definition
-** is in utf.c.
+** Assuming zIn points to the first byte of a UTF-8 character,
+** advance zIn to point to the first byte of the next UTF-8 character.
*/
-extern const unsigned char sqlite3UtfTrans1[];
-
-/*
-** Macros for reading UTF8 characters.
-**
-** SQLITE_READ_UTF8(x,c) reads a single UTF8 value out of x and writes
-** that value into c. The type of x must be unsigned char*. The type
-** of c must be unsigned int.
-**
-** SQLITE_SKIP_UTF8(x) advances x forward by one character. The type of
-** x must be unsigned char*.
-**
-** Notes On Invalid UTF-8:
-**
-** * These macros never allow a 7-bit character (0x00 through 0x7f) to
-** be encoded as a multi-byte character. Any multi-byte character that
-** attempts to encode a value between 0x00 and 0x7f is rendered as 0xfffd.
-**
-** * These macros never allow a UTF16 surrogate value to be encoded.
-** If a multi-byte character attempts to encode a value between
-** 0xd800 and 0xe000 then it is rendered as 0xfffd.
-**
-** * Bytes in the range of 0x80 through 0xbf which occur as the first
-** byte of a character are interpreted as single-byte characters
-** and rendered as themselves even though they are technically
-** invalid characters.
-**
-** * These routines accept an infinite number of different UTF8 encodings
-** for unicode values 0x80 and greater. They do not change over-length
-** encodings to 0xfffd as some systems recommend.
-**
-*/
-#define SQLITE_READ_UTF8(zIn, c) { \
- c = *(zIn++); \
- if( c>=0xc0 ){ \
- c = sqlite3UtfTrans1[c-0xc0]; \
- while( (*zIn & 0xc0)==0x80 ){ \
- c = (c<<6) + (0x3f & *(zIn++)); \
- } \
- if( c<0x80 \
- || (c&0xFFFFF800)==0xD800 \
- || (c&0xFFFFFFFE)==0xFFFE ){ c = 0xFFFD; } \
- } \
-}
#define SQLITE_SKIP_UTF8(zIn) { \
if( (*(zIn++))>=0xc0 ){ \
while( (*zIn & 0xc0)==0x80 ){ zIn++; } \
} \
}
-
-
-
/*
** The SQLITE_CORRUPT_BKPT macro can be either a constant (for production
** builds) or a function call (for debugging). If it is a function call,
int sqlite3FitsIn64Bits(const char *);
int sqlite3Utf16ByteLen(const void *pData, int nChar);
int sqlite3Utf8CharLen(const char *pData, int nByte);
-u32 sqlite3ReadUtf8(const unsigned char *);
+int sqlite3Utf8Read(const u8*, const u8*, const u8**);
int sqlite3PutVarint(unsigned char *, u64);
int sqlite3GetVarint(const unsigned char *, u64 *);
int sqlite3GetVarint32(const unsigned char *, u32 *);
** This file contains routines used to translate between UTF-8,
** UTF-16, UTF-16BE, and UTF-16LE.
**
-** $Id: utf.c,v 1.51 2007/05/23 16:23:09 danielk1977 Exp $
+** $Id: utf.c,v 1.52 2007/07/23 19:12:42 drh Exp $
**
** Notes on UTF-8:
**
0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x00, 0x00,
};
+
#define WRITE_UTF8(zOut, c) { \
if( c<0x00080 ){ \
*zOut++ = (c&0xFF); \
} \
}
+/*
+** Translate a single UTF-8 character. Return the unicode value.
+**
+** During translation, assume that the byte that zTerm points
+** is a 0x00.
+**
+** Write a pointer to the next unread byte back into *pzNext.
+**
+** Notes On Invalid UTF-8:
+**
+** * This routine never allows a 7-bit character (0x00 through 0x7f) to
+** be encoded as a multi-byte character. Any multi-byte character that
+** attempts to encode a value between 0x00 and 0x7f is rendered as 0xfffd.
+**
+** * This routine never allows a UTF16 surrogate value to be encoded.
+** If a multi-byte character attempts to encode a value between
+** 0xd800 and 0xe000 then it is rendered as 0xfffd.
+**
+** * Bytes in the range of 0x80 through 0xbf which occur as the first
+** byte of a character are interpreted as single-byte characters
+** and rendered as themselves even though they are technically
+** invalid characters.
+**
+** * This routine accepts an infinite number of different UTF8 encodings
+** for unicode values 0x80 and greater. It do not change over-length
+** encodings to 0xfffd as some systems recommend.
+*/
+int sqlite3Utf8Read(
+ const unsigned char *z, /* First byte of UTF-8 character */
+ const unsigned char *zTerm, /* Pretend this byte is 0x00 */
+ const unsigned char **pzNext /* Write first byte past UTF-8 char here */
+){
+ int c = *(z++);
+ if( c>=0xc0 ){
+ c = sqlite3UtfTrans1[c-0xc0];
+ while( z!=zTerm && (*z & 0xc0)==0x80 ){
+ c = (c<<6) + (0x3f & *(z++));
+ }
+ if( c<0x80
+ || (c&0xFFFFF800)==0xD800
+ || (c&0xFFFFFFFE)==0xFFFE ){ c = 0xFFFD; }
+ }
+ *pzNext = z;
+ return c;
+}
+
+
+
/*
** If the TRANSLATE_TRACE macro is defined, the value of each Mem is
** printed on stderr on the way into and out of sqlite3VdbeMemTranslate().
z = zOut;
if( pMem->enc==SQLITE_UTF8 ){
- unsigned int iExtra = 0xD800;
-
- if( 0==(pMem->flags&MEM_Term) && zTerm>zIn && (zTerm[-1]&0x80) ){
- /* This UTF8 string is not nul-terminated, and the last byte is
- ** not a character in the ascii range (codpoints 0..127). This
- ** means the SQLITE_READ_UTF8() macro might read past the end
- ** of the allocated buffer.
- **
- ** There are four possibilities:
- **
- ** 1. The last byte is the first byte of a non-ASCII character,
- **
- ** 2. The final N bytes of the input string are continuation bytes
- ** and immediately preceding them is the first byte of a
- ** non-ASCII character.
- **
- ** 3. The final N bytes of the input string are continuation bytes
- ** and immediately preceding them is a byte that encodes a
- ** character in the ASCII range.
- **
- ** 4. The entire string consists of continuation characters.
- **
- ** Cases (3) and (4) require no special handling. The SQLITE_READ_UTF8()
- ** macro will not overread the buffer in these cases.
- */
- unsigned char *zExtra = &zTerm[-1];
- while( zExtra>zIn && (zExtra[0]&0xC0)==0x80 ){
- zExtra--;
- }
-
- if( (zExtra[0]&0xC0)==0xC0 ){
- /* Make a copy of the last character encoding in the input string.
- ** Then make sure it is nul-terminated and use SQLITE_READ_UTF8()
- ** to decode the codepoint. Store the codepoint in variable iExtra,
- ** it will be appended to the output string later.
- */
- unsigned char *zFree = 0;
- unsigned char zBuf[16];
- int nExtra = (pMem->n+zIn-zExtra);
- zTerm = zExtra;
- if( nExtra>15 ){
- zExtra = sqliteMallocRaw(nExtra+1);
- if( !zExtra ){
- return SQLITE_NOMEM;
- }
- zFree = zExtra;
- }else{
- zExtra = zBuf;
- }
- memcpy(zExtra, zTerm, nExtra);
- zExtra[nExtra] = '\0';
- SQLITE_READ_UTF8(zExtra, iExtra);
- sqliteFree(zFree);
- }
- }
-
if( desiredEnc==SQLITE_UTF16LE ){
/* UTF-8 -> UTF-16 Little-endian */
while( zIn<zTerm ){
- SQLITE_READ_UTF8(zIn, c);
+ c = sqlite3Utf8Read(zIn, zTerm, (const u8**)&zIn);
WRITE_UTF16LE(z, c);
}
- if( iExtra!=0xD800 ){
- WRITE_UTF16LE(z, iExtra);
- }
}else{
assert( desiredEnc==SQLITE_UTF16BE );
/* UTF-8 -> UTF-16 Big-endian */
while( zIn<zTerm ){
- SQLITE_READ_UTF8(zIn, c);
+ c = sqlite3Utf8Read(zIn, zTerm, (const u8**)&zIn);
WRITE_UTF16BE(z, c);
}
- if( iExtra!=0xD800 ){
- WRITE_UTF16BE(z, iExtra);
- }
}
pMem->n = z - zOut;
*z++ = 0;
int sqlite3Utf8To8(unsigned char *zIn){
unsigned char *zOut = zIn;
unsigned char *zStart = zIn;
- int c;
+ unsigned char *zTerm;
+ u32 c;
- while(1){
- SQLITE_READ_UTF8(zIn, c);
- if( c==0 ) break;
+ while( zIn[0] ){
+ c = sqlite3Utf8Read(zIn, zTerm, (const u8**)&zIn);
if( c!=0xfffd ){
WRITE_UTF8(zOut, c);
}
unsigned int i, t;
unsigned char zBuf[20];
unsigned char *z;
+ unsigned char *zTerm;
int n;
unsigned int c;
WRITE_UTF8(z, i);
n = z-zBuf;
z[0] = 0;
+ zTerm = z;
z = zBuf;
- SQLITE_READ_UTF8(z, c);
+ c = sqlite3Utf8Read(z, zTerm, (const u8**)&z);
t = i;
if( i>=0xD800 && i<=0xDFFF ) t = 0xFFFD;
if( (i&0xFFFFFFFE)==0xFFFE ) t = 0xFFFD;