-C Fix\sa\stest\scase\sthat\sstarted\sfailing\sdue\sto\sa\schange\sin\sthe\shelp\stext\nfor\sthe\s".prompt"\scommand\sof\sthe\sCLI.
-D 2026-04-17T19:12:27.087
+C Performance\simprovement\sin\sthe\ssubstr()\sand\slength()\sSQL\sfunctions.\nCaution:\sThe\snew\scode\smay\sgive\sdifferent\sresults\sfor\smalformed\sUTF8.\nI\sdo\snot\sconsider\sthat\sto\sbe\sa\sproblem\sby\sthe\sGI/GO\sprinciple.
+D 2026-04-18T12:31:53.096
F .fossil-settings/binary-glob 61195414528fb3ea9693577e1980230d78a1f8b0a54c78cf1b9b24d0a409ed6a x
F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1
F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea
F src/expr.c 68400681c5f6e41231d2c85abf6bb432aeeb2e36c4abdf90eb7b78551a5ce0f3
F src/fault.c 460f3e55994363812d9d60844b2a6de88826e007
F src/fkey.c 931f74cec1dc8038a0217ef340c91ce147dd1bbed08dc40c47ee0ec6edfffb08
-F src/func.c 5d3bff9431e46cc552b189335c39cd23592202f81aae5b786e5c9424a2d2e771
+F src/func.c e6a68dccc9c3c2f638bbf5d29ee84cadd48ded246fbd6c4cc56ddc957e99305f
F src/global.c a19e4b1ca1335f560e9560e590fc13081e21f670643367f99cb9e8f9dc7d615b
F src/hash.c 03c8c0f4be9e8bcb6de65aa26d34a61d48a9430747084a69f9469fbb00ea52ca
F src/hash.h 46b92795a95bfefb210f52f0c316e9d7cdbcdd7e7fcfb0d8be796d3a5767cddf
F src/sqlite.h.in 39d2e09114d2bdb7afd998f4a469c8f8cd065f8093835a7d0422f260fc78fb4f
F src/sqlite3.rc 015537e6ac1eec6c7050e17b616c2ffe6f70fca241835a84a4f0d5937383c479
F src/sqlite3ext.h 9788c301f95370fa30e808861f1d2e6f022a816ddbe2a4f67486784c1b31db2e
-F src/sqliteInt.h bc1cbc0c23dba35b324ae85a7dbb5fb182321bbd30857fb21f3d0cba049001a5
+F src/sqliteInt.h 1e9df4f7f0a754cebbc5e1494ff74b54bf510046b800db1d5382393972f53499
F src/sqliteLimit.h c70656b67ab5b96741a8f1c812bdd80c81f2b1c1e443d0cc3ea8c33bb1f1a092
F src/status.c 7565d63a79aa2f326339a24a0461a60096d0bd2bce711fefb50b5c89335f3592
F src/table.c 0f141b58a16de7e2fbe81c308379e7279f4c6b50eb08efeec5892794a0ba30d1
F test/backup5.test ee5da6d7fe5082f5b9b0bbfa31d016f52412a2e4
F test/backup_ioerr.test 4c3c7147cee85b024ecf6e150e090c32fdbb5135
F test/backup_malloc.test 0c9abdf74c51e7bedb66d504cd684f28d4bd4027
-F test/badutf.test d5360fc31f643d37a973ab0d8b4fb85799c3169f
+F test/badutf.test cff75b714866a4ffa0cdda252eb8fe8765483f5872c0076223c92d52b4fffd1b
F test/badutf2.test f310fd3b24a491b6b77bccdf14923b85d6ebcce751068c180d93a6b8ff854399
F test/basexx1.test 4ae6ddbd92a7ebcabb5d844664c3e755d29fb69c8ddcf0c8d59bbe4e07c23919
F test/bc_common.tcl c70b896d1d4ce72f769d2c7c1fc15b2cb07559eb2093f2736c8ca51664b29ff5
F tool/warnings.sh a554d13f6e5cf3760f041b87939e3d616ec6961859c3245e8ef701d1eafc2ca2
F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f
F tool/winmain.c 00c8fb88e365c9017db14c73d3c78af62194d9644feaf60e220ab0f411f3604c
-P 1979aa0902a43f20d4e396c5f9b9a49aaf0094d8520bf53ce058bb379a7720ab
-R 4253f22afa737c63df05010efc0ec8c6
+P bfe9df1bf4660ce6bdfd11a4f06d32694c93750c7bc0a6432459d9f1089eace4
+R de2467f25321d3500e82ce952370fe34
U drh
-Z 15807cd58a18a0d52d82d86842b402cf
+Z 95b69ad4189c911c5e0fd7a3c7f262ca
# Remove this line to create a well-formed Fossil manifest.
-bfe9df1bf4660ce6bdfd11a4f06d32694c93750c7bc0a6432459d9f1089eace4
+6124d27a33f4562f40777c2c6318d61709f7b481f23f9ade45064d8ad0700752
case SQLITE_TEXT: {
const unsigned char *z = sqlite3_value_text(argv[0]);
const unsigned char *z0;
- unsigned char c;
if( z==0 ) return;
z0 = z;
- while( (c = *z)!=0 ){
- z++;
- if( c>=0xc0 ){
- while( (*z & 0xc0)==0x80 ){ z++; z0++; }
+ while( 1 /*exit-by-break*/ ){
+ /* vvvvvv---- See tag-20260418-01 */
+ if( (u8)(z[0]-1)<(0x80-1) ){
+ z++;
+ }else if( z[0]==0 ){
+ break;
+ }else{
+ z++;
+ while( (z[0]&0xc0)==0x80 ){ z++; z0++; }
}
}
sqlite3_result_int(context, (int)(z-z0));
}
assert( p1>=0 && p2>=0 );
if( p0type!=SQLITE_BLOB ){
- while( *z && p1 ){
- SQLITE_SKIP_UTF8(z);
- p1--;
+ for( ; p1>0; p1--){
+ /* vvvvvv---- See tag-20260418-01 */
+ if( (u8)(z[0]-1)<(0x80-1) ){
+ z++;
+ }else if( z[0]==0 ){
+ break;
+ }else{
+ do{ z++; }while( (z[0]&0xc0)==0x80 );
+ }
}
- for(z2=z; *z2 && p2; p2--){
- SQLITE_SKIP_UTF8(z2);
+ for(z2=z; p2>0; p2--){
+ /* vvvvvv---- See tag-20260418-01 */
+ if( (u8)(z2[0]-1)<(0x80-1) ){
+ z2++;
+ }else if( z2[0]==0 ){
+ break;
+ }else{
+ do{ z2++; }while( (z2[0]&0xc0)==0x80 );
+ }
}
sqlite3_result_text64(context, (char*)z, z2-z, SQLITE_TRANSIENT,
SQLITE_UTF8);
/*
** Assuming zIn points to the first byte of a UTF-8 character,
** advance zIn to point to the first byte of the next UTF-8 character.
+**
+** # Dividing malformed UTF-8 into characters (tag-20260418-01)
+**
+** If a text input is malformed UTF-8, SQLite does not make any guarantees
+** about how the bytes are divided up into characters. The system promises
+** to not overflow an array or cause other memory errors when presented
+** with malformed UTF-8. And it promises to preserve the specific
+** sequence of bytes as long as no conversion occur. But beyond that,
+** there are no guarantees. Results can vary from one version to the
+** next.
+**
+** The SQLITE_SKIP_UTF8 macro below is one technique for dividing UTF-8
+** into characters. The length() and substr() SQL functions use a
+** different technique when searching across multiple characters, a
+** technique that exchanges a subtraction for comparison of z and results
+** in faster machine code on some compilers and architectures. The code
+** in substr() to skip over p1 characters goes something like this:
+**
+** for( ; p1>0; p1--){
+** // vvvv--- tag-20260418-01
+** if( (u8)(z[0]-1)<(0x80-1) ){
+** z++;
+** }else if( z[0]==0 ){
+** break;
+** }else{
+** do{ z++; }while( (z[0]&0xc0)==0x80 );
+** }
+** }
+**
+** In valid UTF-8, multibyte characters always begin with a byte with the
+** two most significant bits set and that is followed by one or more bytes
+** for which the two most significant bits are 10. In other words:
+**
+** First byte: (BYTE & 0xc0)==0xc0
+** Following bytes: (BYTE & 0xc0)==0x80
+**
+** What to do if the input byte sequence contain a "following byte" that
+** is not preceded by a "first byte"? How many characters are in the
+** byte sequence: 0x61 0x81 0x82 0x7a? 3 or 4 or something else?
+**
+** If you use the macro below, the answer will be 4. If you use the code
+** snippet demonstrated at tag-20260418-01, then answer is 3. If you
+** use a variant of tag-20260418-01 where the constant of comparison is
+** 0xc0-1 instead of 0x80-1 then the answer is again 4. The key point is
+** that because the input is malformed UTF-8, so is no "correct" answer.
+** SQLite is free to use either value.
+**
+** It turns out that GCC 13.3.0 is able to generate faster code (at least
+** on x86-64) if the constant at tag-20260418-01 is (0x80-1). If you make
+** that constant (0xc0-1) instead, gcc 13.3.0 generates code that runs slower.
+** So the (0x80-1) constant is used for substr() and length().
*/
#define SQLITE_SKIP_UTF8(zIn) { \
if( (*(zIn++))>=0xc0 ){ \
} {0 {x 3}}
do_test badutf-3.3 {
sqlite3_exec db {SELECT length('%7f%80%81') AS x}
-} {0 {x 3}}
+} {0 {x 2}}
do_test badutf-3.4 {
sqlite3_exec db {SELECT length('%61%c0') AS x}
} {0 {x 2}}
} {0 {x 1}}
do_test badutf-3.7 {
sqlite3_exec db {SELECT length('%80%80%80%80%80%80%80%80%80%80') AS x}
-} {0 {x 10}}
+} {0 {x 1}}
do_test badutf-3.8 {
sqlite3_exec db {SELECT length('%80%80%80%80%80%f0%80%80%80%80') AS x}
-} {0 {x 6}}
+} {0 {x 2}}
do_test badutf-3.9 {
sqlite3_exec db {SELECT length('%80%80%80%80%80%f0%80%80%80%ff') AS x}
-} {0 {x 7}}
+} {0 {x 3}}
do_test badutf-4.1 {
sqlite3_exec db {SELECT hex(trim('%80%80%80%f0%80%80%80%ff','%80%ff')) AS x}