From: drh <> Date: Sat, 24 Apr 2021 12:24:08 +0000 (+0000) Subject: Treat byte-order marks (BOMs) at the start of a token as whitespace. X-Git-Tag: version-3.36.0~151 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=ba9ebc2d127528d5f2c9ed97f2c986ec358f8f38;p=thirdparty%2Fsqlite.git Treat byte-order marks (BOMs) at the start of a token as whitespace. This enhancement is inspired by [forum:/forumpost/ed8f696a20|forum post ed8f696a20]. FossilOrigin-Name: 3d55c21c167631f42d155aadec544e629bd078de9992aa5a74694d08bc52052b --- diff --git a/manifest b/manifest index 4502b7f8e4..dda2d26af2 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Fix\stestcase\saltertab-25.1\sdue\sto\scheck-in\s[c7909e8e0d0577c6]\sdisallowing\nALTER\sTABLE\son\seponymous\svirtual\stables. -D 2021-04-24T12:20:10.489 +C Treat\sbyte-order\smarks\s(BOMs)\sat\sthe\sstart\sof\sa\stoken\sas\swhitespace.\nThis\senhancement\sis\sinspired\sby\n[forum:/forumpost/ed8f696a20|forum\spost\sed8f696a20]. +D 2021-04-24T12:24:08.900 F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1 F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea F LICENSE.md df5091916dbb40e6e9686186587125e1b2ff51f022cc334e886c19a0e9982724 @@ -607,7 +607,7 @@ F src/test_windirent.h 90dfbe95442c9762357fe128dc7ae3dc199d006de93eb33ba3972e0a9 F src/test_window.c cdae419fdcea5bad6dcd9368c685abdad6deb59e9fc8b84b153de513d394ba3f F src/test_wsd.c 41cadfd9d97fe8e3e4e44f61a4a8ccd6f7ca8fe9 F src/threads.c 4ae07fa022a3dc7c5beb373cf744a85d3c5c6c3c -F src/tokenize.c 0b9c82fa628b5adce93e2bcaf935a24d43eb83344fb51551f7835526d0693fc4 +F src/tokenize.c bae853ad129d1129c063de8630a3e99e306283bc40146f359b1bb91be2c08f1e F src/treeview.c e483aeedf6f207000db1f90eb6abd816350493314c30e8749d319bdb9ab3b08c F src/trigger.c f8493674f5c8f103c1a2cd0616af9dca85c7058450f9fe47cacd15cf5d512d52 F src/update.c b3abdaf4a314bbed238da69a6ca54c0f21262119389b412ee5778fffe62dd3cc @@ -1914,7 +1914,7 @@ F vsixtest/vsixtest.tcl 6a9a6ab600c25a91a7acc6293828957a386a8a93 F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0 -P e7b4ffecc610c494ebd506977402ec48cc799780f96c6293c0ccf27697160aa1 -R cf484e06e363477fe958c24db002cbc0 +P 32255e39cbde65492d88177464cee9e10cb20cf3105208416be131e2c89b63e1 +R 1d0e571ef291122bdf317bcd3d07015b U drh -Z bae3a8673bc5b7dab6af1040deb31bfe +Z 9d760c4ee05493d61d6146a0ceb36fa0 diff --git a/manifest.uuid b/manifest.uuid index 03fe5a0afb..5733a0c591 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -32255e39cbde65492d88177464cee9e10cb20cf3105208416be131e2c89b63e1 \ No newline at end of file +3d55c21c167631f42d155aadec544e629bd078de9992aa5a74694d08bc52052b \ No newline at end of file diff --git a/src/tokenize.c b/src/tokenize.c index 5e01de2b90..5d250e6f6a 100644 --- a/src/tokenize.c +++ b/src/tokenize.c @@ -56,6 +56,7 @@ #define CC_ID 27 /* unicode characters usable in IDs */ #define CC_ILLEGAL 28 /* Illegal character */ #define CC_NUL 29 /* 0x00 */ +#define CC_BOM 30 /* First byte of UTF8 BOM: 0xEF 0xBB 0xBF */ static const unsigned char aiClass[] = { #ifdef SQLITE_ASCII @@ -68,14 +69,14 @@ static const unsigned char aiClass[] = { /* 5x */ 1, 1, 1, 1, 1, 1, 1, 1, 0, 2, 2, 9, 28, 28, 28, 2, /* 6x */ 8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 7x */ 1, 1, 1, 1, 1, 1, 1, 1, 0, 2, 2, 28, 10, 28, 25, 28, -/* 8x */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, -/* 9x */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, -/* Ax */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, -/* Bx */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, -/* Cx */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, -/* Dx */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, -/* Ex */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, -/* Fx */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 +/* 8x */ 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, +/* 9x */ 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, +/* Ax */ 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, +/* Bx */ 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, +/* Cx */ 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, +/* Dx */ 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, +/* Ex */ 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 30, +/* Fx */ 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27 #endif #ifdef SQLITE_EBCDIC /* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xa xb xc xd xe xf */ @@ -535,6 +536,14 @@ int sqlite3GetToken(const unsigned char *z, int *tokenType){ i = 1; break; } + case CC_BOM: { + if( z[1]==0xbb && z[2]==0xbf ){ + *tokenType = TK_SPACE; + return 3; + } + i = 1; + break; + } case CC_NUL: { *tokenType = TK_ILLEGAL; return 0;