From 2090a0ed48ee80b2bba745a59b6fab9b90b93c8b Mon Sep 17 00:00:00 2001 From: drh Date: Thu, 7 Oct 2004 19:03:01 +0000 Subject: [PATCH] Optimizations to the tokenizer. (CVS 2011) FossilOrigin-Name: e5540ce047e0215904005bc9df4ff0d1d0a3c1d1 --- manifest | 15 ++- manifest.uuid | 2 +- src/tokenize.c | 240 +++++++++++++-------------------- tool/mkkeywordhash.c | 312 +++++++++++++++++++++++++++++++++++++++++++ www/index.tcl | 7 +- 5 files changed, 416 insertions(+), 160 deletions(-) create mode 100644 tool/mkkeywordhash.c diff --git a/manifest b/manifest index b253cc1776..4b51af2309 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Additional\sparser\soptimizations.\s(CVS\s2010) -D 2004-10-07T03:06:29 +C Optimizations\sto\sthe\stokenizer.\s(CVS\s2011) +D 2004-10-07T19:03:01 F Makefile.in 52c1cc106cad9148d4b7cb387b458e82dc86b339 F Makefile.linux-gcc a9e5a0d309fa7c38e7c14d3ecf7690879d3a5457 F README f1de682fbbd94899d50aca13d387d1b3fd3be2dd @@ -70,7 +70,7 @@ F src/test2.c b11fa244fff02190707dd0879987c37c75e61fc8 F src/test3.c 5b5b0f3d11b097399c1054fff73d8f3711092301 F src/test4.c 7c6b9fc33dd1f3f93c7f1ee6e5e6d016afa6c1df F src/test5.c b001fa7f1b9e2dc5c2331de62fc641b5ab2bd7a1 -F src/tokenize.c 449843c85c8118b9718f3935f4b5d18586c8cee2 +F src/tokenize.c a235e9a38ccf175f7badccdec68f58d79cb5abe5 F src/trigger.c b51a120d53e8b85359be11bf5e50854e5725fe3d F src/update.c 174c3b593b8f4928e510a51ec309e8ce69d2371c F src/utf.c f4f83acd73389090e32d6589d307fc55d794c7ed @@ -201,6 +201,7 @@ F tool/lempar.c 1e61d2b6cb9d8affa264a13336bc0c088498caa4 F tool/memleak.awk b744b6109566206c746d826f6ecdba34662216bc F tool/memleak2.awk 9cc20c8e8f3c675efac71ea0721ee6874a1566e8 F tool/memleak3.tcl 336eb50b0849dbf99b1d5462d9c37291b01b2b43 +F tool/mkkeywordhash.c 0cfab6691def993d5b2124398d25129b12b6b5fd F tool/mkopts.tcl 66ac10d240cc6e86abd37dc908d50382f84ff46e x F tool/opcodeDoc.awk b3a2a3d5d3075b8bd90b7afe24283efdd586659c F tool/report1.txt 9eae07f26a8fc53889b45fc833a66a33daa22816 @@ -235,7 +236,7 @@ F www/dynload.tcl 02eb8273aa78cfa9070dd4501dca937fb22b466c F www/faq.tcl 8cf9f59fd93868c9954223a99db244c9975fa43b F www/fileformat.tcl f71a06a0d533c7df408539c64113b4adeaf29764 F www/formatchng.tcl d1dfecedfb25e122ab513a1e0948b15cb4f0be46 -F www/index.tcl 3bf50fdac2f5df49cf4f6f76a7f312b5fd4725b8 +F www/index.tcl ae29ea8a5bdde5a8031a9ac0935ac16dd921b4ef F www/lang.tcl dde78c1415ee2d19fc30360808ca6f0e2ea71c30 F www/lockingv3.tcl afcd22f0f063989cff2f4d57bbc38d719b4c6e75 F www/mingw.tcl d96b451568c5d28545fefe0c80bee3431c73f69c @@ -251,7 +252,7 @@ F www/tclsqlite.tcl 560ecd6a916b320e59f2917317398f3d59b7cc25 F www/vdbe.tcl 59288db1ac5c0616296b26dce071c36cb611dfe9 F www/version3.tcl 092a01f5ef430d2c4acc0ae558d74c4bb89638a0 F www/whentouse.tcl a8335bce47cc2fddb07f19052cb0cb4d9129a8e4 -P 1376a0bb8d864de755c614b2ecce4342155fd09b -R 7c281d56a2bfc9c05afabc9a79a1f3b5 +P 618dee121e41d8e5c9b4d5c167475b11ddcda2d7 +R c6f2c8afd2a59eadba90462f2b06f8bc U drh -Z fe725e970bb1b0deb3b69c8bf4e69648 +Z bc3b2c6d615a585d4fad51811788aa10 diff --git a/manifest.uuid b/manifest.uuid index ae194f45ea..873688573d 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -618dee121e41d8e5c9b4d5c167475b11ddcda2d7 \ No newline at end of file +e5540ce047e0215904005bc9df4ff0d1d0a3c1d1 \ No newline at end of file diff --git a/src/tokenize.c b/src/tokenize.c index 6b3a5f713f..c333699b6e 100644 --- a/src/tokenize.c +++ b/src/tokenize.c @@ -15,173 +15,115 @@ ** individual tokens and sends those tokens one-by-one over to the ** parser for analysis. ** -** $Id: tokenize.c,v 1.90 2004/10/05 02:41:43 drh Exp $ +** $Id: tokenize.c,v 1.91 2004/10/07 19:03:01 drh Exp $ */ #include "sqliteInt.h" #include "os.h" #include #include -/* -** All the keywords of the SQL language are stored as in a hash -** table composed of instances of the following structure. -*/ -typedef struct Keyword Keyword; -struct Keyword { - char *zName; /* The keyword name */ - u8 tokenType; /* Token value for this keyword */ - u8 len; /* Length of this keyword */ - u8 iNext; /* Index in aKeywordTable[] of next with same hash */ -}; - -/* -** These are the keywords -*/ -static Keyword aKeywordTable[] = { - { "ABORT", TK_ABORT, }, - { "AFTER", TK_AFTER, }, - { "ALL", TK_ALL, }, - { "AND", TK_AND, }, - { "AS", TK_AS, }, - { "ASC", TK_ASC, }, - { "ATTACH", TK_ATTACH, }, - { "BEFORE", TK_BEFORE, }, - { "BEGIN", TK_BEGIN, }, - { "BETWEEN", TK_BETWEEN, }, - { "BY", TK_BY, }, - { "CASCADE", TK_CASCADE, }, - { "CASE", TK_CASE, }, - { "CHECK", TK_CHECK, }, - { "COLLATE", TK_COLLATE, }, - { "COMMIT", TK_COMMIT, }, - { "CONFLICT", TK_CONFLICT, }, - { "CONSTRAINT", TK_CONSTRAINT, }, - { "CREATE", TK_CREATE, }, - { "CROSS", TK_JOIN_KW, }, - { "DATABASE", TK_DATABASE, }, - { "DEFAULT", TK_DEFAULT, }, - { "DEFERRED", TK_DEFERRED, }, - { "DEFERRABLE", TK_DEFERRABLE, }, - { "DELETE", TK_DELETE, }, - { "DESC", TK_DESC, }, - { "DETACH", TK_DETACH, }, - { "DISTINCT", TK_DISTINCT, }, - { "DROP", TK_DROP, }, - { "END", TK_END, }, - { "EACH", TK_EACH, }, - { "ELSE", TK_ELSE, }, - { "EXCEPT", TK_EXCEPT, }, - { "EXCLUSIVE", TK_EXCLUSIVE, }, - { "EXPLAIN", TK_EXPLAIN, }, - { "FAIL", TK_FAIL, }, - { "FOR", TK_FOR, }, - { "FOREIGN", TK_FOREIGN, }, - { "FROM", TK_FROM, }, - { "FULL", TK_JOIN_KW, }, - { "GLOB", TK_GLOB, }, - { "GROUP", TK_GROUP, }, - { "HAVING", TK_HAVING, }, - { "IGNORE", TK_IGNORE, }, - { "IMMEDIATE", TK_IMMEDIATE, }, - { "IN", TK_IN, }, - { "INDEX", TK_INDEX, }, - { "INITIALLY", TK_INITIALLY, }, - { "INNER", TK_JOIN_KW, }, - { "INSERT", TK_INSERT, }, - { "INSTEAD", TK_INSTEAD, }, - { "INTERSECT", TK_INTERSECT, }, - { "INTO", TK_INTO, }, - { "IS", TK_IS, }, - { "ISNULL", TK_ISNULL, }, - { "JOIN", TK_JOIN, }, - { "KEY", TK_KEY, }, - { "LEFT", TK_JOIN_KW, }, - { "LIKE", TK_LIKE, }, - { "LIMIT", TK_LIMIT, }, - { "MATCH", TK_MATCH, }, - { "NATURAL", TK_JOIN_KW, }, - { "NOT", TK_NOT, }, - { "NOTNULL", TK_NOTNULL, }, - { "NULL", TK_NULL, }, - { "OF", TK_OF, }, - { "OFFSET", TK_OFFSET, }, - { "ON", TK_ON, }, - { "OR", TK_OR, }, - { "ORDER", TK_ORDER, }, - { "OUTER", TK_JOIN_KW, }, - { "PRAGMA", TK_PRAGMA, }, - { "PRIMARY", TK_PRIMARY, }, - { "RAISE", TK_RAISE, }, - { "REFERENCES", TK_REFERENCES, }, - { "REPLACE", TK_REPLACE, }, - { "RESTRICT", TK_RESTRICT, }, - { "RIGHT", TK_JOIN_KW, }, - { "ROLLBACK", TK_ROLLBACK, }, - { "ROW", TK_ROW, }, - { "SELECT", TK_SELECT, }, - { "SET", TK_SET, }, - { "STATEMENT", TK_STATEMENT, }, - { "TABLE", TK_TABLE, }, - { "TEMP", TK_TEMP, }, - { "TEMPORARY", TK_TEMP, }, - { "THEN", TK_THEN, }, - { "TRANSACTION", TK_TRANSACTION, }, - { "TRIGGER", TK_TRIGGER, }, - { "UNION", TK_UNION, }, - { "UNIQUE", TK_UNIQUE, }, - { "UPDATE", TK_UPDATE, }, - { "USING", TK_USING, }, - { "VACUUM", TK_VACUUM, }, - { "VALUES", TK_VALUES, }, - { "VIEW", TK_VIEW, }, - { "WHEN", TK_WHEN, }, - { "WHERE", TK_WHERE, }, -}; - -/* -** This is the hash table -*/ -#define KEY_HASH_SIZE 101 -static u8 aiHashTable[KEY_HASH_SIZE]; - - /* ** This function looks up an identifier to determine if it is a ** keyword. If it is a keyword, the token code of that keyword is ** returned. If the input is not a keyword, TK_ID is returned. +** +** The implementation of this routine was generated by a program, +** mkkeywordhash.c, located in the tool subdirectory of the distribution. +** The output of the mkkeywordhash.c program was manually cut and pasted +** into this file. When the set of keywords for SQLite changes, you +** must modify the mkkeywordhash.c program (to add or remove keywords from +** the data tables) then rerun that program to regenerate this function. */ int sqlite3KeywordCode(const char *z, int n){ + static const char zText[519] = + "ABORTAFTERALLANDASCATTACHBEFOREBEGINBETWEENBYCASCADECASECHECK" + "COLLATECOMMITCONFLICTCONSTRAINTCREATECROSSDATABASEDEFAULTDEFERRABLE" + "DEFERREDDELETEDESCDETACHDISTINCTDROPEACHELSEENDEXCEPTEXCLUSIVE" + "EXPLAINFAILFOREIGNFROMFULLGLOBGROUPHAVINGIGNOREIMMEDIATEINDEX" + "INITIALLYINNERINSERTINSTEADINTERSECTINTOISNULLJOINKEYLEFTLIKE" + "LIMITMATCHNATURALNOTNULLNULLOFFSETONORDEROUTERPRAGMAPRIMARYRAISE" + "REFERENCESREPLACERESTRICTRIGHTROLLBACKROWSELECTSETSTATEMENTTABLE" + "TEMPORARYTHENTRANSACTIONTRIGGERUNIONUNIQUEUPDATEUSINGVACUUMVALUES" + "VIEWWHENWHERE"; + static const unsigned char aHash[154] = { + 0, 75, 82, 0, 0, 97, 80, 0, 83, 0, 0, 0, 0, + 0, 0, 6, 0, 95, 4, 0, 0, 0, 0, 0, 0, 0, + 0, 96, 86, 8, 0, 26, 13, 7, 19, 15, 0, 0, 32, + 25, 0, 21, 31, 41, 0, 0, 0, 34, 27, 0, 0, 30, + 0, 0, 0, 9, 0, 10, 0, 0, 0, 0, 51, 0, 44, + 43, 0, 45, 40, 0, 29, 39, 35, 0, 0, 20, 0, 59, + 0, 16, 0, 17, 0, 18, 0, 55, 42, 72, 0, 33, 0, + 0, 61, 66, 56, 0, 0, 0, 0, 0, 0, 0, 54, 0, + 0, 0, 0, 0, 74, 50, 76, 64, 52, 0, 0, 0, 0, + 68, 84, 0, 47, 0, 58, 60, 92, 0, 0, 48, 0, 93, + 0, 63, 71, 98, 0, 0, 0, 0, 0, 67, 0, 0, 0, + 0, 87, 0, 0, 0, 0, 0, 90, 88, 0, 94, + }; + static const unsigned char aNext[98] = { + 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 12, 0, 0, 0, 0, 0, 0, 11, 0, 0, 0, 0, + 0, 0, 0, 14, 3, 24, 0, 0, 0, 1, 22, 0, 0, + 36, 23, 28, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, + 0, 49, 37, 0, 0, 0, 38, 0, 53, 0, 57, 62, 0, + 0, 0, 0, 0, 0, 70, 46, 0, 65, 0, 0, 0, 0, + 69, 73, 0, 77, 0, 0, 0, 0, 0, 0, 81, 85, 0, + 91, 79, 78, 0, 0, 89, 0, + }; + static const unsigned char aLen[98] = { + 5, 5, 3, 3, 2, 3, 6, 6, 5, 7, 2, 7, 4, + 5, 7, 6, 8, 10, 6, 5, 8, 7, 10, 8, 6, 4, + 6, 8, 4, 4, 4, 3, 6, 9, 7, 4, 3, 7, 4, + 4, 4, 5, 6, 6, 9, 2, 5, 9, 5, 6, 7, 9, + 4, 2, 6, 4, 3, 4, 4, 5, 5, 7, 3, 7, 4, + 2, 6, 2, 2, 5, 5, 6, 7, 5, 10, 7, 8, 5, + 8, 3, 6, 3, 9, 5, 4, 9, 4, 11, 7, 5, 6, + 6, 5, 6, 6, 4, 4, 5, + }; + static const unsigned short int aOffset[98] = { + 0, 5, 10, 13, 16, 16, 19, 25, 31, 36, 43, 45, 52, + 56, 61, 68, 74, 82, 92, 98, 103, 111, 118, 128, 136, 142, + 146, 152, 160, 164, 168, 172, 175, 181, 190, 197, 201, 201, 208, + 212, 216, 220, 225, 231, 237, 246, 246, 251, 260, 265, 271, 278, + 287, 291, 291, 297, 301, 304, 308, 312, 317, 322, 329, 329, 336, + 340, 340, 346, 348, 348, 353, 358, 364, 371, 376, 386, 393, 401, + 406, 414, 417, 423, 426, 435, 440, 440, 449, 453, 464, 471, 476, + 482, 488, 493, 499, 505, 509, 513, + }; + static const unsigned char aCode[98] = { + TK_ABORT, TK_AFTER, TK_ALL, TK_AND, TK_AS, + TK_ASC, TK_ATTACH, TK_BEFORE, TK_BEGIN, TK_BETWEEN, + TK_BY, TK_CASCADE, TK_CASE, TK_CHECK, TK_COLLATE, + TK_COMMIT, TK_CONFLICT, TK_CONSTRAINT, TK_CREATE, TK_JOIN_KW, + TK_DATABASE, TK_DEFAULT, TK_DEFERRABLE, TK_DEFERRED, TK_DELETE, + TK_DESC, TK_DETACH, TK_DISTINCT, TK_DROP, TK_EACH, + TK_ELSE, TK_END, TK_EXCEPT, TK_EXCLUSIVE, TK_EXPLAIN, + TK_FAIL, TK_FOR, TK_FOREIGN, TK_FROM, TK_JOIN_KW, + TK_GLOB, TK_GROUP, TK_HAVING, TK_IGNORE, TK_IMMEDIATE, + TK_IN, TK_INDEX, TK_INITIALLY, TK_JOIN_KW, TK_INSERT, + TK_INSTEAD, TK_INTERSECT, TK_INTO, TK_IS, TK_ISNULL, + TK_JOIN, TK_KEY, TK_JOIN_KW, TK_LIKE, TK_LIMIT, + TK_MATCH, TK_JOIN_KW, TK_NOT, TK_NOTNULL, TK_NULL, + TK_OF, TK_OFFSET, TK_ON, TK_OR, TK_ORDER, + TK_JOIN_KW, TK_PRAGMA, TK_PRIMARY, TK_RAISE, TK_REFERENCES, + TK_REPLACE, TK_RESTRICT, TK_JOIN_KW, TK_ROLLBACK, TK_ROW, + TK_SELECT, TK_SET, TK_STATEMENT, TK_TABLE, TK_TEMP, + TK_TEMP, TK_THEN, TK_TRANSACTION,TK_TRIGGER, TK_UNION, + TK_UNIQUE, TK_UPDATE, TK_USING, TK_VACUUM, TK_VALUES, + TK_VIEW, TK_WHEN, TK_WHERE, + }; int h, i; - Keyword *p; - static char needInit = 1; - if( needInit ){ - /* Initialize the keyword hash table */ - sqlite3OsEnterMutex(); - if( needInit ){ - int nk; - nk = sizeof(aKeywordTable)/sizeof(aKeywordTable[0]); - for(i=0, p=aKeywordTable; izName; - int len = p->len = strlen(zName); - h = sqlite3HashNoCase(zName, len) % KEY_HASH_SIZE; - p->iNext = aiHashTable[h]; - aiHashTable[h] = i+1; - } - needInit = 0; - } - sqlite3OsLeaveMutex(); - } - h = sqlite3HashNoCase(z, n) % KEY_HASH_SIZE; - for(i=aiHashTable[h]; i; i=p->iNext){ - p = &aKeywordTable[i-1]; - if( p->len==n && sqlite3StrNICmp(p->zName, z, n)==0 ){ - return p->tokenType; + if( n<2 ) return TK_ID; + h = (sqlite3UpperToLower[((unsigned char*)z)[0]]*5 + + sqlite3UpperToLower[((unsigned char*)z)[n-1]]*3 + + n) % 154; + for(i=((int)aHash[h])-1; i>=0; i=((int)aNext[i])-1){ + if( aLen[i]==n && sqlite3StrNICmp(&zText[aOffset[i]],z,n)==0 ){ + return aCode[i]; } } return TK_ID; } - /* ** If X is a character that can be used in an identifier and ** X&0x80==0 then isIdChar[X] will be 1. If X&0x80==0x80 then diff --git a/tool/mkkeywordhash.c b/tool/mkkeywordhash.c new file mode 100644 index 0000000000..1e51d0ebb2 --- /dev/null +++ b/tool/mkkeywordhash.c @@ -0,0 +1,312 @@ +/* +** Compile and run this standalone program in order to generate code that +** implements a function that will translate alphabetic identifiers into +** parser token codes. +*/ +#include +#include +#include + +/* +** All the keywords of the SQL language are stored as in a hash +** table composed of instances of the following structure. +*/ +typedef struct Keyword Keyword; +struct Keyword { + char *zName; /* The keyword name */ + char *zTokenType; /* Token value for this keyword */ + int hash; /* Hash on the keyword */ + int offset; /* Offset to start of name string */ + int len; /* Length of this keyword, not counting final \000 */ + int iNext; /* Index in aKeywordTable[] of next with same hash */ +}; + +/* +** These are the keywords +*/ +static Keyword aKeywordTable[] = { + { "ABORT", "TK_ABORT", }, + { "AFTER", "TK_AFTER", }, + { "ALL", "TK_ALL", }, + { "AND", "TK_AND", }, + { "AS", "TK_AS", }, + { "ASC", "TK_ASC", }, + { "ATTACH", "TK_ATTACH", }, + { "BEFORE", "TK_BEFORE", }, + { "BEGIN", "TK_BEGIN", }, + { "BETWEEN", "TK_BETWEEN", }, + { "BY", "TK_BY", }, + { "CASCADE", "TK_CASCADE", }, + { "CASE", "TK_CASE", }, + { "CHECK", "TK_CHECK", }, + { "COLLATE", "TK_COLLATE", }, + { "COMMIT", "TK_COMMIT", }, + { "CONFLICT", "TK_CONFLICT", }, + { "CONSTRAINT", "TK_CONSTRAINT", }, + { "CREATE", "TK_CREATE", }, + { "CROSS", "TK_JOIN_KW", }, + { "DATABASE", "TK_DATABASE", }, + { "DEFAULT", "TK_DEFAULT", }, + { "DEFERRED", "TK_DEFERRED", }, + { "DEFERRABLE", "TK_DEFERRABLE", }, + { "DELETE", "TK_DELETE", }, + { "DESC", "TK_DESC", }, + { "DETACH", "TK_DETACH", }, + { "DISTINCT", "TK_DISTINCT", }, + { "DROP", "TK_DROP", }, + { "END", "TK_END", }, + { "EACH", "TK_EACH", }, + { "ELSE", "TK_ELSE", }, + { "EXCEPT", "TK_EXCEPT", }, + { "EXCLUSIVE", "TK_EXCLUSIVE", }, + { "EXPLAIN", "TK_EXPLAIN", }, + { "FAIL", "TK_FAIL", }, + { "FOR", "TK_FOR", }, + { "FOREIGN", "TK_FOREIGN", }, + { "FROM", "TK_FROM", }, + { "FULL", "TK_JOIN_KW", }, + { "GLOB", "TK_GLOB", }, + { "GROUP", "TK_GROUP", }, + { "HAVING", "TK_HAVING", }, + { "IGNORE", "TK_IGNORE", }, + { "IMMEDIATE", "TK_IMMEDIATE", }, + { "IN", "TK_IN", }, + { "INDEX", "TK_INDEX", }, + { "INITIALLY", "TK_INITIALLY", }, + { "INNER", "TK_JOIN_KW", }, + { "INSERT", "TK_INSERT", }, + { "INSTEAD", "TK_INSTEAD", }, + { "INTERSECT", "TK_INTERSECT", }, + { "INTO", "TK_INTO", }, + { "IS", "TK_IS", }, + { "ISNULL", "TK_ISNULL", }, + { "JOIN", "TK_JOIN", }, + { "KEY", "TK_KEY", }, + { "LEFT", "TK_JOIN_KW", }, + { "LIKE", "TK_LIKE", }, + { "LIMIT", "TK_LIMIT", }, + { "MATCH", "TK_MATCH", }, + { "NATURAL", "TK_JOIN_KW", }, + { "NOT", "TK_NOT", }, + { "NOTNULL", "TK_NOTNULL", }, + { "NULL", "TK_NULL", }, + { "OF", "TK_OF", }, + { "OFFSET", "TK_OFFSET", }, + { "ON", "TK_ON", }, + { "OR", "TK_OR", }, + { "ORDER", "TK_ORDER", }, + { "OUTER", "TK_JOIN_KW", }, + { "PRAGMA", "TK_PRAGMA", }, + { "PRIMARY", "TK_PRIMARY", }, + { "RAISE", "TK_RAISE", }, + { "REFERENCES", "TK_REFERENCES", }, + { "REPLACE", "TK_REPLACE", }, + { "RESTRICT", "TK_RESTRICT", }, + { "RIGHT", "TK_JOIN_KW", }, + { "ROLLBACK", "TK_ROLLBACK", }, + { "ROW", "TK_ROW", }, + { "SELECT", "TK_SELECT", }, + { "SET", "TK_SET", }, + { "STATEMENT", "TK_STATEMENT", }, + { "TABLE", "TK_TABLE", }, + { "TEMP", "TK_TEMP", }, + { "TEMPORARY", "TK_TEMP", }, + { "THEN", "TK_THEN", }, + { "TRANSACTION", "TK_TRANSACTION", }, + { "TRIGGER", "TK_TRIGGER", }, + { "UNION", "TK_UNION", }, + { "UNIQUE", "TK_UNIQUE", }, + { "UPDATE", "TK_UPDATE", }, + { "USING", "TK_USING", }, + { "VACUUM", "TK_VACUUM", }, + { "VALUES", "TK_VALUES", }, + { "VIEW", "TK_VIEW", }, + { "WHEN", "TK_WHEN", }, + { "WHERE", "TK_WHERE", }, +}; + +/* Number of keywords */ +#define NKEYWORD (sizeof(aKeywordTable)/sizeof(aKeywordTable[0])) + +/* An array to map all upper-case characters into their corresponding +** lower-case character. +*/ +const unsigned char sqlite3UpperToLower[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, + 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, + 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, + 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 97, 98, 99,100,101,102,103, + 104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121, + 122, 91, 92, 93, 94, 95, 96, 97, 98, 99,100,101,102,103,104,105,106,107, + 108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125, + 126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143, + 144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,161, + 162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179, + 180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197, + 198,199,200,201,202,203,204,205,206,207,208,209,210,211,212,213,214,215, + 216,217,218,219,220,221,222,223,224,225,226,227,228,229,230,231,232,233, + 234,235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250,251, + 252,253,254,255 +}; +#define UpperToLower sqlite3UpperToLower + +/* +** Comparision function for two Keyword records +*/ +static int keywordCompare(const void *a, const void *b){ + const Keyword *pA = (Keyword*)a; + const Keyword *pB = (Keyword*)b; + return strcmp(pA->zName, pB->zName); +} + +/* +** This routine does the work. The generated code is printed on standard +** output. +*/ +int main(int argc, char **argv){ + int i, j, h; + int bestSize, bestCount; + int count; + int nChar; + int aHash[1000]; /* 1000 is much bigger than NKEYWORD */ + + /* Make sure the table is sorted */ + qsort(aKeywordTable, NKEYWORD, sizeof(aKeywordTable[0]), keywordCompare); + + /* Fill in the hash value, length, and offset for all entries */ + nChar = 0; + for(i=0; ilen = strlen(p->zName); + /* p->hash = sqlite3HashNoCase(p->zName, p->len); */ + p->hash = UpperToLower[p->zName[0]]*5 + + UpperToLower[p->zName[p->len-1]]*3 + p->len; + p->offset = nChar; + if( izName, aKeywordTable[i+1].zName,p->len)==0 ){ + /* This entry is a prefix of the one that follows. Do not advance + ** the offset */ + }else{ + nChar += p->len; + } + } + + /* Figure out how big to make the hash table in order to minimize the + ** number of collisions */ + bestSize = NKEYWORD; + bestCount = NKEYWORD*NKEYWORD; + for(i=NKEYWORD/2; i<=2*NKEYWORD; i++){ + for(j=0; joffset==aKeywordTable[i+1].offset ) continue; + if( j==0 ) printf(" \""); + printf("%s", p->zName); + j += p->len; + if( j>60 ){ + printf("\"\n"); + j = 0; + } + } + printf("%s;\n", j>0 ? "\"" : " "); + + printf(" static const unsigned char aHash[%d] = {\n", bestSize); + for(i=j=0; i12 ){ + printf("\n"); + j = 0; + } + } + printf("%s };\n", j==0 ? "" : "\n"); + + printf(" static const unsigned char aNext[%d] = {\n", NKEYWORD); + for(i=j=0; i12 ){ + printf("\n"); + j = 0; + } + } + printf("%s };\n", j==0 ? "" : "\n"); + + printf(" static const unsigned char aLen[%d] = {\n", NKEYWORD); + for(i=j=0; i12 ){ + printf("\n"); + j = 0; + } + } + printf("%s };\n", j==0 ? "" : "\n"); + + printf(" static const unsigned short int aOffset[%d] = {\n", NKEYWORD); + for(i=j=0; i12 ){ + printf("\n"); + j = 0; + } + } + printf("%s };\n", j==0 ? "" : "\n"); + + printf(" static const unsigned char aCode[%d] = {\n", NKEYWORD); + for(i=j=0; i=5 ){ + printf("\n"); + j = 0; + } + } + printf("%s };\n", j==0 ? "" : "\n"); + + printf(" int h, i;\n"); + printf(" if( n<2 ) return TK_ID;\n"); + printf(" h = (sqlite3UpperToLower[((unsigned char*)z)[0]]*5 + \n" + " sqlite3UpperToLower[((unsigned char*)z)[n-1]]*3 +\n" + " n) %% %d;\n", bestSize); + printf(" for(i=((int)aHash[h])-1; i>=0; i=((int)aNext[i])-1){\n"); + printf(" if( aLen[i]==n &&" + " sqlite3StrNICmp(&zText[aOffset[i]],z,n)==0 ){\n"); + printf(" return aCode[i];\n"); + printf(" }\n"); + printf(" }\n"); + printf(" return TK_ID;\n"); + printf("}\n"); + + return 0; +} diff --git a/www/index.tcl b/www/index.tcl index 8444c9b148..ab03847a3c 100644 --- a/www/index.tcl +++ b/www/index.tcl @@ -22,7 +22,8 @@ Features include:
  • A complete database is stored in a single disk file.
  • Database files can be freely shared between machines with different byte orders.
  • -
  • Supports databases up to 2 terabytes (241 bytes) in size.
  • +
  • Supports databases up to 2 terabytes + (241 bytes) in size.
  • Sizes of strings and BLOBs limited only by available memory.
  • Small code footprint: less than 30K lines of C code, less than 250KB code space (gcc on i486)
  • @@ -33,7 +34,7 @@ Features include: Bindings for many other languages available separately. -
  • Well-commented source code with over 90% test coverage.
  • +
  • Well-commented source code with over 95% test coverage.
  • Self-contained: no external dependencies.
  • Sources are in the public domain. Use for any purpose.
  • @@ -80,4 +81,4 @@ puts {

    Old news...

    } -footer {$Id: index.tcl,v 1.97 2004/09/18 18:51:09 drh Exp $} +footer {$Id: index.tcl,v 1.98 2004/10/07 19:03:02 drh Exp $} -- 2.47.3