]> git.ipfire.org Git - thirdparty/sqlite.git/commitdiff
Optimizations to the tokenizer. (CVS 2011)
authordrh <drh@noemail.net>
Thu, 7 Oct 2004 19:03:01 +0000 (19:03 +0000)
committerdrh <drh@noemail.net>
Thu, 7 Oct 2004 19:03:01 +0000 (19:03 +0000)
FossilOrigin-Name: e5540ce047e0215904005bc9df4ff0d1d0a3c1d1

manifest
manifest.uuid
src/tokenize.c
tool/mkkeywordhash.c [new file with mode: 0644]
www/index.tcl

index b253cc17769ddbb2877ba0d0982f0f3b23c1359b..4b51af2309af9eadef2c8a600e7ddcf4f189314f 100644 (file)
--- a/manifest
+++ b/manifest
@@ -1,5 +1,5 @@
-C Additional\sparser\soptimizations.\s(CVS\s2010)
-D 2004-10-07T03:06:29
+C Optimizations\sto\sthe\stokenizer.\s(CVS\s2011)
+D 2004-10-07T19:03:01
 F Makefile.in 52c1cc106cad9148d4b7cb387b458e82dc86b339
 F Makefile.linux-gcc a9e5a0d309fa7c38e7c14d3ecf7690879d3a5457
 F README f1de682fbbd94899d50aca13d387d1b3fd3be2dd
@@ -70,7 +70,7 @@ F src/test2.c b11fa244fff02190707dd0879987c37c75e61fc8
 F src/test3.c 5b5b0f3d11b097399c1054fff73d8f3711092301
 F src/test4.c 7c6b9fc33dd1f3f93c7f1ee6e5e6d016afa6c1df
 F src/test5.c b001fa7f1b9e2dc5c2331de62fc641b5ab2bd7a1
-F src/tokenize.c 449843c85c8118b9718f3935f4b5d18586c8cee2
+F src/tokenize.c a235e9a38ccf175f7badccdec68f58d79cb5abe5
 F src/trigger.c b51a120d53e8b85359be11bf5e50854e5725fe3d
 F src/update.c 174c3b593b8f4928e510a51ec309e8ce69d2371c
 F src/utf.c f4f83acd73389090e32d6589d307fc55d794c7ed
@@ -201,6 +201,7 @@ F tool/lempar.c 1e61d2b6cb9d8affa264a13336bc0c088498caa4
 F tool/memleak.awk b744b6109566206c746d826f6ecdba34662216bc
 F tool/memleak2.awk 9cc20c8e8f3c675efac71ea0721ee6874a1566e8
 F tool/memleak3.tcl 336eb50b0849dbf99b1d5462d9c37291b01b2b43
+F tool/mkkeywordhash.c 0cfab6691def993d5b2124398d25129b12b6b5fd
 F tool/mkopts.tcl 66ac10d240cc6e86abd37dc908d50382f84ff46e x
 F tool/opcodeDoc.awk b3a2a3d5d3075b8bd90b7afe24283efdd586659c
 F tool/report1.txt 9eae07f26a8fc53889b45fc833a66a33daa22816
@@ -235,7 +236,7 @@ F www/dynload.tcl 02eb8273aa78cfa9070dd4501dca937fb22b466c
 F www/faq.tcl 8cf9f59fd93868c9954223a99db244c9975fa43b
 F www/fileformat.tcl f71a06a0d533c7df408539c64113b4adeaf29764
 F www/formatchng.tcl d1dfecedfb25e122ab513a1e0948b15cb4f0be46
-F www/index.tcl 3bf50fdac2f5df49cf4f6f76a7f312b5fd4725b8
+F www/index.tcl ae29ea8a5bdde5a8031a9ac0935ac16dd921b4ef
 F www/lang.tcl dde78c1415ee2d19fc30360808ca6f0e2ea71c30
 F www/lockingv3.tcl afcd22f0f063989cff2f4d57bbc38d719b4c6e75
 F www/mingw.tcl d96b451568c5d28545fefe0c80bee3431c73f69c
@@ -251,7 +252,7 @@ F www/tclsqlite.tcl 560ecd6a916b320e59f2917317398f3d59b7cc25
 F www/vdbe.tcl 59288db1ac5c0616296b26dce071c36cb611dfe9
 F www/version3.tcl 092a01f5ef430d2c4acc0ae558d74c4bb89638a0
 F www/whentouse.tcl a8335bce47cc2fddb07f19052cb0cb4d9129a8e4
-P 1376a0bb8d864de755c614b2ecce4342155fd09b
-R 7c281d56a2bfc9c05afabc9a79a1f3b5
+P 618dee121e41d8e5c9b4d5c167475b11ddcda2d7
+R c6f2c8afd2a59eadba90462f2b06f8bc
 U drh
-Z fe725e970bb1b0deb3b69c8bf4e69648
+Z bc3b2c6d615a585d4fad51811788aa10
index ae194f45eae1dff41555be5c0291d39dae0afdf6..873688573dae7b7c11ac79d84d32a5f3a028440d 100644 (file)
@@ -1 +1 @@
-618dee121e41d8e5c9b4d5c167475b11ddcda2d7
\ No newline at end of file
+e5540ce047e0215904005bc9df4ff0d1d0a3c1d1
\ No newline at end of file
index 6b3a5f713f4b2f04f20212a913f77e5fa56fca02..c333699b6e22710c310cf69099003cfea10495d3 100644 (file)
 ** individual tokens and sends those tokens one-by-one over to the
 ** parser for analysis.
 **
-** $Id: tokenize.c,v 1.90 2004/10/05 02:41:43 drh Exp $
+** $Id: tokenize.c,v 1.91 2004/10/07 19:03:01 drh Exp $
 */
 #include "sqliteInt.h"
 #include "os.h"
 #include <ctype.h>
 #include <stdlib.h>
 
-/*
-** All the keywords of the SQL language are stored as in a hash
-** table composed of instances of the following structure.
-*/
-typedef struct Keyword Keyword;
-struct Keyword {
-  char *zName;             /* The keyword name */
-  u8 tokenType;            /* Token value for this keyword */
-  u8 len;                  /* Length of this keyword */
-  u8 iNext;                /* Index in aKeywordTable[] of next with same hash */
-};
-
-/*
-** These are the keywords
-*/
-static Keyword aKeywordTable[] = {
-  { "ABORT",             TK_ABORT,        },
-  { "AFTER",             TK_AFTER,        },
-  { "ALL",               TK_ALL,          },
-  { "AND",               TK_AND,          },
-  { "AS",                TK_AS,           },
-  { "ASC",               TK_ASC,          },
-  { "ATTACH",            TK_ATTACH,       },
-  { "BEFORE",            TK_BEFORE,       },
-  { "BEGIN",             TK_BEGIN,        },
-  { "BETWEEN",           TK_BETWEEN,      },
-  { "BY",                TK_BY,           },
-  { "CASCADE",           TK_CASCADE,      },
-  { "CASE",              TK_CASE,         },
-  { "CHECK",             TK_CHECK,        },
-  { "COLLATE",           TK_COLLATE,      },
-  { "COMMIT",            TK_COMMIT,       },
-  { "CONFLICT",          TK_CONFLICT,     },
-  { "CONSTRAINT",        TK_CONSTRAINT,   },
-  { "CREATE",            TK_CREATE,       },
-  { "CROSS",             TK_JOIN_KW,      },
-  { "DATABASE",          TK_DATABASE,     },
-  { "DEFAULT",           TK_DEFAULT,      },
-  { "DEFERRED",          TK_DEFERRED,     },
-  { "DEFERRABLE",        TK_DEFERRABLE,   },
-  { "DELETE",            TK_DELETE,       },
-  { "DESC",              TK_DESC,         },
-  { "DETACH",            TK_DETACH,       },
-  { "DISTINCT",          TK_DISTINCT,     },
-  { "DROP",              TK_DROP,         },
-  { "END",               TK_END,          },
-  { "EACH",              TK_EACH,         },
-  { "ELSE",              TK_ELSE,         },
-  { "EXCEPT",            TK_EXCEPT,       },
-  { "EXCLUSIVE",         TK_EXCLUSIVE,    },
-  { "EXPLAIN",           TK_EXPLAIN,      },
-  { "FAIL",              TK_FAIL,         },
-  { "FOR",               TK_FOR,          },
-  { "FOREIGN",           TK_FOREIGN,      },
-  { "FROM",              TK_FROM,         },
-  { "FULL",              TK_JOIN_KW,      },
-  { "GLOB",              TK_GLOB,         },
-  { "GROUP",             TK_GROUP,        },
-  { "HAVING",            TK_HAVING,       },
-  { "IGNORE",            TK_IGNORE,       },
-  { "IMMEDIATE",         TK_IMMEDIATE,    },
-  { "IN",                TK_IN,           },
-  { "INDEX",             TK_INDEX,        },
-  { "INITIALLY",         TK_INITIALLY,    },
-  { "INNER",             TK_JOIN_KW,      },
-  { "INSERT",            TK_INSERT,       },
-  { "INSTEAD",           TK_INSTEAD,      },
-  { "INTERSECT",         TK_INTERSECT,    },
-  { "INTO",              TK_INTO,         },
-  { "IS",                TK_IS,           },
-  { "ISNULL",            TK_ISNULL,       },
-  { "JOIN",              TK_JOIN,         },
-  { "KEY",               TK_KEY,          },
-  { "LEFT",              TK_JOIN_KW,      },
-  { "LIKE",              TK_LIKE,         },
-  { "LIMIT",             TK_LIMIT,        },
-  { "MATCH",             TK_MATCH,        },
-  { "NATURAL",           TK_JOIN_KW,      },
-  { "NOT",               TK_NOT,          },
-  { "NOTNULL",           TK_NOTNULL,      },
-  { "NULL",              TK_NULL,         },
-  { "OF",                TK_OF,           },
-  { "OFFSET",            TK_OFFSET,       },
-  { "ON",                TK_ON,           },
-  { "OR",                TK_OR,           },
-  { "ORDER",             TK_ORDER,        },
-  { "OUTER",             TK_JOIN_KW,      },
-  { "PRAGMA",            TK_PRAGMA,       },
-  { "PRIMARY",           TK_PRIMARY,      },
-  { "RAISE",             TK_RAISE,        },
-  { "REFERENCES",        TK_REFERENCES,   },
-  { "REPLACE",           TK_REPLACE,      },
-  { "RESTRICT",          TK_RESTRICT,     },
-  { "RIGHT",             TK_JOIN_KW,      },
-  { "ROLLBACK",          TK_ROLLBACK,     },
-  { "ROW",               TK_ROW,          },
-  { "SELECT",            TK_SELECT,       },
-  { "SET",               TK_SET,          },
-  { "STATEMENT",         TK_STATEMENT,    },
-  { "TABLE",             TK_TABLE,        },
-  { "TEMP",              TK_TEMP,         },
-  { "TEMPORARY",         TK_TEMP,         },
-  { "THEN",              TK_THEN,         },
-  { "TRANSACTION",       TK_TRANSACTION,  },
-  { "TRIGGER",           TK_TRIGGER,      },
-  { "UNION",             TK_UNION,        },
-  { "UNIQUE",            TK_UNIQUE,       },
-  { "UPDATE",            TK_UPDATE,       },
-  { "USING",             TK_USING,        },
-  { "VACUUM",            TK_VACUUM,       },
-  { "VALUES",            TK_VALUES,       },
-  { "VIEW",              TK_VIEW,         },
-  { "WHEN",              TK_WHEN,         },
-  { "WHERE",             TK_WHERE,        },
-};
-
-/*
-** This is the hash table
-*/
-#define KEY_HASH_SIZE 101
-static u8 aiHashTable[KEY_HASH_SIZE];
-
-
 /*
 ** This function looks up an identifier to determine if it is a
 ** keyword.  If it is a keyword, the token code of that keyword is 
 ** returned.  If the input is not a keyword, TK_ID is returned.
+**
+** The implementation of this routine was generated by a program,
+** mkkeywordhash.c, located in the tool subdirectory of the distribution.
+** The output of the mkkeywordhash.c program was manually cut and pasted
+** into this file.  When the set of keywords for SQLite changes, you
+** must modify the mkkeywordhash.c program (to add or remove keywords from
+** the data tables) then rerun that program to regenerate this function.
 */
 int sqlite3KeywordCode(const char *z, int n){
+  static const char zText[519] =
+    "ABORTAFTERALLANDASCATTACHBEFOREBEGINBETWEENBYCASCADECASECHECK"
+    "COLLATECOMMITCONFLICTCONSTRAINTCREATECROSSDATABASEDEFAULTDEFERRABLE"
+    "DEFERREDDELETEDESCDETACHDISTINCTDROPEACHELSEENDEXCEPTEXCLUSIVE"
+    "EXPLAINFAILFOREIGNFROMFULLGLOBGROUPHAVINGIGNOREIMMEDIATEINDEX"
+    "INITIALLYINNERINSERTINSTEADINTERSECTINTOISNULLJOINKEYLEFTLIKE"
+    "LIMITMATCHNATURALNOTNULLNULLOFFSETONORDEROUTERPRAGMAPRIMARYRAISE"
+    "REFERENCESREPLACERESTRICTRIGHTROLLBACKROWSELECTSETSTATEMENTTABLE"
+    "TEMPORARYTHENTRANSACTIONTRIGGERUNIONUNIQUEUPDATEUSINGVACUUMVALUES"
+    "VIEWWHENWHERE";
+  static const unsigned char aHash[154] = {
+       0,  75,  82,   0,   0,  97,  80,   0,  83,   0,   0,   0,   0,
+       0,   0,   6,   0,  95,   4,   0,   0,   0,   0,   0,   0,   0,
+       0,  96,  86,   8,   0,  26,  13,   7,  19,  15,   0,   0,  32,
+      25,   0,  21,  31,  41,   0,   0,   0,  34,  27,   0,   0,  30,
+       0,   0,   0,   9,   0,  10,   0,   0,   0,   0,  51,   0,  44,
+      43,   0,  45,  40,   0,  29,  39,  35,   0,   0,  20,   0,  59,
+       0,  16,   0,  17,   0,  18,   0,  55,  42,  72,   0,  33,   0,
+       0,  61,  66,  56,   0,   0,   0,   0,   0,   0,   0,  54,   0,
+       0,   0,   0,   0,  74,  50,  76,  64,  52,   0,   0,   0,   0,
+      68,  84,   0,  47,   0,  58,  60,  92,   0,   0,  48,   0,  93,
+       0,  63,  71,  98,   0,   0,   0,   0,   0,  67,   0,   0,   0,
+       0,  87,   0,   0,   0,   0,   0,  90,  88,   0,  94,
+  };
+  static const unsigned char aNext[98] = {
+       0,   0,   0,   0,   2,   0,   0,   0,   0,   0,   0,   0,   0,
+       0,  12,   0,   0,   0,   0,   0,   0,  11,   0,   0,   0,   0,
+       0,   0,   0,  14,   3,  24,   0,   0,   0,   1,  22,   0,   0,
+      36,  23,  28,   0,   0,   0,   0,   0,   0,   0,   0,   5,   0,
+       0,  49,  37,   0,   0,   0,  38,   0,  53,   0,  57,  62,   0,
+       0,   0,   0,   0,   0,  70,  46,   0,  65,   0,   0,   0,   0,
+      69,  73,   0,  77,   0,   0,   0,   0,   0,   0,  81,  85,   0,
+      91,  79,  78,   0,   0,  89,   0,
+  };
+  static const unsigned char aLen[98] = {
+       5,   5,   3,   3,   2,   3,   6,   6,   5,   7,   2,   7,   4,
+       5,   7,   6,   8,  10,   6,   5,   8,   7,  10,   8,   6,   4,
+       6,   8,   4,   4,   4,   3,   6,   9,   7,   4,   3,   7,   4,
+       4,   4,   5,   6,   6,   9,   2,   5,   9,   5,   6,   7,   9,
+       4,   2,   6,   4,   3,   4,   4,   5,   5,   7,   3,   7,   4,
+       2,   6,   2,   2,   5,   5,   6,   7,   5,  10,   7,   8,   5,
+       8,   3,   6,   3,   9,   5,   4,   9,   4,  11,   7,   5,   6,
+       6,   5,   6,   6,   4,   4,   5,
+  };
+  static const unsigned short int aOffset[98] = {
+       0,   5,  10,  13,  16,  16,  19,  25,  31,  36,  43,  45,  52,
+      56,  61,  68,  74,  82,  92,  98, 103, 111, 118, 128, 136, 142,
+     146, 152, 160, 164, 168, 172, 175, 181, 190, 197, 201, 201, 208,
+     212, 216, 220, 225, 231, 237, 246, 246, 251, 260, 265, 271, 278,
+     287, 291, 291, 297, 301, 304, 308, 312, 317, 322, 329, 329, 336,
+     340, 340, 346, 348, 348, 353, 358, 364, 371, 376, 386, 393, 401,
+     406, 414, 417, 423, 426, 435, 440, 440, 449, 453, 464, 471, 476,
+     482, 488, 493, 499, 505, 509, 513,
+  };
+  static const unsigned char aCode[98] = {
+    TK_ABORT,      TK_AFTER,      TK_ALL,        TK_AND,        TK_AS,         
+    TK_ASC,        TK_ATTACH,     TK_BEFORE,     TK_BEGIN,      TK_BETWEEN,    
+    TK_BY,         TK_CASCADE,    TK_CASE,       TK_CHECK,      TK_COLLATE,    
+    TK_COMMIT,     TK_CONFLICT,   TK_CONSTRAINT, TK_CREATE,     TK_JOIN_KW,    
+    TK_DATABASE,   TK_DEFAULT,    TK_DEFERRABLE, TK_DEFERRED,   TK_DELETE,     
+    TK_DESC,       TK_DETACH,     TK_DISTINCT,   TK_DROP,       TK_EACH,       
+    TK_ELSE,       TK_END,        TK_EXCEPT,     TK_EXCLUSIVE,  TK_EXPLAIN,    
+    TK_FAIL,       TK_FOR,        TK_FOREIGN,    TK_FROM,       TK_JOIN_KW,    
+    TK_GLOB,       TK_GROUP,      TK_HAVING,     TK_IGNORE,     TK_IMMEDIATE,  
+    TK_IN,         TK_INDEX,      TK_INITIALLY,  TK_JOIN_KW,    TK_INSERT,     
+    TK_INSTEAD,    TK_INTERSECT,  TK_INTO,       TK_IS,         TK_ISNULL,     
+    TK_JOIN,       TK_KEY,        TK_JOIN_KW,    TK_LIKE,       TK_LIMIT,      
+    TK_MATCH,      TK_JOIN_KW,    TK_NOT,        TK_NOTNULL,    TK_NULL,       
+    TK_OF,         TK_OFFSET,     TK_ON,         TK_OR,         TK_ORDER,      
+    TK_JOIN_KW,    TK_PRAGMA,     TK_PRIMARY,    TK_RAISE,      TK_REFERENCES, 
+    TK_REPLACE,    TK_RESTRICT,   TK_JOIN_KW,    TK_ROLLBACK,   TK_ROW,        
+    TK_SELECT,     TK_SET,        TK_STATEMENT,  TK_TABLE,      TK_TEMP,       
+    TK_TEMP,       TK_THEN,       TK_TRANSACTION,TK_TRIGGER,    TK_UNION,      
+    TK_UNIQUE,     TK_UPDATE,     TK_USING,      TK_VACUUM,     TK_VALUES,     
+    TK_VIEW,       TK_WHEN,       TK_WHERE,      
+  };
   int h, i;
-  Keyword *p;
-  static char needInit = 1;
-  if( needInit ){
-    /* Initialize the keyword hash table */
-    sqlite3OsEnterMutex();
-    if( needInit ){
-      int nk;
-      nk = sizeof(aKeywordTable)/sizeof(aKeywordTable[0]);
-      for(i=0, p=aKeywordTable; i<nk; i++, p++){
-        const char *zName = p->zName;
-        int len = p->len = strlen(zName);
-        h = sqlite3HashNoCase(zName, len) % KEY_HASH_SIZE;
-        p->iNext = aiHashTable[h];
-        aiHashTable[h] = i+1;
-      }
-      needInit = 0;
-    }
-    sqlite3OsLeaveMutex();
-  }
-  h = sqlite3HashNoCase(z, n) % KEY_HASH_SIZE;
-  for(i=aiHashTable[h]; i; i=p->iNext){
-    p = &aKeywordTable[i-1];
-    if( p->len==n && sqlite3StrNICmp(p->zName, z, n)==0 ){
-      return p->tokenType;
+  if( n<2 ) return TK_ID;
+  h = (sqlite3UpperToLower[((unsigned char*)z)[0]]*5 + 
+      sqlite3UpperToLower[((unsigned char*)z)[n-1]]*3 +
+      n) % 154;
+  for(i=((int)aHash[h])-1; i>=0; i=((int)aNext[i])-1){
+    if( aLen[i]==n && sqlite3StrNICmp(&zText[aOffset[i]],z,n)==0 ){
+      return aCode[i];
     }
   }
   return TK_ID;
 }
 
-
 /*
 ** If X is a character that can be used in an identifier and
 ** X&0x80==0 then isIdChar[X] will be 1.  If X&0x80==0x80 then
diff --git a/tool/mkkeywordhash.c b/tool/mkkeywordhash.c
new file mode 100644 (file)
index 0000000..1e51d0e
--- /dev/null
@@ -0,0 +1,312 @@
+/*
+** Compile and run this standalone program in order to generate code that
+** implements a function that will translate alphabetic identifiers into
+** parser token codes.
+*/
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+
+/*
+** All the keywords of the SQL language are stored as in a hash
+** table composed of instances of the following structure.
+*/
+typedef struct Keyword Keyword;
+struct Keyword {
+  char *zName;         /* The keyword name */
+  char *zTokenType;    /* Token value for this keyword */
+  int hash;            /* Hash on the keyword */
+  int offset;          /* Offset to start of name string */
+  int len;             /* Length of this keyword, not counting final \000 */
+  int iNext;           /* Index in aKeywordTable[] of next with same hash */
+};
+
+/*
+** These are the keywords
+*/
+static Keyword aKeywordTable[] = {
+  { "ABORT",            "TK_ABORT",        },
+  { "AFTER",            "TK_AFTER",        },
+  { "ALL",              "TK_ALL",          },
+  { "AND",              "TK_AND",          },
+  { "AS",               "TK_AS",           },
+  { "ASC",              "TK_ASC",          },
+  { "ATTACH",           "TK_ATTACH",       },
+  { "BEFORE",           "TK_BEFORE",       },
+  { "BEGIN",            "TK_BEGIN",        },
+  { "BETWEEN",          "TK_BETWEEN",      },
+  { "BY",               "TK_BY",           },
+  { "CASCADE",          "TK_CASCADE",      },
+  { "CASE",             "TK_CASE",         },
+  { "CHECK",            "TK_CHECK",        },
+  { "COLLATE",          "TK_COLLATE",      },
+  { "COMMIT",           "TK_COMMIT",       },
+  { "CONFLICT",         "TK_CONFLICT",     },
+  { "CONSTRAINT",       "TK_CONSTRAINT",   },
+  { "CREATE",           "TK_CREATE",       },
+  { "CROSS",            "TK_JOIN_KW",      },
+  { "DATABASE",         "TK_DATABASE",     },
+  { "DEFAULT",          "TK_DEFAULT",      },
+  { "DEFERRED",         "TK_DEFERRED",     },
+  { "DEFERRABLE",       "TK_DEFERRABLE",   },
+  { "DELETE",           "TK_DELETE",       },
+  { "DESC",             "TK_DESC",         },
+  { "DETACH",           "TK_DETACH",       },
+  { "DISTINCT",         "TK_DISTINCT",     },
+  { "DROP",             "TK_DROP",         },
+  { "END",              "TK_END",          },
+  { "EACH",             "TK_EACH",         },
+  { "ELSE",             "TK_ELSE",         },
+  { "EXCEPT",           "TK_EXCEPT",       },
+  { "EXCLUSIVE",        "TK_EXCLUSIVE",    },
+  { "EXPLAIN",          "TK_EXPLAIN",      },
+  { "FAIL",             "TK_FAIL",         },
+  { "FOR",              "TK_FOR",          },
+  { "FOREIGN",          "TK_FOREIGN",      },
+  { "FROM",             "TK_FROM",         },
+  { "FULL",             "TK_JOIN_KW",      },
+  { "GLOB",             "TK_GLOB",         },
+  { "GROUP",            "TK_GROUP",        },
+  { "HAVING",           "TK_HAVING",       },
+  { "IGNORE",           "TK_IGNORE",       },
+  { "IMMEDIATE",        "TK_IMMEDIATE",    },
+  { "IN",               "TK_IN",           },
+  { "INDEX",            "TK_INDEX",        },
+  { "INITIALLY",        "TK_INITIALLY",    },
+  { "INNER",            "TK_JOIN_KW",      },
+  { "INSERT",           "TK_INSERT",       },
+  { "INSTEAD",          "TK_INSTEAD",      },
+  { "INTERSECT",        "TK_INTERSECT",    },
+  { "INTO",             "TK_INTO",         },
+  { "IS",               "TK_IS",           },
+  { "ISNULL",           "TK_ISNULL",       },
+  { "JOIN",             "TK_JOIN",         },
+  { "KEY",              "TK_KEY",          },
+  { "LEFT",             "TK_JOIN_KW",      },
+  { "LIKE",             "TK_LIKE",         },
+  { "LIMIT",            "TK_LIMIT",        },
+  { "MATCH",            "TK_MATCH",        },
+  { "NATURAL",          "TK_JOIN_KW",      },
+  { "NOT",              "TK_NOT",          },
+  { "NOTNULL",          "TK_NOTNULL",      },
+  { "NULL",             "TK_NULL",         },
+  { "OF",               "TK_OF",           },
+  { "OFFSET",           "TK_OFFSET",       },
+  { "ON",               "TK_ON",           },
+  { "OR",               "TK_OR",           },
+  { "ORDER",            "TK_ORDER",        },
+  { "OUTER",            "TK_JOIN_KW",      },
+  { "PRAGMA",           "TK_PRAGMA",       },
+  { "PRIMARY",          "TK_PRIMARY",      },
+  { "RAISE",            "TK_RAISE",        },
+  { "REFERENCES",       "TK_REFERENCES",   },
+  { "REPLACE",          "TK_REPLACE",      },
+  { "RESTRICT",         "TK_RESTRICT",     },
+  { "RIGHT",            "TK_JOIN_KW",      },
+  { "ROLLBACK",         "TK_ROLLBACK",     },
+  { "ROW",              "TK_ROW",          },
+  { "SELECT",           "TK_SELECT",       },
+  { "SET",              "TK_SET",          },
+  { "STATEMENT",        "TK_STATEMENT",    },
+  { "TABLE",            "TK_TABLE",        },
+  { "TEMP",             "TK_TEMP",         },
+  { "TEMPORARY",        "TK_TEMP",         },
+  { "THEN",             "TK_THEN",         },
+  { "TRANSACTION",      "TK_TRANSACTION",  },
+  { "TRIGGER",          "TK_TRIGGER",      },
+  { "UNION",            "TK_UNION",        },
+  { "UNIQUE",           "TK_UNIQUE",       },
+  { "UPDATE",           "TK_UPDATE",       },
+  { "USING",            "TK_USING",        },
+  { "VACUUM",           "TK_VACUUM",       },
+  { "VALUES",           "TK_VALUES",       },
+  { "VIEW",             "TK_VIEW",         },
+  { "WHEN",             "TK_WHEN",         },
+  { "WHERE",            "TK_WHERE",        },
+};
+
+/* Number of keywords */
+#define NKEYWORD (sizeof(aKeywordTable)/sizeof(aKeywordTable[0]))
+
+/* An array to map all upper-case characters into their corresponding
+** lower-case character. 
+*/
+const unsigned char sqlite3UpperToLower[] = {
+      0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
+     18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
+     36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53,
+     54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 97, 98, 99,100,101,102,103,
+    104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,
+    122, 91, 92, 93, 94, 95, 96, 97, 98, 99,100,101,102,103,104,105,106,107,
+    108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,
+    126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,
+    144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,161,
+    162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,
+    180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,
+    198,199,200,201,202,203,204,205,206,207,208,209,210,211,212,213,214,215,
+    216,217,218,219,220,221,222,223,224,225,226,227,228,229,230,231,232,233,
+    234,235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250,251,
+    252,253,254,255
+};
+#define UpperToLower sqlite3UpperToLower
+
+/*
+** Comparision function for two Keyword records
+*/
+static int keywordCompare(const void *a, const void *b){
+  const Keyword *pA = (Keyword*)a;
+  const Keyword *pB = (Keyword*)b;
+  return strcmp(pA->zName, pB->zName);
+}
+
+/*
+** This routine does the work.  The generated code is printed on standard
+** output.
+*/
+int main(int argc, char **argv){
+  int i, j, h;
+  int bestSize, bestCount;
+  int count;
+  int nChar;
+  int aHash[1000];  /* 1000 is much bigger than NKEYWORD */
+
+  /* Make sure the table is sorted */
+  qsort(aKeywordTable, NKEYWORD, sizeof(aKeywordTable[0]), keywordCompare);
+
+  /* Fill in the hash value, length, and offset for all entries */
+  nChar = 0;
+  for(i=0; i<NKEYWORD; i++){
+    Keyword *p = &aKeywordTable[i];
+    p->len = strlen(p->zName);
+    /* p->hash = sqlite3HashNoCase(p->zName, p->len); */
+    p->hash = UpperToLower[p->zName[0]]*5 +
+              UpperToLower[p->zName[p->len-1]]*3 + p->len;
+    p->offset = nChar;
+    if( i<NKEYWORD-1 && strncmp(p->zName, aKeywordTable[i+1].zName,p->len)==0 ){
+      /* This entry is a prefix of the one that follows.  Do not advance
+      ** the offset */
+    }else{
+      nChar += p->len;
+    }
+  }
+
+  /* Figure out how big to make the hash table in order to minimize the
+  ** number of collisions */
+  bestSize = NKEYWORD;
+  bestCount = NKEYWORD*NKEYWORD;
+  for(i=NKEYWORD/2; i<=2*NKEYWORD; i++){
+    for(j=0; j<i; j++) aHash[j] = 0;
+    for(j=0; j<NKEYWORD; j++){
+      h = aKeywordTable[j].hash % i;
+      aHash[h] *= 2;
+      aHash[h]++;
+    }
+    for(j=count=0; j<i; j++) count += aHash[j];
+    if( count<bestCount ){
+      bestCount = count;
+      bestSize = i;
+    }
+  }
+
+  /* Compute the hash */
+  for(i=0; i<bestSize; i++) aHash[i] = 0;
+  for(i=0; i<NKEYWORD; i++){
+    h = aKeywordTable[i].hash % bestSize;
+    aKeywordTable[i].iNext = aHash[h];
+    aHash[h] = i+1;
+  }
+
+  /* Begin generating code */
+  printf("int sqlite3KeywordCode(const char *z, int n){\n");
+
+  printf("  static const char zText[%d] =\n", nChar+1);
+  for(i=j=0; i<NKEYWORD; i++){
+    Keyword *p = &aKeywordTable[i];
+    if( i<NKEYWORD-1 && p->offset==aKeywordTable[i+1].offset ) continue;
+    if( j==0 ) printf("    \"");
+    printf("%s", p->zName);
+    j += p->len;
+    if( j>60 ){
+      printf("\"\n");
+      j = 0;
+    }
+  }
+  printf("%s;\n", j>0 ? "\"" : "  ");
+
+  printf("  static const unsigned char aHash[%d] = {\n", bestSize);
+  for(i=j=0; i<bestSize; i++){
+    if( j==0 ) printf("    ");
+    printf(" %3d,", aHash[i]);
+    j++;
+    if( j>12 ){
+      printf("\n");
+      j = 0;
+    }
+  }
+  printf("%s  };\n", j==0 ? "" : "\n");    
+
+  printf("  static const unsigned char aNext[%d] = {\n", NKEYWORD);
+  for(i=j=0; i<NKEYWORD; i++){
+    if( j==0 ) printf("    ");
+    printf(" %3d,", aKeywordTable[i].iNext);
+    j++;
+    if( j>12 ){
+      printf("\n");
+      j = 0;
+    }
+  }
+  printf("%s  };\n", j==0 ? "" : "\n");    
+
+  printf("  static const unsigned char aLen[%d] = {\n", NKEYWORD);
+  for(i=j=0; i<NKEYWORD; i++){
+    if( j==0 ) printf("    ");
+    printf(" %3d,", aKeywordTable[i].len);
+    j++;
+    if( j>12 ){
+      printf("\n");
+      j = 0;
+    }
+  }
+  printf("%s  };\n", j==0 ? "" : "\n");    
+
+  printf("  static const unsigned short int aOffset[%d] = {\n", NKEYWORD);
+  for(i=j=0; i<NKEYWORD; i++){
+    if( j==0 ) printf("    ");
+    printf(" %3d,", aKeywordTable[i].offset);
+    j++;
+    if( j>12 ){
+      printf("\n");
+      j = 0;
+    }
+  }
+  printf("%s  };\n", j==0 ? "" : "\n");
+
+  printf("  static const unsigned char aCode[%d] = {\n", NKEYWORD);
+  for(i=j=0; i<NKEYWORD; i++){
+    char *zToken = aKeywordTable[i].zTokenType;
+    if( j==0 ) printf("    ");
+    printf("%s,%*s", zToken, (int)(14-strlen(zToken)), "");
+    j++;
+    if( j>=5 ){
+      printf("\n");
+      j = 0;
+    }
+  }
+  printf("%s  };\n", j==0 ? "" : "\n");
+
+  printf("  int h, i;\n");
+  printf("  if( n<2 ) return TK_ID;\n");
+  printf("  h = (sqlite3UpperToLower[((unsigned char*)z)[0]]*5 + \n"
+         "      sqlite3UpperToLower[((unsigned char*)z)[n-1]]*3 +\n"
+         "      n) %% %d;\n", bestSize);
+  printf("  for(i=((int)aHash[h])-1; i>=0; i=((int)aNext[i])-1){\n");
+  printf("    if( aLen[i]==n &&"
+                   " sqlite3StrNICmp(&zText[aOffset[i]],z,n)==0 ){\n");
+  printf("      return aCode[i];\n");
+  printf("    }\n");
+  printf("  }\n");
+  printf("  return TK_ID;\n");
+  printf("}\n");
+
+  return 0;
+}
index 8444c9b1487debf127d368fac982a41ca013ffec..ab03847a3c3ee1fb6db7856a3aaa8b24ab2d3186 100644 (file)
@@ -22,7 +22,8 @@ Features include:
 <li>A complete database is stored in a single disk file.</li>
 <li>Database files can be freely shared between machines with
     different byte orders.</li>
-<li>Supports databases up to 2 terabytes (2<sup>41</sup> bytes) in size.</li>
+<li>Supports databases up to 2 terabytes
+    (2<sup><small>41</small></sup> bytes) in size.</li>
 <li>Sizes of strings and BLOBs limited only by available memory.</li>
 <li>Small code footprint: less than 30K lines of C code,
     less than 250KB code space (gcc on i486)</li>
@@ -33,7 +34,7 @@ Features include:
     Bindings for many other languages 
     <a href="http://www.sqlite.org/cvstrac/wiki?p=SqliteWrappers">
     available separately.</a></li>
-<li>Well-commented source code with over 90% test coverage.</li>
+<li>Well-commented source code with over 95% test coverage.</li>
 <li>Self-contained: no external dependencies.</li>
 <li>Sources are in the <a href="copyright.html">public domain</a>.
     Use for any purpose.</li>
@@ -80,4 +81,4 @@ puts {
 <p align="right"><a href="oldnews.html">Old news...</a></p>
 </td></tr></table>
 }
-footer {$Id: index.tcl,v 1.97 2004/09/18 18:51:09 drh Exp $}
+footer {$Id: index.tcl,v 1.98 2004/10/07 19:03:02 drh Exp $}