"**\n"
"** The code in this file has been automatically generated by\n"
"**\n"
- "** $Header: /home/drh/sqlite/trans/cvs/sqlite/sqlite/tool/mkkeywordhash.c,v 1.28 2007/04/26 14:42:36 danielk1977 Exp $\n"
+ "** $Header: /home/drh/sqlite/trans/cvs/sqlite/sqlite/tool/mkkeywordhash.c,v 1.29 2007/05/04 17:07:53 drh Exp $\n"
"**\n"
"** The code in this file implements a function that determines whether\n"
"** or not a given identifier is really an SQL keyword. The same thing\n"
int offset; /* Offset to start of name string */
int len; /* Length of this keyword, not counting final \000 */
int prefix; /* Number of characters in prefix */
+ int longestSuffix; /* Longest suffix that is a prefix on another word */
int iNext; /* Index in aKeywordTable[] of next with same hash */
int substrId; /* Id to another keyword this keyword is embedded in */
int substrOffset; /* Offset into substrId for start of this keyword */
};
/* Number of keywords */
-static int NKEYWORD = (sizeof(aKeywordTable)/sizeof(aKeywordTable[0]));
+static int nKeyword = (sizeof(aKeywordTable)/sizeof(aKeywordTable[0]));
/* An array to map all upper-case characters into their corresponding
** lower-case character.
static int keywordCompare2(const void *a, const void *b){
const Keyword *pA = (Keyword*)a;
const Keyword *pB = (Keyword*)b;
- int n = strcmp(pA->zName, pB->zName);
+ int n = pB->longestSuffix - pA->longestSuffix;
+ if( n==0 ){
+ n = strcmp(pA->zName, pB->zName);
+ }
return n;
}
static int keywordCompare3(const void *a, const void *b){
*/
static Keyword *findById(int id){
int i;
- for(i=0; i<NKEYWORD; i++){
+ for(i=0; i<nKeyword; i++){
if( aKeywordTable[i].id==id ) break;
}
return &aKeywordTable[i];
int bestSize, bestCount;
int count;
int nChar;
- int aHash[1000]; /* 1000 is much bigger than NKEYWORD */
+ int totalLen = 0;
+ int aHash[1000]; /* 1000 is much bigger than nKeyword */
/* Remove entries from the list of keywords that have mask==0 */
- for(i=j=0; i<NKEYWORD; i++){
+ for(i=j=0; i<nKeyword; i++){
if( aKeywordTable[i].mask==0 ) continue;
if( j<i ){
aKeywordTable[j] = aKeywordTable[i];
}
j++;
}
- NKEYWORD = j;
+ nKeyword = j;
/* Fill in the lengths of strings and hashes for all entries. */
- for(i=0; i<NKEYWORD; i++){
+ for(i=0; i<nKeyword; i++){
Keyword *p = &aKeywordTable[i];
p->len = strlen(p->zName);
+ totalLen += p->len;
p->hash = (UpperToLower[p->zName[0]]*4) ^
(UpperToLower[p->zName[p->len-1]]*3) ^ p->len;
p->id = i+1;
}
/* Sort the table from shortest to longest keyword */
- qsort(aKeywordTable, NKEYWORD, sizeof(aKeywordTable[0]), keywordCompare1);
+ qsort(aKeywordTable, nKeyword, sizeof(aKeywordTable[0]), keywordCompare1);
/* Look for short keywords embedded in longer keywords */
- for(i=NKEYWORD-2; i>=0; i--){
+ for(i=nKeyword-2; i>=0; i--){
Keyword *p = &aKeywordTable[i];
- for(j=NKEYWORD-1; j>i && p->substrId==0; j--){
+ for(j=nKeyword-1; j>i && p->substrId==0; j--){
Keyword *pOther = &aKeywordTable[j];
if( pOther->substrId ) continue;
if( pOther->len<=p->len ) continue;
}
}
- /* Sort the table into alphabetical order */
- qsort(aKeywordTable, NKEYWORD, sizeof(aKeywordTable[0]), keywordCompare2);
+ /* Compute the longestSuffix value for every word */
+ for(i=0; i<nKeyword; i++){
+ Keyword *p = &aKeywordTable[i];
+ if( p->substrId ) continue;
+ for(j=0; j<nKeyword; j++){
+ Keyword *pOther;
+ if( j==i ) continue;
+ pOther = &aKeywordTable[j];
+ if( pOther->substrId ) continue;
+ for(k=p->longestSuffix+1; k<p->len && k<pOther->len; k++){
+ if( memcmp(&p->zName[p->len-k], pOther->zName, k)==0 ){
+ p->longestSuffix = k;
+ }
+ }
+ }
+ }
+
+ /* Sort the table into reverse order by length */
+ qsort(aKeywordTable, nKeyword, sizeof(aKeywordTable[0]), keywordCompare2);
/* Fill in the offset for all entries */
nChar = 0;
- for(i=0; i<NKEYWORD; i++){
+ for(i=0; i<nKeyword; i++){
Keyword *p = &aKeywordTable[i];
if( p->offset>0 || p->substrId ) continue;
p->offset = nChar;
nChar += p->len;
for(k=p->len-1; k>=1; k--){
- for(j=i+1; j<NKEYWORD; j++){
+ for(j=i+1; j<nKeyword; j++){
Keyword *pOther = &aKeywordTable[j];
if( pOther->offset>0 || pOther->substrId ) continue;
if( pOther->len<=k ) continue;
}
}
}
- for(i=0; i<NKEYWORD; i++){
+ for(i=0; i<nKeyword; i++){
Keyword *p = &aKeywordTable[i];
if( p->substrId ){
p->offset = findById(p->substrId)->offset + p->substrOffset;
}
/* Sort the table by offset */
- qsort(aKeywordTable, NKEYWORD, sizeof(aKeywordTable[0]), keywordCompare3);
+ qsort(aKeywordTable, nKeyword, sizeof(aKeywordTable[0]), keywordCompare3);
/* Figure out how big to make the hash table in order to minimize the
** number of collisions */
- bestSize = NKEYWORD;
- bestCount = NKEYWORD*NKEYWORD;
- for(i=NKEYWORD/2; i<=2*NKEYWORD; i++){
+ bestSize = nKeyword;
+ bestCount = nKeyword*nKeyword;
+ for(i=nKeyword/2; i<=2*nKeyword; i++){
for(j=0; j<i; j++) aHash[j] = 0;
- for(j=0; j<NKEYWORD; j++){
+ for(j=0; j<nKeyword; j++){
h = aKeywordTable[j].hash % i;
aHash[h] *= 2;
aHash[h]++;
/* Compute the hash */
for(i=0; i<bestSize; i++) aHash[i] = 0;
- for(i=0; i<NKEYWORD; i++){
+ for(i=0; i<nKeyword; i++){
h = aKeywordTable[i].hash % bestSize;
aKeywordTable[i].iNext = aHash[h];
aHash[h] = i+1;
printf("%s", zHdr);
printf("/* Hash score: %d */\n", bestCount);
printf("static int keywordCode(const char *z, int n){\n");
+ printf(" /* zText[] encodes %d bytes of keywords in %d bytes */\n",
+ totalLen + nKeyword, nChar+1 );
printf(" static const char zText[%d] =\n", nChar+1);
- for(i=j=0; i<NKEYWORD; i++){
+ for(i=j=0; i<nKeyword; i++){
Keyword *p = &aKeywordTable[i];
if( p->substrId ) continue;
if( j==0 ) printf(" \"");
}
printf("%s };\n", j==0 ? "" : "\n");
- printf(" static const unsigned char aNext[%d] = {\n", NKEYWORD);
- for(i=j=0; i<NKEYWORD; i++){
+ printf(" static const unsigned char aNext[%d] = {\n", nKeyword);
+ for(i=j=0; i<nKeyword; i++){
if( j==0 ) printf(" ");
printf(" %3d,", aKeywordTable[i].iNext);
j++;
}
printf("%s };\n", j==0 ? "" : "\n");
- printf(" static const unsigned char aLen[%d] = {\n", NKEYWORD);
- for(i=j=0; i<NKEYWORD; i++){
+ printf(" static const unsigned char aLen[%d] = {\n", nKeyword);
+ for(i=j=0; i<nKeyword; i++){
if( j==0 ) printf(" ");
printf(" %3d,", aKeywordTable[i].len+aKeywordTable[i].prefix);
j++;
}
printf("%s };\n", j==0 ? "" : "\n");
- printf(" static const unsigned short int aOffset[%d] = {\n", NKEYWORD);
- for(i=j=0; i<NKEYWORD; i++){
+ printf(" static const unsigned short int aOffset[%d] = {\n", nKeyword);
+ for(i=j=0; i<nKeyword; i++){
if( j==0 ) printf(" ");
printf(" %3d,", aKeywordTable[i].offset);
j++;
}
printf("%s };\n", j==0 ? "" : "\n");
- printf(" static const unsigned char aCode[%d] = {\n", NKEYWORD);
- for(i=j=0; i<NKEYWORD; i++){
+ printf(" static const unsigned char aCode[%d] = {\n", nKeyword);
+ for(i=j=0; i<nKeyword; i++){
char *zToken = aKeywordTable[i].zTokenType;
if( j==0 ) printf(" ");
printf("%s,%*s", zToken, (int)(14-strlen(zToken)), "");