sqlite3 *db;
const char *zName; /* virtual table name */
sqlite3_tokenizer *pTokenizer; /* tokenizer for inserts and queries */
+ int nColumn; /* Number of columns */
+ char **azColumn; /* Names of all columns */
/* Precompiled statements which we keep as long as the table is
** open.
return sql_single_step_statement(v, TERM_DELETE_STMT, &s);
}
+/*
+** Free the memory used to contain a fulltext_vtab structure.
+*/
static void fulltext_vtab_destroy(fulltext_vtab *v){
int iStmt;
v->pTokenizer->pModule->xDestroy(v->pTokenizer);
v->pTokenizer = NULL;
}
-
+
+ free(v->azColumn);
free((void *) v->zName);
free(v);
}
+/*
+** Token types for parsing the arguments to xConnect or xCreate.
+*/
+#define TOKEN_EOF 0 /* End of file */
+#define TOKEN_SPACE 1 /* Any kind of whitespace */
+#define TOKEN_ID 2 /* An identifier */
+#define TOKEN_STRING 3 /* A string literal */
+#define TOKEN_PUNCT 4 /* A single punctuation character */
+
+/*
+** If X is a character that can be used in an identifier then
+** IdChar(X) will be true. Otherwise it is false.
+**
+** For ASCII, any character with the high-order bit set is
+** allowed in an identifier. For 7-bit characters,
+** sqlite3IsIdChar[X] must be 1.
+**
+** Ticket #1066. the SQL standard does not allow '$' in the
+** middle of identfiers. But many SQL implementations do.
+** SQLite will allow '$' in identifiers for compatibility.
+** But the feature is undocumented.
+*/
+static const char isIdChar[] = {
+/* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */
+ 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 2x */
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 3x */
+ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 4x */
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 5x */
+ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 6x */
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 7x */
+};
+#define IdChar(C) (((c=C)&0x80)!=0 || (c>0x1f && isIdChar[c-0x20]))
+
+
+/*
+** Return the length of the token that begins at z[0].
+** Store the token type in *tokenType before returning.
+*/
+static int getToken(const char *z, int *tokenType){
+ int i, c;
+ switch( *z ){
+ case 0: {
+ *tokenType = TOKEN_EOF;
+ return 0;
+ }
+ case ' ': case '\t': case '\n': case '\f': case '\r': {
+ for(i=1; isspace(z[i]); i++){}
+ *tokenType = TOKEN_SPACE;
+ return i;
+ }
+ case '\'':
+ case '"': {
+ int delim = z[0];
+ for(i=1; (c=z[i])!=0; i++){
+ if( c==delim ){
+ if( z[i+1]==delim ){
+ i++;
+ }else{
+ break;
+ }
+ }
+ }
+ *tokenType = TOKEN_STRING;
+ return i + (c!=0);
+ }
+ case '[': {
+ for(i=1, c=z[0]; c!=']' && (c=z[i])!=0; i++){}
+ *tokenType = TOKEN_ID;
+ return i;
+ }
+ default: {
+ if( !IdChar(*z) ){
+ break;
+ }
+ for(i=1; IdChar(z[i]); i++){}
+ *tokenType = TOKEN_ID;
+ return i;
+ }
+ }
+ *tokenType = TOKEN_PUNCT;
+ return 1;
+}
+
+/*
+** A token extracted from a string is an instance of the following
+** structure.
+*/
+typedef struct Token {
+ const char *z; /* Pointer to token text. Not '\000' terminated */
+ short int n; /* Length of the token text in bytes. */
+} Token;
+
+/*
+** Given a input string (which is really one of the argv[] parameters
+** passed into xConnect or xCreate) split the string up into tokens.
+** Return an array of pointers to '\000' terminated strings, one string
+** for each non-whitespace token.
+**
+** The returned array is terminated by a single NULL pointer.
+**
+** Space to hold the returned array is obtained from a single
+** malloc and should be freed by passing the return value to free().
+** The individual strings within the token list are all a part of
+** the single memory allocation and will all be freed at once.
+*/
+static char **tokenizeString(const char *z, int *pnToken){
+ int nToken = 0;
+ Token *aToken = malloc( strlen(z) * sizeof(aToken[0]) );
+ int n = 1;
+ int e, i;
+ int totalSize = 0;
+ char **azToken;
+ char *zCopy;
+ while( n>0 ){
+ n = getToken(z, &e);
+ if( e!=TOKEN_SPACE ){
+ aToken[nToken].z = z;
+ aToken[nToken].n = n;
+ nToken++;
+ totalSize += n+1;
+ }
+ z += n;
+ }
+ azToken = (char**)malloc( nToken*sizeof(char*) + totalSize );
+ zCopy = (char*)&azToken[nToken];
+ nToken--;
+ for(i=0; i<nToken; i++){
+ azToken[i] = zCopy;
+ n = aToken[i].n;
+ memcpy(zCopy, aToken[i].z, n);
+ zCopy[n] = 0;
+ zCopy += n+1;
+ }
+ azToken[nToken] = 0;
+ free(aToken);
+ *pnToken = nToken;
+ return azToken;
+}
+
+/*
+** Remove the first nSkip tokens from a token list as well
+** as all "(", ",", and ")" tokens from a token list.
+**
+** The memory for a token list comes from a single malloc().
+** This routine just rearranges the pointers within that allocation.
+** The token list is still freed by a single free().
+*/
+static void removeDelimiterTokens(char **azTokens, int nSkip, int *pnToken){
+ int i, j, c;
+ for(i=nSkip, j=0; azTokens[i]; i++){
+ c = azTokens[i][0];
+ if( c=='(' || c==',' || c==')' ) continue;
+ azTokens[j++] = azTokens[i];
+ }
+ azTokens[j] = 0;
+ *pnToken = j;
+}
+
+
+
/* Current interface:
-** argv[0] - module name
-** argv[1] - database name
-** argv[2] - table name
+** argv[0] - module name
+** argv[1] - database name
+** argv[2...] - arguments.
+**
+** Arguments:
+**
+** tokenizer NAME(ARG1,ARG2,...)
+** columns(C1,C2,C3,...)
+
** argv[3] - tokenizer name (optional, a sensible default is provided)
** argv[4..] - passed to tokenizer (optional based on tokenizer)
**/
-static int fulltextConnect(sqlite3 *db, void *pAux, int argc, char **argv,
- sqlite3_vtab **ppVTab, char **pzErr){
- int rc;
- fulltext_vtab *v;
+static int fulltextConnect(
+ sqlite3 *db,
+ void *pAux,
+ int argc, const char *const*argv,
+ sqlite3_vtab **ppVTab,
+ char **pzErr
+){
+ int rc, i;
+ fulltext_vtab *v = 0;
const sqlite3_tokenizer_module *m = NULL;
+ char **azToken = 0;
+ int seen_tokenizer = 0;
+ int seen_columns = 0;
assert( argc>=3 );
v = (fulltext_vtab *) malloc(sizeof(fulltext_vtab));
+ if( v==0 ) goto connect_failed;
memset(v, 0, sizeof(*v));
v->db = db;
v->zName = string_dup(argv[2]);
v->pTokenizer = NULL;
- if( argc==3 ){
- sqlite3Fts1SimpleTokenizerModule(&m);
- } else {
- /* TODO(shess) For now, add new tokenizers as else if clauses. */
- if( !strcmp(argv[3], "simple") ){
- sqlite3Fts1SimpleTokenizerModule(&m);
- } else {
- *pzErr = sqlite3_mprintf("unknown tokenizer: %s", argv[3]);
- assert( "unrecognized tokenizer"==NULL );
+ /* Process arguments to the module */
+ for(i=3; i<argc; i++){
+ int nToken;
+ azToken = tokenizeString(argv[i], &nToken);
+ if( azToken==0 ) goto connect_failed;
+ removeDelimiterTokens(azToken, 0, &nToken);
+ if( nToken>=2 && strcmp(azToken[0],"tokenizer")==0 ){
+ if( seen_tokenizer ){
+ *pzErr = sqlite3_mprintf("multiple tokenizer definitions");
+ goto connect_failed;
+ }
+ seen_tokenizer = 1;
+ if( !strcmp(azToken[1], "simple") ){
+ sqlite3Fts1SimpleTokenizerModule(&m);
+ } else {
+ *pzErr = sqlite3_mprintf("unknown tokenizer: %s", azToken[1]);
+ goto connect_failed;
+ }
+ rc = m->xCreate(nToken-2, (const char *const*)&azToken[2],&v->pTokenizer);
+ v->pTokenizer->pModule = m;
+ m = 0;
+ if( rc ){
+ goto connect_failed;
+ }
+ }else if( nToken>=2 && strcmp(azToken[0], "columns")==0 ){
+ if( seen_columns ){
+ *pzErr = sqlite3_mprintf("multiple column definitions");
+ goto connect_failed;
+ }
+ removeDelimiterTokens(azToken, 1, &nToken);
+ v->nColumn = nToken;
+ v->azColumn = azToken;
+ azToken = 0;
+ seen_columns = 1;
+ }else{
+ *pzErr = sqlite3_mprintf("bad argument: %s", argv[i]);
+ goto connect_failed;
}
+ free(azToken);
+ azToken = 0;
}
- /* TODO(shess) Since tokenization impacts the index, the parameters
- ** to the tokenizer need to be identical when a persistent virtual
- ** table is re-created. One solution would be a meta-table to track
- ** such information in the database. Then we could verify that the
- ** information is identical on subsequent creates.
+ /* Put in default values for the column names and the tokenizer if
+ ** none is specified in the arguments.
*/
- /* TODO(shess) Why isn't argv already (const char **)? */
- rc = m->xCreate(argc-3, (const char **) (argv+3), &v->pTokenizer);
- if( rc!=SQLITE_OK ) return rc;
- v->pTokenizer->pModule = m;
+ if( !seen_tokenizer ){
+ sqlite3Fts1SimpleTokenizerModule(&m);
+ rc = m->xCreate(0, 0, &v->pTokenizer);
+ v->pTokenizer->pModule = m;
+ if( rc!=SQLITE_OK ){
+ goto connect_failed;
+ }
+ m = 0;
+ }
+ if( !seen_columns ){
+ v->nColumn = 1;
+ v->azColumn = malloc( sizeof(char*)*2 );
+ if( v->azColumn==0 ) goto connect_failed;
+ v->azColumn[0] = "content";
+ v->azColumn[1] = 0;
+ }
/* TODO: verify the existence of backing tables foo_content, foo_term */
*ppVTab = &v->base;
TRACE(("FTS1 Connect %p\n", v));
return SQLITE_OK;
+
+connect_failed:
+ if( v ){
+ fulltext_vtab_destroy(v);
+ }
+ free(azToken);
+ return SQLITE_ERROR;
}
-static int fulltextCreate(sqlite3 *db, void *pAux, int argc, char **argv,
+static int fulltextCreate(sqlite3 *db, void *pAux,
+ int argc, const char *const*argv,
sqlite3_vtab **ppVTab, char **pzErr){
int rc;
assert( argc>=3 );
-C Add\spzErr\sparameters\sto\sthe\sxConnect\sand\sxCreate\smethods\sof\svirtual\stables\nin\sorder\sto\sprovide\sbetter\serror\sreporting.\s\sThis\sis\san\sinterface\schange\nfor\svirtual\stables.\s\sPrior\svirtual\stable\simplementations\swill\sneed\sto\sbe\nmodified\sand\srecompiled.\s(CVS\s3402)
-D 2006-09-10T17:31:59
+C Add\sa\srudimentary\stokenizer\sand\sparser\sto\sFTS1\sfor\sparsing\sthe\smodule\narguments\sduring\sinitialization.\s\s\sRecognized\sarguments\sinclude\sa\ntokenizer\sselector\sand\sa\slist\sof\svirtual\stable\scolumns.\s(CVS\s3403)
+D 2006-09-11T00:34:22
F Makefile.in cabd42d34340f49260bc2a7668c38eba8d4cfd99
F Makefile.linux-gcc 2d8574d1ba75f129aba2019f0b959db380a90935
F README 9c4e2d6706bdcc3efdd773ce752a8cdab4f90028
F ext/fts1/README.txt 20ac73b006a70bcfd80069bdaf59214b6cf1db5e
F ext/fts1/ft_hash.c 3927bd880e65329bdc6f506555b228b28924921b
F ext/fts1/ft_hash.h 1a35e654a235c2c662d3ca0dfc3138ad60b8b7d5
-F ext/fts1/fts1.c a0f9600c5d3fedaf0002247b554c0570c431bf9e
+F ext/fts1/fts1.c 022a985bafaecdd6d245ddfeba68f9d268fccd9d
F ext/fts1/fts1.h fe8e8f38dd6d2d2645b9b0d6972e80985249575f
F ext/fts1/fts1_hash.c 3196cee866edbebb1c0521e21672e6d599965114
F ext/fts1/fts1_hash.h 957d378355ed29f672cd5add012ce8b088a5e089
-F ext/fts1/fts1_tokenizer.h a90c4d022d1c5e50ca931a9b6415bc8bce12b76e
+F ext/fts1/fts1_tokenizer.h 12c0e7ad83120aff1f86ca848149f96f61da738b
F ext/fts1/fts1_tokenizer1.c 1155942be01e8b191b13ac2ea4604b301f77e73e
F ext/fts1/fulltext.c d935e600d87bc86b7d64f55c7520ea41d6034c5c
F ext/fts1/fulltext.h 08525a47852d1d62a0be81d3fc3fe2d23b094efd
F src/select.c 0d4724930a1f34c747105ed1802fa4af0d8eb519
F src/server.c 087b92a39d883e3fa113cae259d64e4c7438bc96
F src/shell.c 233f7766e532a204bed465249ffc584424ed1757
-F src/sqlite.h.in c76f7a4609631606f657fbe976e3bc901d39c2d3
+F src/sqlite.h.in 19f5390cce182242b309a053aa1ee2b902bee147
F src/sqlite3ext.h 11a046b3519c4b9b7709e6d6a95c3a36366f684a
F src/sqliteInt.h 259adce944cc3b28da1fa3df9beb9ba86017a45d
F src/table.c d8817f43a6c6bf139487db161760b9e1e02da3f1
F src/test5.c 7162f8526affb771c4ed256826eee7bb9eca265f
F src/test6.c 60a02961ceb7b3edc25f5dc5c1ac2556622a76de
F src/test7.c 03fa8d787f6aebc6d1f72504d52f33013ad2c8e3
-F src/test8.c f86da05e9611275a8ea8bbd679ebe89e9dddc4f1
+F src/test8.c cdde31e45651081a88845d5e66eeed450a7e2a3e
F src/test_async.c e3deaedd4d86a56391b81808fde9e44fbd92f1d3
F src/test_autoext.c bbb70bc1c83bd273cf59908ca9b486ae5df55a59
F src/test_loadext.c 22065d601a18878e5542191001f0eaa5d77c0ed8
F src/test_md5.c 6c42bc0a3c0b54be34623ff77a0eec32b2fa96e3
-F src/test_schema.c 01a3bdd6005bffe6212468bf8e232fe31086d235
+F src/test_schema.c ced72140a3a25c148975428e170ec1850d3c3a7d
F src/test_server.c a6460daed0b92ecbc2531b6dc73717470e7a648c
-F src/test_tclvar.c be4e54ce56d612a90907e5190d8142875cdc778c
+F src/test_tclvar.c 315e77c17f128ff8c06b38c08617fd07c825a95b
F src/tokenize.c dfdff21768fbedd40e8d3ca84fc5d0d7af2b46dd
F src/trigger.c 0fc40125820409a6274834a6e04ad804d96e2793
F src/update.c 951f95ef044cf6d28557c48dc35cb0711a0b9129
F src/vdbeaux.c 9fab61427a0741c9c123e8ff16e349b1f90397be
F src/vdbefifo.c 9efb94c8c3f4c979ebd0028219483f88e57584f5
F src/vdbemem.c 26623176bf1c616aa478da958fac49502491a921
-F src/vtab.c 430513b5e2b3cfe72f960be2d1dff41ce8ac0f9d
+F src/vtab.c 4d360f2222c6c9a4b779d733fbfb8ddf61be9eb4
F src/where.c 75a89957fcb8c068bec55caa4e9d2ed5fa0b0724
F tclinstaller.tcl 046e3624671962dc50f0481d7c25b38ef803eb42
F test/aggerror.test a867e273ef9e3d7919f03ef4f0e8c0d2767944f2
F www/vdbe.tcl 87a31ace769f20d3627a64fa1fade7fed47b90d0
F www/version3.tcl 890248cf7b70e60c383b0e84d77d5132b3ead42b
F www/whentouse.tcl 97e2b5cd296f7d8057e11f44427dea8a4c2db513
-P 36693a5cb72b4363010f9ab0866e1f7865f65275
-R bc62906dea603a74fb4a9c89628cc681
+P f44b8bae97b6872524580009c96d07391578c388
+R 9998640c1fac069b758db49fd22e886b
U drh
-Z c46929b76e373fecb1fd2b6b3f4e1308
+Z 2b1ea65d3e0e9f0fb73f4523c49b6c09