return (fulltext_vtab *) c->base.pVtab;
}
-static const sqlite3_module fulltextModule; /* forward declaration */
+static const sqlite3_module fts2Module; /* forward declaration */
/* Return a dynamically generated statement of the form
* insert into %_content (rowid, ...) values (?, ...)
*/
static int constructVtab(
sqlite3 *db, /* The SQLite database connection */
+ fts2Hash *pHash, /* Hash table containing tokenizers */
TableSpec *spec, /* Parsed spec information from parseSpec() */
sqlite3_vtab **ppVTab, /* Write the resulting vtab structure here */
char **pzErr /* Write any error message here */
const sqlite3_tokenizer_module *m = NULL;
char *schema;
+ char const *zTok; /* Name of tokenizer to use for this fts table */
+ int nTok; /* Length of zTok, including nul terminator */
+
v = (fulltext_vtab *) malloc(sizeof(fulltext_vtab));
if( v==0 ) return SQLITE_NOMEM;
CLEAR(v);
if( spec->azTokenizer==0 ){
return SQLITE_NOMEM;
}
- /* TODO(shess) For now, add new tokenizers as else if clauses. */
- if( spec->azTokenizer[0]==0 || startsWith(spec->azTokenizer[0], "simple") ){
- sqlite3Fts2SimpleTokenizerModule(&m);
- }else if( startsWith(spec->azTokenizer[0], "porter") ){
- sqlite3Fts2PorterTokenizerModule(&m);
- }else{
+
+ zTok = spec->azTokenizer[0];
+ if( !zTok ){
+ zTok = "simple";
+ }
+ nTok = strlen(zTok)+1;
+
+ m = (sqlite3_tokenizer_module *)sqlite3Fts2HashFind(pHash, zTok, nTok);
+ if( !m ){
*pzErr = sqlite3_mprintf("unknown tokenizer: %s", spec->azTokenizer[0]);
rc = SQLITE_ERROR;
goto err;
}
+
for(n=0; spec->azTokenizer[n]; n++){}
if( n ){
rc = m->xCreate(n-1, (const char*const*)&spec->azTokenizer[1],
int rc = parseSpec(&spec, argc, argv, pzErr);
if( rc!=SQLITE_OK ) return rc;
- rc = constructVtab(db, &spec, ppVTab, pzErr);
+ rc = constructVtab(db, (fts2Hash *)pAux, &spec, ppVTab, pzErr);
clearTableSpec(&spec);
return rc;
}
");");
if( rc!=SQLITE_OK ) goto out;
- rc = constructVtab(db, &spec, ppVTab, pzErr);
+ rc = constructVtab(db, (fts2Hash *)pAux, &spec, ppVTab, pzErr);
out:
clearTableSpec(&spec);
return 0;
}
-static const sqlite3_module fulltextModule = {
+static const sqlite3_module fts2Module = {
/* iVersion */ 0,
/* xCreate */ fulltextCreate,
/* xConnect */ fulltextConnect,
/* xFindFunction */ fulltextFindFunction,
};
+static void hashDestroy(void *p){
+ fts2Hash *pHash = (fts2Hash *)p;
+ sqlite3Fts2HashClear(pHash);
+ sqlite3_free(pHash);
+}
+
+/*
+** The fts2 built-in tokenizers - "simple" and "porter" - are implemented
+** in files fts2_tokenizer1.c and fts2_porter.c respectively. The following
+** two forward declarations are for functions declared in these files
+** used to retrieve the respective implementations.
+**
+** Calling sqlite3Fts2SimpleTokenizerModule() sets the value pointed
+** to by the argument to point a the "simple" tokenizer implementation.
+** Function ...PorterTokenizerModule() sets *pModule to point to the
+** porter tokenizer/stemmer implementation.
+*/
+void sqlite3Fts2SimpleTokenizerModule(sqlite3_tokenizer_module const**ppModule);
+void sqlite3Fts2PorterTokenizerModule(sqlite3_tokenizer_module const**ppModule);
+void sqlite3Fts2IcuTokenizerModule(sqlite3_tokenizer_module const**ppModule);
+
+/*
+** Initialise the fts2 extension. If this extension is built as part
+** of the sqlite library, then this function is called directly by
+** SQLite. If fts2 is built as a dynamically loadable extension, this
+** function is called by the sqlite3_extension_init() entry point.
+*/
int sqlite3Fts2Init(sqlite3 *db){
- sqlite3_overload_function(db, "snippet", -1);
- sqlite3_overload_function(db, "offsets", -1);
- return sqlite3_create_module(db, "fts2", &fulltextModule, 0);
+ int rc = SQLITE_OK;
+ fts2Hash *pHash = 0;
+ const sqlite3_tokenizer_module *pSimple = 0;
+ const sqlite3_tokenizer_module *pPorter = 0;
+ const sqlite3_tokenizer_module *pIcu = 0;
+
+ sqlite3Fts2SimpleTokenizerModule(&pSimple);
+ sqlite3Fts2PorterTokenizerModule(&pPorter);
+#ifdef SQLITE_ENABLE_ICU
+ sqlite3Fts2IcuTokenizerModule(&pIcu);
+#endif
+
+ /* Allocate and initialise the hash-table used to store tokenizers. */
+ pHash = sqlite3_malloc(sizeof(fts2Hash));
+ if( !pHash ){
+ rc = SQLITE_NOMEM;
+ }else{
+ sqlite3Fts2HashInit(pHash, FTS2_HASH_STRING, 1);
+ }
+
+ /* Load the built-in tokenizers into the hash table */
+ if( rc==SQLITE_OK ){
+ if( sqlite3Fts2HashInsert(pHash, "simple", 7, (void *)pSimple)
+ || sqlite3Fts2HashInsert(pHash, "porter", 7, (void *)pPorter)
+ || (pIcu && sqlite3Fts2HashInsert(pHash, "icu", 4, (void *)pIcu))
+ ){
+ rc = SQLITE_NOMEM;
+ }
+ }
+
+ /* Create the virtual table wrapper around the hash-table and overload
+ ** the two scalar functions. If this is successful, register the
+ ** module with sqlite.
+ */
+ if( SQLITE_OK==rc
+ && SQLITE_OK==(rc = sqlite3Fts2InitHashTable(db, pHash, "fts2_tokenizer"))
+ && SQLITE_OK==(rc = sqlite3_overload_function(db, "snippet", -1))
+ && SQLITE_OK==(rc = sqlite3_overload_function(db, "offsets", -1))
+ ){
+ return sqlite3_create_module_v2(
+ db, "fts2", &fts2Module, (void *)pHash, hashDestroy
+ );
+ }
+
+ /* An error has occured. Delete the hash table and return the error code. */
+ assert( rc!=SQLITE_OK );
+ if( pHash ){
+ sqlite3Fts2HashClear(pHash);
+ sqlite3_free(pHash);
+ }
+ return rc;
}
#if !SQLITE_CORE
-int sqlite3_extension_init(sqlite3 *db, char **pzErrMsg,
- const sqlite3_api_routines *pApi){
+int sqlite3_extension_init(
+ sqlite3 *db,
+ char **pzErrMsg,
+ const sqlite3_api_routines *pApi
+){
SQLITE_EXTENSION_INIT2(pApi)
return sqlite3Fts2Init(db);
}
--- /dev/null
+/*
+** 2007 June 22
+**
+** The author disclaims copyright to this source code. In place of
+** a legal notice, here is a blessing:
+**
+** May you do good and not evil.
+** May you find forgiveness for yourself and forgive others.
+** May you share freely, never taking more than you give.
+**
+*************************************************************************
+** This file implements a tokenizer for fts2 based on the ICU library.
+**
+** $Id: fts2_icu.c,v 1.1 2007/06/22 15:21:16 danielk1977 Exp $
+*/
+
+#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2)
+#ifdef SQLITE_ENABLE_ICU
+
+#include <assert.h>
+#include <string.h>
+#include "fts2_tokenizer.h"
+
+#include <unicode/ubrk.h>
+#include <unicode/ucol.h>
+#include <unicode/ustring.h>
+#include <unicode/utf16.h>
+
+typedef struct IcuTokenizer IcuTokenizer;
+typedef struct IcuCursor IcuCursor;
+
+struct IcuTokenizer {
+ sqlite3_tokenizer base;
+ char *zLocale;
+};
+
+struct IcuCursor {
+ sqlite3_tokenizer_cursor base;
+
+ UBreakIterator *pIter; /* ICU break-iterator object */
+ int nChar; /* Number of UChar elements in pInput */
+ UChar *aChar; /* Copy of input using utf-16 encoding */
+ int *aOffset; /* Offsets of each character in utf-8 input */
+
+ int nBuffer;
+ char *zBuffer;
+
+ int iToken;
+};
+
+/*
+** Create a new tokenizer instance.
+*/
+static int icuCreate(
+ int argc, /* Number of entries in argv[] */
+ const char * const *argv, /* Tokenizer creation arguments */
+ sqlite3_tokenizer **ppTokenizer /* OUT: Created tokenizer */
+){
+ IcuTokenizer *p;
+ int n = 0;
+
+ if( argc>0 ){
+ n = strlen(argv[0])+1;
+ }
+ p = (IcuTokenizer *)sqlite3_malloc(sizeof(IcuTokenizer)+n);
+ if( !p ){
+ return SQLITE_NOMEM;
+ }
+ memset(p, 0, sizeof(IcuTokenizer));
+
+ if( n ){
+ p->zLocale = (char *)&p[1];
+ memcpy(p->zLocale, argv[0], n);
+ }
+
+ *ppTokenizer = (sqlite3_tokenizer *)p;
+
+ return SQLITE_OK;
+}
+
+/*
+** Destroy a tokenizer
+*/
+static int icuDestroy(sqlite3_tokenizer *pTokenizer){
+ IcuTokenizer *p = (IcuTokenizer *)pTokenizer;
+ sqlite3_free(p);
+ return SQLITE_OK;
+}
+
+/*
+** Prepare to begin tokenizing a particular string. The input
+** string to be tokenized is pInput[0..nBytes-1]. A cursor
+** used to incrementally tokenize this string is returned in
+** *ppCursor.
+*/
+static int icuOpen(
+ sqlite3_tokenizer *pTokenizer, /* The tokenizer */
+ const char *zInput, /* Input string */
+ int nInput, /* Length of zInput in bytes */
+ sqlite3_tokenizer_cursor **ppCursor /* OUT: Tokenization cursor */
+){
+ IcuTokenizer *p = (IcuTokenizer *)pTokenizer;
+ IcuCursor *pCsr;
+
+ const int32_t opt = U_FOLD_CASE_DEFAULT;
+ UErrorCode status = U_ZERO_ERROR;
+ int nChar;
+
+ UChar32 c;
+ int iInput = 0;
+ int iOut = 0;
+
+ *ppCursor = 0;
+
+ nChar = nInput+1;
+ pCsr = (IcuCursor *)sqlite3_malloc(
+ sizeof(IcuCursor) + /* IcuCursor */
+ nChar * sizeof(UChar) + /* IcuCursor.aChar[] */
+ (nChar+1) * sizeof(int) /* IcuCursor.aOffset[] */
+ );
+ if( !pCsr ){
+ return SQLITE_NOMEM;
+ }
+ memset(pCsr, 0, sizeof(IcuCursor));
+ pCsr->aChar = (UChar *)&pCsr[1];
+ pCsr->aOffset = (int *)&pCsr->aChar[nChar];
+
+ pCsr->aOffset[iOut] = iInput;
+ U8_NEXT(zInput, iInput, nInput, c);
+ while( c>0 ){
+ int isError = 0;
+ c = u_foldCase(c, opt);
+ U16_APPEND(pCsr->aChar, iOut, nChar, c, isError);
+ if( isError ){
+ sqlite3_free(pCsr);
+ return SQLITE_ERROR;
+ }
+ pCsr->aOffset[iOut] = iInput;
+
+ if( iInput<nInput ){
+ U8_NEXT(zInput, iInput, nInput, c);
+ }else{
+ c = 0;
+ }
+ }
+
+ pCsr->pIter = ubrk_open(UBRK_WORD, p->zLocale, pCsr->aChar, iOut, &status);
+ if( !U_SUCCESS(status) ){
+ sqlite3_free(pCsr);
+ return SQLITE_ERROR;
+ }
+ pCsr->nChar = iOut;
+
+ ubrk_first(pCsr->pIter);
+ *ppCursor = (sqlite3_tokenizer_cursor *)pCsr;
+ return SQLITE_OK;
+}
+
+/*
+** Close a tokenization cursor previously opened by a call to icuOpen().
+*/
+static int icuClose(sqlite3_tokenizer_cursor *pCursor){
+ IcuCursor *pCsr = (IcuCursor *)pCursor;
+ ubrk_close(pCsr->pIter);
+ sqlite3_free(pCsr->zBuffer);
+ sqlite3_free(pCsr);
+ return SQLITE_OK;
+}
+
+/*
+** Extract the next token from a tokenization cursor.
+*/
+static int icuNext(
+ sqlite3_tokenizer_cursor *pCursor, /* Cursor returned by simpleOpen */
+ const char **ppToken, /* OUT: *ppToken is the token text */
+ int *pnBytes, /* OUT: Number of bytes in token */
+ int *piStartOffset, /* OUT: Starting offset of token */
+ int *piEndOffset, /* OUT: Ending offset of token */
+ int *piPosition /* OUT: Position integer of token */
+){
+ IcuCursor *pCsr = (IcuCursor *)pCursor;
+
+ int iStart = 0;
+ int iEnd = 0;
+ int nByte = 0;
+
+ while( iStart==iEnd ){
+ UChar32 c;
+
+ iStart = ubrk_current(pCsr->pIter);
+ iEnd = ubrk_next(pCsr->pIter);
+ if( iEnd==UBRK_DONE ){
+ return SQLITE_DONE;
+ }
+
+ while( iStart<iEnd ){
+ int iWhite = iStart;
+ U8_NEXT(pCsr->aChar, iWhite, pCsr->nChar, c);
+ if( u_isspace(c) ){
+ iStart = iWhite;
+ }else{
+ break;
+ }
+ }
+ assert(iStart<=iEnd);
+ }
+
+ do {
+ UErrorCode status = U_ZERO_ERROR;
+ if( nByte ){
+ char *zNew = sqlite3_realloc(pCsr->zBuffer, nByte);
+ if( !zNew ){
+ return SQLITE_NOMEM;
+ }
+ pCsr->zBuffer = zNew;
+ pCsr->nBuffer = nByte;
+ }
+
+ u_strToUTF8(
+ pCsr->zBuffer, pCsr->nBuffer, &nByte, /* Output vars */
+ &pCsr->aChar[iStart], iEnd-iStart, /* Input vars */
+ &status /* Output success/failure */
+ );
+ } while( nByte>pCsr->nBuffer );
+
+ *ppToken = pCsr->zBuffer;
+ *pnBytes = nByte;
+ *piStartOffset = pCsr->aOffset[iStart];
+ *piEndOffset = pCsr->aOffset[iEnd];
+ *piPosition = pCsr->iToken++;
+
+ return SQLITE_OK;
+}
+
+/*
+** The set of routines that implement the simple tokenizer
+*/
+static const sqlite3_tokenizer_module icuTokenizerModule = {
+ 0, /* iVersion */
+ icuCreate, /* xCreate */
+ icuDestroy, /* xCreate */
+ icuOpen, /* xOpen */
+ icuClose, /* xClose */
+ icuNext, /* xNext */
+};
+
+/*
+** Set *ppModule to point at the implementation of the ICU tokenizer.
+*/
+void sqlite3Fts2IcuTokenizerModule(
+ sqlite3_tokenizer_module const**ppModule
+){
+ *ppModule = &icuTokenizerModule;
+}
+
+#endif /* defined(SQLITE_ENABLE_ICU) */
+#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS2) */
--- /dev/null
+
+#include "sqlite3.h"
+#include "sqlite3ext.h"
+SQLITE_EXTENSION_INIT1
+
+#include "fts2_hash.h"
+#include "fts2_tokenizer.h"
+#include <assert.h>
+
+/*
+** Implementation of the SQL scalar function for accessing the underlying
+** hash table. This function may be called as follows:
+**
+** SELECT <function-name>(<key-name>);
+** SELECT <function-name>(<key-name>, <pointer>);
+**
+** where <function-name> is the name passed as the second argument
+** to the sqlite3Fts2InitHashTable() function (e.g. 'fts2_tokenizer').
+**
+** If the <pointer> argument is specified, it must be a blob value
+** containing a pointer to be stored as the hash data corresponding
+** to the string <key-name>. If <pointer> is not specified, then
+** the string <key-name> must already exist in the has table. Otherwise,
+** an error is returned.
+**
+** Whether or not the <pointer> argument is specified, the value returned
+** is a blob containing the pointer stored as the hash data corresponding
+** to string <key-name> (after the hash-table is updated, if applicable).
+*/
+static void scalarFunc(
+ sqlite3_context *context,
+ int argc,
+ sqlite3_value **argv
+){
+ fts2Hash *pHash;
+ void *pPtr = 0;
+ const unsigned char *zName;
+ int nName;
+
+ assert( argc==1 || argc==2 );
+
+ pHash = (fts2Hash *)sqlite3_user_data(context);
+
+ zName = sqlite3_value_text(argv[0]);
+ nName = sqlite3_value_bytes(argv[0])+1;
+
+ if( argc==2 ){
+ void *pOld;
+ int n = sqlite3_value_bytes(argv[1]);
+ if( n!=sizeof(pPtr) ){
+ sqlite3_result_error(context, "argument type mismatch", -1);
+ return;
+ }
+ pPtr = *(void **)sqlite3_value_blob(argv[1]);
+ pOld = sqlite3Fts2HashInsert(pHash, (void *)zName, nName, pPtr);
+ if( pOld==pPtr ){
+ sqlite3_result_error(context, "out of memory", -1);
+ return;
+ }
+ }else{
+ pPtr = sqlite3Fts2HashFind(pHash, zName, nName);
+ if( !pPtr ){
+ char *zErr = sqlite3_mprintf("unknown tokenizer: %s", zName);
+ sqlite3_result_error(context, zErr, -1);
+ sqlite3_free(zErr);
+ return;
+ }
+ }
+
+ sqlite3_result_blob(context, (void *)&pPtr, sizeof(pPtr), SQLITE_TRANSIENT);
+}
+
+#ifdef SQLITE_TEST
+
+#include <tcl.h>
+
+/*
+** Implementation of a special SQL scalar function for testing tokenizers
+** designed to be used in concert with the Tcl testing framework. This
+** function must be called with two arguments:
+**
+** SELECT <function-name>(<key-name>, <input-string>);
+** SELECT <function-name>(<key-name>, <pointer>);
+**
+** where <function-name> is the name passed as the second argument
+** to the sqlite3Fts2InitHashTable() function (e.g. 'fts2_tokenizer')
+** concatenated with the string '_test' (e.g. 'fts2_tokenizer_test').
+**
+** The return value is a string that may be interpreted as a Tcl
+** list. For each token in the <input-string>, three elements are
+** added to the returned list. The first is the token position, the
+** second is the token text (folded, stemmed, etc.) and the third is the
+** substring of <input-string> associated with the token. For example,
+** using the built-in "simple" tokenizer:
+**
+** SELECT fts_tokenizer_test('simple', 'I don't see how');
+**
+** will return the string:
+**
+** "{0 i I 1 dont don't 2 see see 3 how how}"
+**
+*/
+static void testFunc(
+ sqlite3_context *context,
+ int argc,
+ sqlite3_value **argv
+){
+ fts2Hash *pHash;
+ sqlite3_tokenizer_module *p;
+ sqlite3_tokenizer *pTokenizer = 0;
+ sqlite3_tokenizer_cursor *pCsr = 0;
+
+ const char *zErr = 0;
+
+ const char *zName;
+ int nName;
+ const char *zInput;
+ int nInput;
+
+ const char *zToken;
+ int nToken;
+ int iStart;
+ int iEnd;
+ int iPos;
+
+ Tcl_Obj *pRet;
+
+ assert( argc==2 );
+
+ nName = sqlite3_value_bytes(argv[0]);
+ zName = (const char *)sqlite3_value_text(argv[0]);
+ nInput = sqlite3_value_bytes(argv[1]);
+ zInput = (const char *)sqlite3_value_text(argv[1]);
+
+ pHash = (fts2Hash *)sqlite3_user_data(context);
+ p = (sqlite3_tokenizer_module *)sqlite3Fts2HashFind(pHash, zName, nName+1);
+
+ if( !p ){
+ char *zErr = sqlite3_mprintf("unknown tokenizer: %s", zName);
+ sqlite3_result_error(context, zErr, -1);
+ sqlite3_free(zErr);
+ return;
+ }
+
+ pRet = Tcl_NewObj();
+ Tcl_IncrRefCount(pRet);
+
+ if( SQLITE_OK!=p->xCreate(0, 0, &pTokenizer) ){
+ zErr = "error in xCreate()";
+ goto finish;
+ }
+ pTokenizer->pModule = p;
+ if( SQLITE_OK!=p->xOpen(pTokenizer, zInput, nInput, &pCsr) ){
+ zErr = "error in xOpen()";
+ goto finish;
+ }
+ pCsr->pTokenizer = pTokenizer;
+
+ while( SQLITE_OK==p->xNext(pCsr, &zToken, &nToken, &iStart, &iEnd, &iPos) ){
+ Tcl_ListObjAppendElement(0, pRet, Tcl_NewIntObj(iPos));
+ Tcl_ListObjAppendElement(0, pRet, Tcl_NewStringObj(zToken, nToken));
+ zToken = &zInput[iStart];
+ nToken = iEnd-iStart;
+ Tcl_ListObjAppendElement(0, pRet, Tcl_NewStringObj(zToken, nToken));
+ }
+
+ if( SQLITE_OK!=p->xClose(pCsr) ){
+ zErr = "error in xClose()";
+ goto finish;
+ }
+ if( SQLITE_OK!=p->xDestroy(pTokenizer) ){
+ zErr = "error in xDestroy()";
+ goto finish;
+ }
+
+finish:
+ if( zErr ){
+ sqlite3_result_error(context, zErr, -1);
+ }else{
+ sqlite3_result_text(context, Tcl_GetString(pRet), -1, SQLITE_TRANSIENT);
+ }
+ Tcl_DecrRefCount(pRet);
+}
+#endif
+
+/*
+** Set up SQL objects in database db used to access the contents of
+** the hash table pointed to by argument pHash. The hash table must
+** been initialised to use string keys, and to take a private copy
+** of the key when a value is inserted. i.e. by a call similar to:
+**
+** sqlite3Fts2HashInit(pHash, FTS2_HASH_STRING, 1);
+**
+** This function adds a scalar function (see header comment above
+** scalarFunc() in this file for details) and, if ENABLE_TABLE is
+** defined at compilation time, a temporary virtual table (see header
+** comment above struct HashTableVtab) to the database schema. Both
+** provide read/write access to the contents of *pHash.
+**
+** The third argument to this function, zName, is used as the name
+** of both the scalar and, if created, the virtual table.
+*/
+int sqlite3Fts2InitHashTable(
+ sqlite3 *db,
+ fts2Hash *pHash,
+ const char *zName
+){
+ int rc;
+ void *p = (void *)pHash;
+ const int any = SQLITE_ANY;
+ char *zTest = 0;
+
+#ifdef SQLITE_TEST
+ zTest = sqlite3_mprintf("%s_test", zName);
+ if( !zTest ){
+ return SQLITE_NOMEM;
+ }
+#endif
+
+ if( (rc = sqlite3_create_function(db, zName, 1, any, p, scalarFunc, 0, 0))
+ || (rc = sqlite3_create_function(db, zName, 2, any, p, scalarFunc, 0, 0))
+#ifdef SQLITE_TEST
+ || (rc = sqlite3_create_function(db, zTest, 2, any, p, testFunc, 0, 0))
+#endif
+ );
+
+ sqlite3_free(zTest);
+ return rc;
+}
+
/* Tokenizer implementations will typically add additional fields */
};
-/*
-** Get the module for a tokenizer which generates tokens based on a
-** set of non-token characters. The default is to break tokens at any
-** non-alnum character, though the set of delimiters can also be
-** specified by the first argv argument to xCreate().
-*/
-/* TODO(shess) This doesn't belong here. Need some sort of
-** registration process.
-*/
-void sqlite3Fts2SimpleTokenizerModule(sqlite3_tokenizer_module const**ppModule);
-void sqlite3Fts2PorterTokenizerModule(sqlite3_tokenizer_module const**ppModule);
-
#endif /* _FTS2_TOKENIZER_H_ */
2 COMPILATION AND USAGE
The easiest way to compile and use the ICU extension is to build
- and use it as a dynamically loadable SQLite extension.
+ and use it as a dynamically loadable SQLite extension. To do this
+ using gcc on *nix:
+ gcc -shared icu.c `icu-config --ldflags` -o libSqliteIcu.so
+ You may need to add "-I" flags so that gcc can find sqlite3ext.h
+ and sqlite3.h. The resulting shared lib, libSqliteIcu.so, may be
+ loaded into sqlite in the same way as any other dynamically loadable
+ extension.
3 BUGS, PROBLEMS AND SECURITY ISSUES
** May you share freely, never taking more than you give.
**
*************************************************************************
-** $Id: icu.c,v 1.5 2007/06/11 08:00:00 danielk1977 Exp $
+** $Id: icu.c,v 1.6 2007/06/22 15:21:16 danielk1977 Exp $
**
** This file implements an integration between the ICU library
** ("International Components for Unicode", an open-source library
#include <unicode/ucol.h>
#include <assert.h>
-#include "sqlite3.h"
#ifndef SQLITE_CORE
#include "sqlite3ext.h"
SQLITE_EXTENSION_INIT1
+#else
+ #include "sqlite3.h"
#endif
/*
nInput = sqlite3_value_bytes16(apArg[0]);
nOutput = nInput * 2 + 2;
- zOutput = sqlite3_malloc(nInput*2+2);
+ zOutput = sqlite3_malloc(nOutput);
if( !zOutput ){
return;
}
$(TOP)/ext/icu/icu.c \
$(TOP)/ext/fts2/fts2.c \
$(TOP)/ext/fts2/fts2_hash.c \
+ $(TOP)/ext/fts2/fts2_icu.c \
$(TOP)/ext/fts2/fts2_porter.c \
+ $(TOP)/ext/fts2/fts2_tokenizer.c \
$(TOP)/ext/fts2/fts2_tokenizer1.c
# Generated source code files
-C Clarify\sdocumentation\sof\sthe\scolumn\smetadata\sAPIs.\s\sMake\ssure\sthat\sthe\nnew\sdocumentation\sclaims\sare\stested.\s(CVS\s4107)
-D 2007-06-21T15:25:05
+C Extend\sfts2\sso\sthat\suser\sdefined\stokenizers\smay\sbe\sadded.\sAdd\sa\stokenizer\sthat\suses\sthe\sICU\slibrary\sif\savailable.\sDocumentation\sand\stests\sto\scome.\s(CVS\s4108)
+D 2007-06-22T15:21:16
F Makefile.in 7f7485a4cc039476a42e534b3f26ec90e2f9753e
F Makefile.linux-gcc 2d8574d1ba75f129aba2019f0b959db380a90935
F README 9c4e2d6706bdcc3efdd773ce752a8cdab4f90028
F ext/fts1/simple_tokenizer.c 1844d72f7194c3fd3d7e4173053911bf0661b70d
F ext/fts1/tokenizer.h 0c53421b832366d20d720d21ea3e1f6e66a36ef9
F ext/fts2/README.txt 8c18f41574404623b76917b9da66fcb0ab38328d
-F ext/fts2/fts2.c 8f9bd5fce1a6900072ad9b65dd41fe8ba010f099
+F ext/fts2/fts2.c 841766f2f14d68e623404f9531d98afa0f7cbf05
F ext/fts2/fts2.h 591916a822cfb6426518fdbf6069359119bc46eb
F ext/fts2/fts2_hash.c b3f22116d4ef0bc8f2da6e3fdc435c86d0951a9b
F ext/fts2/fts2_hash.h e283308156018329f042816eb09334df714e105e
+F ext/fts2/fts2_icu.c 45b54d1e075020b35db20f69d829f95ca0651111
F ext/fts2/fts2_porter.c 991a45463553c7318063fe7773368a6c0f39e35d
-F ext/fts2/fts2_tokenizer.h 4c5ffe31d63622869eb6eec1503df7f6996fd1bd
+F ext/fts2/fts2_tokenizer.c 836373ee0fab4f8288a7815496529f25e4504881
+F ext/fts2/fts2_tokenizer.h 6d151c51382e8f6cf689c616bb697fe780478089
F ext/fts2/fts2_tokenizer1.c 5c979fe8815f95396beb22b627571da895a025af
F ext/fts2/mkfts2amal.tcl 2a9ec76b0760fe7f3669dca5bc0d60728bc1c977
-F ext/icu/README.txt a470afe5adf6534cc0bdafca31e6cf4d88c321fa
-F ext/icu/icu.c daab19e2c5221685688ecff2bb75bf9e0eea361d
+F ext/icu/README.txt 3b130aa66e7a681136f6add198b076a2f90d1e33
+F ext/icu/icu.c 61a345d8126686aa3487aa8d2d0f68abd655f7a4
F install-sh 9d4de14ab9fb0facae2f48780b874848cbf2f895
F ltmain.sh 56abb507100ed2d4261f6dd1653dec3cf4066387
-F main.mk 522c81a818f2f81eb5e904ce983710449c5d76ad
+F main.mk 9007943b573ddccc1bb39f8fcb7b2812f6cc8792
F mkdll.sh 37fa8a7412e51b5ab2bc6d4276135f022a0feffb
F mkopcodec.awk bd46ad001c98dfbab07b1713cb8e692fa0e5415d
F mkopcodeh.awk cde995d269aa06c94adbf6455bea0acedb913fa5
F src/delete.c 5c0d89b3ef7d48fe1f5124bfe8341f982747fe29
F src/experimental.c 1b2d1a6cd62ecc39610e97670332ca073c50792b
F src/expr.c 763ca2b39fe551a6dc7d37ba40544311622eee32
-F src/func.c 4668843e6f0d27653939e12fc32276fb8e38d21a
+F src/func.c 6b45261aa2c514f642201b90493af68469c04af6
F src/hash.c 67b23e14f0257b69a3e8aa663e4eeadc1a2b6fd5
F src/hash.h 1b3f7e2609141fd571f62199fc38687d262e9564
F src/insert.c e595ca26805dfb3a9ebaabc28e7947c479f3b14d
F src/legacy.c 388c71ad7fbcd898ba1bcbfc98a3ac954bfa5d01
F src/limits.h 71ab25f17e35e0a9f3f6f234b8ed49cc56731d35
-F src/loadext.c afe4f4755dc49c36ef505748bbdddecb9f1d02a2
-F src/main.c 797dc983716c1480f6af78a36be3add8806211a1
+F src/loadext.c b85b4e777cda9bf95475152ed240b6dfd2a0ecd9
+F src/main.c 65fc7de0b3c2e5b637c000ecf419c35de2525ef9
F src/malloc.c fa9bbccc4e6d099cd04c2518d238a1669c9d1020
F src/md5.c c5fdfa5c2593eaee2e32a5ce6c6927c986eaf217
F src/os.c 1f10b47acc1177fb9225edb4f5f0d25ed716f9cb
F src/select.c 33a258fc9c9dccb28ae2d3a02f1e1148d6433148
F src/server.c 087b92a39d883e3fa113cae259d64e4c7438bc96
F src/shell.c 4b0fc3c76a9f23a1c963e01703c0fbbca1b5c34d
-F src/sqlite.h.in 475e0e3dbd34c151ca89423c97d878c99575c71a
-F src/sqlite3ext.h 7d0d363ea7327e817ef0dfe1b7eee1f171b72890
-F src/sqliteInt.h ed31d1a0311c1ffc018fa2e9035a6cf7985049c8
+F src/sqlite.h.in 6f290b660b2e7c3359968bb4b344ec31a1178746
+F src/sqlite3ext.h 95575e0d175a0271fe2c3232c0d11e8720ed6887
+F src/sqliteInt.h 6503239d26b1943227031aa005320ef09b9b92b7
F src/sqliteLimit.h f14609c27636ebc217c9603ade26dbdd7d0f6afa
F src/table.c a8de75bcedf84d4060d804264b067ab3b1a3561d
F src/tclsqlite.c 4bffe56752d2c24ade23340e46a91fd92c316e08
F src/vdbeblob.c 96f3572fdc45eda5be06e6372b612bc30742d9f0
F src/vdbefifo.c 3ca8049c561d5d67cbcb94dc909ae9bb68c0bf8f
F src/vdbemem.c ca4d3994507cb0a9504820293af69f5c778b4abd
-F src/vtab.c c5ebebf615b2f29499fbe97a584c4bb342632aa0
+F src/vtab.c 51d43cda45d25e6f3a15d19fe32992b7756e74db
F src/where.c 12387641659605318ae03d87f0687f223dfc9568
F tclinstaller.tcl 4356d9d94d2b5ed5e68f9f0c80c4df3048dd7617
F test/aggerror.test a867e273ef9e3d7919f03ef4f0e8c0d2767944f2
F test/fts2m.test 4b30142ead6f3ed076e880a2a464064c5ad58c51
F test/fts2n.test a70357e72742681eaebfdbe9007b87ff3b771638
F test/fts2o.test 05ce2ac9111c29998418a584de02136a0ded471b
+F test/fts2token.test 8cfc9ee33361b93fa175197f25fefdd13dfb442e
F test/func.test 605989453d1b42cec1d05c17aa232dc98e3e04e6
F test/fuzz.test 62fc19dd36a427777fd671b569df07166548628a
F test/fuzz2.test ea38692ce2da99ad79fe0be5eb1a452c1c4d37bb
F www/vdbe.tcl 87a31ace769f20d3627a64fa1fade7fed47b90d0
F www/version3.tcl 890248cf7b70e60c383b0e84d77d5132b3ead42b
F www/whentouse.tcl fc46eae081251c3c181bd79c5faef8195d7991a5
-P af4b914a2152ce021401b6f78bb88a5323aa6fc2
-R 9c91b2e377cf682d8693c478c169d4ef
-U drh
-Z 97256b78e966a6731710de3bdea5bd11
+P 2dafe08a91b5328a9d0df5ab29c3ff2d94ad5f6f
+R c1366eff2872139d79721716582129d6
+U danielk1977
+Z 5d0863f4b162f005704998d90939d28f
-2dafe08a91b5328a9d0df5ab29c3ff2d94ad5f6f
\ No newline at end of file
+68677e420c744b39ea9d7399819e0f376748886d
\ No newline at end of file
** sqliteRegisterBuildinFunctions() found at the bottom of the file.
** All other code has file scope.
**
-** $Id: func.c,v 1.160 2007/06/07 19:08:33 drh Exp $
+** $Id: func.c,v 1.161 2007/06/22 15:21:16 danielk1977 Exp $
*/
#include "sqliteInt.h"
#include <ctype.h>
}
}
sqlite3RegisterDateTimeFunctions(db);
- sqlite3_overload_function(db, "MATCH", 2);
+ if( !sqlite3MallocFailed() ){
+ int rc = sqlite3_overload_function(db, "MATCH", 2);
+ assert( rc==SQLITE_NOMEM || rc==SQLITE_OK );
+ if( rc==SQLITE_NOMEM ){
+ sqlite3FailedMalloc();
+ }
+ }
#ifdef SQLITE_SSE
(void)sqlite3SseFunctions(db);
#endif
sqlite3_create_function,
sqlite3_create_function16,
sqlite3_create_module,
+ sqlite3_create_module_v2,
sqlite3_data_count,
sqlite3_db_handle,
sqlite3_declare_vtab,
** other files are for internal use by SQLite and should not be
** accessed by users of the library.
**
-** $Id: main.c,v 1.376 2007/05/08 20:37:39 drh Exp $
+** $Id: main.c,v 1.377 2007/06/22 15:21:16 danielk1977 Exp $
*/
#include "sqliteInt.h"
#include "os.h"
#ifndef SQLITE_OMIT_VIRTUALTABLE
for(i=sqliteHashFirst(&db->aModule); i; i=sqliteHashNext(i)){
Module *pMod = (Module *)sqliteHashData(i);
+ if( pMod->xDestroy ){
+ pMod->xDestroy(pMod->pAux);
+ }
sqliteFree(pMod);
}
sqlite3HashClear(&db->aModule);
db->aDb[1].safety_level = 1;
#endif
+ db->magic = SQLITE_MAGIC_OPEN;
+ if( sqlite3MallocFailed() ){
+ goto opendb_out;
+ }
+
/* Register all built-in functions, but do not attempt to read the
** database schema yet. This is delayed until the first time the database
** is accessed.
*/
- if( !sqlite3MallocFailed() ){
- sqlite3Error(db, SQLITE_OK, 0);
- sqlite3RegisterBuiltinFunctions(db);
- }
- db->magic = SQLITE_MAGIC_OPEN;
+ sqlite3Error(db, SQLITE_OK, 0);
+ sqlite3RegisterBuiltinFunctions(db);
/* Load automatic extensions - extensions that have been registered
** using the sqlite3_automatic_extension() API.
*/
(void)sqlite3AutoLoadExtensions(db);
+ if( sqlite3_errcode(db)!=SQLITE_OK ){
+ goto opendb_out;
+ }
#ifdef SQLITE_ENABLE_FTS1
- {
+ if( !sqlite3MallocFailed() ){
extern int sqlite3Fts1Init(sqlite3*);
- sqlite3Fts1Init(db);
+ rc = sqlite3Fts1Init(db);
}
#endif
#ifdef SQLITE_ENABLE_FTS2
- {
+ if( !sqlite3MallocFailed() && rc==SQLITE_OK ){
extern int sqlite3Fts2Init(sqlite3*);
- sqlite3Fts2Init(db);
+ rc = sqlite3Fts2Init(db);
}
#endif
#ifdef SQLITE_ENABLE_ICU
- if( !sqlite3MallocFailed() ){
+ if( !sqlite3MallocFailed() && rc==SQLITE_OK ){
extern int sqlite3IcuInit(sqlite3*);
- sqlite3IcuInit(db);
+ rc = sqlite3IcuInit(db);
}
#endif
+ sqlite3Error(db, rc, 0);
/* -DSQLITE_DEFAULT_LOCKING_MODE=1 makes EXCLUSIVE the default locking
** mode. -DSQLITE_DEFAULT_LOCKING_MODE=0 make NORMAL the default locking
** the version number) and changes its name to "sqlite3.h" as
** part of the build process.
**
-** @(#) $Id: sqlite.h.in,v 1.214 2007/06/21 15:25:05 drh Exp $
+** @(#) $Id: sqlite.h.in,v 1.215 2007/06/22 15:21:16 danielk1977 Exp $
*/
#ifndef _SQLITE3_H_
#define _SQLITE3_H_
void * /* Client data for xCreate/xConnect */
);
+/*
+** This routine is identical to the sqlite3_create_module() method above,
+** except that it allows a destructor function to be specified. It is
+** even more experimental than the rest of the virtual tables API.
+*/
+int sqlite3_create_module_v2(
+ sqlite3 *db, /* SQLite connection to register module with */
+ const char *zName, /* Name of the module */
+ const sqlite3_module *, /* Methods for the module */
+ void *, /* Client data for xCreate/xConnect */
+ void(*xDestroy)(void*) /* Module destructor function */
+);
+
/*
** Every module implementation uses a subclass of the following structure
** to describe a particular instance of the module. Each subclass will
** as extensions by SQLite should #include this file instead of
** sqlite3.h.
**
-** @(#) $Id: sqlite3ext.h,v 1.10 2007/03/29 18:46:01 drh Exp $
+** @(#) $Id: sqlite3ext.h,v 1.11 2007/06/22 15:21:16 danielk1977 Exp $
*/
#ifndef _SQLITE3EXT_H_
#define _SQLITE3EXT_H_
int (*create_function)(sqlite3*,const char*,int,int,void*,void (*xFunc)(sqlite3_context*,int,sqlite3_value**),void (*xStep)(sqlite3_context*,int,sqlite3_value**),void (*xFinal)(sqlite3_context*));
int (*create_function16)(sqlite3*,const void*,int,int,void*,void (*xFunc)(sqlite3_context*,int,sqlite3_value**),void (*xStep)(sqlite3_context*,int,sqlite3_value**),void (*xFinal)(sqlite3_context*));
int (*create_module)(sqlite3*,const char*,const sqlite3_module*,void*);
+ int (*create_module_v2)(sqlite3*,const char*,const sqlite3_module*,void*,void (*xDestroy)(void *));
int (*data_count)(sqlite3_stmt*pStmt);
sqlite3 * (*db_handle)(sqlite3_stmt*);
int (*declare_vtab)(sqlite3*,const char*);
#define sqlite3_create_function sqlite3_api->create_function
#define sqlite3_create_function16 sqlite3_api->create_function16
#define sqlite3_create_module sqlite3_api->create_module
+#define sqlite3_create_module_v2 sqlite3_api->create_module_v2
#define sqlite3_data_count sqlite3_api->data_count
#define sqlite3_db_handle sqlite3_api->db_handle
#define sqlite3_declare_vtab sqlite3_api->declare_vtab
*************************************************************************
** Internal interface definitions for SQLite.
**
-** @(#) $Id: sqliteInt.h,v 1.574 2007/06/20 15:29:25 drh Exp $
+** @(#) $Id: sqliteInt.h,v 1.575 2007/06/22 15:21:16 danielk1977 Exp $
*/
#ifndef _SQLITEINT_H_
#define _SQLITEINT_H_
const sqlite3_module *pModule; /* Callback pointers */
const char *zName; /* Name passed to create_module() */
void *pAux; /* pAux passed to create_module() */
+ void (*xDestroy)(void *); /* Module destructor function */
};
/*
*************************************************************************
** This file contains code used to help implement virtual tables.
**
-** $Id: vtab.c,v 1.46 2007/05/04 13:15:57 drh Exp $
+** $Id: vtab.c,v 1.47 2007/06/22 15:21:16 danielk1977 Exp $
*/
#ifndef SQLITE_OMIT_VIRTUALTABLE
#include "sqliteInt.h"
-/*
-** External API function used to create a new virtual-table module.
-*/
-int sqlite3_create_module(
+static int createModule(
sqlite3 *db, /* Database in which module is registered */
const char *zName, /* Name assigned to this module */
const sqlite3_module *pModule, /* The definition of the module */
- void *pAux /* Context pointer for xCreate/xConnect */
-){
+ void *pAux, /* Context pointer for xCreate/xConnect */
+ void (*xDestroy)(void *) /* Module destructor function */
+) {
int nName = strlen(zName);
Module *pMod = (Module *)sqliteMallocRaw(sizeof(Module) + nName + 1);
if( pMod ){
pMod->zName = zCopy;
pMod->pModule = pModule;
pMod->pAux = pAux;
+ pMod->xDestroy = xDestroy;
pMod = (Module *)sqlite3HashInsert(&db->aModule, zCopy, nName, (void*)pMod);
+ if( pMod && pMod->xDestroy ){
+ pMod->xDestroy(pMod->pAux);
+ }
sqliteFree(pMod);
sqlite3ResetInternalSchema(db, 0);
}
return sqlite3ApiExit(db, SQLITE_OK);
}
+
+/*
+** External API function used to create a new virtual-table module.
+*/
+int sqlite3_create_module(
+ sqlite3 *db, /* Database in which module is registered */
+ const char *zName, /* Name assigned to this module */
+ const sqlite3_module *pModule, /* The definition of the module */
+ void *pAux /* Context pointer for xCreate/xConnect */
+){
+ return createModule(db, zName, pModule, pAux, 0);
+}
+
+/*
+** External API function used to create a new virtual-table module.
+*/
+int sqlite3_create_module_v2(
+ sqlite3 *db, /* Database in which module is registered */
+ const char *zName, /* Name assigned to this module */
+ const sqlite3_module *pModule, /* The definition of the module */
+ void *pAux, /* Context pointer for xCreate/xConnect */
+ void (*xDestroy)(void *) /* Module destructor function */
+){
+ return createModule(db, zName, pModule, pAux, xDestroy);
+}
+
/*
** Lock the virtual table so that it cannot be disconnected.
** Locks nest. Every lock should have a corresponding unlock.
--- /dev/null
+# 2007 June 21
+#
+# The author disclaims copyright to this source code. In place of
+# a legal notice, here is a blessing:
+#
+# May you do good and not evil.
+# May you find forgiveness for yourself and forgive others.
+# May you share freely, never taking more than you give.
+#
+#*************************************************************************
+# This file implements regression tests for SQLite library. The focus
+# of this script is testing the pluggable tokeniser feature of the
+# FTS2 module.
+#
+# $Id: fts2token.test,v 1.1 2007/06/22 15:21:16 danielk1977 Exp $
+#
+
+set testdir [file dirname $argv0]
+source $testdir/tester.tcl
+
+# If SQLITE_ENABLE_FTS2 is defined, omit this file.
+ifcapable !fts2 {
+ finish_test
+ return
+}
+
+#--------------------------------------------------------------------------
+# Test cases fts2token-1.* are the warm-body test for the SQL scalar
+# function fts2_tokenizer(). The procedure is as follows:
+#
+# 1: Verify that there is no such fts2 tokenizer as 'blah'.
+#
+# 2: Query for the built-in tokenizer 'simple'. Insert a copy of the
+# retrieved value as tokenizer 'blah'.
+#
+# 3: Test that the value returned for tokenizer 'blah' is now the
+# same as that retrieved for 'simple'.
+#
+# 4: Test that it is now possible to create an fts2 table using
+# tokenizer 'blah' (it was not possible in step 1).
+#
+# 5: Test that the table created to use tokenizer 'blah' is usable.
+#
+do_test fts2token-1.1 {
+ catchsql {
+ CREATE VIRTUAL TABLE t1 USING fts2(content, tokenize blah);
+ }
+} {1 {unknown tokenizer: blah}}
+do_test fts2token-1.2 {
+ execsql {
+ SELECT fts2_tokenizer('blah', fts2_tokenizer('simple')) IS NULL;
+ }
+} {0}
+do_test fts2token-1.3 {
+ execsql {
+ SELECT fts2_tokenizer('blah') == fts2_tokenizer('simple');
+ }
+} {1}
+do_test fts2token-1.4 {
+ catchsql {
+ CREATE VIRTUAL TABLE t1 USING fts2(content, tokenize blah);
+ }
+} {0 {}}
+do_test fts2token-1.5 {
+ execsql {
+ INSERT INTO t1(content) VALUES('There was movement at the station');
+ INSERT INTO t1(content) VALUES('For the word has passed around');
+ INSERT INTO t1(content) VALUES('That the colt from ol regret had got away');
+ SELECT content FROM t1 WHERE content MATCH 'movement'
+ }
+} {{There was movement at the station}}
+
+#--------------------------------------------------------------------------
+# Test cases fts2token-2.* test error cases in the scalar function based
+# API for getting and setting tokenizers.
+#
+do_test fts2token-2.1 {
+ catchsql {
+ SELECT fts2_tokenizer('nosuchtokenizer');
+ }
+} {1 {unknown tokenizer: nosuchtokenizer}}
+
+#--------------------------------------------------------------------------
+# Test cases fts2token-3.* test the three built-in tokenizers with a
+# simple input string via the built-in test function. This is as much
+# to test the test function as the tokenizer implementations.
+#
+do_test fts2token-3.1 {
+ execsql {
+ SELECT fts2_tokenizer_test('simple', 'I don''t see how');
+ }
+} {{0 i I 1 don don 2 t t 3 see see 4 how how}}
+do_test fts2token-3.2 {
+ execsql {
+ SELECT fts2_tokenizer_test('porter', 'I don''t see how');
+ }
+} {{0 i I 1 don don 2 t t 3 see see 4 how how}}
+
+ifcapable icu {
+ do_test fts2token-3.3 {
+ execsql {
+ SELECT fts2_tokenizer_test('icu', 'I don''t see how');
+ }
+ } {{0 i I 1 don't don't 2 see see 3 how how}}
+}
+
+finish_test