From: danielk1977 Date: Tue, 26 Jun 2007 12:54:07 +0000 (+0000) Subject: Reorganize comments in fts2_tokenizer.h. No code changes. (CVS 4132) X-Git-Tag: version-3.6.10~2042 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=9ff802627a28e59b3c2b74cf9dba91bf9caf774b;p=thirdparty%2Fsqlite.git Reorganize comments in fts2_tokenizer.h. No code changes. (CVS 4132) FossilOrigin-Name: b331e30395e9fc90abe40ab802972a67648cf48e --- diff --git a/ext/fts2/fts2_tokenizer.h b/ext/fts2/fts2_tokenizer.h index 9b482abf00..8c256b2bed 100644 --- a/ext/fts2/fts2_tokenizer.h +++ b/ext/fts2/fts2_tokenizer.h @@ -27,42 +27,109 @@ #include "sqlite3.h" /* -** Structures used by the tokenizer interface. +** Structures used by the tokenizer interface. When a new tokenizer +** implementation is registered, the caller provides a pointer to +** an sqlite3_tokenizer_module containing pointers to the callback +** functions that make up an implementation. +** +** When an fts2 table is created, it passes any arguments passed to +** the tokenizer clause of the CREATE VIRTUAL TABLE statement to the +** sqlite3_tokenizer_module.xCreate() function of the requested tokenizer +** implementation. The xCreate() function in turn returns an +** sqlite3_tokenizer structure representing the specific tokenizer to +** be used for the fts2 table (customized by the tokenizer clause arguments). +** +** To tokenize an input buffer, the sqlite3_tokenizer_module.xOpen() +** method is called. It returns an sqlite3_tokenizer_cursor object +** that may be used to tokenize a specific input buffer based on +** the tokenization rules supplied by a specific sqlite3_tokenizer +** object. */ +typedef struct sqlite3_tokenizer_module sqlite3_tokenizer_module; typedef struct sqlite3_tokenizer sqlite3_tokenizer; typedef struct sqlite3_tokenizer_cursor sqlite3_tokenizer_cursor; -typedef struct sqlite3_tokenizer_module sqlite3_tokenizer_module; struct sqlite3_tokenizer_module { - int iVersion; /* currently 0 */ /* - ** Create and destroy a tokenizer. argc/argv are passed down from - ** the fulltext virtual table creation to allow customization. + ** Structure version. Should always be set to 0. + */ + int iVersion; + + /* + ** Create a new tokenizer. The values in the argv[] array are the + ** arguments passed to the "tokenizer" clause of the CREATE VIRTUAL + ** TABLE statement that created the fts2 table. For example, if + ** the following SQL is executed: + ** + ** CREATE .. USING fts2( ... , tokenizer arg1 arg2) + ** + ** then argc is set to 2, and the argv[] array contains pointers + ** to the strings "arg1" and "arg2". + ** + ** This method should return either SQLITE_OK (0), or an SQLite error + ** code. If SQLITE_OK is returned, then *ppTokenizer should be set + ** to point at the newly created tokenizer structure. The generic + ** sqlite3_tokenizer.pModule variable should not be initialised by + ** this callback. The caller will do so. + */ + int (*xCreate)( + int argc, /* Size of argv array */ + const char *const*argv, /* Tokenizer argument strings */ + sqlite3_tokenizer **ppTokenizer /* OUT: Created tokenizer */ + ); + + /* + ** Destroy an existing tokenizer. The fts2 module calls this method + ** exactly once for each successful call to xCreate(). */ - int (*xCreate)(int argc, const char *const*argv, - sqlite3_tokenizer **ppTokenizer); int (*xDestroy)(sqlite3_tokenizer *pTokenizer); /* - ** Tokenize a particular input. Call xOpen() to prepare to - ** tokenize, xNext() repeatedly until it returns SQLITE_DONE, then - ** xClose() to free any internal state. The pInput passed to - ** xOpen() must exist until the cursor is closed. The ppToken - ** result from xNext() is only valid until the next call to xNext() - ** or until xClose() is called. + ** Create a tokenizer cursor to tokenize an input buffer. The caller + ** is responsible for ensuring that the input buffer remains valid + ** until the cursor is closed (using the xClose() method). + */ + int (*xOpen)( + sqlite3_tokenizer *pTokenizer, /* Tokenizer object */ + const char *pInput, int nBytes, /* Input buffer */ + sqlite3_tokenizer_cursor **ppCursor /* OUT: Created tokenizer cursor */ + ); + + /* + ** Destroy an existing tokenizer cursor. The fts2 module calls this + ** method exactly once for each successful call to xOpen(). + */ + int (*xClose)(sqlite3_tokenizer_cursor *pCursor); + + /* + ** Retrieve the next token from the tokenizer cursor pCursor. This + ** method should either return SQLITE_OK and set the values of the + ** "OUT" variables identified below, or SQLITE_DONE to indicate that + ** the end of the buffer has been reached, or an SQLite error code. + ** + ** *ppToken should be set to point at a buffer containing the + ** normalized version of the token (i.e. after any case-folding and/or + ** stemming has been performed). *pnBytes should be set to the length + ** of this buffer in bytes. The input text that generated the token is + ** identified by the byte offsets returned in *piStartOffset and + ** *piEndOffset. + ** + ** The buffer *ppToken is set to point at is managed by the tokenizer + ** implementation. It is only required to be valid until the next call + ** to xNext() or xClose(). */ /* TODO(shess) current implementation requires pInput to be ** nul-terminated. This should either be fixed, or pInput/nBytes ** should be converted to zInput. */ - int (*xOpen)(sqlite3_tokenizer *pTokenizer, - const char *pInput, int nBytes, - sqlite3_tokenizer_cursor **ppCursor); - int (*xClose)(sqlite3_tokenizer_cursor *pCursor); - int (*xNext)(sqlite3_tokenizer_cursor *pCursor, - const char **ppToken, int *pnBytes, - int *piStartOffset, int *piEndOffset, int *piPosition); + int (*xNext)( + sqlite3_tokenizer_cursor *pCursor, /* Tokenizer cursor */ + const char **ppToken, int *pnBytes, /* OUT: Normalized text for token */ + int *piStartOffset, /* OUT: Byte offset of token in input buffer */ + int *piEndOffset, /* OUT: Byte offset of end of token in input buffer */ + int *piPosition /* OUT: Number of tokens returned before this one */ + ); }; struct sqlite3_tokenizer { diff --git a/manifest b/manifest index 6cf7024c0b..2aad08a5fe 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Use\s(((i64)1)<<63)\sinstead\sof\sjust\s(1<<63)\sto\scompute\sthe\smost\snegative\s64-bit\sinteger.\s(CVS\s4131) -D 2007-06-26T12:52:34 +C Reorganize\scomments\sin\sfts2_tokenizer.h.\sNo\scode\schanges.\s(CVS\s4132) +D 2007-06-26T12:54:07 F Makefile.in 7f7485a4cc039476a42e534b3f26ec90e2f9753e F Makefile.linux-gcc 2d8574d1ba75f129aba2019f0b959db380a90935 F README 9c4e2d6706bdcc3efdd773ce752a8cdab4f90028 @@ -44,7 +44,7 @@ F ext/fts2/fts2_hash.h e283308156018329f042816eb09334df714e105e F ext/fts2/fts2_icu.c 45b54d1e075020b35db20f69d829f95ca0651111 F ext/fts2/fts2_porter.c 991a45463553c7318063fe7773368a6c0f39e35d F ext/fts2/fts2_tokenizer.c 5cec41326fabe65323945a46fa9495ee85c3d5fd -F ext/fts2/fts2_tokenizer.h 6d151c51382e8f6cf689c616bb697fe780478089 +F ext/fts2/fts2_tokenizer.h a7e46462d935a314b2682287f12f27530a3ee08e F ext/fts2/fts2_tokenizer1.c 719f6eb8cf94a8a5be6576c9b3d94d22deeea816 F ext/fts2/mkfts2amal.tcl 974d5d438cb3f7c4a652639262f82418c1e4cff0 F ext/icu/README.txt 3b130aa66e7a681136f6add198b076a2f90d1e33 @@ -516,7 +516,7 @@ F www/tclsqlite.tcl 8be95ee6dba05eabcd27a9d91331c803f2ce2130 F www/vdbe.tcl 87a31ace769f20d3627a64fa1fade7fed47b90d0 F www/version3.tcl 890248cf7b70e60c383b0e84d77d5132b3ead42b F www/whentouse.tcl fc46eae081251c3c181bd79c5faef8195d7991a5 -P c6dfd9e43449b0b3528281d9e2e4971c6ba86ab5 -R ef164c42f461a60d1ea7875c9c81c3fc +P be2570c061e1e751d1a46450bd1186549146526e +R 6fa498c7a241cebb5a9c89bb113f95f1 U danielk1977 -Z 261217702720ef772fc27545ebd414f5 +Z ceb6a414b7b3e25a3dea78f60f54d2f2 diff --git a/manifest.uuid b/manifest.uuid index cc93836013..d9411509de 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -be2570c061e1e751d1a46450bd1186549146526e \ No newline at end of file +b331e30395e9fc90abe40ab802972a67648cf48e \ No newline at end of file