# define SQLITE_CORE 1
#endif
+#include "fts3Int.h"
+
#include <assert.h>
#include <stdlib.h>
#include <stddef.h>
#include <string.h>
#include "fts3.h"
-#include "fts3_hash.h"
-#include "fts3_tokenizer.h"
#ifndef SQLITE_CORE
# include "sqlite3ext.h"
SQLITE_EXTENSION_INIT1
#endif
-#include "fts3Int.h"
-
/* TODO(shess) MAN, this thing needs some refactoring. At minimum, it
/*
** Values that may be used as the first parameter to fts3DoclistMerge().
*/
-#define MERGE_AND 1 /* D + D -> D */
#define MERGE_NOT 2 /* D + D -> D */
-#define MERGE_OR 3 /* D + D -> D */
-#define MERGE_POS_OR 4 /* P + P -> P */
-#define MERGE_PHRASE 5 /* P + P -> D */
-#define MERGE_POS_PHRASE 6 /* P + P -> P */
-#define MERGE_NEAR 7 /* P + P -> D */
-#define MERGE_POS_NEAR 8 /* P + P -> P */
+#define MERGE_AND 3 /* D + D -> D */
+#define MERGE_OR 4 /* D + D -> D */
+#define MERGE_POS_OR 5 /* P + P -> P */
+#define MERGE_PHRASE 6 /* P + P -> D */
+#define MERGE_POS_PHRASE 7 /* P + P -> P */
+#define MERGE_NEAR 8 /* P + P -> D */
+#define MERGE_POS_NEAR 9 /* P + P -> P */
static int fts3DoclistMerge(
int mergetype, /* One of the MERGE_XXX constants */
char **paOut, /* OUT: Pointer to malloc'd result buffer */
int *pnOut /* OUT: Size of buffer at *paOut */
){
- int rc = SQLITE_OK;
+ int rc = SQLITE_OK; /* Return code */
+ /* Zero the output parameters. */
*paOut = 0;
*pnOut = 0;
nParam1 = pExpr->nNear+1;
nParam2 = nParam1+pLeft->pPhrase->nToken+pRight->pPhrase->nToken-2;
- aBuffer = sqlite3_malloc(nLeft + nRight);
+ aBuffer = sqlite3_malloc(nLeft+nRight+1);
rc = fts3DoclistMerge(mergetype, nParam1, nParam2, aBuffer,
pnOut, aLeft, nLeft, aRight, nRight
);
break;
}
- case FTSQUERY_NOT: {
- fts3DoclistMerge(MERGE_NOT, 0, 0, aLeft, pnOut,
+ case FTSQUERY_OR: {
+ /* Allocate a buffer for the output. The maximum size is the
+ ** sum of the sizes of the two input buffers. The +1 term is
+ ** so that a buffer of zero bytes is never allocated - this can
+ ** cause fts3DoclistMerge() to incorrectly return SQLITE_NOMEM.
+ */
+ char *aBuffer = sqlite3_malloc(nRight+nLeft+1);
+ rc = fts3DoclistMerge(MERGE_OR, 0, 0, aBuffer, pnOut,
aLeft, nLeft, aRight, nRight
);
- *paOut = aLeft;
+ *paOut = aBuffer;
+ sqlite3_free(aLeft);
break;
}
- case FTSQUERY_AND: {
- fts3DoclistMerge(MERGE_AND, 0, 0, aLeft, pnOut,
+ case FTSQUERY_AND:
+ case FTSQUERY_NOT: {
+ assert( FTSQUERY_NOT==MERGE_NOT && FTSQUERY_AND==MERGE_AND );
+ fts3DoclistMerge(pExpr->eType, 0, 0, aLeft, pnOut,
aLeft, nLeft, aRight, nRight
);
*paOut = aLeft;
break;
}
-
- case FTSQUERY_OR: {
- char *aBuffer = sqlite3_malloc(nRight+nLeft);
- rc = fts3DoclistMerge(MERGE_OR, 0, 0, aBuffer, pnOut,
- aLeft, nLeft, aRight, nRight
- );
- *paOut = aBuffer;
- sqlite3_free(aLeft);
- break;
- }
}
}
sqlite3_free(aRight);
#ifndef _FTSINT_H
#define _FTSINT_H
+#if !defined(NDEBUG) && !defined(SQLITE_DEBUG)
+# define NDEBUG 1
+#endif
+
#include <sqlite3.h>
#include "fts3_tokenizer.h"
#include "fts3_hash.h"
-C Start\sreworking\sfts3\scode\sto\smatch\sthe\srest\sof\sSQLite\s(code\sconventions,\smalloc-failure\shandling\setc.).
-D 2009-11-13T10:36:21
+C Further\sOOM\stesting\sfor\sfts3\scode.\sAdd\sTcl\scode\simplementing\san\sintegrity-check\sfor\sfts3.
+D 2009-11-14T11:41:01
F Makefile.arm-wince-mingw32ce-gcc fcd5e9cd67fe88836360bb4f9ef4cb7f8e2fb5a0
F Makefile.in 53f3dfa49f28ab5b80cb083fb7c9051e596bcfa1
F Makefile.linux-gcc d53183f4aa6a9192d249731c90dbdffbd2c68654
F ext/fts3/README.syntax a19711dc5458c20734b8e485e75fb1981ec2427a
F ext/fts3/README.tokenizers 998756696647400de63d5ba60e9655036cb966e9
F ext/fts3/README.txt 8c18f41574404623b76917b9da66fcb0ab38328d
-F ext/fts3/fts3.c 835061e6c5324f80f13396418f9294b4691ac813
+F ext/fts3/fts3.c 1de6c9d40f1534eb542020eba56fb517780ee968
F ext/fts3/fts3.h 3a10a0af180d502cecc50df77b1b22df142817fe
-F ext/fts3/fts3Int.h a6aa1a4ad280adf6487fbccacbbe986a2fabcb82
+F ext/fts3/fts3Int.h f8419da445790c0666d4b4d72dc15a07dd7ae93e
F ext/fts3/fts3_expr.c bdf11f3602f62f36f0e42823680bf22033dae0de
F ext/fts3/fts3_hash.c 1af1833a4d581ee8d668bb71f5a500f7a0104982
F ext/fts3/fts3_hash.h 39524725425078bf9e814e9569c74a8e5a21b9fb
F test/fts2r.test b154c30b63061d8725e320fba1a39e2201cadd5e
F test/fts2token.test d8070b241a15ff13592a9ae4a8b7c171af6f445a
F test/fts3.test f4f380d3717493605270dfa3b0fa893ea0afb18d
+F test/fts3_common.tcl 2a38db65c919b5aa7b88d9c204e19e3d66fd810a
F test/fts3aa.test 5327d4c1d9b6c61021696746cc9a6cdc5bf159c0
F test/fts3ab.test 09aeaa162aee6513d9ff336b6932211008b9d1f9
F test/fts3ac.test 356280144a2c92aa7b11474afadfe62a437fcd69
F test/fts3e.test 1f6c6ac9cc8b772ca256e6b22aaeed50c9350851
F test/fts3expr.test 05dab77387801e4900009917bb18f556037d82da
F test/fts3expr2.test 18da930352e5693eaa163a3eacf96233b7290d1a
-F test/fts3malloc.test 92dbea5665b6f333dd32886366481aa95ffaeb50
+F test/fts3malloc.test b169661934883d99dc33a1dde1959a0448bd4f11
F test/fts3near.test dc196dd17b4606f440c580d45b3d23aa975fd077
F test/func.test af106ed834001738246d276659406823e35cde7b
F test/func2.test 772d66227e4e6684b86053302e2d74a2500e1e0f
F tool/speedtest8.c 2902c46588c40b55661e471d7a86e4dd71a18224
F tool/speedtest8inst1.c 293327bc76823f473684d589a8160bde1f52c14e
F tool/vdbe-compress.tcl d70ea6d8a19e3571d7ab8c9b75cba86d1173ff0f
-P bdc45ba77fb77771c8ff46b8d6c2dd29e6d3b019
-R a0400ee87fd3b17fac8e469e29fd58ca
-T *bgcolor * #f3f4f6
-T *branch * fts3-refactor
-T *sym-fts3-refactor *
-T -sym-trunk *
+P 30a92f1132801c7582007ee625c577ea2ac31cdf
+R b57a3a365be0f7386d8b2b667cc40aff
U dan
-Z d56027263e4b0769a9172f5a73a4a788
+Z c9155f22386e8e971e228afde4beba2a
-30a92f1132801c7582007ee625c577ea2ac31cdf
\ No newline at end of file
+c27d46b33e8596b45c562c2742b05030e8899092
\ No newline at end of file
--- /dev/null
+# 2009 November 04
+#
+# The author disclaims copyright to this source code. In place of
+# a legal notice, here is a blessing:
+#
+# May you do good and not evil.
+# May you find forgiveness for yourself and forgive others.
+# May you share freely, never taking more than you give.
+#
+#***********************************************************************
+#
+# This file contains common code used the fts3 tests. At one point
+# equivalent functionality was implemented in C code. But it is easier
+# to use Tcl.
+#
+
+#-------------------------------------------------------------------------
+# USAGE: fts3_integrity_check TBL
+#
+# This proc is used to verify that the full-text index is consistent with
+# the contents of the fts3 table. In other words, it checks that the
+# data in the %_contents table matches that in the %_segdir and %_segments
+# tables.
+#
+# This is not an efficient procedure. It uses a lot of memory and a lot
+# of CPU. But it is better than not checking at all.
+#
+# The procedure is:
+#
+# 1) Read the entire full-text index from the %_segdir and %_segments
+# tables into memory. For each entry in the index, the following is
+# done:
+#
+# set C($iDocid,$iCol,$iPosition) $zTerm
+#
+# 2) Iterate through each column of each row of the %_content table.
+# Tokenize all documents, and check that for each token there is
+# a corresponding entry in the $C array. After checking a token,
+# [unset] the $C array entry.
+#
+# 3) Check that array $C is now empty.
+#
+#
+proc fts3_integrity_check {tbl} {
+
+ fts3_read2 $tbl 1 A
+
+ foreach zTerm [array names A] {
+ foreach doclist $A($zTerm) {
+ set docid 0
+ while {[string length $doclist]>0} {
+ set iCol 0
+ set iPos 0
+ set lPos [list]
+ set lCol [list]
+
+ # First varint of a doclist-entry is the docid. Delta-compressed
+ # with respect to the docid of the previous entry.
+ #
+ incr docid [gobble_varint doclist]
+ if {[info exists D($zTerm,$docid)]} {
+ while {[set iDelta [gobble_varint doclist]] != 0} {}
+ continue
+ }
+ set D($zTerm,$docid) 1
+
+ # Gobble varints until the 0x00 that terminates the doclist-entry
+ # is found.
+ while {[set iDelta [gobble_varint doclist]] > 0} {
+ if {$iDelta == 1} {
+ set iCol [gobble_varint doclist]
+ set iPos 0
+ } else {
+ incr iPos $iDelta
+ incr iPos -2
+ set C($docid,$iCol,$iPos) $zTerm
+ }
+ }
+ }
+ }
+ }
+
+
+ db eval "SELECT * FROM ${tbl}_content" E {
+ set iCol 0
+ set iDoc $E(docid)
+ foreach col [lrange $E(*) 1 end] {
+ set c $E($col)
+ set sql {SELECT fts3_tokenizer_test('simple', $c)}
+
+ foreach {pos term dummy} [db one $sql] {
+ if {$C($iDoc,$iCol,$pos) != "$term"} {
+ set es "Error at docid=$iDoc col=$iCol pos=$pos. "
+ append es "Index has \"$C($iDoc,$iCol,$pos)\", document has \"$term\""
+ lappend errors $es
+ }
+ unset C($iDoc,$iCol,$pos)
+ }
+ incr iCol
+ }
+ }
+
+ foreach c [array names C] {
+ lappend errors "Bad index entry: $c -> $C($c)"
+ }
+
+ if {[info exists errors]} { return [join $errors "\n"] }
+ return "ok"
+}
+
+# USAGE: fts3_terms TBL WHERE
+#
+# Argument TBL must be the name of an FTS3 table. Argument WHERE is an
+# SQL expression that will be used as the WHERE clause when scanning
+# the %_segdir table. As in the following query:
+#
+# "SELECT * FROM ${TBL}_segdir WHERE ${WHERE}"
+#
+# This function returns a list of all terms present in the segments
+# selected by the statement above.
+#
+proc fts3_terms {tbl where} {
+ fts3_read $tbl $where a
+ return [lsort [array names a]]
+}
+
+
+# USAGE: fts3_doclist TBL TERM WHERE
+#
+# Argument TBL must be the name of an FTS3 table. TERM is a term that may
+# or may not be present in the table. Argument WHERE is used to select a
+# subset of the b-tree segments in the associated full-text index as
+# described above for [fts3_terms].
+#
+# This function returns the results of merging the doclists associated
+# with TERM in the selected segments. Each doclist is an element of the
+# returned list. Each doclist is formatted as follows:
+#
+# [$docid ?$col[$off1 $off2...]?...]
+#
+# The formatting is odd for a Tcl command in order to be compatible with
+# the original C-language implementation. If argument WHERE is "1", then
+# any empty doclists are omitted from the returned list.
+#
+proc fts3_doclist {tbl term where} {
+ fts3_read $tbl $where a
+
+
+ foreach doclist $a($term) {
+ set docid 0
+
+ while {[string length $doclist]>0} {
+ set iCol 0
+ set iPos 0
+ set lPos [list]
+ set lCol [list]
+ incr docid [gobble_varint doclist]
+
+ while {[set iDelta [gobble_varint doclist]] > 0} {
+ if {$iDelta == 1} {
+ lappend lCol [list $iCol $lPos]
+ set iPos 0
+ set lPos [list]
+ set iCol [gobble_varint doclist]
+ } else {
+ incr iPos $iDelta
+ incr iPos -2
+ lappend lPos $iPos
+ }
+ }
+
+ if {[llength $lPos]>0} {
+ lappend lCol [list $iCol $lPos]
+ }
+
+ if {$where != "1" || [llength $lCol]>0} {
+ set ret($docid) $lCol
+ } else {
+ unset -nocomplain ret($docid)
+ }
+ }
+ }
+
+ set lDoc [list]
+ foreach docid [lsort -integer [array names ret]] {
+ set lCol [list]
+ set cols ""
+ foreach col $ret($docid) {
+ foreach {iCol lPos} $col {}
+ append cols " $iCol\[[join $lPos { }]\]"
+ }
+ lappend lDoc "\[${docid}${cols}\]"
+ }
+
+ join $lDoc " "
+}
+
+###########################################################################
+
+proc gobble_varint {varname} {
+ upvar $varname blob
+ set n [read_varint $blob ret]
+ set blob [string range $blob $n end]
+ return $ret
+}
+proc gobble_string {varname nLength} {
+ upvar $varname blob
+ set ret [string range $blob 0 [expr $nLength-1]]
+ set blob [string range $blob $nLength end]
+ return $ret
+}
+
+# The argument is a blob of data representing an FTS3 segment leaf.
+# Return a list consisting of alternating terms (strings) and doclists
+# (blobs of data).
+#
+proc fts3_readleaf {blob} {
+ set zPrev ""
+ set terms [list]
+
+ while {[string length $blob] > 0} {
+ set nPrefix [gobble_varint blob]
+ set nSuffix [gobble_varint blob]
+
+ set zTerm [string range $zPrev 0 [expr $nPrefix-1]]
+ append zTerm [gobble_string blob $nSuffix]
+ set doclist [gobble_string blob [gobble_varint blob]]
+
+ lappend terms $zTerm $doclist
+ set zPrev $zTerm
+ }
+
+ return $terms
+}
+
+proc fts3_read2 {tbl where varname} {
+ upvar $varname a
+ array unset a
+ db eval " SELECT start_block, leaves_end_block, root
+ FROM ${tbl}_segdir WHERE $where
+ ORDER BY level ASC, idx DESC
+ " {
+ if {$start_block == 0} {
+ foreach {t d} [fts3_readleaf $root] { lappend a($t) $d }
+ } else {
+ db eval " SELECT block
+ FROM ${tbl}_segments
+ WHERE blockid>=$start_block AND blockid<$leaves_end_block
+ ORDER BY blockid
+ " {
+ foreach {t d} [fts3_readleaf $block] { lappend a($t) $d }
+
+ }
+ }
+ }
+}
+
+proc fts3_read {tbl where varname} {
+ upvar $varname a
+ array unset a
+ db eval " SELECT start_block, leaves_end_block, root
+ FROM ${tbl}_segdir WHERE $where
+ ORDER BY level DESC, idx ASC
+ " {
+ if {$start_block == 0} {
+ foreach {t d} [fts3_readleaf $root] { lappend a($t) $d }
+ } else {
+ db eval " SELECT block
+ FROM ${tbl}_segments
+ WHERE blockid>=$start_block AND blockid<$leaves_end_block
+ ORDER BY blockid
+ " {
+ foreach {t d} [fts3_readleaf $block] { lappend a($t) $d }
+
+ }
+ }
+ }
+}
+
source $testdir/tester.tcl
ifcapable !fts3 { finish_test ; return }
source $testdir/malloc_common.tcl
+source $testdir/fts3_common.tcl
+set sqlite_fts3_enable_parentheses 1
+
+if 0 {
do_malloc_test fts3_malloc-1.1 -sqlbody {
CREATE VIRTUAL TABLE ft USING fts3(a, b, c);
}
-
do_malloc_test fts3_malloc-1.2 -sqlprep {
CREATE VIRTUAL TABLE ft USING fts3(a, b, c);
} -sqlbody {
DROP TABLE ft;
}
-
-do_malloc_test fts3_malloc-1.3 -sqlprep {
- CREATE VIRTUAL TABLE ft USING fts3(content);
-} -sqlbody {
- INSERT INTO ft VALUES('one two three four');
-}
-
-do_malloc_test fts3_malloc-1.4 -tclprep {
- db eval {CREATE VIRTUAL TABLE ft USING fts3(a, b)}
- for {set i 0} {$i<16} {incr i} {
- db eval { INSERT INTO ft VALUES('one two', 'three four') }
- }
-} -sqlbody {
- INSERT INTO ft VALUES('one two', 'three four');
}
-proc do_write_test {sql} {
- uplevel [list db eval $sql]
-}
+set DO_MALLOC_TEST 0
-proc do_read_test {name sql result} {
+#-------------------------------------------------------------------------
+# This proc is used to test a single SELECT statement. Parameter $name is
+# passed a name for the test case (i.e. "fts3_malloc-1.4.1") and parameter
+# $sql is passed the text of the SELECT statement. Parameter $result is
+# set to the expected output if the SELECT statement is successfully
+# executed using [db eval].
+#
+# Example:
+#
+# do_select_test testcase-1.1 "SELECT 1+1, 1+2" {1 2}
+#
+# If global variable DO_MALLOC_TEST is set to a non-zero value, or if
+# it is not defined at all, then OOM testing is performed on the SELECT
+# statement. Each OOM test case is said to pass if either (a) executing
+# the SELECT statement succeeds and the results match those specified
+# by parameter $result, or (b) TCL throws an "out of memory" error.
+#
+# If DO_MALLOC_TEST is defined and set to zero, then the SELECT statement
+# is executed just once. In this case the test case passes if the results
+# match the expected results passed via parameter $result.
+#
+proc do_select_test {name sql result} {
- if {![info exists ::DO_MALLOC_TEST]} {
- set ::DO_MALLOC_TEST 1
- }
+ if {![info exists ::DO_MALLOC_TEST]} { set ::DO_MALLOC_TEST 1 }
- set answers [list [list 0 $result]]
if {$::DO_MALLOC_TEST } {
set answers [list {1 {out of memory}} [list 0 $result]]
set modes [list 100000 transient 1 persistent]
} else {
+ set answers [list [list 0 $result]]
set modes [list 0 nofail]
}
set str [join $answers " OR "]
}
}
+#-------------------------------------------------------------------------
+# Test a single write to the database. In this case a "write" is a
+# DELETE, UPDATE or INSERT statement.
+#
+# If OOM testing is performed, there are several acceptable outcomes:
+#
+# 1) The write succeeds. No error is returned.
+#
+# 2) An "out of memory" exception is thrown and:
+#
+# a) The statement has no effect, OR
+# b) The current transaction is rolled back, OR
+# c) The statement succeeds. This can only happen if the connection
+# is in auto-commit mode (after the statement is executed, so this
+# includes COMMIT statements).
+#
+# If the write operation eventually succeeds, zero is returned. If a
+# transaction is rolled back, non-zero is returned.
+#
+# Parameter $name is the name to use for the test case (or test cases).
+# The second parameter, $tbl, should be the name of the database table
+# being modified. Parameter $sql contains the SQL statement to test.
+#
+proc do_write_test {name tbl sql} {
+ # Figure out an statement to get a checksum for table $tbl.
+ db eval "SELECT * FROM $tbl" V break
+ set cksumsql "SELECT md5sum([join [concat rowid $V(*)] ,]) FROM $tbl"
+
+ # Calculate the initial table checksum.
+ set cksum1 [db one $cksumsql]
+
+ if {![info exists ::DO_MALLOC_TEST]} { set ::DO_MALLOC_TEST 1 }
+
+ if {$::DO_MALLOC_TEST } {
+ set answers [list {1 {out of memory}} {0 {}}]
+ set modes [list 100000 transient 1 persistent]
+ } else {
+ set answers [list {0 {}}]
+ set modes [list 0 nofail]
+ }
+ set str [join $answers " OR "]
+
+ foreach {nRepeat zName} $modes {
+ for {set iFail 1} 1 {incr iFail} {
+ if {$::DO_MALLOC_TEST} {sqlite3_memdebug_fail $iFail -repeat $nRepeat}
+
+ set res [catchsql $sql]
+ set nFail [sqlite3_memdebug_fail -1 -benigncnt nBenign]
+ if {$nFail==0} {
+ do_test $name.$zName.$iFail [list set {} $res] {0 {}}
+ return
+ } else {
+ if {[lsearch $answers $res]>=0} {
+ set res $str
+ }
+ do_test $name.$zName.$iFail [list set {} $res] $str
+ set cksum2 [db one $cksumsql]
+ if {$cksum1 != $cksum2} return
+ }
+ }
+ }
+}
+
proc normal_list {l} {
set ret [list]
foreach elem $l {lappend ret $elem}
file delete -force test.db test.db-journal
sqlite3 db test.db
sqlite3_db_config_lookaside db 0 0 0
-set sqlite_fts3_enable_parentheses 1
-
do_test fts3_malloc-2.0 {
execsql { CREATE VIRTUAL TABLE ft USING fts3(a, b) }
}
21 {SELECT a FROM ft WHERE b MATCH 'neung NEAR song NEAR sahm'} {
- {one two three} {one two three four}
- {one two three five} {one two three four five}
+ {one two three} {one two three four}
+ {one two three five} {one two three four five}
}
} {
set result [normal_list $result]
- do_read_test fts3_malloc-2.$tn $sql $result
+ do_select_test fts3_malloc-2.$tn $sql $result
}
do_test fts3_malloc-3.0 {
} {}
foreach {tn sql result} {
1 "SELECT count(*) FROM ft" {1023}
+
2 "SELECT a FROM ft WHERE a MATCH 'one two three four five six seven eight'" {
{one two three four five six seven eight}
{one two three four five six seven eight nine}
{one two three four five six seven eight nine ten}
}
- 3 {SELECT count(*), sum(docid) FROM ft WHERE a MATCH 'o*'} {512 262144}
+ 3 {SELECT count(*), sum(docid) FROM ft WHERE a MATCH 'o*'} {
+ 512 262144
+ }
+
4 {SELECT count(*), sum(docid) FROM ft WHERE a MATCH '"two three four"'} {
128 66368
}
} {
-#set ::DO_MALLOC_TEST 0
set result [normal_list $result]
- do_read_test fts3_malloc-3.$tn $sql $result
+ do_select_test fts3_malloc-3.$tn $sql $result
}
+do_test fts3_malloc-4.0 {
+ execsql { DELETE FROM ft WHERE docid>=32 }
+} {}
+foreach {tn sql} {
+ 1 "DELETE FROM ft WHERE ft MATCH 'one'"
+ 2 "DELETE FROM ft WHERE ft MATCH 'three'"
+ 3 "DELETE FROM ft WHERE ft MATCH 'five'"
+} {
+ do_write_test fts3_malloc-4.1.$tn ft_content $sql
+}
+do_test fts3_malloc-4.2 {
+ execsql { SELECT a FROM ft }
+} {two four {two four}}
+
+
finish_test