From: dan Date: Mon, 5 Oct 2015 11:57:09 +0000 (+0000) Subject: Add fts5txt2db.tcl, a tool for creating sample fts4/5 databases from text files. X-Git-Tag: version-3.9.0~39 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=2c1023df0971dbcfe7fd08525f4a746c1df7a6e7;p=thirdparty%2Fsqlite.git Add fts5txt2db.tcl, a tool for creating sample fts4/5 databases from text files. FossilOrigin-Name: 44f1ce30d1e446c9ee5f8bd8a62119e00356aa0e --- diff --git a/ext/fts5/tool/fts5txt2db.tcl b/ext/fts5/tool/fts5txt2db.tcl new file mode 100644 index 0000000000..23f607a801 --- /dev/null +++ b/ext/fts5/tool/fts5txt2db.tcl @@ -0,0 +1,135 @@ + + +proc usage {} { + puts stderr "$::argv0 ?OPTIONS? DATABASE FILE1..." + puts stderr "" + puts stderr "Options are" + puts stderr " -fts5" + puts stderr " -fts4" + puts stderr " -colsize " + puts stderr { +This script is designed to create fts4/5 tables with more than one column. +The -colsize option should be set to a Tcl list of integer values, one for +each column in the table. Each value is the number of tokens that will be +inserted into the column value for each row. For example, setting the -colsize +option to "5 10" creates an FTS table with 2 columns, with roughly 5 and 10 +tokens per row in each, respectively. + +Each "FILE" argument should be a text file. The contents of these text files is +split on whitespace characters to form a list of tokens. The first N1 tokens +are used for the first column of the first row, where N1 is the first element +of the -colsize list. The next N2 are used for the second column of the first +row, and so on. Rows are added to the table until the entire list of tokens +is exhausted. +} + exit -1 +} + +set O(aColsize) [list 10 10 10] +set O(tblname) t1 +set O(fts) fts5 + + +set options_with_values {-colsize} + +for {set i 0} {$i < [llength $argv]} {incr i} { + set opt [lindex $argv $i] + if {[string range $opt 0 0]!="-"} break + + if {[lsearch $options_with_values $opt]>=0} { + incr i + if {$i==[llength $argv]} usage + set val [lindex $argv $i] + } + + switch -- $opt { + -colsize { + set O(aColSize) $val + } + + -fts4 { + set O(fts) fts4 + } + + -fts5 { + set O(fts) fts5 + } + } +} + +if {$i > [llength $argv]-2} usage +set O(db) [lindex $argv $i] +set O(files) [lrange $argv [expr $i+1] end] + +foreach {k v} [lrange $argv 0 end-2] { + switch -- $k { + -colsize { + set O(aColSize) $v + } + + -colsize { + set O(aColSize) $v + } + } + +} + +sqlite3 db $O(db) +load_static_extension db fts5 + + +# Create the FTS table in the db. Return a list of the table columns. +# +proc create_table {} { + global O + set cols [list a b c d e f g h i j k l m n o p q r s t u v w x y z] + + set nCol [llength $O(aColsize)] + set cols [lrange $cols 0 [expr $nCol-1]] + + set sql "CREATE VIRTUAL TABLE IF NOT EXISTS $O(tblname) USING $O(fts) (" + append sql [join $cols ,] + append sql ");" + + db eval $sql + return $cols +} + +# Return a list of tokens from the named file. +# +proc readfile {file} { + set fd [open $file] + set data [read $fd] + close $fd + split $data +} + + +# Load all the data into a big list of tokens. +# +set tokens [list] +foreach f $O(files) { + set tokens [concat $tokens [readfile $f]] +} + +set N [llength $tokens] +set i 0 +set cols [create_table] +set sql "INSERT INTO $O(tblname) VALUES(\$[lindex $cols 0]" +foreach c [lrange $cols 1 end] { + append sql ", \$$c" +} +append sql ")" + +db eval BEGIN + while {$i < $N} { + foreach c $cols s $O(aColsize) { + set $c [lrange $tokens $i [expr $i+$s-1]] + incr i $s + } + db eval $sql + } +db eval COMMIT + + + diff --git a/manifest b/manifest index 3cf342d387..30906a1cee 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Update\sfts5\sto\ssupport\sthe\stable\sfunction\ssyntax.\s"...\sFROM\sfts5_tbl\sWHERE\sfts5_tbl\sMATCH\s?1\sAND\srank\sMATCH\s?1"\scan\snow\sbe\swritten\s"FROM\sfts5_tbl(?1,\s?2)". -D 2015-10-03T15:38:57.855 +C Add\sfts5txt2db.tcl,\sa\stool\sfor\screating\ssample\sfts4/5\sdatabases\sfrom\stext\sfiles. +D 2015-10-05T11:57:09.832 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 2143eeef6d0cc26006ae5fc4bb242a4a8b973412 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -183,6 +183,7 @@ F ext/fts5/test/fts5unicode3.test 35c3d02aa7acf7d43d8de3bfe32c15ba96e8928e F ext/fts5/test/fts5unindexed.test e9539d5b78c677315e7ed8ea911d4fd25437c680 F ext/fts5/test/fts5version.test 978f59541d8cef7e8591f8be2115ec5ccb863e2e F ext/fts5/test/fts5vocab.test cdf97b9678484e9bad5062edf9c9106e5c3b0c5c +F ext/fts5/tool/fts5txt2db.tcl 3d19fb8ffb234031d33d7d2151acfbc55e9cfcc4 F ext/fts5/tool/loadfts5.tcl 58e90407cc5c2b1770460119488fd7c0090d4dd3 F ext/fts5/tool/mkfts5c.tcl 5745072c7de346e18c7f491e4c3281fe8a1cfe51 F ext/fts5/tool/showfts5.tcl 9eaf6c3df352f98a2ab5ce1921dd94128ab1381d @@ -1390,7 +1391,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 48bd54594752d5be3337f12c72f28d2080cb630b F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P f653fce90846b700441e8fa5f1930c1ec5e38e31 -R 2a1e06ad741b8341871247b55aface69 +P 41d17d9e24303aed6d549ea23c62b93bf2f71691 +R 5e8c1254946bcba6c019e513b206067f U dan -Z 2e1c3b96f1b7a9a0a6f0aa6c18cff068 +Z 56785af9a4f39d2ef982f38c897a3388 diff --git a/manifest.uuid b/manifest.uuid index b69ffcc6e6..8d0a492b28 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -41d17d9e24303aed6d549ea23c62b93bf2f71691 \ No newline at end of file +44f1ce30d1e446c9ee5f8bd8a62119e00356aa0e \ No newline at end of file