From: dan Date: Tue, 26 Jan 2016 17:08:22 +0000 (+0000) Subject: Enhance fts5txt2db.tcl, a script used to generate fts5/fts4 databases for performance... X-Git-Tag: version-3.11.0~90^2~2 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=fee981e5b97a79b26308c2dc3f946264e4ee8028;p=thirdparty%2Fsqlite.git Enhance fts5txt2db.tcl, a script used to generate fts5/fts4 databases for performance testing. FossilOrigin-Name: c646e40350e5aa91abcf52de61fb31275bad38f9 --- diff --git a/ext/fts5/tool/fts5txt2db.tcl b/ext/fts5/tool/fts5txt2db.tcl index c22c5dae72..c6a58e6edd 100644 --- a/ext/fts5/tool/fts5txt2db.tcl +++ b/ext/fts5/tool/fts5txt2db.tcl @@ -1,13 +1,127 @@ -proc usage {} { - puts stderr "$::argv0 ?OPTIONS? DATABASE FILE1..." +#------------------------------------------------------------------------- +# Command line options processor. +# +proc command_line_error {O E {msg ""}} { + if {$msg != ""} { + puts stderr "Error: $msg" + puts stderr "" + } + + set L [list] + foreach o $O { + if {[llength $o]==1} { + lappend L [string toupper $o] + } + } + + puts stderr "Usage: $::argv0 ?SWITCHES? $L" puts stderr "" - puts stderr "Options are" - puts stderr " -fts5" - puts stderr " -fts4" - puts stderr " -colsize " - puts stderr { + puts stderr "Switches are:" + foreach o $O { + if {[llength $o]==3} { + foreach {a b c} $o {} + puts stderr [format " -%-15s %s (default \"%s\")" "$a VAL" $c $b] + } elseif {[llength $o]==2} { + foreach {a b} $o {} + puts stderr [format " -%-15s %s" $a $b] + } + } + puts stderr "" + puts stderr $E + exit -1 +} + +proc process_command_line {avar lArgs O E} { + + upvar $avar A + set zTrailing "" ;# True if ... is present in $O + set lPosargs [list] + + # Populate A() with default values. Also, for each switch in the command + # line spec, set an entry in the idx() array as follows: + # + # {tblname t1 "table name to use"} + # -> [set idx(-tblname) {tblname t1 "table name to use"} + # + # For each position parameter, append its name to $lPosargs. If the ... + # specifier is present, set $zTrailing to the name of the prefix. + # + foreach o $O { + set nm [lindex $o 0] + set nArg [llength $o] + switch -- $nArg { + 1 { + if {[string range $nm end-2 end]=="..."} { + set zTrailing [string range $nm 0 end-3] + } else { + lappend lPosargs $nm + } + } + 2 { + set A($nm) 0 + set idx(-$nm) $o + } + 3 { + set A($nm) [lindex $o 1] + set idx(-$nm) $o + } + default { + error "Error in command line specification" + } + } + } + + # Set explicitly specified option values + # + set nArg [llength $lArgs] + for {set i 0} {$i < $nArg} {incr i} { + set opt [lindex $lArgs $i] + if {[string range $opt 0 0]!="-" || $opt=="--"} break + set c [array names idx "${opt}*"] + if {[llength $c]==0} { command_line_error $O $E "Unrecognized option: $opt"} + if {[llength $c]>1} { command_line_error $O $E "Ambiguous option: $opt"} + + if {[llength $idx($c)]==3} { + if {$i==[llength $lArgs]-1} { + command_line_error $O $E "Option requires argument: $c" + } + incr i + set A([lindex $idx($c) 0]) [lindex $lArgs $i] + } else { + set A([lindex $idx($c) 0]) 1 + } + } + + # Deal with position arguments. + # + set nPosarg [llength $lPosargs] + set nRem [expr $nArg - $i] + if {$nRem < $nPosarg || ($zTrailing=="" && $nRem > $nPosarg)} { + command_line_error $O $E + } + for {set j 0} {$j < $nPosarg} {incr j} { + set A([lindex $lPosargs $j]) [lindex $lArgs [expr $j+$i]] + } + if {$zTrailing!=""} { + set A($zTrailing) [lrange $lArgs [expr $j+$i] end] + } +} +# End of command line options processor. +#------------------------------------------------------------------------- + + +process_command_line A $argv { + {fts5 "use fts5"} + {fts4 "use fts4"} + {colsize "10 10 10" "list of column sizes"} + {tblname "t1" "table name to create"} + {detail "full" "Fts5 detail mode to use"} + {repeat 1 "Load each file this many times"} + database + file... +} { This script is designed to create fts4/5 tables with more than one column. The -colsize option should be set to a Tcl list of integer values, one for each column in the table. Each value is the number of tokens that will be @@ -22,59 +136,27 @@ of the -colsize list. The next N2 are used for the second column of the first row, and so on. Rows are added to the table until the entire list of tokens is exhausted. } - exit -1 -} - -set O(aColSize) [list 10 10 10] -set O(tblname) t1 -set O(fts) fts5 - - -set options_with_values {-colsize} - -for {set i 0} {$i < [llength $argv]} {incr i} { - set opt [lindex $argv $i] - if {[string range $opt 0 0]!="-"} break - - if {[lsearch $options_with_values $opt]>=0} { - incr i - if {$i==[llength $argv]} usage - set val [lindex $argv $i] - } - switch -- $opt { - -colsize { - set O(aColSize) $val - } - - -fts4 { - set O(fts) fts4 - } - - -fts5 { - set O(fts) fts5 - } - } +if {$A(fts4)} { + set A(fts) fts4 +} else { + set A(fts) fts5 } -if {$i > [llength $argv]-2} usage -set O(db) [lindex $argv $i] -set O(files) [lrange $argv [expr $i+1] end] - -sqlite3 db $O(db) +sqlite3 db $A(database) # Create the FTS table in the db. Return a list of the table columns. # proc create_table {} { - global O + global A set cols [list a b c d e f g h i j k l m n o p q r s t u v w x y z] - set nCol [llength $O(aColSize)] + set nCol [llength $A(colsize)] set cols [lrange $cols 0 [expr $nCol-1]] - set sql "CREATE VIRTUAL TABLE IF NOT EXISTS $O(tblname) USING $O(fts) (" + set sql "CREATE VIRTUAL TABLE IF NOT EXISTS $A(tblname) USING $A(fts) (" append sql [join $cols ,] - append sql ");" + if {$A(fts)=="fts5"} { append sql ",detail=$A(detail));" } db eval $sql return $cols @@ -89,27 +171,35 @@ proc readfile {file} { split $data } +proc repeat {L n} { + set res [list] + for {set i 0} {$i < $n} {incr i} { + set res [concat $res $L] + } + set res +} + # Load all the data into a big list of tokens. # set tokens [list] -foreach f $O(files) { - set tokens [concat $tokens [readfile $f]] +foreach f $A(file) { + set tokens [concat $tokens [repeat [readfile $f] $A(repeat)]] } set N [llength $tokens] set i 0 set cols [create_table] -set sql "INSERT INTO $O(tblname) VALUES(\$[lindex $cols 0]" +set sql "INSERT INTO $A(tblname) VALUES(\$R([lindex $cols 0])" foreach c [lrange $cols 1 end] { - append sql ", \$A($c)" + append sql ", \$R($c)" } append sql ")" db eval BEGIN while {$i < $N} { - foreach c $cols s $O(aColSize) { - set A($c) [lrange $tokens $i [expr $i+$s-1]] + foreach c $cols s $A(colsize) { + set R($c) [lrange $tokens $i [expr $i+$s-1]] incr i $s } db eval $sql diff --git a/manifest b/manifest index fbcb4d2d1b..32c610bbaf 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Fix\san\sfts5\sproblem\swith\susing\sboth\sxPhraseFirst()\sand\sxPhraseFirstColumn()\swithin\sa\ssingle\sstatement\sin\sdetail=col\smode. -D 2016-01-23T18:51:59.865 +C Enhance\sfts5txt2db.tcl,\sa\sscript\sused\sto\sgenerate\sfts5/fts4\sdatabases\sfor\sperformance\stesting. +D 2016-01-26T17:08:22.193 F Makefile.in 027c1603f255390c43a426671055a31c0a65fdb4 F Makefile.linux-gcc 7bc79876b875010e8c8f9502eb935ca92aa3c434 F Makefile.msc 1708a78eda223b6daa302b140037fcc214a779f9 @@ -189,7 +189,7 @@ F ext/fts5/test/fts5update.test 57c7012a7919889048947addae10e0613df45529 F ext/fts5/test/fts5version.test 978f59541d8cef7e8591f8be2115ec5ccb863e2e F ext/fts5/test/fts5vocab.test 480d780aa6b699816c5066225fbd86f3a0239477 F ext/fts5/tool/fts5speed.tcl aaee41894b552df8fbf8616aad003b2ea9ba3221 -F ext/fts5/tool/fts5txt2db.tcl c374c4c4797e8cdfadabdfaeeb5412dcd6686e84 +F ext/fts5/tool/fts5txt2db.tcl ae308338b2da1646dea456ab66706acdde8c714e F ext/fts5/tool/loadfts5.tcl 95b03429ee6b138645703c6ca192c3ac96eaf093 F ext/fts5/tool/mkfts5c.tcl d1c2a9ab8e0ec690a52316f33dd9b1d379942f45 F ext/fts5/tool/showfts5.tcl d54da0e067306663e2d5d523965ca487698e722c @@ -1419,7 +1419,7 @@ F tool/vdbe_profile.tcl 246d0da094856d72d2c12efec03250d71639d19f F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 48bd54594752d5be3337f12c72f28d2080cb630b F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P ceccc9ad788fb4da9120915741995b9f088f85ff -R e7d7753020b6f70b109a92a8ac037ba7 +P 72d53699bf0dcdb9d2a22e229989d7435f061399 +R e572f7330a1bdca73a606e620f2735b9 U dan -Z 648645363fb131dd10e5d7321a173b9d +Z 9f71d918f9bd09bf4fa550347b1ed369 diff --git a/manifest.uuid b/manifest.uuid index 1891619c18..0db14bcc12 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -72d53699bf0dcdb9d2a22e229989d7435f061399 \ No newline at end of file +c646e40350e5aa91abcf52de61fb31275bad38f9 \ No newline at end of file