From: drh Date: Fri, 13 Oct 2017 18:58:55 +0000 (+0000) Subject: Add the tcl/mkccode.tcl script used to construct a single C-language source X-Git-Tag: version-3.21.0~9^2~10 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=903b23022d6d39fbc18d619dfdead39277bed72d;p=thirdparty%2Fsqlite.git Add the tcl/mkccode.tcl script used to construct a single C-language source fiel for programs that combine C-code, SQLite, and TCL. Use this script to construct the sqlite3_analyzer program. FossilOrigin-Name: 298a3fddec459c4fd2b840bd363239dc627f1dda90e2d5e478846cb895a8ad82 --- diff --git a/Makefile.in b/Makefile.in index d474222fb8..e2934d80b1 100644 --- a/Makefile.in +++ b/Makefile.in @@ -1180,14 +1180,8 @@ valgrindtest: $(TESTPROGS) valgrindfuzz smoketest: $(TESTPROGS) fuzzcheck$(TEXE) ./testfixture$(TEXE) $(TOP)/test/main.test $(TESTOPTS) -sqlite3_analyzer.c: sqlite3.c $(TOP)/src/tclsqlite.c $(TOP)/tool/spaceanal.tcl - echo "#define TCLSH 2" > $@ - echo "#define SQLITE_ENABLE_DBSTAT_VTAB 1" >> $@ - cat sqlite3.c $(TOP)/src/tclsqlite.c >> $@ - echo "static const char *tclsh_main_loop(void){" >> $@ - echo "static const char *zMainloop = " >> $@ - $(TCLSH_CMD) $(TOP)/tool/tostr.tcl $(TOP)/tool/spaceanal.tcl >> $@ - echo "; return zMainloop; }" >> $@ +sqlite3_analyzer.c: sqlite3.c $(TOP)/src/tclsqlite.c $(TOP)/tool/spaceanal.tcl $(TOP)/tool/mkccode.tcl $(TOP)/tool/sqlite3_analyzer.c.in + $(TCLSH_CMD) $(TOP)/tool/mkccode.tcl $(TOP)/tool/sqlite3_analyzer.c.in >sqlite3_analyzer.c sqlite3_analyzer$(TEXE): sqlite3_analyzer.c $(LTLINK) sqlite3_analyzer.c -o $@ $(LIBTCL) $(TLIBS) diff --git a/Makefile.msc b/Makefile.msc index 0611d8a8b3..bfd368c558 100644 --- a/Makefile.msc +++ b/Makefile.msc @@ -2191,14 +2191,8 @@ smoketest: $(TESTPROGS) @set PATH=$(LIBTCLPATH);$(PATH) .\testfixture.exe $(TOP)\test\main.test $(TESTOPTS) -sqlite3_analyzer.c: $(SQLITE3C) $(SQLITE3H) $(TOP)\src\tclsqlite.c $(TOP)\tool\spaceanal.tcl $(SQLITE_TCL_DEP) - echo #define TCLSH 2 > $@ - echo #define SQLITE_ENABLE_DBSTAT_VTAB 1 >> $@ - copy $@ + $(SQLITE3C) + $(TOP)\src\tclsqlite.c $@ - echo static const char *tclsh_main_loop(void){ >> $@ - echo static const char *zMainloop = >> $@ - $(TCLSH_CMD) $(TOP)\tool\tostr.tcl $(TOP)\tool\spaceanal.tcl >> $@ - echo ; return zMainloop; } >> $@ +sqlite3_analyzer.c: $(SQLITE3C) $(SQLITE3H) $(TOP)\src\tclsqlite.c $(TOP)\tool\spaceanal.tcl $(TOP)\tool\mkccode.tcl $(TOP)\tool\sqlite3_analyzer.c.in $(SQLITE_TCL_DEP) + $(TCLSH_CMD) $(TOP)\tool\mkccode.tcl $(TOP)\tool\sqlite3_analyzer.c.in > $@ sqlite3_analyzer.exe: sqlite3_analyzer.c $(LIBRESOBJS) $(LTLINK) $(NO_WARN) -DBUILD_sqlite -I$(TCLINCDIR) sqlite3_analyzer.c \ diff --git a/main.mk b/main.mk index bdbb56e229..18d7cd3459 100644 --- a/main.mk +++ b/main.mk @@ -780,14 +780,8 @@ tclsqlite3: $(TOP)/src/tclsqlite.c libsqlite3.a $(TCCX) $(TCL_FLAGS) -DTCLSH=1 -o tclsqlite3 \ $(TOP)/src/tclsqlite.c libsqlite3.a $(LIBTCL) $(THREADLIB) -sqlite3_analyzer.c: sqlite3.c $(TOP)/src/tclsqlite.c $(TOP)/tool/spaceanal.tcl - echo "#define TCLSH 2" > $@ - echo "#define SQLITE_ENABLE_DBSTAT_VTAB 1" >> $@ - cat sqlite3.c $(TOP)/src/tclsqlite.c >> $@ - echo "static const char *tclsh_main_loop(void){" >> $@ - echo "static const char *zMainloop = " >> $@ - tclsh $(TOP)/tool/tostr.tcl $(TOP)/tool/spaceanal.tcl >> $@ - echo "; return zMainloop; }" >> $@ +sqlite3_analyzer.c: sqlite3.c $(TOP)/src/tclsqlite.c $(TOP)/tool/spaceanal.tcl $(TOP)/tool/sqlite3_analyzer.c.in $(TOP)/tool/mkccode.tcl + tclsh $(TOP)/tool/mkccode.tcl $(TOP)/tool/sqlite3_analyzer.c.in >sqlite3_analyzer.c sqlite3_analyzer$(EXE): sqlite3_analyzer.c $(TCCX) $(TCL_FLAGS) sqlite3_analyzer.c -o $@ $(LIBTCL) $(THREADLIB) diff --git a/manifest b/manifest index c45a7ce09e..669c1b284a 100644 --- a/manifest +++ b/manifest @@ -1,8 +1,8 @@ -C Move\sa\sbunch\sof\sunrelated\stest\scode\sout\sof\stclsqlite.c\sand\sinto\ntest_*.c\sfiles.\s\sThere\sis\sstill\ssome\stest\scode\sin\stclsqlite.c,\sbut\sthe\samount\nis\sgreatly\sreduced. -D 2017-10-13T16:19:48.503 -F Makefile.in d9acf9cc5252f406c2a417c12475ef14fe49096d121bd025a230fe3b58e0f294 +C Add\sthe\stcl/mkccode.tcl\sscript\sused\sto\sconstruct\sa\ssingle\sC-language\ssource\nfiel\sfor\sprograms\sthat\scombine\sC-code,\sSQLite,\sand\sTCL.\s\sUse\sthis\sscript\sto\nconstruct\sthe\ssqlite3_analyzer\sprogram. +D 2017-10-13T18:58:55.897 +F Makefile.in a99044d778e2a83ba6cf78b88937d3d667cee425dca5915d73eeba1d56b98082 F Makefile.linux-gcc 7bc79876b875010e8c8f9502eb935ca92aa3c434 -F Makefile.msc c20c8f2945955931d0f790104b335b37ba511eeb0d75eded23b7f878077ee1f5 +F Makefile.msc b864c4f906031a37e425466d8214df2da2f01703956dad5290156f03548d7277 F README.md f5c87359573c4d255425e588a56554b50fdcc2afba4e017a2e02a43701456afd F VERSION f81232df28e2d3ff049feefad5fbd5489cc33697f6bd2ecf61af7f0dde3b83d0 F aclocal.m4 a5c22d164aff7ed549d53a90fa56d56955281f50 @@ -384,7 +384,7 @@ F ext/userauth/userauth.c 3410be31283abba70255d71fd24734e017a4497f F install-sh 9d4de14ab9fb0facae2f48780b874848cbf2f895 x F ltmain.sh 3ff0879076df340d2e23ae905484d8c15d5fdea8 F magic.txt 8273bf49ba3b0c8559cb2774495390c31fd61c60 -F main.mk 9311f1d83efd6818b7fbeb1256dbc5377159359528be4e7d925345e4d80446bf +F main.mk a1948e0509f143fe4395a449cf61039bb6c7db3a93ae6af65092a72556718026 F mkso.sh fd21c06b063bb16a5d25deea1752c2da6ac3ed83 F mptest/config01.test 3c6adcbc50b991866855f1977ff172eb6d901271 F mptest/config02.test 4415dfe36c48785f751e16e32c20b077c28ae504 @@ -1593,6 +1593,7 @@ F tool/loadfts.c c3c64e4d5e90e8ba41159232c2189dba4be7b862 F tool/logest.c 11346aa019e2e77a00902aa7d0cabd27bd2e8cca F tool/max-limits.c cbb635fbb37ae4d05f240bfb5b5270bb63c54439 F tool/mkautoconfamal.sh e855df211ecbcc7131dee817110ff386cfb112f7 +F tool/mkccode.tcl 86463e68ce9c15d3041610fedd285ce32a5cf7a58fc88b3202b8b76837650dbe x F tool/mkctimec.tcl dd183b73ae1c28249669741c250525f0407e579a70482371668fd5f130d9feb3 F tool/mkkeywordhash.c 2e852ac0dfdc5af18886dc1ce7e9676d11714ae3df0a282dc7d90b3a0fe2033c F tool/mkmsvcmin.tcl cbd93f1cfa3a0a9ae56fc958510aa3fc3ac65e29cb111716199e3d0e66eefaa4 @@ -1631,11 +1632,11 @@ F tool/speedtest8.c 2902c46588c40b55661e471d7a86e4dd71a18224 F tool/speedtest8inst1.c 7ce07da76b5e745783e703a834417d725b7d45fd F tool/split-sqlite3c.tcl d9be87f1c340285a3e081eb19b4a247981ed290c F tool/sqldiff.c 30879bbc8de686df4624e86adce2d8981f500904c1cfb55b5d1eea2ffd9341eb +F tool/sqlite3_analyzer.c.in 3d53f06e04619f43b7aebc01cc318ef215bfd09ea1947fe9e485bed6aa6cb4b9 F tool/srcck1.c 371de5363b70154012955544f86fdee8f6e5326f F tool/stack_usage.tcl f8e71b92cdb099a147dad572375595eae55eca43 F tool/symbols-mingw.sh 4dbcea7e74768305384c9fd2ed2b41bbf9f0414d F tool/symbols.sh c5a617b8c61a0926747a56c65f5671ef8ac0e148 -F tool/tostr.tcl 96022f35ada2194f6f8ccf6fd95809e90ed277c4 F tool/varint.c 5d94cb5003db9dbbcbcc5df08d66f16071aee003 F tool/vdbe-compress.tcl 5926c71f9c12d2ab73ef35c29376e756eb68361c F tool/vdbe_profile.tcl 246d0da094856d72d2c12efec03250d71639d19f @@ -1663,8 +1664,7 @@ F vsixtest/vsixtest.tcl 6a9a6ab600c25a91a7acc6293828957a386a8a93 F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0 -P eaeeb09d4aa1dbccdd2488af8461e2a8c8a53d92c63fd56330be041ad72a9e4a ea2fa9ab9b3c97e001c1a2f3e24f3554b544070d5f915b9827eb07eb5ea8f00b -R ef5c1b585866b0d225763ce3d0edba7c -T +closed ea2fa9ab9b3c97e001c1a2f3e24f3554b544070d5f915b9827eb07eb5ea8f00b +P a9c4bc88fcf985a0bea14ed5381239cfb697886287998da04a10230b6858ab5d +R 78f34b33921be1c7a7c8587f43b33b81 U drh -Z 0c47718eb3eab9783f8c2bca1833cb14 +Z 180ea09ee34fefc9e97e7461c13719ef diff --git a/manifest.uuid b/manifest.uuid index 05172a0f2b..fabaeec3c0 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -a9c4bc88fcf985a0bea14ed5381239cfb697886287998da04a10230b6858ab5d \ No newline at end of file +298a3fddec459c4fd2b840bd363239dc627f1dda90e2d5e478846cb895a8ad82 \ No newline at end of file diff --git a/tool/mkccode.tcl b/tool/mkccode.tcl new file mode 100755 index 0000000000..41b09f1e81 --- /dev/null +++ b/tool/mkccode.tcl @@ -0,0 +1,93 @@ +#!/usr/bin/tclsh +# +# Use this script to build C-language source code for a program that uses +# tclsqlite.c together with custom TCL scripts and/or C extensions for +# either SQLite or TCL. +# +# Usage example: +# +# tclsh mktclsqliteprog.tcl demoapp.c.in >demoapp.c +# +# The demoapp.c.in file contains a mixture of C code, TCL script, and +# processing directives used by mktclsqliteprog.tcl to build the final C-code +# output file. Most lines of demoapp.c.in are copied straight through into +# the output. The following control directives are recognized: +# +# BEGIN_STRING +# +# This marks the beginning of large string literal - usually a TCL +# script of some kind. Subsequent lines of text through the first +# line that begins with END_STRING are converted into a C-language +# string literal. +# +# INCLUDE path +# +# The path argument is the name of a file to be inserted in place of +# the INCLUDE line. The path can begin with $ROOT to signify the +# root of the SQLite source tree, or $HOME to signify the directory +# that contains the demoapp.c.in input script itself. If the path does +# not begin with either $ROOT or $HOME, then it is interpreted relative +# to the current working directory. +# +# If the INCLUDE occurs in the middle of BEGIN_STRING...END_STRING +# then all of the text in the input file is converted into C-language +# string literals. +# +# None of the control directives described above will nest. Only the +# top-level input file ("demoapp.c.in" in the example) is interpreted. +# referenced files are copied verbatim. +# +if {[llength $argv]!=1} { + puts stderr "Usage: $argv0 TEMPLATE >OUTPUT" + exit 1 +} +set infile [lindex $argv 0] +set ROOT [file normalize [file dir $argv0]/..] +set HOME [file normalize [file dir $infile]] +set in [open $infile rb] +puts [subst {/* DO NOT EDIT +** +** This file was generated by \"$argv0 $infile\". +** To make changes, edit $infile then rerun the generator +** command. +*/}] +set instr 0 +while {1} { + set line [gets $in] + if {[eof $in]} break + if {[regexp {^INCLUDE (.*)} $line all path]} { + regsub {^\$ROOT\y} $path $ROOT path + regsub {^\$HOME\y} $path $HOME path + set in2 [open $path rb] + puts "/* INCLUDE $path */" + if {$instr} { + while {1} { + set line [gets $in2] + if {[eof $in2]} break + set x [string map "\\\\ \\\\\\\\ \\\" \\\\\"" $line] + puts "\"$x\\n\"" + } + } else { + puts [read $in2] + } + puts "/* END $path */" + close $in2 + continue + } + if {[regexp {^BEGIN_STRING} $line]} { + set instr 1 + puts "/* BEGIN_STRING */" + continue + } + if {[regexp {^END_STRING} $line]} { + set instr 0 + puts "/* END_STRING */" + continue + } + if {$instr} { + set x [string map "\\\\ \\\\\\\\ \\\" \\\\\"" $line] + puts "\"$x\\n\"" + } else { + puts $line + } +} diff --git a/tool/sqlite3_analyzer.c.in b/tool/sqlite3_analyzer.c.in new file mode 100644 index 0000000000..54f323a427 --- /dev/null +++ b/tool/sqlite3_analyzer.c.in @@ -0,0 +1,903 @@ +/* +** Read an SQLite database file and analyze its space utilization. Generate +** text on standard output. +*/ +#define TCLSH 2 +#define SQLITE_ENABLE_DBSTAT_VTAB 1 +#undef SQLITE_THREADSAFE +#define SQLITE_THREADSAFE 0 +#undef SQLITE_ENABLE_COLUMN_METADATA +#define SQLITE_OMIT_DECLTYPE 1 +#define SQLITE_OMIT_DEPRECATED 1 +#define SQLITE_OMIT_PROGRESS_CALLBACK 1 +#define SQLITE_OMIT_SHARED_CACHE 1 +#define SQLITE_DEFAULT_MEMSTATUS 0 +#define SQLITE_MAX_EXPR_DEPTH 0 +#define SQLITE_OMIT_LOAD_EXTENSION 1 +INCLUDE sqlite3.c +INCLUDE $ROOT/src/tclsqlite.c + +static const char *tclsh_main_loop(void){ +return +BEGIN_STRING +# Run this TCL script using "testfixture" in order get a report that shows +# how much disk space is used by a particular data to actually store data +# versus how much space is unused. +# + +if {[catch { + +# Argument $tname is the name of a table within the database opened by +# database handle [db]. Return true if it is a WITHOUT ROWID table, or +# false otherwise. +# +proc is_without_rowid {tname} { + set t [string map {' ''} $tname] + db eval "PRAGMA index_list = '$t'" o { + if {$o(origin) == "pk"} { + set n $o(name) + if {0==[db one { SELECT count(*) FROM sqlite_master WHERE name=$n }]} { + return 1 + } + } + } + return 0 +} + +# Read and run TCL commands from standard input. Used to implement +# the --tclsh option. +# +proc tclsh {} { + set line {} + while {![eof stdin]} { + if {$line!=""} { + puts -nonewline "> " + } else { + puts -nonewline "% " + } + flush stdout + append line [gets stdin] + if {[info complete $line]} { + if {[catch {uplevel #0 $line} result]} { + puts stderr "Error: $result" + } elseif {$result!=""} { + puts $result + } + set line {} + } else { + append line \n + } + } +} + + +# Get the name of the database to analyze +# +proc usage {} { + set argv0 [file rootname [file tail [info nameofexecutable]]] + puts stderr "Usage: $argv0 ?--pageinfo? ?--stats? database-filename" + puts stderr { +Analyze the SQLite3 database file specified by the "database-filename" +argument and output a report detailing size and storage efficiency +information for the database and its constituent tables and indexes. + +Options: + + --pageinfo Show how each page of the database-file is used + + --stats Output SQL text that creates a new database containing + statistics about the database that was analyzed + + --tclsh Run the built-in TCL interpreter interactively (for debugging) + + --version Show the version number of SQLite +} + exit 1 +} +set file_to_analyze {} +set flags(-pageinfo) 0 +set flags(-stats) 0 +set flags(-debug) 0 +append argv {} +foreach arg $argv { + if {[regexp {^-+pageinfo$} $arg]} { + set flags(-pageinfo) 1 + } elseif {[regexp {^-+stats$} $arg]} { + set flags(-stats) 1 + } elseif {[regexp {^-+debug$} $arg]} { + set flags(-debug) 1 + } elseif {[regexp {^-+tclsh$} $arg]} { + tclsh + exit 0 + } elseif {[regexp {^-+version$} $arg]} { + sqlite3 mem :memory: + puts [mem one {SELECT sqlite_version()||' '||sqlite_source_id()}] + mem close + exit 0 + } elseif {[regexp {^-} $arg]} { + puts stderr "Unknown option: $arg" + usage + } elseif {$file_to_analyze!=""} { + usage + } else { + set file_to_analyze $arg + } +} +if {$file_to_analyze==""} usage +set root_filename $file_to_analyze +regexp {^file:(//)?([^?]*)} $file_to_analyze all x1 root_filename +if {![file exists $root_filename]} { + puts stderr "No such file: $root_filename" + exit 1 +} +if {![file readable $root_filename]} { + puts stderr "File is not readable: $root_filename" + exit 1 +} +set true_file_size [file size $root_filename] +if {$true_file_size<512} { + puts stderr "Empty or malformed database: $root_filename" + exit 1 +} + +# Compute the total file size assuming test_multiplexor is being used. +# Assume that SQLITE_ENABLE_8_3_NAMES might be enabled +# +set extension [file extension $root_filename] +set pattern $root_filename +append pattern {[0-3][0-9][0-9]} +foreach f [glob -nocomplain $pattern] { + incr true_file_size [file size $f] + set extension {} +} +if {[string length $extension]>=2 && [string length $extension]<=4} { + set pattern [file rootname $root_filename] + append pattern {.[0-3][0-9][0-9]} + foreach f [glob -nocomplain $pattern] { + incr true_file_size [file size $f] + } +} + +# Open the database +# +if {[catch {sqlite3 db $file_to_analyze -uri 1} msg]} { + puts stderr "error trying to open $file_to_analyze: $msg" + exit 1 +} +if {$flags(-debug)} { + proc dbtrace {txt} {puts $txt; flush stdout;} + db trace ::dbtrace +} + +db eval {SELECT count(*) FROM sqlite_master} +set pageSize [expr {wide([db one {PRAGMA page_size}])}] + +if {$flags(-pageinfo)} { + db eval {CREATE VIRTUAL TABLE temp.stat USING dbstat} + db eval {SELECT name, path, pageno FROM temp.stat ORDER BY pageno} { + puts "$pageno $name $path" + } + exit 0 +} +if {$flags(-stats)} { + db eval {CREATE VIRTUAL TABLE temp.stat USING dbstat} + puts "BEGIN;" + puts "CREATE TABLE stats(" + puts " name STRING, /* Name of table or index */" + puts " path INTEGER, /* Path to page from root */" + puts " pageno INTEGER, /* Page number */" + puts " pagetype STRING, /* 'internal', 'leaf' or 'overflow' */" + puts " ncell INTEGER, /* Cells on page (0 for overflow) */" + puts " payload INTEGER, /* Bytes of payload on this page */" + puts " unused INTEGER, /* Bytes of unused space on this page */" + puts " mx_payload INTEGER, /* Largest payload size of all cells */" + puts " pgoffset INTEGER, /* Offset of page in file */" + puts " pgsize INTEGER /* Size of the page */" + puts ");" + db eval {SELECT quote(name) || ',' || + quote(path) || ',' || + quote(pageno) || ',' || + quote(pagetype) || ',' || + quote(ncell) || ',' || + quote(payload) || ',' || + quote(unused) || ',' || + quote(mx_payload) || ',' || + quote(pgoffset) || ',' || + quote(pgsize) AS x FROM stat} { + puts "INSERT INTO stats VALUES($x);" + } + puts "COMMIT;" + exit 0 +} + + +# In-memory database for collecting statistics. This script loops through +# the tables and indices in the database being analyzed, adding a row for each +# to an in-memory database (for which the schema is shown below). It then +# queries the in-memory db to produce the space-analysis report. +# +sqlite3 mem :memory: +if {$flags(-debug)} { + proc dbtrace {txt} {puts $txt; flush stdout;} + mem trace ::dbtrace +} +set tabledef {CREATE TABLE space_used( + name clob, -- Name of a table or index in the database file + tblname clob, -- Name of associated table + is_index boolean, -- TRUE if it is an index, false for a table + is_without_rowid boolean, -- TRUE if WITHOUT ROWID table + nentry int, -- Number of entries in the BTree + leaf_entries int, -- Number of leaf entries + depth int, -- Depth of the b-tree + payload int, -- Total amount of data stored in this table or index + ovfl_payload int, -- Total amount of data stored on overflow pages + ovfl_cnt int, -- Number of entries that use overflow + mx_payload int, -- Maximum payload size + int_pages int, -- Number of interior pages used + leaf_pages int, -- Number of leaf pages used + ovfl_pages int, -- Number of overflow pages used + int_unused int, -- Number of unused bytes on interior pages + leaf_unused int, -- Number of unused bytes on primary pages + ovfl_unused int, -- Number of unused bytes on overflow pages + gap_cnt int, -- Number of gaps in the page layout + compressed_size int -- Total bytes stored on disk +);} +mem eval $tabledef + +# Create a temporary "dbstat" virtual table. +# +db eval {CREATE VIRTUAL TABLE temp.stat USING dbstat} +db eval {CREATE TEMP TABLE dbstat AS SELECT * FROM temp.stat + ORDER BY name, path} +db eval {DROP TABLE temp.stat} + +set isCompressed 0 +set compressOverhead 0 +set depth 0 +set sql { SELECT name, tbl_name FROM sqlite_master WHERE rootpage>0 } +foreach {name tblname} [concat sqlite_master sqlite_master [db eval $sql]] { + + set is_index [expr {$name!=$tblname}] + set is_without_rowid [is_without_rowid $name] + db eval { + SELECT + sum(ncell) AS nentry, + sum((pagetype=='leaf')*ncell) AS leaf_entries, + sum(payload) AS payload, + sum((pagetype=='overflow') * payload) AS ovfl_payload, + sum(path LIKE '%+000000') AS ovfl_cnt, + max(mx_payload) AS mx_payload, + sum(pagetype=='internal') AS int_pages, + sum(pagetype=='leaf') AS leaf_pages, + sum(pagetype=='overflow') AS ovfl_pages, + sum((pagetype=='internal') * unused) AS int_unused, + sum((pagetype=='leaf') * unused) AS leaf_unused, + sum((pagetype=='overflow') * unused) AS ovfl_unused, + sum(pgsize) AS compressed_size, + max((length(CASE WHEN path LIKE '%+%' THEN '' ELSE path END)+3)/4) + AS depth + FROM temp.dbstat WHERE name = $name + } break + + set total_pages [expr {$leaf_pages+$int_pages+$ovfl_pages}] + set storage [expr {$total_pages*$pageSize}] + if {!$isCompressed && $storage>$compressed_size} { + set isCompressed 1 + set compressOverhead 14 + } + + # Column 'gap_cnt' is set to the number of non-contiguous entries in the + # list of pages visited if the b-tree structure is traversed in a top-down + # fashion (each node visited before its child-tree is passed). Any overflow + # chains present are traversed from start to finish before any child-tree + # is. + # + set gap_cnt 0 + set prev 0 + db eval { + SELECT pageno, pagetype FROM temp.dbstat + WHERE name=$name + ORDER BY pageno + } { + if {$prev>0 && $pagetype=="leaf" && $pageno!=$prev+1} { + incr gap_cnt + } + set prev $pageno + } + mem eval { + INSERT INTO space_used VALUES( + $name, + $tblname, + $is_index, + $is_without_rowid, + $nentry, + $leaf_entries, + $depth, + $payload, + $ovfl_payload, + $ovfl_cnt, + $mx_payload, + $int_pages, + $leaf_pages, + $ovfl_pages, + $int_unused, + $leaf_unused, + $ovfl_unused, + $gap_cnt, + $compressed_size + ); + } +} + +proc integerify {real} { + if {[string is double -strict $real]} { + return [expr {wide($real)}] + } else { + return 0 + } +} +mem function int integerify + +# Quote a string for use in an SQL query. Examples: +# +# [quote {hello world}] == {'hello world'} +# [quote {hello world's}] == {'hello world''s'} +# +proc quote {txt} { + return [string map {' ''} $txt] +} + +# Output a title line +# +proc titleline {title} { + if {$title==""} { + puts [string repeat * 79] + } else { + set len [string length $title] + set stars [string repeat * [expr 79-$len-5]] + puts "*** $title $stars" + } +} + +# Generate a single line of output in the statistics section of the +# report. +# +proc statline {title value {extra {}}} { + set len [string length $title] + set dots [string repeat . [expr 50-$len]] + set len [string length $value] + set sp2 [string range { } $len end] + if {$extra ne ""} { + set extra " $extra" + } + puts "$title$dots $value$sp2$extra" +} + +# Generate a formatted percentage value for $num/$denom +# +proc percent {num denom {of {}}} { + if {$denom==0.0} {return ""} + set v [expr {$num*100.0/$denom}] + set of {} + if {$v==100.0 || $v<0.001 || ($v>1.0 && $v<99.0)} { + return [format {%5.1f%% %s} $v $of] + } elseif {$v<0.1 || $v>99.9} { + return [format {%7.3f%% %s} $v $of] + } else { + return [format {%6.2f%% %s} $v $of] + } +} + +proc divide {num denom} { + if {$denom==0} {return 0.0} + return [format %.2f [expr double($num)/double($denom)]] +} + +# Generate a subreport that covers some subset of the database. +# the $where clause determines which subset to analyze. +# +proc subreport {title where showFrag} { + global pageSize file_pgcnt compressOverhead + + # Query the in-memory database for the sum of various statistics + # for the subset of tables/indices identified by the WHERE clause in + # $where. Note that even if the WHERE clause matches no rows, the + # following query returns exactly one row (because it is an aggregate). + # + # The results of the query are stored directly by SQLite into local + # variables (i.e. $nentry, $payload etc.). + # + mem eval " + SELECT + int(sum( + CASE WHEN (is_without_rowid OR is_index) THEN nentry + ELSE leaf_entries + END + )) AS nentry, + int(sum(payload)) AS payload, + int(sum(ovfl_payload)) AS ovfl_payload, + max(mx_payload) AS mx_payload, + int(sum(ovfl_cnt)) as ovfl_cnt, + int(sum(leaf_pages)) AS leaf_pages, + int(sum(int_pages)) AS int_pages, + int(sum(ovfl_pages)) AS ovfl_pages, + int(sum(leaf_unused)) AS leaf_unused, + int(sum(int_unused)) AS int_unused, + int(sum(ovfl_unused)) AS ovfl_unused, + int(sum(gap_cnt)) AS gap_cnt, + int(sum(compressed_size)) AS compressed_size, + int(max(depth)) AS depth, + count(*) AS cnt + FROM space_used WHERE $where" {} {} + + # Output the sub-report title, nicely decorated with * characters. + # + puts "" + titleline $title + puts "" + + # Calculate statistics and store the results in TCL variables, as follows: + # + # total_pages: Database pages consumed. + # total_pages_percent: Pages consumed as a percentage of the file. + # storage: Bytes consumed. + # payload_percent: Payload bytes used as a percentage of $storage. + # total_unused: Unused bytes on pages. + # avg_payload: Average payload per btree entry. + # avg_fanout: Average fanout for internal pages. + # avg_unused: Average unused bytes per btree entry. + # avg_meta: Average metadata overhead per entry. + # ovfl_cnt_percent: Percentage of btree entries that use overflow pages. + # + set total_pages [expr {$leaf_pages+$int_pages+$ovfl_pages}] + set total_pages_percent [percent $total_pages $file_pgcnt] + set storage [expr {$total_pages*$pageSize}] + set payload_percent [percent $payload $storage {of storage consumed}] + set total_unused [expr {$ovfl_unused+$int_unused+$leaf_unused}] + set avg_payload [divide $payload $nentry] + set avg_unused [divide $total_unused $nentry] + set total_meta [expr {$storage - $payload - $total_unused}] + set total_meta [expr {$total_meta + 4*($ovfl_pages - $ovfl_cnt)}] + set meta_percent [percent $total_meta $storage {of metadata}] + set avg_meta [divide $total_meta $nentry] + if {$int_pages>0} { + # TODO: Is this formula correct? + set nTab [mem eval " + SELECT count(*) FROM ( + SELECT DISTINCT tblname FROM space_used WHERE $where AND is_index=0 + ) + "] + set avg_fanout [mem eval " + SELECT (sum(leaf_pages+int_pages)-$nTab)/sum(int_pages) FROM space_used + WHERE $where + "] + set avg_fanout [format %.2f $avg_fanout] + } + set ovfl_cnt_percent [percent $ovfl_cnt $nentry {of all entries}] + + # Print out the sub-report statistics. + # + statline {Percentage of total database} $total_pages_percent + statline {Number of entries} $nentry + statline {Bytes of storage consumed} $storage + if {$compressed_size!=$storage} { + set compressed_size [expr {$compressed_size+$compressOverhead*$total_pages}] + set pct [expr {$compressed_size*100.0/$storage}] + set pct [format {%5.1f%%} $pct] + statline {Bytes used after compression} $compressed_size $pct + } + statline {Bytes of payload} $payload $payload_percent + statline {Bytes of metadata} $total_meta $meta_percent + if {$cnt==1} {statline {B-tree depth} $depth} + statline {Average payload per entry} $avg_payload + statline {Average unused bytes per entry} $avg_unused + statline {Average metadata per entry} $avg_meta + if {[info exists avg_fanout]} { + statline {Average fanout} $avg_fanout + } + if {$showFrag && $total_pages>1} { + set fragmentation [percent $gap_cnt [expr {$total_pages-1}]] + statline {Non-sequential pages} $gap_cnt $fragmentation + } + statline {Maximum payload per entry} $mx_payload + statline {Entries that use overflow} $ovfl_cnt $ovfl_cnt_percent + if {$int_pages>0} { + statline {Index pages used} $int_pages + } + statline {Primary pages used} $leaf_pages + statline {Overflow pages used} $ovfl_pages + statline {Total pages used} $total_pages + if {$int_unused>0} { + set int_unused_percent [ + percent $int_unused [expr {$int_pages*$pageSize}] {of index space}] + statline "Unused bytes on index pages" $int_unused $int_unused_percent + } + statline "Unused bytes on primary pages" $leaf_unused [ + percent $leaf_unused [expr {$leaf_pages*$pageSize}] {of primary space}] + statline "Unused bytes on overflow pages" $ovfl_unused [ + percent $ovfl_unused [expr {$ovfl_pages*$pageSize}] {of overflow space}] + statline "Unused bytes on all pages" $total_unused [ + percent $total_unused $storage {of all space}] + return 1 +} + +# Calculate the overhead in pages caused by auto-vacuum. +# +# This procedure calculates and returns the number of pages used by the +# auto-vacuum 'pointer-map'. If the database does not support auto-vacuum, +# then 0 is returned. The two arguments are the size of the database file in +# pages and the page size used by the database (in bytes). +proc autovacuum_overhead {filePages pageSize} { + + # Set $autovacuum to non-zero for databases that support auto-vacuum. + set autovacuum [db one {PRAGMA auto_vacuum}] + + # If the database is not an auto-vacuum database or the file consists + # of one page only then there is no overhead for auto-vacuum. Return zero. + if {0==$autovacuum || $filePages==1} { + return 0 + } + + # The number of entries on each pointer map page. The layout of the + # database file is one pointer-map page, followed by $ptrsPerPage other + # pages, followed by a pointer-map page etc. The first pointer-map page + # is the second page of the file overall. + set ptrsPerPage [expr double($pageSize/5)] + + # Return the number of pointer map pages in the database. + return [expr wide(ceil( ($filePages-1.0)/($ptrsPerPage+1.0) ))] +} + + +# Calculate the summary statistics for the database and store the results +# in TCL variables. They are output below. Variables are as follows: +# +# pageSize: Size of each page in bytes. +# file_bytes: File size in bytes. +# file_pgcnt: Number of pages in the file. +# file_pgcnt2: Number of pages in the file (calculated). +# av_pgcnt: Pages consumed by the auto-vacuum pointer-map. +# av_percent: Percentage of the file consumed by auto-vacuum pointer-map. +# inuse_pgcnt: Data pages in the file. +# inuse_percent: Percentage of pages used to store data. +# free_pgcnt: Free pages calculated as ( - ) +# free_pgcnt2: Free pages in the file according to the file header. +# free_percent: Percentage of file consumed by free pages (calculated). +# free_percent2: Percentage of file consumed by free pages (header). +# ntable: Number of tables in the db. +# nindex: Number of indices in the db. +# nautoindex: Number of indices created automatically. +# nmanindex: Number of indices created manually. +# user_payload: Number of bytes of payload in table btrees +# (not including sqlite_master) +# user_percent: $user_payload as a percentage of total file size. + +### The following, setting $file_bytes based on the actual size of the file +### on disk, causes this tool to choke on zipvfs databases. So set it based +### on the return of [PRAGMA page_count] instead. +if 0 { + set file_bytes [file size $file_to_analyze] + set file_pgcnt [expr {$file_bytes/$pageSize}] +} +set file_pgcnt [db one {PRAGMA page_count}] +set file_bytes [expr {$file_pgcnt * $pageSize}] + +set av_pgcnt [autovacuum_overhead $file_pgcnt $pageSize] +set av_percent [percent $av_pgcnt $file_pgcnt] + +set sql {SELECT sum(leaf_pages+int_pages+ovfl_pages) FROM space_used} +set inuse_pgcnt [expr wide([mem eval $sql])] +set inuse_percent [percent $inuse_pgcnt $file_pgcnt] + +set free_pgcnt [expr {$file_pgcnt-$inuse_pgcnt-$av_pgcnt}] +set free_percent [percent $free_pgcnt $file_pgcnt] +set free_pgcnt2 [db one {PRAGMA freelist_count}] +set free_percent2 [percent $free_pgcnt2 $file_pgcnt] + +set file_pgcnt2 [expr {$inuse_pgcnt+$free_pgcnt2+$av_pgcnt}] + +set ntable [db eval {SELECT count(*)+1 FROM sqlite_master WHERE type='table'}] +set nindex [db eval {SELECT count(*) FROM sqlite_master WHERE type='index'}] +set sql {SELECT count(*) FROM sqlite_master WHERE name LIKE 'sqlite_autoindex%'} +set nautoindex [db eval $sql] +set nmanindex [expr {$nindex-$nautoindex}] + +# set total_payload [mem eval "SELECT sum(payload) FROM space_used"] +set user_payload [mem one {SELECT int(sum(payload)) FROM space_used + WHERE NOT is_index AND name NOT LIKE 'sqlite_master'}] +set user_percent [percent $user_payload $file_bytes] + +# Output the summary statistics calculated above. +# +puts "/** Disk-Space Utilization Report For $root_filename" +puts "" +statline {Page size in bytes} $pageSize +statline {Pages in the whole file (measured)} $file_pgcnt +statline {Pages in the whole file (calculated)} $file_pgcnt2 +statline {Pages that store data} $inuse_pgcnt $inuse_percent +statline {Pages on the freelist (per header)} $free_pgcnt2 $free_percent2 +statline {Pages on the freelist (calculated)} $free_pgcnt $free_percent +statline {Pages of auto-vacuum overhead} $av_pgcnt $av_percent +statline {Number of tables in the database} $ntable +statline {Number of indices} $nindex +statline {Number of defined indices} $nmanindex +statline {Number of implied indices} $nautoindex +if {$isCompressed} { + statline {Size of uncompressed content in bytes} $file_bytes + set efficiency [percent $true_file_size $file_bytes] + statline {Size of compressed file on disk} $true_file_size $efficiency +} else { + statline {Size of the file in bytes} $file_bytes +} +statline {Bytes of user payload stored} $user_payload $user_percent + +# Output table rankings +# +puts "" +titleline "Page counts for all tables with their indices" +puts "" +mem eval {SELECT tblname, count(*) AS cnt, + int(sum(int_pages+leaf_pages+ovfl_pages)) AS size + FROM space_used GROUP BY tblname ORDER BY size+0 DESC, tblname} {} { + statline [string toupper $tblname] $size [percent $size $file_pgcnt] +} +puts "" +titleline "Page counts for all tables and indices separately" +puts "" +mem eval { + SELECT + upper(name) AS nm, + int(int_pages+leaf_pages+ovfl_pages) AS size + FROM space_used + ORDER BY size+0 DESC, name} {} { + statline $nm $size [percent $size $file_pgcnt] +} +if {$isCompressed} { + puts "" + titleline "Bytes of disk space used after compression" + puts "" + set csum 0 + mem eval {SELECT tblname, + int(sum(compressed_size)) + + $compressOverhead*sum(int_pages+leaf_pages+ovfl_pages) + AS csize + FROM space_used GROUP BY tblname ORDER BY csize+0 DESC, tblname} {} { + incr csum $csize + statline [string toupper $tblname] $csize [percent $csize $true_file_size] + } + set overhead [expr {$true_file_size - $csum}] + if {$overhead>0} { + statline {Header and free space} $overhead [percent $overhead $true_file_size] + } +} + +# Output subreports +# +if {$nindex>0} { + subreport {All tables and indices} 1 0 +} +subreport {All tables} {NOT is_index} 0 +if {$nindex>0} { + subreport {All indices} {is_index} 0 +} +foreach tbl [mem eval {SELECT DISTINCT tblname name FROM space_used + ORDER BY name}] { + set qn [quote $tbl] + set name [string toupper $tbl] + set n [mem eval {SELECT count(*) FROM space_used WHERE tblname=$tbl}] + if {$n>1} { + set idxlist [mem eval "SELECT name FROM space_used + WHERE tblname='$qn' AND is_index + ORDER BY 1"] + subreport "Table $name and all its indices" "tblname='$qn'" 0 + subreport "Table $name w/o any indices" "name='$qn'" 1 + if {[llength $idxlist]>1} { + subreport "Indices of table $name" "tblname='$qn' AND is_index" 0 + } + foreach idx $idxlist { + set qidx [quote $idx] + subreport "Index [string toupper $idx] of table $name" "name='$qidx'" 1 + } + } else { + subreport "Table $name" "name='$qn'" 1 + } +} + +# Output instructions on what the numbers above mean. +# +puts "" +titleline Definitions +puts { +Page size in bytes + + The number of bytes in a single page of the database file. + Usually 1024. + +Number of pages in the whole file +} +puts " The number of $pageSize-byte pages that go into forming the complete + database" +puts { +Pages that store data + + The number of pages that store data, either as primary B*Tree pages or + as overflow pages. The number at the right is the data pages divided by + the total number of pages in the file. + +Pages on the freelist + + The number of pages that are not currently in use but are reserved for + future use. The percentage at the right is the number of freelist pages + divided by the total number of pages in the file. + +Pages of auto-vacuum overhead + + The number of pages that store data used by the database to facilitate + auto-vacuum. This is zero for databases that do not support auto-vacuum. + +Number of tables in the database + + The number of tables in the database, including the SQLITE_MASTER table + used to store schema information. + +Number of indices + + The total number of indices in the database. + +Number of defined indices + + The number of indices created using an explicit CREATE INDEX statement. + +Number of implied indices + + The number of indices used to implement PRIMARY KEY or UNIQUE constraints + on tables. + +Size of the file in bytes + + The total amount of disk space used by the entire database files. + +Bytes of user payload stored + + The total number of bytes of user payload stored in the database. The + schema information in the SQLITE_MASTER table is not counted when + computing this number. The percentage at the right shows the payload + divided by the total file size. + +Percentage of total database + + The amount of the complete database file that is devoted to storing + information described by this category. + +Number of entries + + The total number of B-Tree key/value pairs stored under this category. + +Bytes of storage consumed + + The total amount of disk space required to store all B-Tree entries + under this category. The is the total number of pages used times + the pages size. + +Bytes of payload + + The amount of payload stored under this category. Payload is the data + part of table entries and the key part of index entries. The percentage + at the right is the bytes of payload divided by the bytes of storage + consumed. + +Bytes of metadata + + The amount of formatting and structural information stored in the + table or index. Metadata includes the btree page header, the cell pointer + array, the size field for each cell, the left child pointer or non-leaf + cells, the overflow pointers for overflow cells, and the rowid value for + rowid table cells. In other words, metadata is everything that is neither + unused space nor content. The record header in the payload is counted as + content, not metadata. + +Average payload per entry + + The average amount of payload on each entry. This is just the bytes of + payload divided by the number of entries. + +Average unused bytes per entry + + The average amount of free space remaining on all pages under this + category on a per-entry basis. This is the number of unused bytes on + all pages divided by the number of entries. + +Non-sequential pages + + The number of pages in the table or index that are out of sequence. + Many filesystems are optimized for sequential file access so a small + number of non-sequential pages might result in faster queries, + especially for larger database files that do not fit in the disk cache. + Note that after running VACUUM, the root page of each table or index is + at the beginning of the database file and all other pages are in a + separate part of the database file, resulting in a single non- + sequential page. + +Maximum payload per entry + + The largest payload size of any entry. + +Entries that use overflow + + The number of entries that user one or more overflow pages. + +Total pages used + + This is the number of pages used to hold all information in the current + category. This is the sum of index, primary, and overflow pages. + +Index pages used + + This is the number of pages in a table B-tree that hold only key (rowid) + information and no data. + +Primary pages used + + This is the number of B-tree pages that hold both key and data. + +Overflow pages used + + The total number of overflow pages used for this category. + +Unused bytes on index pages + + The total number of bytes of unused space on all index pages. The + percentage at the right is the number of unused bytes divided by the + total number of bytes on index pages. + +Unused bytes on primary pages + + The total number of bytes of unused space on all primary pages. The + percentage at the right is the number of unused bytes divided by the + total number of bytes on primary pages. + +Unused bytes on overflow pages + + The total number of bytes of unused space on all overflow pages. The + percentage at the right is the number of unused bytes divided by the + total number of bytes on overflow pages. + +Unused bytes on all pages + + The total number of bytes of unused space on all primary and overflow + pages. The percentage at the right is the number of unused bytes + divided by the total number of bytes. +} + +# Output a dump of the in-memory database. This can be used for more +# complex offline analysis. +# +titleline {} +puts "The entire text of this report can be sourced into any SQL database" +puts "engine for further analysis. All of the text above is an SQL comment." +puts "The data used to generate this report follows:" +puts "*/" +puts "BEGIN;" +puts $tabledef +unset -nocomplain x +mem eval {SELECT * FROM space_used} x { + puts -nonewline "INSERT INTO space_used VALUES" + set sep ( + foreach col $x(*) { + set v $x($col) + if {$v=="" || ![string is double $v]} {set v '[quote $v]'} + puts -nonewline $sep$v + set sep , + } + puts ");" +} +puts "COMMIT;" + +} err]} { + puts "ERROR: $err" + puts $errorInfo + exit 1 +} +END_STRING +; +} diff --git a/tool/tostr.tcl b/tool/tostr.tcl deleted file mode 100644 index cb06ee947f..0000000000 --- a/tool/tostr.tcl +++ /dev/null @@ -1,12 +0,0 @@ -#!/usr/bin/tcl -# -# Convert input text into a C string -# -set in [open [lindex $argv 0] rb] -while {![eof $in]} { - set line [gets $in] - if {[eof $in]} break; - set x [string map "\\\\ \\\\\\\\ \\\" \\\\\"" $line] - puts "\"$x\\n\"" -} -close $in