*************************************************************************
** A TCL Interface to SQLite
**
-** $Id: tclsqlite.c,v 1.59 2004/02/25 22:51:06 rdc Exp $
+** $Id: tclsqlite.c,v 1.59.2.1 2004/06/19 11:57:40 drh Exp $
*/
#ifndef NO_TCL /* Omit this whole file if TCL is unavailable */
return Sqlite_Init(interp);
}
#endif
+/***************************************************************************
+** The remaining code is only included if the TCLSH macro is defined to
+** be an integer greater than 0
+*/
+#if defined(TCLSH) && TCLSH>0
/*
** If the macro TCLSH is defined and is one, then put in code for the
-** "main" routine that will initialize Tcl.
+** "main" routine that implement a interactive shell into which the user
+** can type TCL commands.
*/
-#if defined(TCLSH) && TCLSH==1
+#if TCLSH==1
static char zMainloop[] =
"set line {}\n"
"while {![eof stdin]} {\n"
"}\n"
"}\n"
;
+#endif /* TCLSH==1 */
-#define TCLSH_MAIN main /* Needed to fake out mktclapp */
-int TCLSH_MAIN(int argc, char **argv){
- Tcl_Interp *interp;
- Tcl_FindExecutable(argv[0]);
- interp = Tcl_CreateInterp();
+int Libsqlite_Init( Tcl_Interp *interp) {
+#ifdef TCL_THREADS
+ if (Thread_Init(interp) == TCL_ERROR) {
+ return TCL_ERROR;
+ }
+#endif
Sqlite_Init(interp);
#ifdef SQLITE_TEST
{
extern int Sqlitetest1_Init(Tcl_Interp*);
extern int Sqlitetest2_Init(Tcl_Interp*);
extern int Sqlitetest3_Init(Tcl_Interp*);
- extern int Sqlitetest4_Init(Tcl_Interp*);
extern int Md5_Init(Tcl_Interp*);
Sqlitetest1_Init(interp);
Sqlitetest2_Init(interp);
Sqlitetest3_Init(interp);
- Sqlitetest4_Init(interp);
Md5_Init(interp);
+ Tcl_StaticPackage(interp, "sqlite", Libsqlite_Init, Libsqlite_Init);
}
#endif
+ return TCL_OK;
+}
+
+#define TCLSH_MAIN main /* Needed to fake out mktclapp */
+#if TCLSH==1
+int TCLSH_MAIN(int argc, char **argv){
+#ifndef TCL_THREADS
+ Tcl_Interp *interp;
+ Tcl_FindExecutable(argv[0]);
+ interp = Tcl_CreateInterp();
+ Libsqlite_Init(interp);
if( argc>=2 ){
int i;
Tcl_SetVar(interp,"argv0",argv[1],TCL_GLOBAL_ONLY);
const char *zInfo = Tcl_GetVar(interp, "errorInfo", TCL_GLOBAL_ONLY);
if( zInfo==0 ) zInfo = interp->result;
fprintf(stderr,"%s: %s\n", *argv, zInfo);
- return 1;
+ return TCL_ERROR;
}
}else{
Tcl_GlobalEval(interp, zMainloop);
}
return 0;
+#else
+ Tcl_Main(argc, argv, Libsqlite_Init);
+#endif /* TCL_THREADS */
+ return 0;
+}
+#endif /* TCLSH==1 */
+
+
+/*
+** If the macro TCLSH is set to 2, then implement a space analysis tool.
+*/
+#if TCLSH==2
+static char zAnalysis[] =
+#include "spaceanal_tcl.h"
+;
+
+int main(int argc, char **argv){
+ Tcl_Interp *interp;
+ int i;
+ Tcl_FindExecutable(argv[0]);
+ interp = Tcl_CreateInterp();
+ Libsqlite_Init(interp);
+ Tcl_SetVar(interp,"argv0",argv[0],TCL_GLOBAL_ONLY);
+ Tcl_SetVar(interp,"argv", "", TCL_GLOBAL_ONLY);
+ for(i=1; i<argc; i++){
+ Tcl_SetVar(interp, "argv", argv[i],
+ TCL_GLOBAL_ONLY | TCL_LIST_ELEMENT | TCL_APPEND_VALUE);
+ }
+ if( Tcl_GlobalEval(interp, zAnalysis)!=TCL_OK ){
+ const char *zInfo = Tcl_GetVar(interp, "errorInfo", TCL_GLOBAL_ONLY);
+ if( zInfo==0 ) zInfo = interp->result;
+ fprintf(stderr,"%s: %s\n", *argv, zInfo);
+ return TCL_ERROR;
+ }
+ return 0;
}
+#endif /* TCLSH==2 */
+
#endif /* TCLSH */
-#endif /* !defined(NO_TCL) */
+#endif /* NO_TCL */
--- /dev/null
+# Run this TCL script using "testfixture" in order get a report that shows
+# how much disk space is used by a particular data to actually store data
+# versus how much space is unused.
+#
+
+# Get the name of the database to analyze
+#
+if {[llength $argv]!=1} {
+ puts stderr "Usage: $argv0 database-name"
+ exit 1
+}
+set file_to_analyze [lindex $argv 0]
+if {![file exists $file_to_analyze]} {
+ puts stderr "No such file: $file_to_analyze"
+ exit 1
+}
+if {![file readable $file_to_analyze]} {
+ puts stderr "File is not readable: $file_to_analyze"
+ exit 1
+}
+if {[file size $file_to_analyze]<2048} {
+ puts stderr "Empty or malformed database: $file_to_analyze"
+ exit 1
+}
+
+# Open the database
+#
+sqlite db [lindex $argv 0]
+set DB [btree_open [lindex $argv 0]]
+
+# In-memory database for collecting statistics
+#
+sqlite mem :memory:
+set tabledef\
+{CREATE TABLE space_used(
+ name clob, -- Name of a table or index in the database file
+ tblname clob, -- Name of associated table
+ is_index boolean, -- TRUE if it is an index, false for a table
+ nentry int, -- Number of entries in the BTree
+ payload int, -- Total amount of data stored in this table or index
+ mx_payload int, -- Maximum payload size
+ n_ovfl int, -- Number of entries that overflow
+ pri_pages int, -- Number of primary pages used
+ ovfl_pages int, -- Number of overflow pages used
+ pri_unused int, -- Number of unused bytes on primary pages
+ ovfl_unused int -- Number of unused bytes on overflow pages
+);}
+mem eval $tabledef
+
+# This query will be used to find the root page number for every index and
+# table in the database.
+#
+set sql {
+ SELECT name, tbl_name, type, rootpage
+ FROM sqlite_master WHERE type IN ('table','index')
+ UNION ALL
+ SELECT 'sqlite_master', 'sqlite_master', 'table', 2
+ ORDER BY 1
+}
+
+# Analyze every table in the database, one at a time.
+#
+foreach {name tblname type rootpage} [db eval $sql] {
+ puts stderr "Analyzing $name..."
+ set cursor [btree_cursor $DB $rootpage 0]
+ set go [btree_first $cursor]
+ set size 0
+ catch {unset pg_used}
+ set unused_ovfl 0
+ set n_overflow 0
+ set cnt_ovfl 0
+ set n_entry 0
+ set mx_size 0
+ set pg_used($rootpage) 1016
+ while {$go==0} {
+ incr n_entry
+ set payload [btree_payload_size $cursor]
+ incr size $payload
+ set stat [btree_cursor_dump $cursor]
+ set pgno [lindex $stat 0]
+ set freebytes [lindex $stat 4]
+ set pg_used($pgno) $freebytes
+ if {$payload>236} {
+ # if {[lindex $stat 8]==0} {error "overflow is empty with $payload"}
+ set n [expr {($payload-236+1019)/1020}]
+ incr n_overflow $n
+ incr cnt_ovfl
+ incr unused_ovfl [expr {$n*1020+236-$payload}]
+ } else {
+ # if {[lindex $stat 8]!=0} {error "overflow not empty with $payload"}
+ }
+ if {$payload>$mx_size} {set mx_size $payload}
+ set go [btree_next $cursor]
+ }
+ btree_close_cursor $cursor
+ set n_primary [llength [array names pg_used]]
+ set unused_primary 0
+ foreach x [array names pg_used] {incr unused_primary $pg_used($x)}
+ regsub -all ' $name '' name
+ set sql "INSERT INTO space_used VALUES('$name'"
+ regsub -all ' $tblname '' tblname
+ append sql ",'$tblname',[expr {$type=="index"}],$n_entry"
+ append sql ",$size,$mx_size,$cnt_ovfl,"
+ append sql "$n_primary,$n_overflow,$unused_primary,$unused_ovfl);"
+ mem eval $sql
+}
+
+# Generate a single line of output in the statistics section of the
+# report.
+#
+proc statline {title value {extra {}}} {
+ set len [string length $title]
+ set dots [string range {......................................} $len end]
+ set len [string length $value]
+ set sp2 [string range { } $len end]
+ if {$extra ne ""} {
+ set extra " $extra"
+ }
+ puts "$title$dots $value$sp2$extra"
+}
+
+# Generate a formatted percentage value for $num/$denom
+#
+proc percent {num denom} {
+ if {$denom==0.0} {return ""}
+ set v [expr {$num*100.0/$denom}]
+ if {$v>1.0 && $v<99.0} {
+ return [format %4.1f%% $v]
+ } elseif {$v<0.1 || $v>99.9} {
+ return [format %6.3f%% $v]
+ } else {
+ return [format %5.2f%% $v]
+ }
+}
+
+# Generate a subreport that covers some subset of the database.
+# the $where clause determines which subset to analyze.
+#
+proc subreport {title where} {
+ set hit 0
+ mem eval "SELECT sum(nentry) AS nentry, \
+ sum(payload) AS payload, \
+ sum(CASE is_index WHEN 1 THEN 0 ELSE payload-4*nentry END) \
+ AS data, \
+ max(mx_payload) AS mx_payload, \
+ sum(n_ovfl) as n_ovfl, \
+ sum(pri_pages) AS pri_pages, \
+ sum(ovfl_pages) AS ovfl_pages, \
+ sum(pri_unused) AS pri_unused, \
+ sum(ovfl_unused) AS ovfl_unused \
+ FROM space_used WHERE $where" {} {set hit 1}
+ if {!$hit} {return 0}
+ puts ""
+ set len [string length $title]
+ incr len 5
+ set stars "***********************************"
+ append stars $stars
+ set stars [string range $stars $len end]
+ puts "*** $title $stars"
+ puts ""
+ statline "Percentage of total database" \
+ [percent [expr {$pri_pages+$ovfl_pages}] $::file_pgcnt]
+ statline "Number of entries" $nentry
+ set storage [expr {($pri_pages+$ovfl_pages)*1024}]
+ statline "Bytes of storage consumed" $storage
+ statline "Bytes of payload" $payload [percent $payload $storage]
+ statline "Bytes of data" $data [percent $data $storage]
+ set key [expr {$payload-$data}]
+ statline "Bytes of key" $key [percent $key $storage]
+ set avgpay [expr {$nentry>0?$payload/$nentry:0}]
+ statline "Average payload per entry" $avgpay
+ set avgunused [expr {$nentry>0?($pri_unused+$ovfl_unused)/$nentry:0}]
+ statline "Average unused bytes per entry" $avgunused
+ statline "Average fanout" \
+ [format %.2f [expr {$pri_pages==0?0:($nentry+0.0)/$pri_pages}]]
+ statline "Maximum payload per entry" $mx_payload
+ statline "Entries that use overflow" $n_ovfl [percent $n_ovfl $nentry]
+ statline "Total pages used" [set allpgs [expr {$pri_pages+$ovfl_pages}]]
+ statline "Primary pages used" $pri_pages ;# [percent $pri_pages $allpgs]
+ statline "Overflow pages used" $ovfl_pages ;# [percent $ovfl_pages $allpgs]
+ statline "Unused bytes on primary pages" $pri_unused \
+ [percent $pri_unused [expr {$pri_pages*1024}]]
+ statline "Unused bytes on overflow pages" $ovfl_unused \
+ [percent $ovfl_unused [expr {$ovfl_pages*1024}]]
+ set allunused [expr {$ovfl_unused+$pri_unused}]
+ statline "Unused bytes on all pages" $allunused \
+ [percent $allunused [expr {$allpgs*1024}]]
+ return 1
+}
+
+# Output summary statistics:
+#
+puts "/** Disk-Space Utilization Report For $file_to_analyze"
+puts "*** As of [clock format [clock seconds] -format {%Y-%b-%d %H:%M:%S}]"
+puts ""
+set fsize [file size [lindex $argv 0]]
+set file_pgcnt [expr {$fsize/1024}]
+set usedcnt [mem eval {SELECT sum(pri_pages+ovfl_pages) FROM space_used}]
+set freecnt [expr {$file_pgcnt-$usedcnt-1}]
+set freecnt2 [lindex [btree_get_meta $DB] 0]
+statline {Pages in the whole file (measured)} $file_pgcnt
+set file_pgcnt2 [expr {$usedcnt+$freecnt2+1}]
+statline {Pages in the whole file (calculated)} $file_pgcnt2
+statline {Pages that store data} $usedcnt [percent $usedcnt $file_pgcnt]
+statline {Pages on the freelist (per header)}\
+ $freecnt2 [percent $freecnt2 $file_pgcnt]
+statline {Pages on the freelist (calculated)}\
+ $freecnt [percent $freecnt $file_pgcnt]
+statline {Header pages} 1 [percent 1 $file_pgcnt]
+
+set ntable [db eval {SELECT count(*)+1 FROM sqlite_master WHERE type='table'}]
+statline {Number of tables in the database} $ntable
+set nindex [db eval {SELECT count(*) FROM sqlite_master WHERE type='index'}]
+set autoindex [db eval {SELECT count(*) FROM sqlite_master
+ WHERE type='index' AND name LIKE '(% autoindex %)'}]
+set manindex [expr {$nindex-$autoindex}]
+statline {Number of indices} $nindex
+statline {Number of named indices} $manindex [percent $manindex $nindex]
+statline {Automatically generated indices} $autoindex \
+ [percent $autoindex $nindex]
+
+set bytes_data [mem eval "SELECT sum(payload-4*nentry) FROM space_used
+ WHERE NOT is_index AND name!='sqlite_master'"]
+set total_payload [mem eval "SELECT sum(payload) FROM space_used"]
+statline "Size of the file in bytes" $fsize
+statline "Bytes of payload stored" $total_payload \
+ [percent $total_payload $fsize]
+statline "Bytes of user data stored" $bytes_data \
+ [percent $bytes_data $fsize]
+
+# Output table rankings
+#
+puts ""
+puts "*** Page counts for all tables with their indices ********************"
+puts ""
+mem eval {SELECT tblname, count(*) AS cnt, sum(pri_pages+ovfl_pages) AS size
+ FROM space_used GROUP BY tblname ORDER BY size DESC, tblname} {} {
+ statline [string toupper $tblname] $size [percent $size $file_pgcnt]
+}
+
+# Output subreports
+#
+if {$nindex>0} {
+ subreport {All tables and indices} 1
+}
+subreport {All tables} {NOT is_index}
+if {$nindex>0} {
+ subreport {All indices} {is_index}
+}
+foreach tbl [mem eval {SELECT name FROM space_used WHERE NOT is_index
+ ORDER BY name}] {
+ regsub ' $tbl '' qn
+ set name [string toupper $tbl]
+ set n [mem eval "SELECT count(*) FROM space_used WHERE tblname='$qn'"]
+ if {$n>1} {
+ subreport "Table $name and all its indices" "tblname='$qn'"
+ subreport "Table $name w/o any indices" "name='$qn'"
+ subreport "Indices of table $name" "tblname='$qn' AND is_index"
+ } else {
+ subreport "Table $name" "name='$qn'"
+ }
+}
+
+# Output instructions on what the numbers above mean.
+#
+puts {
+*** Definitions ******************************************************
+
+Number of pages in the whole file
+
+ The number of 1024-byte pages that go into forming the complete database
+
+Pages that store data
+
+ The number of pages that store data, either as primary B*Tree pages or
+ as overflow pages. The number at the right is the data pages divided by
+ the total number of pages in the file.
+
+Pages on the freelist
+
+ The number of pages that are not currently in use but are reserved for
+ future use. The percentage at the right is the number of freelist pages
+ divided by the total number of pages in the file.
+
+Header pages
+
+ The number of pages of header overhead in the database. This value is
+ always 1. The percentage at the right is the number of header pages
+ divided by the total number of pages in the file.
+
+Number of tables in the database
+
+ The number of tables in the database, including the SQLITE_MASTER table
+ used to store schema information.
+
+Number of indices
+
+ The total number of indices in the database.
+
+Number of named indices
+
+ The number of indices created using an explicit CREATE INDEX statement.
+
+Automatically generated indices
+
+ The number of indices used to implement PRIMARY KEY or UNIQUE constraints
+ on tables.
+
+Size of the file in bytes
+
+ The total amount of disk space used by the entire database files.
+
+Bytes of payload stored
+
+ The total number of bytes of payload stored in the database. Payload
+ includes both key and data. The content of the SQLITE_MASTER table is
+ counted when computing this number. The percentage at the right shows
+ the payload divided by the total file size.
+
+Bytes of user data stored
+
+ The total number of bytes of data stored in the database, not counting
+ the database schema information stored in the SQLITE_MASTER table. The
+ percentage at the right is the user data size divided by the total file
+ size.
+
+Percentage of total database
+
+ The amount of the complete database file that is devoted to storing
+ information described by this category.
+
+Number of entries
+
+ The total number of B*Tree key/value pairs stored under this category.
+
+Bytes of storage consumed
+
+ The total amount of disk space required to store all B*Tree entries
+ under this category. The is the total number of pages used times
+ the pages size (1024).
+
+Bytes of payload
+
+ The amount of payload stored under this category. Payload is the sum
+ of keys and data. Each table entry has 4 bytes of key and an arbitrary
+ amount of data. Each index entry has 4 or more bytes of key and no
+ data. The percentage at the right is the bytes of payload divided by
+ the bytes of storage consumed.
+
+Bytes of data
+
+ The amount of data stored under this category. The data space reported
+ includes formatting information such as nul-terminators and field-lengths
+ that are stored with the data. The percentage at the right is the bytes
+ of data divided by bytes of storage consumed.
+
+Bytes of key
+
+ The sum of the sizes of all keys under this category. The percentage at
+ the right is the bytes of key divided by the bytes of storage consumed.
+
+Average payload per entry
+
+ The average amount of payload on each entry. This is just the bytes of
+ payload divided by the number of entries.
+
+Average unused bytes per entry
+
+ The average amount of free space remaining on all pages under this
+ category on a per-entry basis. This is the number of unused bytes on
+ all pages divided by the number of entries.
+
+Maximum payload per entry
+
+ The largest payload size of any entry.
+
+Entries that use overflow
+
+ Up to 236 bytes of payload for each entry are stored directly in the
+ primary B*Tree page. Any additional payload is stored on a linked list
+ of overflow pages. This is the number of entries that exceed 236 bytes
+ in size. The value to the right is the number of entries that overflow
+ divided by the total number of entries.
+
+Total pages used
+
+ This is the number of 1024 byte pages used to hold all information in
+ the current category. This is the sum of primary and overflow pages.
+
+Primary pages used
+
+ This is the number of primary B*Tree pages used.
+
+Overflow pages used
+
+ The total number of overflow pages used for this category.
+
+Unused bytes on primary pages
+
+ The total number of bytes of unused space on all primary pages. The
+ percentage at the right is the number of unused bytes divided by the
+ total number of bytes on primary pages.
+
+Unused bytes on overflow pages
+
+ The total number of bytes of unused space on all overflow pages. The
+ percentage at the right is the number of unused bytes divided by the
+ total number of bytes on overflow pages.
+
+Unused bytes on all pages
+
+ The total number of bytes of unused space on all primary and overflow
+ pages. The percentage at the right is the number of unused bytes
+ divided by the total number of bytes.
+}
+
+# Output the database
+#
+puts "**********************************************************************"
+puts "The entire text of this report can be sourced into any SQL database"
+puts "engine for further analysis. All of the text above is an SQL comment."
+puts "The data used to generate this report follows:"
+puts "*/"
+puts "BEGIN;"
+puts $tabledef
+unset -nocomplain x
+mem eval {SELECT * FROM space_used} x {
+ puts -nonewline "INSERT INTO space_used VALUES("
+ regsub ' $x(name) '' qn
+ regsub ' $x(tblname) '' qtn
+ puts -nonewline "'$qn','$qtn',"
+ puts -nonewline "$x(is_index),$x(nentry),$x(payload),$x(mx_payload),"
+ puts -nonewline "$x(n_ovfl),$x(pri_pages),$x(ovfl_pages),$x(pri_unused),"
+ puts "$x(ovfl_unused));"
+}
+puts "COMMIT;"