-C Explicit\scollations\salways\soverride\simplicit\scollations.\s\sThis\sis\nbackwards\scompatible\ssince\sSQLite\shas\snot\spreviously\ssupported\nexplicit\scollations.\sNeed\sto\sadd\stests\sof\sthis\snew\sbehavior.\s(CVS\s3633)
-D 2007-02-07T13:09:46
+C Changes\sto\ssupport\sfragmentation\sanalysis\sin\ssqlite3_analyzer.\s(CVS\s3634)
+D 2007-02-10T19:22:36
F Makefile.in 7fa74bf4359aa899da5586e394d17735f221315f
F Makefile.linux-gcc 2d8574d1ba75f129aba2019f0b959db380a90935
F README 9c4e2d6706bdcc3efdd773ce752a8cdab4f90028
F src/analyze.c 7d2b7ab9a9c2fd6e55700f69064dfdd3e36d7a8a
F src/attach.c b11eb4d5d3fb99a10a626956bccc7215f6b68b16
F src/auth.c 902f4722661c796b97f007d9606bd7529c02597f
-F src/btree.c 51aef6a4b18df165b83b332befd1447c011b4389
+F src/btree.c 4d4bef16fbf4f53ec3b161cfe5bb19bbc27a281d
F src/btree.h 066444ee25bd6e6accb997bfd2cf5ace14dbcd00
F src/build.c 6bd68dc730b01c1727738f8e4b5c730eb0ddb421
F src/callback.c 31d22b4919c7645cbcbb1591ce2453e8c677c558
F src/tclsqlite.c cd2b3b86ab07c0e0779f6c6e71e72c6c7dc1e704
F src/test1.c cb314bfa3e9251b545fa3669ec80a8c8a0a86310
F src/test2.c ca74a1d8aeb7d9606e8f6b762c5daf85c1a3f92b
-F src/test3.c 875126eab6749f9d9e2b60b6ee6a65825b3d1fed
+F src/test3.c ed494a126221c4b9f66f8f0445554ad749764709
F src/test4.c 8b784cd82de158a2317cb4ac4bc86f91ad315e25
F src/test5.c 7162f8526affb771c4ed256826eee7bb9eca265f
F src/test6.c 60a02961ceb7b3edc25f5dc5c1ac2556622a76de
F test/where3.test 0a30fe9808b0fa01c46d0fcf4fac0bf6cf75bb30
F test/where4.test 3fcf53c5ea7af1db3980b3293c2a45b56605f26a
F tool/diffdb.c 7524b1b5df217c20cd0431f6789851a4e0cb191b
+F tool/fragck.tcl 5265a95126abcf6ab357f7efa544787e5963f439
F tool/lemon.c 2938bec507110397c937bd8a03b0c9596a709a04
F tool/lempar.c fdc1672e97f72f72e76553038501da40fec9d251
F tool/memleak.awk 4e7690a51bf3ed757e611273d43fe3f65b510133
F tool/showdb.c a086a3d788c7a23cb008317c3180ceb19f20bce0
F tool/showjournal.c ec3b171be148656827c4949fbfb8ab4370822f87
F tool/space_used.tcl f714c41a59e326b8b9042f415b628b561bafa06b
-F tool/spaceanal.tcl f4ca4843c137db16124d680523f466044d5f0ba2
+F tool/spaceanal.tcl 2614b0c929852547138d608f58535f33a49d27ec
F tool/speedtest.tcl 06c76698485ccf597b9e7dbb1ac70706eb873355
F tool/speedtest2.tcl ee2149167303ba8e95af97873c575c3e0fab58ff
F www/arch.fig d5f9752a4dbf242e9cfffffd3f5762b6c63b3bcf
F www/vdbe.tcl 87a31ace769f20d3627a64fa1fade7fed47b90d0
F www/version3.tcl 890248cf7b70e60c383b0e84d77d5132b3ead42b
F www/whentouse.tcl 97e2b5cd296f7d8057e11f44427dea8a4c2db513
-P 2bd4b62a20219f939ac2ac22440dc7fc0449f766
-R 7101481de1358da4b073980f7790ac38
+P 3638823a629164e4158f76d03ff2cea1eab34e9d
+R 51a9ab370b25f99fa741aa9c670f3eea
U drh
-Z 02cb54f9b44027c71a4480afa634e551
+Z bcbf10610bf7092c876385c4549956cc
-3638823a629164e4158f76d03ff2cea1eab34e9d
\ No newline at end of file
+bd6bc3b8f06919000fb082087dff7bbd335d07e9
\ No newline at end of file
** May you share freely, never taking more than you give.
**
*************************************************************************
-** $Id: btree.c,v 1.334 2007/01/27 02:24:55 drh Exp $
+** $Id: btree.c,v 1.335 2007/02/10 19:22:36 drh Exp $
**
** This file implements a external (disk-based) database using BTrees.
** For a detailed discussion of BTrees, refer to
** aResult[7] = Header size in bytes
** aResult[8] = Local payload size
** aResult[9] = Parent page number
+** aResult[10]= Page number of the first overflow page
**
** This routine is used for testing and debugging only.
*/
}else{
aResult[9] = pPage->pParent->pgno;
}
+ if( tmpCur.info.iOverflow ){
+ aResult[10] = get4byte(&tmpCur.info.pCell[tmpCur.info.iOverflow]);
+ }else{
+ aResult[10] = 0;
+ }
releaseTempCursor(&tmpCur);
return SQLITE_OK;
}
** is not included in the SQLite library. It is used for automated
** testing of the SQLite library.
**
-** $Id: test3.c,v 1.69 2007/01/27 02:24:56 drh Exp $
+** $Id: test3.c,v 1.70 2007/02/10 19:22:36 drh Exp $
*/
#include "sqliteInt.h"
#include "pager.h"
}
pBt = sqlite3TextToPtr(argv[1]);
nRoot = argc-2;
- aRoot = malloc( sizeof(int)*(argc-2) );
+ aRoot = (int*)malloc( sizeof(int)*(argc-2) );
for(i=0; i<argc-2; i++){
if( Tcl_GetInt(interp, argv[i+2], &aRoot[i]) ) return TCL_ERROR;
}
#else
zResult = 0;
#endif
- free(aRoot);
+ free((void*)aRoot);
if( zResult ){
Tcl_AppendResult(interp, zResult, 0);
sqliteFree(zResult);
** aResult[7] = Header size in bytes
** aResult[8] = Local payload size
** aResult[9] = Parent page number
+** aResult[10]= Page number of the first overflow page
*/
static int btree_cursor_info(
void *NotUsed,
int rc;
int i, j;
int up;
- int aResult[10];
+ int aResult[11];
char zBuf[400];
if( argc!=2 && argc!=3 ){
return SQLITE_OK;
}
+/*
+** Copied from btree.c:
+*/
+static u32 get4byte(unsigned char *p){
+ return (p[0]<<24) | (p[1]<<16) | (p[2]<<8) | p[3];
+}
+
+/*
+** btree_ovfl_info BTREE CURSOR
+**
+** Given a cursor, return the sequence of pages number that form the
+** overflow pages for the data of the entry that the cursor is point
+** to.
+*/
+static int btree_ovfl_info(
+ void *NotUsed,
+ Tcl_Interp *interp, /* The TCL interpreter that invoked this command */
+ int argc, /* Number of arguments */
+ const char **argv /* Text of each argument */
+){
+ Btree *pBt;
+ BtCursor *pCur;
+ Pager *pPager;
+ int rc;
+ int n;
+ int dataSize;
+ u32 pgno;
+ void *pPage;
+ int aResult[11];
+ char zElem[100];
+ Tcl_DString str;
+
+ if( argc!=3 ){
+ Tcl_AppendResult(interp, "wrong # args: should be \"", argv[0],
+ " BTREE CURSOR", 0);
+ return TCL_ERROR;
+ }
+ pBt = sqlite3TextToPtr(argv[1]);
+ pCur = sqlite3TextToPtr(argv[2]);
+ if( (*(void**)pCur) != (void*)pBt ){
+ Tcl_AppendResult(interp, "Cursor ", argv[2], " does not belong to btree ",
+ argv[1], 0);
+ return TCL_ERROR;
+ }
+ pPager = sqlite3BtreePager(pBt);
+ rc = sqlite3BtreeCursorInfo(pCur, aResult, 0);
+ if( rc ){
+ Tcl_AppendResult(interp, errorName(rc), 0);
+ return TCL_ERROR;
+ }
+ dataSize = sqlite3BtreeGetPageSize(pBt) - sqlite3BtreeGetReserve(pBt);
+ Tcl_DStringInit(&str);
+ n = aResult[6] - aResult[8];
+ n = (n + dataSize - 1)/dataSize;
+ pgno = (u32)aResult[10];
+ while( pgno && n-- ){
+ sprintf(zElem, "%d", pgno);
+ Tcl_DStringAppendElement(&str, zElem);
+ if( sqlite3pager_get(pPager, pgno, &pPage)!=SQLITE_OK ){
+ Tcl_DStringFree(&str);
+ Tcl_AppendResult(interp, "unable to get page ", zElem, 0);
+ return TCL_ERROR;
+ }
+ pgno = get4byte((unsigned char*)pPage);
+ sqlite3pager_unref(pPage);
+ }
+ Tcl_DStringResult(interp, &str);
+ return SQLITE_OK;
+}
+
/*
** The command is provided for the purpose of setting breakpoints.
** in regression test scripts.
{ "btree_from_db", (Tcl_CmdProc*)btree_from_db },
{ "btree_set_cache_size", (Tcl_CmdProc*)btree_set_cache_size },
{ "btree_cursor_info", (Tcl_CmdProc*)btree_cursor_info },
+ { "btree_ovfl_info", (Tcl_CmdProc*)btree_ovfl_info },
{ "btree_cursor_list", (Tcl_CmdProc*)btree_cursor_list },
};
int i;
--- /dev/null
+# Run this TCL script using "testfixture" to get a report that shows
+# the sequence of database pages used by a particular table or index.
+# This information is used for fragmentation analysis.
+#
+
+# Get the name of the database to analyze
+#
+
+if {[llength $argv]!=2} {
+ puts stderr "Usage: $argv0 database-name table-or-index-name"
+ exit 1
+}
+set file_to_analyze [lindex $argv 0]
+if {![file exists $file_to_analyze]} {
+ puts stderr "No such file: $file_to_analyze"
+ exit 1
+}
+if {![file readable $file_to_analyze]} {
+ puts stderr "File is not readable: $file_to_analyze"
+ exit 1
+}
+if {[file size $file_to_analyze]<512} {
+ puts stderr "Empty or malformed database: $file_to_analyze"
+ exit 1
+}
+set objname [lindex $argv 1]
+
+# Open the database
+#
+sqlite3 db [lindex $argv 0]
+set DB [btree_open [lindex $argv 0] 1000 0]
+
+# This proc is a wrapper around the btree_cursor_info command. The
+# second argument is an open btree cursor returned by [btree_cursor].
+# The first argument is the name of an array variable that exists in
+# the scope of the caller. If the third argument is non-zero, then
+# info is returned for the page that lies $up entries upwards in the
+# tree-structure. (i.e. $up==1 returns the parent page, $up==2 the
+# grandparent etc.)
+#
+# The following entries in that array are filled in with information retrieved
+# using [btree_cursor_info]:
+#
+# $arrayvar(page_no) = The page number
+# $arrayvar(entry_no) = The entry number
+# $arrayvar(page_entries) = Total number of entries on this page
+# $arrayvar(cell_size) = Cell size (local payload + header)
+# $arrayvar(page_freebytes) = Number of free bytes on this page
+# $arrayvar(page_freeblocks) = Number of free blocks on the page
+# $arrayvar(payload_bytes) = Total payload size (local + overflow)
+# $arrayvar(header_bytes) = Header size in bytes
+# $arrayvar(local_payload_bytes) = Local payload size
+# $arrayvar(parent) = Parent page number
+#
+proc cursor_info {arrayvar csr {up 0}} {
+ upvar $arrayvar a
+ foreach [list a(page_no) \
+ a(entry_no) \
+ a(page_entries) \
+ a(cell_size) \
+ a(page_freebytes) \
+ a(page_freeblocks) \
+ a(payload_bytes) \
+ a(header_bytes) \
+ a(local_payload_bytes) \
+ a(parent) \
+ a(first_ovfl) ] [btree_cursor_info $csr $up] break
+}
+
+# Determine the page-size of the database. This global variable is used
+# throughout the script.
+#
+set pageSize [db eval {PRAGMA page_size}]
+
+# Find the root page of table or index to be analyzed. Also find out
+# if the object is a table or an index.
+#
+if {$objname=="sqlite_master"} {
+ set rootpage 1
+ set type table
+} else {
+ db eval {
+ SELECT rootpage, type FROM sqlite_master
+ WHERE name=$objname
+ } break
+ if {![info exists rootpage]} {
+ puts stderr "no such table or index: $objname"
+ exit 1
+ }
+ if {$type!="table" && $type!="index"} {
+ puts stderr "$objname is something other than a table or index"
+ exit 1
+ }
+ if {![string is integer -strict $rootpage]} {
+ puts stderr "invalid root page for $objname: $rootpage"
+ exit 1
+ }
+}
+
+# The cursor $csr is pointing to an entry. Print out information
+# about the page that $up levels above that page that contains
+# the entry. If $up==0 use the page that contains the entry.
+#
+# If information about the page has been printed already, then
+# this is a no-op.
+#
+proc page_info {csr up} {
+ global seen
+ cursor_info ci $csr $up
+ set pg $ci(page_no)
+ if {[info exists seen($pg)]} return
+ set seen($pg) 1
+
+ # Do parent pages first
+ #
+ if {$ci(parent)} {
+ page_info $csr [expr {$up+1}]
+ }
+
+ # Find the depth of this page
+ #
+ set depth 1
+ set i $up
+ while {$ci(parent)} {
+ incr i
+ incr depth
+ cursor_info ci $csr $i
+ }
+
+ # print the results
+ #
+ puts [format {LEVEL %d: %6d} $depth $pg]
+}
+
+
+
+
+# Loop through the object and print out page numbers
+#
+set csr [btree_cursor $DB $rootpage 0]
+for {btree_first $csr} {![btree_eof $csr]} {btree_next $csr} {
+ page_info $csr 0
+ set i 1
+ foreach pg [btree_ovfl_info $DB $csr] {
+ puts [format {OVFL %3d: %6d} $i $pg]
+ incr i
+ }
+}
+exit 0
exit 1
}
+# Maximum distance between pages before we consider it a "gap"
+#
+set MAXGAP 3
+
# Open the database
#
sqlite3 db [lindex $argv 0]
ovfl_pages int, -- Number of overflow pages used
int_unused int, -- Number of unused bytes on interior pages
leaf_unused int, -- Number of unused bytes on primary pages
- ovfl_unused int -- Number of unused bytes on overflow pages
+ ovfl_unused int, -- Number of unused bytes on overflow pages
+ gap_cnt int -- Number of gaps in the page layout
);}
mem eval $tabledef
a(payload_bytes) \
a(header_bytes) \
a(local_payload_bytes) \
- a(parent) ] [btree_cursor_info $csr $up] {}
+ a(parent) \
+ a(first_ovfl) ] [btree_cursor_info $csr $up] break
}
# Determine the page-size of the database. This global variable is used
set ovfl_pages $wideZero ;# Number of overflow pages used
set leaf_pages $wideZero ;# Number of leaf pages
set int_pages $wideZero ;# Number of interior pages
+ set gap_cnt 0 ;# Number of holes in the page sequence
+ set prev_pgno 0 ;# Last page number seen
# As the btree is traversed, the array variable $seen($pgno) is set to 1
# the first time page $pgno is encountered.
set n [expr {int(ceil($ovfl/($pageSize-4.0)))}]
incr ovfl_pages $n
incr unused_ovfl [expr {$n*($pageSize-4) - $ovfl}]
+ set pglist [btree_ovfl_info $DB $csr]
+ } else {
+ set pglist {}
}
# If this is the first table entry analyzed for the page, then update
set seen($ci(page_no)) 1
incr leaf_pages
incr unused_leaf $ci(page_freebytes)
+ set pglist "$ci(page_no) $pglist"
# Now check if the page has a parent that has not been analyzed. If
# so, update the $int_pages, $cnt_int_entry and $unused_int statistics
incr int_pages
incr cnt_int_entry $ci(page_entries)
incr unused_int $ci(page_freebytes)
+
+ # parent pages come before their first child
+ set pglist "$ci(page_no) $pglist"
+ }
+ }
+
+ # Check the page list for fragmentation
+ #
+ foreach pg $pglist {
+ if {($pg<$prev_pgno || $pg>$prev_pgno+$MAXGAP) && $prev_pgno>0} {
+ incr gap_cnt
}
+ set prev_pgno $pg
}
}
btree_close_cursor $csr
append sql ",$unused_int"
append sql ",$unused_leaf"
append sql ",$unused_ovfl"
+ append sql ",$gap_cnt"
append sql );
mem eval $sql
}
set mx_payload $wideZero ;# Maximum payload size
set ovfl_pages $wideZero ;# Number of overflow pages used
set leaf_pages $wideZero ;# Number of leaf pages
+ set gap_cnt 0 ;# Number of holes in the page sequence
+ set prev_pgno 0 ;# Last page number seen
# As the btree is traversed, the array variable $seen($pgno) is set to 1
# the first time page $pgno is encountered.
set seen($ci(page_no)) 1
incr leaf_pages
incr unused_leaf $ci(page_freebytes)
+ set pg $ci(page_no)
+ if {$prev_pgno>0 && ($prev_pgno<$pg-$MAXGAP || $prev_pgno>$pg)} {
+ incr gap_cnt
+ }
+ set prev_pgno $ci(page_no)
}
}
btree_close_cursor $csr
append sql ",0"
append sql ",$unused_leaf"
append sql ",$unused_ovfl"
+ append sql ",$gap_cnt"
append sql );
mem eval $sql
}
int(sum(ovfl_pages)) AS ovfl_pages,
int(sum(leaf_unused)) AS leaf_unused,
int(sum(int_unused)) AS int_unused,
- int(sum(ovfl_unused)) AS ovfl_unused
+ int(sum(ovfl_unused)) AS ovfl_unused,
+ int(sum(gap_cnt)) AS gap_cnt
FROM space_used WHERE $where" {} {}
# Output the sub-report title, nicely decorated with * characters.
set total_unused [expr {$ovfl_unused+$int_unused+$leaf_unused}]
set avg_payload [divide $payload $nleaf]
set avg_unused [divide $total_unused $nleaf]
+ set fragmentation [percent $gap_cnt $total_pages {fragmentation}]
if {$int_pages>0} {
# TODO: Is this formula correct?
set nTab [mem eval "
if {[info exists avg_fanout]} {
statline {Average fanout} $avg_fanout
}
+ if {$total_pages>1} {
+ statline {Fragmentation} $fragmentation
+ }
statline {Maximum payload per entry} $mx_payload
statline {Entries that use overflow} $ovfl_cnt $ovfl_cnt_percent
if {$int_pages>0} {