From: dan Date: Mon, 6 Aug 2012 18:50:11 +0000 (+0000) Subject: When reusing pages as part of creating a new index, allocate the leaves from each... X-Git-Tag: version-3.7.14~46^2~1 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=428c218c90cce7bbff210a19ee01ecdc01511076;p=thirdparty%2Fsqlite.git When reusing pages as part of creating a new index, allocate the leaves from each free-list trunk page in ascending order, instead of trying to maximize localization for each individual allocation. This increases the chance that pages will be written to disk in ascending order by a large CREATE INDEX statement, improving overall performance. FossilOrigin-Name: d045f8b2d44e388d8c4549ff02d4ca7eff4e2038 --- diff --git a/manifest b/manifest index 7722412b09..34d8efeb55 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Update\ssorter-coalesce-writes\sbranch\swith\slatest\strunk\schanges. -D 2012-08-06T18:10:09.480 +C When\sreusing\spages\sas\spart\sof\screating\sa\snew\sindex,\sallocate\sthe\sleaves\sfrom\seach\sfree-list\strunk\spage\sin\sascending\sorder,\sinstead\sof\strying\sto\smaximize\slocalization\sfor\seach\sindividual\sallocation.\sThis\sincreases\sthe\schance\sthat\spages\swill\sbe\swritten\sto\sdisk\sin\sascending\sorder\sby\sa\slarge\sCREATE\sINDEX\sstatement,\simproving\soverall\sperformance. +D 2012-08-06T18:50:11.492 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in abd5c10d21d1395f140d9e50ea999df8fa4d6376 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -117,16 +117,16 @@ F sqlite.pc.in 42b7bf0d02e08b9e77734a47798d1a55a9e0716b F sqlite3.1 6be1ad09113570e1fc8dcaff84c9b0b337db5ffc F sqlite3.pc.in ae6f59a76e862f5c561eb32a380228a02afc3cad F src/alter.c 149cc80d9257971b0bff34e58fb2263e01998289 -F src/analyze.c a4790912e504c8ddac273445e7aba39bbce50881 +F src/analyze.c 7553068d21e32a57fc33ab6b2393fc8c1ba41410 F src/attach.c 577bf5675b0c50495fc28549f2fcbdb1bac71143 F src/auth.c 523da7fb4979469955d822ff9298352d6b31de34 F src/backup.c 5b31b24d6814b11de763debf342c8cd0a15a4910 F src/bitvec.c 26675fe8e431dc555e6f2d0e11e651d172234aa1 F src/btmutex.c 976f45a12e37293e32cae0281b15a21d48a8aaa7 -F src/btree.c f0b71054103cb77eb5e782088c16998ec4f06624 -F src/btree.h 48a013f8964f12d944d90e4700df47b72dd6d923 -F src/btreeInt.h 38a639c0542c29fe8331a221c4aed0cb8686249e -F src/build.c 47c4506afe4bcb4ed1f4b5357582d1cb3402f8ad +F src/btree.c 1d366468b6f30234d76bf1da43e038d6f3ba2c9c +F src/btree.h 4aee02e879211bfcfd3f551769578d2e940ab6c2 +F src/btreeInt.h 4e5c2bd0f9b36b2a815a6d84f771a61a65830621 +F src/build.c 0f6b40ad6211dcaba6159d0f9a297f0704f22142 F src/callback.c 0cb4228cdcd827dcc5def98fb099edcc9142dbcd F src/complete.c dc1d136c0feee03c2f7550bafc0d29075e36deac F src/ctime.c 500d019da966631ad957c37705642be87524463b @@ -180,7 +180,7 @@ F src/select.c a365da6d7a6d7d8a10ad60ca71837ab5e9369466 F src/shell.c 076e1c90d594644f36027c8ecff9a392cf2d3a06 F src/sqlite.h.in 3e8035bc406b1571a5cc8ea46bcc831201676f1a F src/sqlite3ext.h 6904f4aadf976f95241311fbffb00823075d9477 -F src/sqliteInt.h ed41801550b0b8fb8217fcfd2e362118062b30c0 +F src/sqliteInt.h c8169801f8bbfdf5873cc6fa45cb5df720c04db4 F src/sqliteLimit.h 164b0e6749d31e0daa1a4589a169d31c0dec7b3d F src/status.c 35939e7e03abf1b7577ce311f48f682c40de3208 F src/table.c 2cd62736f845d82200acfa1287e33feb3c15d62e @@ -227,7 +227,7 @@ F src/test_superlock.c 2b97936ca127d13962c3605dbc9a4ef269c424cd F src/test_syscall.c a992d8c80ea91fbf21fb2dd570db40e77dd7e6ae F src/test_tclvar.c f4dc67d5f780707210d6bb0eb6016a431c04c7fa F src/test_thread.c e286f2173563f2a1747c24bcda6b9d030bf4f4e4 -F src/test_vfs.c da6d0d982b11756c94c1760196355d33d03ff745 +F src/test_vfs.c c6260ef238c1142c8f8bd402db02216afd182ae3 F src/test_vfstrace.c 6b28adb2a0e8ecd0f2e3581482e1f658b11b4067 F src/test_wholenumber.c 3d2b9ed1505c40ad5c5ca2ad16ae7a289d6cc251 F src/test_wsd.c 41cadfd9d97fe8e3e4e44f61a4a8ccd6f7ca8fe9 @@ -237,7 +237,7 @@ F src/update.c d3076782c887c10e882996550345da9c4c9f9dea F src/utf.c 890c67dcfcc7a74623c95baac7535aadfe265e84 F src/util.c 0af2e515dc0dabacec931bca39525f6c3f1c5455 F src/vacuum.c 587a52bb8833d7ac15af8916f25437e2575028bd -F src/vdbe.c f5ad3c06dc3fe647097065829c013f3f1b9eadca +F src/vdbe.c 75da79cdcd58481825a06f045bc2f5ea3966eeae F src/vdbe.h 18f581cac1f4339ec3299f3e0cc6e11aec654cdb F src/vdbeInt.h 986b6b11a13c517337355009e5438703ba5b0a40 F src/vdbeapi.c 88ea823bbcb4320f5a6607f39cd7c2d3cc4c26b1 @@ -531,6 +531,7 @@ F test/index.test b5429732b3b983fa810e3ac867d7ca85dae35097 F test/index2.test ee83c6b5e3173a3d7137140d945d9a5d4fdfb9d6 F test/index3.test 423a25c789fc8cc51aaf2a4370bbdde2d9e9eed7 F test/index4.test 1e299862024012e0165531cce251572f7f084d15 +F test/index5.test edc8c64ca78bee140c21ce3836820fadf47906bb F test/indexedby.test be501e381b82b2f8ab406309ba7aac46e221f4ad F test/indexfault.test 31d4ab9a7d2f6e9616933eb079722362a883eb1d F test/init.test 15c823093fdabbf7b531fe22cf037134d09587a7 @@ -1008,7 +1009,7 @@ F tool/vdbe-compress.tcl d70ea6d8a19e3571d7ab8c9b75cba86d1173ff0f F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh fbc018d67fd7395f440c28f33ef0f94420226381 F tool/win/sqlite.vsix 67d8a99aceb56384a81b3f30d6c71743146d2cc9 -P f4b3fded231231ef15bde98d2a996b4e16415d4c 541e9310a7b88e0b40c6530947803527f28e51de -R 29178e6d938c0a18dd53fdfdd0d9db73 +P 214f8cda1727e0eee51605be487d4cf4f2dfb6db +R 3158683ecf6a85f9d8794482d2318bc6 U dan -Z ebf9b1cca66bb54b576393e824c02bfc +Z 52fb98b324ddd516442d0480eabbc4d5 diff --git a/manifest.uuid b/manifest.uuid index 5a544c351b..c85d23ae1e 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -214f8cda1727e0eee51605be487d4cf4f2dfb6db \ No newline at end of file +d045f8b2d44e388d8c4549ff02d4ca7eff4e2038 \ No newline at end of file diff --git a/src/analyze.c b/src/analyze.c index 810ed54d85..632fdc1ac1 100644 --- a/src/analyze.c +++ b/src/analyze.c @@ -176,7 +176,7 @@ static void openStatTable( "CREATE TABLE %Q.%s(%s)", pDb->zName, zTab, aTable[i].zCols ); aRoot[i] = pParse->regRoot; - aCreateTbl[i] = 1; + aCreateTbl[i] = OPFLAG_P2ISREG; }else{ /* The table already exists. If zWhere is not NULL, delete all entries ** associated with the table zWhere. If zWhere is NULL, delete the diff --git a/src/btree.c b/src/btree.c index c16eca5f64..7f1fd7e148 100644 --- a/src/btree.c +++ b/src/btree.c @@ -5926,7 +5926,8 @@ static int balance_nonroot( MemPage *pParent, /* Parent page of siblings being balanced */ int iParentIdx, /* Index of "the page" in pParent */ u8 *aOvflSpace, /* page-size bytes of space for parent ovfl */ - int isRoot /* True if pParent is a root-page */ + int isRoot, /* True if pParent is a root-page */ + int bBulk /* True if this call is part of a bulk load */ ){ BtShared *pBt; /* The whole database */ int nCell = 0; /* Number of cells in apCell[] */ @@ -6257,7 +6258,7 @@ static int balance_nonroot( if( rc ) goto balance_cleanup; }else{ assert( i>0 ); - rc = allocateBtreePage(pBt, &pNew, &pgno, pgno, 0); + rc = allocateBtreePage(pBt, &pNew, &pgno, (bBulk ? 1 : pgno), 0); if( rc ) goto balance_cleanup; apNew[i] = pNew; nNew++; @@ -6707,7 +6708,7 @@ static int balance(BtCursor *pCur){ ** pSpace buffer passed to the latter call to balance_nonroot(). */ u8 *pSpace = sqlite3PageMalloc(pCur->pBt->pageSize); - rc = balance_nonroot(pParent, iIdx, pSpace, iPage==1); + rc = balance_nonroot(pParent, iIdx, pSpace, iPage==1, pCur->hints); if( pFree ){ /* If pFree is not NULL, it points to the pSpace buffer used ** by a previous call to balance_nonroot(). Its contents are @@ -8294,3 +8295,13 @@ int sqlite3BtreeSetVersion(Btree *pBtree, int iVersion){ pBt->btsFlags &= ~BTS_NO_WAL; return rc; } + +/* +** set the mask of hint flags for cursor pCsr. Currently the only valid +** values are 0 and BTREE_BULKLOAD. +*/ +void sqlite3BtreeCursorHints(BtCursor *pCsr, unsigned int mask){ + assert( mask==BTREE_BULKLOAD || mask==0 ); + pCsr->hints = mask; +} + diff --git a/src/btree.h b/src/btree.h index 9832001b7f..95897d5662 100644 --- a/src/btree.h +++ b/src/btree.h @@ -135,6 +135,12 @@ int sqlite3BtreeUpdateMeta(Btree*, int idx, u32 value); #define BTREE_USER_VERSION 6 #define BTREE_INCR_VACUUM 7 +/* +** Values that may be OR'd together to form the second argument of an +** sqlite3BtreeCursorHints() call. +*/ +#define BTREE_BULKLOAD 0x00000001 + int sqlite3BtreeCursor( Btree*, /* BTree containing table to open */ int iTable, /* Index of root page */ @@ -178,8 +184,8 @@ struct Pager *sqlite3BtreePager(Btree*); int sqlite3BtreePutData(BtCursor*, u32 offset, u32 amt, void*); void sqlite3BtreeCacheOverflow(BtCursor *); void sqlite3BtreeClearCursor(BtCursor *); - int sqlite3BtreeSetVersion(Btree *pBt, int iVersion); +void sqlite3BtreeCursorHints(BtCursor *, unsigned int mask); #ifndef NDEBUG int sqlite3BtreeCursorIsValid(BtCursor*); diff --git a/src/btreeInt.h b/src/btreeInt.h index 0d21497966..b157decec7 100644 --- a/src/btreeInt.h +++ b/src/btreeInt.h @@ -510,6 +510,7 @@ struct BtCursor { #ifndef SQLITE_OMIT_INCRBLOB u8 isIncrblobHandle; /* True if this cursor is an incr. io handle */ #endif + u8 hints; /* As configured by CursorSetHints() */ i16 iPage; /* Index of current page in apPage */ u16 aiIdx[BTCURSOR_MAX_DEPTH]; /* Current index in apPage[i] */ MemPage *apPage[BTCURSOR_MAX_DEPTH]; /* Pages from root to current page */ diff --git a/src/build.c b/src/build.c index 9f13b7b11c..776ffa4db3 100644 --- a/src/build.c +++ b/src/build.c @@ -1581,7 +1581,7 @@ void sqlite3EndTable( assert(pParse->nTab==1); sqlite3VdbeAddOp3(v, OP_OpenWrite, 1, pParse->regRoot, iDb); - sqlite3VdbeChangeP5(v, 1); + sqlite3VdbeChangeP5(v, OPFLAG_P2ISREG); pParse->nTab = 2; sqlite3SelectDestInit(&dest, SRT_Table, 1); sqlite3Select(pParse, pSelect, &dest); @@ -2397,9 +2397,7 @@ static void sqlite3RefillIndex(Parse *pParse, Index *pIndex, int memRootPage){ pKey = sqlite3IndexKeyinfo(pParse, pIndex); sqlite3VdbeAddOp4(v, OP_OpenWrite, iIdx, tnum, iDb, (char *)pKey, P4_KEYINFO_HANDOFF); - if( memRootPage>=0 ){ - sqlite3VdbeChangeP5(v, 1); - } + sqlite3VdbeChangeP5(v, OPFLAG_BULKCSR|((memRootPage>=0)?OPFLAG_P2ISREG:0)); #ifndef SQLITE_OMIT_MERGE_SORT /* Open the sorter cursor if we are to use one. */ diff --git a/src/sqliteInt.h b/src/sqliteInt.h index c525ebd16f..b3cdb2363c 100644 --- a/src/sqliteInt.h +++ b/src/sqliteInt.h @@ -2317,6 +2317,8 @@ struct AuthContext { #define OPFLAG_CLEARCACHE 0x20 /* Clear pseudo-table cache in OP_Column */ #define OPFLAG_LENGTHARG 0x40 /* OP_Column only used for length() */ #define OPFLAG_TYPEOFARG 0x80 /* OP_Column only used for typeof() */ +#define OPFLAG_BULKCSR 0x01 /* OP_Open** used to open bulk cursor */ +#define OPFLAG_P2ISREG 0x02 /* P2 to OP_Open** is a register number */ /* * Each trigger present in the database schema is stored as an instance of diff --git a/src/test_vfs.c b/src/test_vfs.c index fd2aa9fb07..93c556b56e 100644 --- a/src/test_vfs.c +++ b/src/test_vfs.c @@ -361,7 +361,8 @@ static int tvfsWrite( if( p->pScript && p->mask&TESTVFS_WRITE_MASK ){ tvfsExecTcl(p, "xWrite", - Tcl_NewStringObj(pFd->zFilename, -1), pFd->pShmId, 0 + Tcl_NewStringObj(pFd->zFilename, -1), pFd->pShmId, + Tcl_NewWideIntObj(iOfst) ); tvfsResultCode(p, &rc); } diff --git a/src/vdbe.c b/src/vdbe.c index 19c0255b8f..12e7325143 100644 --- a/src/vdbe.c +++ b/src/vdbe.c @@ -3120,6 +3120,9 @@ case OP_OpenWrite: { VdbeCursor *pCur; Db *pDb; + assert( (pOp->p5&(OPFLAG_P2ISREG|OPFLAG_BULKCSR))==pOp->p5 ); + assert( pOp->opcode==OP_OpenWrite || pOp->p5==0 ); + if( p->expired ){ rc = SQLITE_ABORT; break; @@ -3143,7 +3146,7 @@ case OP_OpenWrite: { }else{ wrFlag = 0; } - if( pOp->p5 ){ + if( pOp->p5 & OPFLAG_P2ISREG ){ assert( p2>0 ); assert( p2<=p->nMem ); pIn2 = &aMem[p2]; @@ -3174,6 +3177,8 @@ case OP_OpenWrite: { pCur->isOrdered = 1; rc = sqlite3BtreeCursor(pX, p2, wrFlag, pKeyInfo, pCur->pCursor); pCur->pKeyInfo = pKeyInfo; + assert( OPFLAG_BULKCSR==BTREE_BULKLOAD ); + sqlite3BtreeCursorHints(pCur->pCursor, (pOp->p5 & OPFLAG_BULKCSR)); /* Since it performs no memory allocation or IO, the only value that ** sqlite3BtreeCursor() may return is SQLITE_OK. */ diff --git a/test/index5.test b/test/index5.test new file mode 100644 index 0000000000..c8e94b3985 --- /dev/null +++ b/test/index5.test @@ -0,0 +1,75 @@ +# 2012 August 6 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# + + +set testdir [file dirname $argv0] +source $testdir/tester.tcl +set ::testprefix index5 + +do_test 1.1 { + execsql { + PRAGMA page_size = 1024; + CREATE TABLE t1(x); + BEGIN; + } + for {set i 0} {$i < 100000} {incr i} { + execsql { INSERT INTO t1 VALUES(randstr(100,100)) } + } + execsql COMMIT + execsql { + CREATE INDEX i1 ON t1(x); + DROP INDEX I1; + PRAGMA main.page_size; + } +} {1024} + +db close +testvfs tvfs +tvfs filter xWrite +tvfs script write_cb +proc write_cb {xCall file handle iOfst} { + if {[file tail $file]=="test.db"} { + lappend ::write_list [expr $iOfst/1024] + } + puts "$xCall $file $args" +} + +do_test 1.2 { + sqlite3 db test.db -vfs tvfs + set ::write_list [list] + execsql { CREATE INDEX i1 ON t1(x) } +} {} + +do_test 1.3 { + set nForward 0 + set nBackward 0 + set nNoncont 0 + set iPrev [lindex $::write_list 0] + for {set i 1} {$i < [llength $::write_list]} {incr i} { + set iNext [lindex $::write_list $i] + if {$iNext==($iPrev+1)} { + incr nForward + } elseif {$iNext==($iPrev-1)} { + incr nBackward + } else { + incr nNoncont + } + set iPrev $iNext + } + + expr {$nForward > $nBackward} +} {1} +db close +tvfs delete + +finish_test +