From: dan Date: Fri, 28 Apr 2017 10:20:03 +0000 (+0000) Subject: Use a different free-list format for server-mode databases in order to reduce X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=aa189cf00f68e90b11cbfd3f047e80cfedaef9e0;p=thirdparty%2Fsqlite.git Use a different free-list format for server-mode databases in order to reduce contention. FossilOrigin-Name: 778e8a102d8dc7b0fa006c2d90b0a8cd36ebc1e16bd17477f2d536fc0cef4bf3 --- diff --git a/manifest b/manifest index 529f619085..b96dd6adf4 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Do\snot\swrite\smaster\sjournal\sfilenames\sinto\sserver-mode\sjournal\sfiles.\sUse\nSQLITE_MUTEX_STATIC_APP1\sto\sprotect\scritical\ssections\sin\sserver.c. -D 2017-04-27T14:12:43.330 +C Use\sa\sdifferent\sfree-list\sformat\sfor\sserver-mode\sdatabases\sin\sorder\sto\sreduce\ncontention. +D 2017-04-28T10:20:03.891 F Makefile.in 1cc758ce3374a32425e4d130c2fe7b026b20de5b8843243de75f087c0a2661fb F Makefile.linux-gcc 7bc79876b875010e8c8f9502eb935ca92aa3c434 F Makefile.msc 6a8c838220f7c00820e1fc0ac1bccaaa8e5676067e1dbfa1bafa7a4ffecf8ae6 @@ -345,7 +345,7 @@ F src/auth.c 930b376a9c56998557367e6f7f8aaeac82a2a792 F src/backup.c faf17e60b43233c214aae6a8179d24503a61e83b F src/bitvec.c 17ea48eff8ba979f1f5b04cc484c7bb2be632f33 F src/btmutex.c 0e9ce2d56159b89b9bc8e197e023ee11e39ff8ca -F src/btree.c a0d9a1c782ff3d22df5d217a4fa7125dd69ad5849caa51c4442c10246ca8ae27 +F src/btree.c c721037ad34eb8a0544fb6a00594ab8ff52dabcdc38d12646ca093d1f2bed521 F src/btree.h 80f518c0788be6cec8d9f8e13bd8e380df299d2b5e4ac340dc887b0642647cfc F src/btreeInt.h a392d353104b4add58b4a59cb185f5d5693dde832c565b77d8d4c343ed98f610 F src/build.c 4026a9c554b233e50c5e9ad46963e676cf54dd2306d952aa1eaa07a1bc9ce14f @@ -389,8 +389,8 @@ F src/os_setup.h 0dbaea40a7d36bf311613d31342e0b99e2536586 F src/os_unix.c 30e2c43e4955db990e5b5a81e901f8aa74cc8820 F src/os_win.c 2a6c73eef01c51a048cc4ddccd57f981afbec18a F src/os_win.h 7b073010f1451abe501be30d12f6bc599824944a -F src/pager.c ba9b474f886b79b1e8de087240179c74198a3a11de6b16cc48e14084f562c38d -F src/pager.h 54f5321bc48768610a3a000d2f821fb849cbda35339abd185ceb5f214faf10bf +F src/pager.c 2bea7ef08f65fbc2419925870dd4fb0736951623759a0cb5cb664956d1ce53c1 +F src/pager.h 4ef462a008f9c44f201298aefaa2fa9ace9657eba1aec623211893dd7f83290d F src/parse.y 0513387ce02fea97897d8caef82d45f347818593f24f1bdc48e0c530a8af122d F src/pcache.c 62835bed959e2914edd26afadfecce29ece0e870 F src/pcache.h 2cedcd8407eb23017d92790b112186886e179490 @@ -403,7 +403,7 @@ F src/random.c 80f5d666f23feb3e6665a6ce04c7197212a88384 F src/resolve.c 3e518b962d932a997fae373366880fc028c75706 F src/rowset.c 7b7e7e479212e65b723bf40128c7b36dc5afdfac F src/select.c bf8ab605e49717c222136380453cfb7eda564f8e500d5ff6a01341ea59fefe80 -F src/server.c 1e930af2d0181600d36126b60c44456741c7503188d88eb324679a688ba6da90 +F src/server.c 350729b7a6d2975a0d8fd4a2493bf4a4554893485882f17a3cdbe1df51a5f111 F src/shell.c 21b79c0e1b93f8e35fd7b4087d6ba438326c3d7e285d0dd51dfd741475f858a1 F src/sqlite.h.in f6e3734fee6d334a896cff8f43446ca2e47dcf7e39135b3e3a596bf23cdbbc97 F src/sqlite3.rc 5121c9e10c3964d5755191c80dd1180c122fc3a8 @@ -1109,7 +1109,7 @@ F test/selectE.test a8730ca330fcf40ace158f134f4fe0eb00c7edbf F test/selectF.test 21c94e6438f76537b72532fa9fd4710cdd455fc3 F test/selectG.test e8600e379589e85e9fefd2fe4d44a4cdd63f6982 F test/server1.test 46803bd3fe8b99b30dbc5ff38ffc756f5c13a118 -F test/server2.test bdb66c32c1233b77195bcfac08f44d6cfed24cc4f44ffff8e825a5d8cc2bc624 +F test/server2.test 11dda300ebef43b4abe0fdc086b6f51b964beddb529fb65bc1f026db5895c36e F test/server3.test c33343f2f6bc23f2b4e2f047c3d083579f0cfac2795e0f1eb226ab34758967c0 F test/servercrash.test 816c132b26af008067cab2913783f67006d4003e3988f3f3ee1075742f6e0a6c F test/session.test 78fa2365e93d3663a6e933f86e7afc395adf18be @@ -1580,7 +1580,7 @@ F vsixtest/vsixtest.tcl 6a9a6ab600c25a91a7acc6293828957a386a8a93 F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0 -P d5b5326df25b85b1c3926cd693bcde1ca08e6e03b8aea151a82d222fc9c23dd6 -R c361ed394cbf7d5d3ff166b253e6e420 +P 3144ae40d2eb63dfd5587579a49163ea1add5947d624daa478ada13339495af4 +R b065c920a1928240f3f2f2172780f454 U dan -Z 609d068095d8e4dd56c2feec5f2f04e4 +Z 9c7cf36faff07e17279e8854885ab036 diff --git a/manifest.uuid b/manifest.uuid index 3828257dc2..751b2072a4 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -3144ae40d2eb63dfd5587579a49163ea1add5947d624daa478ada13339495af4 \ No newline at end of file +778e8a102d8dc7b0fa006c2d90b0a8cd36ebc1e16bd17477f2d536fc0cef4bf3 \ No newline at end of file diff --git a/src/btree.c b/src/btree.c index 35e47173fa..23839b7c24 100644 --- a/src/btree.c +++ b/src/btree.c @@ -5616,6 +5616,239 @@ int sqlite3BtreePrevious(BtCursor *pCur, int *pRes){ return SQLITE_OK; } +#ifdef SQLITE_SERVER_EDITION + +#define SERVER_DEFAULT_FREELISTS 16 +#define SERVER_DEFAULT_FREELIST_SIZE 128 + +/* +** Allocate the free-node and the first SERVER_DEFAULT_FREELISTS +** trunk pages. +*/ +static int allocateServerFreenode(BtShared *pBt){ + int rc; + MemPage *pPage1 = pBt->pPage1; + + rc = sqlite3PagerWrite(pPage1->pDbPage); + if( rc==SQLITE_OK ){ + Pgno pgnoNode = (++pBt->nPage); + MemPage *pNode = 0; + int i; + + put4byte(&pPage1->aData[32], pgnoNode); + rc = btreeGetUnusedPage(pBt, pgnoNode, &pNode, PAGER_GET_NOCONTENT); + if( rc==SQLITE_OK ){ + rc = sqlite3PagerWrite(pNode->pDbPage); + } + if( rc==SQLITE_OK ){ + put4byte(&pNode->aData[0], 0); + put4byte(&pNode->aData[4], SERVER_DEFAULT_FREELISTS); + } + for(i=0; rc==SQLITE_OK && inPage==PENDING_BYTE_PAGE(pBt) ) pBt->nPage++; + pgnoTrunk = pBt->nPage; + + rc = btreeGetUnusedPage(pBt, pgnoTrunk, &pTrunk, PAGER_GET_NOCONTENT); + if( rc==SQLITE_OK ){ + rc = sqlite3PagerWrite(pTrunk->pDbPage); + } + if( rc==SQLITE_OK ){ + memset(pTrunk->aData, 0, 8); + put4byte(&pNode->aData[8+i*4], pgnoTrunk); + } + releasePage(pTrunk); + } + releasePage(pNode); + } + + return rc; +} + +/* +** Return a reference to the first trunk page in one of the database free-lists. +** Allocate the database free-lists if required. +*/ +static int findServerTrunk(BtShared *pBt, int bAlloc, MemPage **ppTrunk){ + MemPage *pPage1 = pBt->pPage1; + MemPage *pNode = 0; /* The node page */ + MemPage *pTrunk = 0; /* The returned page */ + Pgno iNode; /* Page number of node page */ + int rc = SQLITE_OK; + + /* If the node page and free-list trunks have not yet been allocated, allocate + ** them now. */ + pPage1 = pBt->pPage1; + iNode = get4byte(&pPage1->aData[32]); + if( iNode==0 ){ + rc = allocateServerFreenode(pBt); + iNode = get4byte(&pPage1->aData[32]); + } + + /* Grab the node page */ + if( rc==SQLITE_OK ){ + rc = btreeGetUnusedPage(pBt, iNode, &pNode, 0); + } + if( rc==SQLITE_OK ){ + int nList; /* Number of free-lists in this db */ + int i; + + /* Try to lock a free-list trunk. If bAlloc is true, it has to be a + ** free-list trunk with at least one entry in the free-list. */ + nList = (int)get4byte(&pNode->aData[4]); + for(i=0; iaData[8+i*4]); + if( SQLITE_OK==sqlite3PagerWritelock(pBt->pPager, iTrunk) ){ + rc = btreeGetUnusedPage(pBt, iTrunk, &pTrunk, 0); + if( rc==SQLITE_OK && bAlloc ){ + if( !get4byte(&pTrunk->aData[0]) && !get4byte(&pTrunk->aData[4]) ){ + releasePage(pTrunk); + pTrunk = 0; + } + } + if( rc!=SQLITE_OK || pTrunk ) break; + } + } + + /* No free pages in any free-list. Or perhaps we were locked out. In + ** either case, try to allocate more from the end of the file now. */ + if( i==nList ){ + assert( rc==SQLITE_OK && pTrunk==0 ); + rc = sqlite3PagerWrite(pPage1->pDbPage); + for(i=0; rc==SQLITE_OK && iaData[8+i*4]); + rc = btreeGetUnusedPage(pBt, iTrunk, &pT, 0); + if( rc==SQLITE_OK ){ + rc = sqlite3PagerWrite(pT->pDbPage); + } + if( rc==SQLITE_OK ){ + int iPg = get4byte(&pT->aData[4]); + for(/*no-op*/; iPgnPage==PENDING_BYTE_PAGE(pBt) ) pBt->nPage++; + put4byte(&pT->aData[8+iPg*4], pBt->nPage); + } + put4byte(&pT->aData[4], iPg); + if( pTrunk==0 ){ + pTrunk = pT; + pT = 0; + } + } + releasePage(pT); + } + if( rc==SQLITE_OK ){ + MemPage *pLast = 0; + rc = btreeGetUnusedPage(pBt, pBt->nPage, &pLast, 0); + if( rc==SQLITE_OK ){ + rc = sqlite3PagerWrite(pLast->pDbPage); + releasePage(pLast); + put4byte(28 + (u8*)pPage1->aData, pBt->nPage); + } + } + } + } + + releasePage(pNode); + if( rc==SQLITE_OK ){ + assert( pTrunk ); + rc = sqlite3PagerWrite(pTrunk->pDbPage); + } + if( rc!=SQLITE_OK ){ + releasePage(pTrunk); + pTrunk = 0; + } + *ppTrunk = pTrunk; + return rc; +} + +static int allocateServerPage( + BtShared *pBt, /* The btree */ + MemPage **ppPage, /* Store pointer to the allocated page here */ + Pgno *pPgno, /* Store the page number here */ + Pgno nearby, /* Search for a page near this one */ + u8 eMode /* BTALLOC_EXACT, BTALLOC_LT, or BTALLOC_ANY */ +){ + int rc; /* Return code */ + MemPage *pTrunk = 0; /* The node page */ + Pgno pgnoNew = 0; + + assert( eMode==BTALLOC_ANY ); + assert( sqlite3_mutex_held(pBt->mutex) ); + + rc = findServerTrunk(pBt, 1, &pTrunk); + if( rc==SQLITE_OK ){ + int nFree; /* Number of free pages on this trunk page */ + nFree = (int)get4byte(&pTrunk->aData[4]); + if( nFree==0 ){ + pgnoNew = get4byte(&pTrunk->aData[0]); + assert( pgnoNew ); + }else{ + nFree--; + pgnoNew = get4byte(&pTrunk->aData[8+4*nFree]); + put4byte(&pTrunk->aData[4], (u32)nFree); + releasePage(pTrunk); + pTrunk = 0; + } + } + + if( rc==SQLITE_OK ){ + MemPage *pNew = 0; + rc = btreeGetUnusedPage(pBt, pgnoNew, &pNew, pTrunk?0:PAGER_GET_NOCONTENT); + if( rc==SQLITE_OK ){ + rc = sqlite3PagerWrite(pNew->pDbPage); + } + if( rc==SQLITE_OK && pTrunk ){ + memcpy(pTrunk->aData, pNew->aData, pBt->usableSize); + } + *ppPage = pNew; + *pPgno = pgnoNew; + } + + releasePage(pTrunk); + return rc; +} + +static int freeServerPage2(BtShared *pBt, MemPage *pPage, Pgno iPage){ + int rc; /* Return code */ + MemPage *pTrunk = 0; /* The node page */ + + assert( sqlite3_mutex_held(pBt->mutex) ); + + rc = findServerTrunk(pBt, 0, &pTrunk); + if( rc==SQLITE_OK ){ + int nFree; /* Number of free pages on this trunk page */ + nFree = (int)get4byte(&pTrunk->aData[4]); + if( nFree>=((pBt->usableSize / 4) - 2) ){ + if( pPage==0 ){ + rc = btreeGetUnusedPage(pBt, iPage, &pPage, 0); + }else{ + sqlite3PagerRef(pPage->pDbPage); + } + rc = sqlite3PagerWrite(pPage->pDbPage); + if( rc==SQLITE_OK ){ + memcpy(pPage->aData, pTrunk->aData, pBt->usableSize); + put4byte(&pTrunk->aData[0], iPage); + put4byte(&pTrunk->aData[4], 0); + } + releasePage(pPage); + }else{ + put4byte(&pTrunk->aData[8+nFree*4], iPage); + put4byte(&pTrunk->aData[4], (u32)nFree+1); + } + } + + return rc; +} + +#else +# define allocateServerPage(v, w, x, y, z) SQLITE_OK +# define freeServerPage2(x, y, z) SQLITE_OK +#endif /* SQLITE_SERVER_EDITION */ + /* ** Allocate a new page from the database file. ** @@ -5653,6 +5886,10 @@ static int allocateBtreePage( MemPage *pPrevTrunk = 0; Pgno mxPage; /* Total size of the database file */ + if( sqlite3PagerIsServer(pBt->pPager) ){ + return allocateServerPage(pBt, ppPage, pPgno, nearby, eMode); + } + assert( sqlite3_mutex_held(pBt->mutex) ); assert( eMode==BTALLOC_ANY || (nearby>0 && IfNotOmitAV(pBt->autoVacuum)) ); pPage1 = pBt->pPage1; @@ -5980,12 +6217,6 @@ static int freePage2(BtShared *pBt, MemPage *pMemPage, Pgno iPage){ pPage = btreePageLookup(pBt, iPage); } - /* Increment the free page count on pPage1 */ - rc = sqlite3PagerWrite(pPage1->pDbPage); - if( rc ) goto freepage_out; - nFree = get4byte(&pPage1->aData[36]); - put4byte(&pPage1->aData[36], nFree+1); - if( pBt->btsFlags & BTS_SECURE_DELETE ){ /* If the secure_delete option is enabled, then ** always fully overwrite deleted information with zeros. @@ -5997,6 +6228,17 @@ static int freePage2(BtShared *pBt, MemPage *pMemPage, Pgno iPage){ } memset(pPage->aData, 0, pPage->pBt->pageSize); } + + if( sqlite3PagerIsServer(pBt->pPager) ){ + rc = freeServerPage2(pBt, pPage, iPage); + goto freepage_out; + } + + /* Increment the free page count on pPage1 */ + rc = sqlite3PagerWrite(pPage1->pDbPage); + if( rc ) goto freepage_out; + nFree = get4byte(&pPage1->aData[36]); + put4byte(&pPage1->aData[36], nFree+1); /* If the database supports auto-vacuum, write an entry in the pointer-map ** to indicate that the page is free. @@ -9442,6 +9684,49 @@ end_of_check: #endif /* SQLITE_OMIT_INTEGRITY_CHECK */ #ifndef SQLITE_OMIT_INTEGRITY_CHECK + +#if !defined(SQLITE_OMIT_INTEGRITY_CHECK) && defined(SQLITE_SERVER_EDITION) +static void checkServerList(IntegrityCk *pCheck){ + u32 pgnoNode = get4byte(&pCheck->pBt->pPage1->aData[32]); + if( pgnoNode ){ + DbPage *pNode = 0; + u8 *aNodeData; + u32 nList; /* Number of free-lists */ + int i; + + checkRef(pCheck, pgnoNode); + if( sqlite3PagerGet(pCheck->pPager, (Pgno)pgnoNode, &pNode, 0) ){ + checkAppendMsg(pCheck, "failed to get node page %d", pgnoNode); + return; + } + aNodeData = sqlite3PagerGetData(pNode); + nList = get4byte(&aNodeData[4]); + for(i=0; ipPager, (Pgno)pgnoTrunk, &pTrunk, 0) ){ + checkAppendMsg(pCheck, "failed to get page %d", pgnoTrunk); + pgnoTrunk = 0; + }else{ + u8 *aTrunkData = sqlite3PagerGetData(pTrunk); + int nLeaf = (int)get4byte(&aTrunkData[4]); + int iLeaf; + for(iLeaf=0; iLeafpPage1->aData[32]), - get4byte(&pBt->pPage1->aData[36])); +#ifdef SQLITE_SERVER_EDITION + if( sqlite3PagerIsServer(pBt->pPager) ){ + checkServerList(&sCheck); + }else +#endif + { + checkList(&sCheck, 1, get4byte(&pBt->pPage1->aData[32]), + get4byte(&pBt->pPage1->aData[36])); + } sCheck.zPfx = 0; /* Check all the tables. diff --git a/src/pager.c b/src/pager.c index 4d44048b9d..c4d467c1dd 100644 --- a/src/pager.c +++ b/src/pager.c @@ -7661,4 +7661,13 @@ int sqlite3PagerWalFramesize(Pager *pPager){ } #endif +#ifdef SQLITE_SERVER_EDITION +int sqlite3PagerIsServer(Pager *pPager){ + return pagerIsServer(pPager); +} +int sqlite3PagerWritelock(Pager *pPager, Pgno pgno){ + return sqlite3ServerLock(pPager->pServer, pgno, 1); +} +#endif + #endif /* SQLITE_OMIT_DISKIO */ diff --git a/src/pager.h b/src/pager.h index e9f37512fb..ae8fe4bd8e 100644 --- a/src/pager.h +++ b/src/pager.h @@ -238,6 +238,8 @@ void *sqlite3PagerCodec(DbPage *); #ifdef SQLITE_SERVER_EDITION int sqlite3PagerRollbackJournal(Pager*, int); + int sqlite3PagerIsServer(Pager *pPager); + int sqlite3PagerWritelock(Pager *pPager, Pgno); #endif #endif /* SQLITE_PAGER_H */ diff --git a/src/server.c b/src/server.c index 3b8a8f00ea..bf1e94f9b6 100644 --- a/src/server.c +++ b/src/server.c @@ -480,10 +480,9 @@ int sqlite3ServerLock(Server *p, Pgno pgno, int bWrite){ v = *pSlot; } + n = v | (1 << p->iClient); if( bWrite ){ - n = v | ((p->iClient+1) << HMA_CLIENT_SLOTS); - }else{ - n = v | (1 << p->iClient); + n = n | ((p->iClient+1) << HMA_CLIENT_SLOTS); } if( __sync_val_compare_and_swap(pSlot, v, n)==v ) break; v = *pSlot; diff --git a/test/server2.test b/test/server2.test index e52ce71644..adfd76859b 100644 --- a/test/server2.test +++ b/test/server2.test @@ -41,7 +41,8 @@ do_test 1.4 { db2 close file exists test.db-hma } {1} -do_test 1.5 { +integrity_check 1.5 +do_test 1.6 { db close file exists test.db-hma } {0}