From: dan Date: Wed, 26 Apr 2017 20:45:00 +0000 (+0000) Subject: Experimental implementation of pessimistic page-level locking based on X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=1381bbc2622e557ab39aea12de4d9afc49a452a3;p=thirdparty%2Fsqlite.git Experimental implementation of pessimistic page-level locking based on rollback mode. FossilOrigin-Name: 64ecf7c7e512827e8a5a42f9f3ad92ff57ec868820e3943dbc74d5823f9a889d --- diff --git a/main.mk b/main.mk index 54f223bb56..57d08bc377 100644 --- a/main.mk +++ b/main.mk @@ -69,6 +69,7 @@ LIBOBJ+= vdbe.o parse.o \ notify.o opcodes.o os.o os_unix.o os_win.o \ pager.o pcache.o pcache1.o pragma.o prepare.o printf.o \ random.o resolve.o rowset.o rtree.o select.o sqlite3rbu.o status.o \ + server.o \ table.o threads.o tokenize.o treeview.o trigger.o \ update.o userauth.o util.o vacuum.o \ vdbeapi.o vdbeaux.o vdbeblob.o vdbemem.o vdbesort.o \ @@ -144,6 +145,8 @@ SRC = \ $(TOP)/src/resolve.c \ $(TOP)/src/rowset.c \ $(TOP)/src/select.c \ + $(TOP)/src/server.c \ + $(TOP)/src/server.h \ $(TOP)/src/status.c \ $(TOP)/src/shell.c \ $(TOP)/src/sqlite.h.in \ diff --git a/manifest b/manifest index 8c73ad3567..134a033d5c 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Add\snew\stest\sfile\scachespill.test. -D 2017-04-26T17:21:33.779 +C Experimental\simplementation\sof\spessimistic\spage-level\slocking\sbased\son\nrollback\smode. +D 2017-04-26T20:45:00.409 F Makefile.in 1cc758ce3374a32425e4d130c2fe7b026b20de5b8843243de75f087c0a2661fb F Makefile.linux-gcc 7bc79876b875010e8c8f9502eb935ca92aa3c434 F Makefile.msc 6a8c838220f7c00820e1fc0ac1bccaaa8e5676067e1dbfa1bafa7a4ffecf8ae6 @@ -326,7 +326,7 @@ F ext/userauth/userauth.c 3410be31283abba70255d71fd24734e017a4497f F install-sh 9d4de14ab9fb0facae2f48780b874848cbf2f895 x F ltmain.sh 3ff0879076df340d2e23ae905484d8c15d5fdea8 F magic.txt 8273bf49ba3b0c8559cb2774495390c31fd61c60 -F main.mk 9abb506e717887d57f754bae139b85c1a06d6f2ac25b589f3e792e310567f278 +F main.mk 0116e0f17f9bb71a111a41fc560c677bba20c557e4c4c11cebf66124c472b22d F mkso.sh fd21c06b063bb16a5d25deea1752c2da6ac3ed83 F mptest/config01.test 3c6adcbc50b991866855f1977ff172eb6d901271 F mptest/config02.test 4415dfe36c48785f751e16e32c20b077c28ae504 @@ -389,8 +389,8 @@ F src/os_setup.h 0dbaea40a7d36bf311613d31342e0b99e2536586 F src/os_unix.c 30e2c43e4955db990e5b5a81e901f8aa74cc8820 F src/os_win.c 2a6c73eef01c51a048cc4ddccd57f981afbec18a F src/os_win.h 7b073010f1451abe501be30d12f6bc599824944a -F src/pager.c ff1232b3088a39806035ecfac4fffeb22717d80b -F src/pager.h f2a99646c5533ffe11afa43e9e0bea74054e4efa +F src/pager.c 80f36e7fa7413b227c8fefd9c23169984e648134308c4730681167cb54121384 +F src/pager.h 54f5321bc48768610a3a000d2f821fb849cbda35339abd185ceb5f214faf10bf F src/parse.y 0513387ce02fea97897d8caef82d45f347818593f24f1bdc48e0c530a8af122d F src/pcache.c 62835bed959e2914edd26afadfecce29ece0e870 F src/pcache.h 2cedcd8407eb23017d92790b112186886e179490 @@ -403,11 +403,12 @@ F src/random.c 80f5d666f23feb3e6665a6ce04c7197212a88384 F src/resolve.c 3e518b962d932a997fae373366880fc028c75706 F src/rowset.c 7b7e7e479212e65b723bf40128c7b36dc5afdfac F src/select.c bf8ab605e49717c222136380453cfb7eda564f8e500d5ff6a01341ea59fefe80 +F src/server.c bbc32009028b3fb778b9bb2ea426e9acd7b8a5904f808075502bc43e75f50da0 F src/shell.c 21b79c0e1b93f8e35fd7b4087d6ba438326c3d7e285d0dd51dfd741475f858a1 -F src/sqlite.h.in 40233103e3e4e10f8a63523498d0259d232e42aba478e2d3fb914799185aced6 +F src/sqlite.h.in f6e3734fee6d334a896cff8f43446ca2e47dcf7e39135b3e3a596bf23cdbbc97 F src/sqlite3.rc 5121c9e10c3964d5755191c80dd1180c122fc3a8 F src/sqlite3ext.h 58fd0676d3111d02e62e5a35992a7d3da5d3f88753acc174f2d37b774fbbdd28 -F src/sqliteInt.h 9affb53bb405dcea1d86e85198ebaf6232a684cc2b2af6b3c181869f1c8f3e93 +F src/sqliteInt.h 88b07d07c2fc9b8abe8e58b0b1a687ab0531265c76285998819402f25ba6aedb F src/sqliteLimit.h 1513bfb7b20378aa0041e7022d04acb73525de35b80b252f1b83fedb4de6a76b F src/status.c a9e66593dfb28a9e746cba7153f84d49c1ddc4b1 F src/table.c b46ad567748f24a326d9de40e5b9659f96ffff34 @@ -473,7 +474,7 @@ F src/vdbe.c 1eba07ffa0e4ff5e14ceca17e366c26f58c11d12cc56e55a9a13157a93533b7e F src/vdbe.h f7d1456e28875c2dcb964056589b5b7149ab7edf39edeca801596a39bb3d3848 F src/vdbeInt.h c070bc5c8b913bda0ceaa995cd4d939ded5e4fc96cf7c3c1c602d41b871f8ade F src/vdbeapi.c 5b08d82592bcff4470601fe78aaabebd50837860 -F src/vdbeaux.c 6b3f6ce909e206d4c918988b13b7fa687e92b4471d137e0f2a37edac80ec60be +F src/vdbeaux.c 3cf14d047121c2dfabe85d874262a36a4d8983616fc0da7faff197ed3dd0617a F src/vdbeblob.c 359891617358deefc85bef7bcf787fa6b77facb9 F src/vdbemem.c 2c70f8f5de6c71fb99a22c5b83be9fab5c47cdd8e279fa44a8c00cfed06d7e89 F src/vdbesort.c e72fe02a2121386ba767ede8942e9450878b8fc873abf3d1b6824485f092570c @@ -1033,7 +1034,7 @@ F test/parser1.test 391b9bf9a229547a129c61ac345ed1a6f5eb1854 F test/pcache.test c8acbedd3b6fd0f9a7ca887a83b11d24a007972b F test/pcache2.test af7f3deb1a819f77a6d0d81534e97d1cf62cd442 F test/percentile.test 4243af26b8f3f4555abe166f723715a1f74c77ff -F test/permutations.test 8aaa22a0f428a7e6b8446b97bc7691a273eaeff5dc290fb9129bf79fa9813a6e +F test/permutations.test d037d2a4d00087dc8e8446e6d392357dc5b739d1b6d08c3c1cdb7a1f072f5db7 F test/pragma.test 1e94755164a3a3264cd39836de4bebcb7809e5f8 F test/pragma2.test e5d5c176360c321344249354c0c16aec46214c9f F test/pragma3.test 14c12bc5352b1e100e0b6b44f371053a81ccf8ed @@ -1108,6 +1109,9 @@ F test/selectE.test a8730ca330fcf40ace158f134f4fe0eb00c7edbf F test/selectF.test 21c94e6438f76537b72532fa9fd4710cdd455fc3 F test/selectG.test e8600e379589e85e9fefd2fe4d44a4cdd63f6982 F test/server1.test 46803bd3fe8b99b30dbc5ff38ffc756f5c13a118 +F test/server2.test 68480691c8b16de757396fc43523329e0f275f53f48afa81d6362485ed7aa22f +F test/server3.test c33343f2f6bc23f2b4e2f047c3d083579f0cfac2795e0f1eb226ab34758967c0 +F test/servercrash.test 816c132b26af008067cab2913783f67006d4003e3988f3f3ee1075742f6e0a6c F test/session.test 78fa2365e93d3663a6e933f86e7afc395adf18be F test/shared.test 1da9dbad400cee0d93f252ccf76e1ae007a63746 F test/shared2.test 03eb4a8d372e290107d34b6ce1809919a698e879 @@ -1189,7 +1193,7 @@ F test/temptable.test d2c9b87a54147161bcd1822e30c1d1cd891e5b30 F test/temptable2.test cd396beb41117a5302fff61767c35fa4270a0d5e F test/temptable3.test d11a0974e52b347e45ee54ef1923c91ed91e4637 F test/temptrigger.test 38f0ca479b1822d3117069e014daabcaacefffcc -F test/tester.tcl 581f0185434daf7026ccede4c07e8d1479186ec5 +F test/tester.tcl d74fefbba7cdd4ad2846c73ed0e02734809d8f824bf25f44a8dbe43dd8a74283 F test/thread001.test 9f22fd3525a307ff42a326b6bc7b0465be1745a5 F test/thread002.test e630504f8a06c00bf8bbe68528774dd96aeb2e58 F test/thread003.test ee4c9efc3b86a6a2767516a37bd64251272560a7 @@ -1517,7 +1521,7 @@ F tool/mkopts.tcl 66ac10d240cc6e86abd37dc908d50382f84ff46e F tool/mkpragmatab.tcl 32bb40741df11bddc8451de9ea4d130e7b4476d8064794b1cf402ac110840fba F tool/mkspeedsql.tcl a1a334d288f7adfe6e996f2e712becf076745c97 F tool/mksqlite3c-noext.tcl fef88397668ae83166735c41af99d79f56afaabb -F tool/mksqlite3c.tcl 06b2e6a0f21cc0a5d70fbbd136b3e0a96470645e +F tool/mksqlite3c.tcl 44d4b9b9d7398801164e66f866f1da0ee1e12e47d1b718f1bbf09a19fd2f3a44 F tool/mksqlite3h.tcl b9836752c3d08f9fab2dfc0017ca9fd5d90ac863 F tool/mksqlite3internalh.tcl eb994013e833359137eb53a55acdad0b5ae1049b F tool/mkvsix.tcl b9e0777a213c23156b6542842c238479e496ebf5 @@ -1576,7 +1580,10 @@ F vsixtest/vsixtest.tcl 6a9a6ab600c25a91a7acc6293828957a386a8a93 F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0 -P 63d9ca5c7392e1efe3330689fe750310a952287e843b3242178724e8561fae0e -R c4490df1c59aee5f8635e7285ee6a7e5 +P 2d0b64316d66a362f5891ceb71a1fd8e4607732274b09b0a8472b97eef68ebc2 +R ba2951238a6a64860b2d30a7e3110580 +T *branch * server-edition +T *sym-server-edition * +T -sym-trunk * U dan -Z 2b0ae6051b3e0017c3cacc8dc5ffb6e4 +Z fdb999cd81f23109f6505ea72ca50eac diff --git a/manifest.uuid b/manifest.uuid index 44877c7f80..3b2427e063 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -2d0b64316d66a362f5891ceb71a1fd8e4607732274b09b0a8472b97eef68ebc2 \ No newline at end of file +64ecf7c7e512827e8a5a42f9f3ad92ff57ec868820e3943dbc74d5823f9a889d \ No newline at end of file diff --git a/src/pager.c b/src/pager.c index 40c4dd9d88..7d8db16870 100644 --- a/src/pager.c +++ b/src/pager.c @@ -706,6 +706,9 @@ struct Pager { Wal *pWal; /* Write-ahead log used by "journal_mode=wal" */ char *zWal; /* File name for write-ahead log */ #endif +#ifdef SQLITE_SERVER_EDITION + Server *pServer; +#endif }; /* @@ -836,6 +839,12 @@ int sqlite3PagerUseWal(Pager *pPager, Pgno pgno){ # define pagerBeginReadTransaction(z) SQLITE_OK #endif +#ifdef SQLITE_SERVER_EDITION +# define pagerIsServer(x) ((x)->pServer!=0) +#else +# define pagerIsServer(x) 0 +#endif + #ifndef NDEBUG /* ** Usage: @@ -1132,6 +1141,7 @@ static int pagerUnlockDb(Pager *pPager, int eLock){ assert( !pPager->exclusiveMode || pPager->eLock==eLock ); assert( eLock==NO_LOCK || eLock==SHARED_LOCK ); assert( eLock!=NO_LOCK || pagerUseWal(pPager)==0 ); + assert( eLock!=NO_LOCK || pagerIsServer(pPager)==0 ); if( isOpen(pPager->fd) ){ assert( pPager->eLock>=eLock ); rc = pPager->noLock ? SQLITE_OK : sqlite3OsUnlock(pPager->fd, eLock); @@ -1807,6 +1817,12 @@ static void pager_unlock(Pager *pPager){ pPager->pInJournal = 0; releaseAllSavepoints(pPager); +#ifdef SQLITE_SERVER_EDITION + if( pagerIsServer(pPager) ){ + sqlite3ServerEnd(pPager->pServer); + pPager->eState = PAGER_OPEN; + }else +#endif if( pagerUseWal(pPager) ){ assert( !isOpen(pPager->jfd) ); sqlite3WalEndReadTransaction(pPager->pWal); @@ -2105,6 +2121,11 @@ static int pager_end_transaction(Pager *pPager, int hasMaster, int bCommit){ if( rc==SQLITE_NOTFOUND ) rc = SQLITE_OK; } +#ifdef SQLITE_SERVER_EDITION + if( pagerIsServer(pPager) ){ + rc2 = sqlite3ServerReleaseWriteLocks(pPager->pServer); + }else +#endif if( !pPager->exclusiveMode && (!pagerUseWal(pPager) || sqlite3WalExclusiveMode(pPager->pWal, 0)) ){ @@ -4078,6 +4099,13 @@ int sqlite3PagerClose(Pager *pPager, sqlite3 *db){ (db && (db->flags & SQLITE_NoCkptOnClose) ? 0 : pTmp) ); pPager->pWal = 0; +#endif +#ifdef SQLITE_SERVER_EDITION + if( pPager->pServer ){ + sqlite3ServerDisconnect(pPager->pServer, pPager->fd); + pPager->pServer = 0; + sqlite3_free(pPager->zJournal); + } #endif pager_reset(pPager); if( MEMDB ){ @@ -5051,6 +5079,78 @@ static int hasHotJournal(Pager *pPager, int *pExists){ return rc; } +#ifdef SQLITE_SERVER_EDITION +static int pagerServerConnect(Pager *pPager){ + int rc = SQLITE_OK; + if( pPager->tempFile==0 ){ + int iClient = 0; + pPager->noLock = 1; + pPager->journalMode = PAGER_JOURNALMODE_PERSIST; + rc = sqlite3ServerConnect(pPager, &pPager->pServer, &iClient); + if( rc==SQLITE_OK ){ + pPager->zJournal = sqlite3_mprintf( + "%s-journal%d", pPager->zFilename, iClient + ); + if( pPager->zJournal==0 ){ + rc = SQLITE_NOMEM_BKPT; + } + } + } + return rc; +} + +int sqlite3PagerRollbackJournal(Pager *pPager, int iClient){ + int rc; + char *zJrnl = sqlite3_mprintf("%s-journal%d", pPager->zFilename, iClient); + + if( zJrnl ){ + int bExists = 0; + sqlite3_file *jfd = 0; + sqlite3_vfs * const pVfs = pPager->pVfs; + + rc = sqlite3OsAccess(pVfs, zJrnl, SQLITE_ACCESS_EXISTS, &bExists); + if( rc==SQLITE_OK && bExists ){ + int flags = SQLITE_OPEN_READWRITE|SQLITE_OPEN_MAIN_JOURNAL; + rc = sqlite3OsOpenMalloc(pVfs, zJrnl, &jfd, flags, &flags); + } + assert( rc==SQLITE_OK || jfd==0 ); + if( jfd ){ + sqlite3_file *saved_jfd = pPager->jfd; + u8 saved_eState = pPager->eState; + u8 saved_eLock = pPager->eLock; + i64 saved_journalOff = pPager->journalOff; + i64 saved_journalHdr = pPager->journalHdr; + + pPager->eLock = EXCLUSIVE_LOCK; + pPager->eState = PAGER_WRITER_DBMOD; + pPager->jfd = jfd; + rc = pagerSyncHotJournal(pPager); + if( rc==SQLITE_OK ) rc = pager_playback(pPager, 1); + + pPager->jfd = saved_jfd; + pPager->eState = saved_eState; + pPager->eLock = saved_eLock; + pPager->journalOff = saved_journalOff; + pPager->journalHdr = saved_journalHdr; + + sqlite3OsCloseFree(jfd); + if( rc==SQLITE_OK ){ + rc = sqlite3OsDelete(pVfs, zJrnl, 0); + } + } + sqlite3_free(zJrnl); + }else{ + rc = SQLITE_NOMEM_BKPT; + } + + return rc; +} + +#else +# define pagerServerConnect(pPager) SQLITE_OK +#endif + + /* ** This function is called to obtain a shared lock on the database file. ** It is illegal to call sqlite3PagerGet() until after this function @@ -5090,7 +5190,9 @@ int sqlite3PagerSharedLock(Pager *pPager){ assert( pPager->eState==PAGER_OPEN || pPager->eState==PAGER_READER ); assert( pPager->errCode==SQLITE_OK ); - if( !pagerUseWal(pPager) && pPager->eState==PAGER_OPEN ){ + if( !pagerUseWal(pPager) + && !pagerIsServer(pPager) + && pPager->eState==PAGER_OPEN ){ int bHotJournal = 1; /* True if there exists a hot journal-file */ assert( !MEMDB ); @@ -5269,8 +5371,19 @@ int sqlite3PagerSharedLock(Pager *pPager){ #ifndef SQLITE_OMIT_WAL assert( pPager->pWal==0 || rc==SQLITE_OK ); #endif + + if( rc==SQLITE_OK && pagerUseWal(pPager)==0 ){ + rc = pagerServerConnect(pPager); + } } +#ifdef SQLITE_SERVER_EDITION + if( pagerIsServer(pPager) ){ + assert( rc==SQLITE_OK ); + pager_reset(pPager); + rc = sqlite3ServerBegin(pPager->pServer); + }else +#endif if( pagerUseWal(pPager) ){ assert( rc==SQLITE_OK ); rc = pagerBeginReadTransaction(pPager); @@ -5564,6 +5677,12 @@ int sqlite3PagerGet( DbPage **ppPage, /* Write a pointer to the page here */ int flags /* PAGER_GET_XXX flags */ ){ +#ifdef SQLITE_SERVER_EDITION + if( pagerIsServer(pPager) ){ + int rc = sqlite3ServerLock(pPager->pServer, pgno, 0); + if( rc!=SQLITE_OK ) return rc; + } +#endif return pPager->xGet(pPager, pgno, ppPage, flags); } @@ -5866,6 +5985,13 @@ static int pager_write(PgHdr *pPg){ assert( pPager->readOnly==0 ); CHECK_PAGE(pPg); +#ifdef SQLITE_SERVER_EDITION + if( pagerIsServer(pPager) ){ + rc = sqlite3ServerLock(pPager->pServer, pPg->pgno, 1); + if( rc!=SQLITE_OK ) return rc; + } +#endif + /* The journal file needs to be opened. Higher level routines have already ** obtained the necessary locks to begin the write-transaction, but the ** rollback journal might not yet be open. Open it now if this is the case. @@ -6144,7 +6270,10 @@ static int pager_incr_changecounter(Pager *pPager, int isDirectMode){ # define DIRECT_MODE isDirectMode #endif - if( !pPager->changeCountDone && ALWAYS(pPager->dbSize>0) ){ + if( 0==pagerIsServer(pPager) + && !pPager->changeCountDone + && ALWAYS(pPager->dbSize>0) + ){ PgHdr *pPgHdr; /* Reference to page 1 */ assert( !pPager->tempFile && isOpen(pPager->fd) ); diff --git a/src/pager.h b/src/pager.h index 585ef29497..e9f37512fb 100644 --- a/src/pager.h +++ b/src/pager.h @@ -236,4 +236,8 @@ void *sqlite3PagerCodec(DbPage *); # define enable_simulated_io_errors() #endif +#ifdef SQLITE_SERVER_EDITION + int sqlite3PagerRollbackJournal(Pager*, int); +#endif + #endif /* SQLITE_PAGER_H */ diff --git a/src/server.c b/src/server.c new file mode 100644 index 0000000000..9bcac9db9d --- /dev/null +++ b/src/server.c @@ -0,0 +1,485 @@ +/* +** 2017 April 24 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +*/ + +#include "sqliteInt.h" + +/* +** HMA file layout: +** +** 4 bytes - DMS slot. All connections read-lock this slot. +** +** 16*4 bytes - locking slots. Connections hold a read-lock on a locking slot +** when they are connected, a write lock when they have an open +** transaction. +** +** N*4 bytes - Page locking slots. N is HMA_PAGELOCK_SLOTS. +** +** Page lock slot format: +** +** Least significant HMA_CLIENT_SLOTS used for read-locks. If bit 0 is set, +** client 0 holds a read-lock. +** +** If (v) is the value of the locking slot and (v>>HMA_CLIENT_SLOTS) is +** not zero, then the write-lock holder is client ((v>>HMA_CLIENT_SLOTS)-1). +** +*/ + +#ifdef SQLITE_SERVER_EDITION + +#define HMA_CLIENT_SLOTS 16 +#define HMA_PAGELOCK_SLOTS (256*1024) + +#define HMA_FILE_SIZE (4 + 4*HMA_CLIENT_SLOTS + 4*HMA_PAGELOCK_SLOTS) + +#include "unistd.h" +#include "fcntl.h" +#include "sys/mman.h" +#include "sys/types.h" +#include "sys/stat.h" + +typedef struct ServerHMA ServerHMA; + +struct ServerGlobal { + sqlite3_mutex *mutex; + ServerHMA *pHma; +}; +static struct ServerGlobal g_server; + +/* +** There is one instance of the following structure for each distinct +** HMA file opened by clients within this process. +*/ +struct ServerHMA { + char *zName; /* hma file path */ + int fd; /* Fd open on hma file */ + int nClient; /* Current number of clients */ + Server *aClient[HMA_CLIENT_SLOTS]; /* Local (this process) clients */ + u32 *aMap; /* MMapped hma file */ + ServerHMA *pNext; /* Next HMA in this process */ + + dev_t st_dev; + ino_t st_ino; +}; + +struct Server { + ServerHMA *pHma; /* Hma file object */ + int iClient; /* Client id */ + Pager *pPager; /* Associated pager object */ + + int nAlloc; /* Allocated size of aLock[] array */ + int nLock; /* Number of entries in aLock[] */ + u32 *aLock; /* Mapped lock file */ +}; + +#define SERVER_WRITE_LOCK 3 +#define SERVER_READ_LOCK 2 +#define SERVER_NO_LOCK 1 + +static int posixLock(int fd, int iSlot, int eLock, int bBlock){ + int res; + struct flock l; + short aType[4] = {0, F_UNLCK, F_RDLCK, F_WRLCK}; + assert( eLock==SERVER_WRITE_LOCK + || eLock==SERVER_READ_LOCK + || eLock==SERVER_NO_LOCK + ); + memset(&l, 0, sizeof(l)); + l.l_type = aType[eLock]; + l.l_whence = SEEK_SET; + l.l_start = iSlot*sizeof(u32); + l.l_len = 1; + + res = fcntl(fd, (bBlock ? F_SETLKW : F_SETLK), &l); + return (res==0 ? SQLITE_OK : SQLITE_BUSY); +} + +static int serverMapFile(ServerHMA *p){ + assert( p->aMap==0 ); + p->aMap = mmap(0, HMA_FILE_SIZE, PROT_READ|PROT_WRITE, MAP_SHARED, p->fd, 0); + if( p->aMap==0 ){ + return SQLITE_ERROR; + } + return SQLITE_OK; +} + + +static void serverDecrHmaRefcount(ServerHMA *pHma){ + if( pHma ){ + pHma->nClient--; + if( pHma->nClient<=0 ){ + ServerHMA **pp; + if( pHma->aMap ) munmap(pHma->aMap, HMA_FILE_SIZE); + if( pHma->fd>=0 ) close(pHma->fd); + for(pp=&g_server.pHma; *pp!=pHma; pp=&(*pp)->pNext); + *pp = pHma->pNext; + sqlite3_free(pHma); + } + } +} + + +static int serverOpenHma(Pager *pPager, const char *zPath, ServerHMA **ppHma){ + struct stat sStat; /* Structure populated by stat() */ + int res; /* result of stat() */ + int rc = SQLITE_OK; /* Return code */ + ServerHMA *pHma = 0; + + assert( sqlite3_mutex_held(g_server.mutex) ); + + res = stat(zPath, &sStat); + if( res!=0 ){ + sqlite3_log(SQLITE_CANTOPEN, "Failed to stat(%s)", zPath); + rc = SQLITE_ERROR; + }else{ + for(pHma=g_server.pHma; pHma; pHma=pHma->pNext){ + if( sStat.st_dev==pHma->st_dev && sStat.st_ino==pHma->st_ino ) break; + } + if( pHma==0 ){ + int nPath = strlen(zPath); + int nByte = sizeof(ServerHMA) + nPath+1 + 4; + + pHma = (ServerHMA*)sqlite3_malloc(nByte); + if( pHma==0 ){ + rc = SQLITE_NOMEM; + }else{ + int i; + memset(pHma, 0, nByte); + pHma->zName = (char*)&pHma[1]; + pHma->nClient = 1; + pHma->st_dev = sStat.st_dev; + pHma->st_ino = sStat.st_ino; + pHma->pNext = g_server.pHma; + g_server.pHma = pHma; + + memcpy(pHma->zName, zPath, nPath); + memcpy(&pHma->zName[nPath], "-hma", 5); + + pHma->fd = open(pHma->zName, O_RDWR|O_CREAT, 0644); + if( pHma->fd<0 ){ + sqlite3_log(SQLITE_CANTOPEN, "Failed to open(%s)", pHma->zName); + rc = SQLITE_ERROR; + } + + if( rc==SQLITE_OK ){ + /* Write-lock the DMS slot. If successful, initialize the hma file. */ + rc = posixLock(pHma->fd, 0, SERVER_WRITE_LOCK, 0); + if( rc==SQLITE_OK ){ + res = ftruncate(pHma->fd, HMA_FILE_SIZE); + if( res!=0 ){ + sqlite3_log(SQLITE_CANTOPEN, + "Failed to ftruncate(%s)", pHma->zName + ); + rc = SQLITE_ERROR; + } + if( rc==SQLITE_OK ){ + rc = serverMapFile(pHma); + } + if( rc==SQLITE_OK ){ + memset(pHma->aMap, 0, HMA_FILE_SIZE); + }else{ + rc = SQLITE_ERROR; + } + }else{ + rc = serverMapFile(pHma); + } + for(i=0; rc==SQLITE_OK && ifd, 0, SERVER_READ_LOCK, 1); + } + } + + if( rc!=SQLITE_OK ){ + serverDecrHmaRefcount(pHma); + pHma = 0; + } + } + }else{ + pHma->nClient++; + } + } + + *ppHma = pHma; + return rc; +} + +static u32 *serverPageLockSlot(Server *p, Pgno pgno){ + int iSlot = pgno % HMA_PAGELOCK_SLOTS; + return &p->pHma->aMap[1 + HMA_CLIENT_SLOTS + iSlot]; +} +static u32 *serverClientSlot(Server *p, int iClient){ + return &p->pHma->aMap[1 + iClient]; +} + +/* +** Close the "connection" and *-hma file. This deletes the object passed +** as the first argument. +*/ +void sqlite3ServerDisconnect(Server *p, sqlite3_file *dbfd){ + if( p->pHma ){ + ServerHMA *pHma = p->pHma; + sqlite3_mutex_enter(g_server.mutex); + if( p->iClient>=0 ){ + u32 *pSlot = serverClientSlot(p, p->iClient); + *pSlot = 0; + assert( pHma->aClient[p->iClient]==p ); + pHma->aClient[p->iClient] = 0; + posixLock(pHma->fd, p->iClient+1, SERVER_NO_LOCK, 0); + } + if( dbfd + && pHma->nClient==1 + && SQLITE_OK==sqlite3OsLock(dbfd, SQLITE_LOCK_EXCLUSIVE) + ){ + unlink(pHma->zName); + } + serverDecrHmaRefcount(pHma); + sqlite3_mutex_leave(g_server.mutex); + } + sqlite3_free(p->aLock); + sqlite3_free(p); +} + +static int serverRollbackClient(Server *p, int iBlock){ + int rc; + + sqlite3_log(SQLITE_NOTICE, "Rolling back failed client %d", iBlock); + + /* Roll back any journal file for client iBlock. */ + rc = sqlite3PagerRollbackJournal(p->pPager, iBlock); + + /* Clear any locks held by client iBlock from the HMA file. */ + if( rc==SQLITE_OK ){ + int i; + for(i=0; i>HMA_CLIENT_SLOTS)==iBlock+1 ){ + n = n & ((1<iClient = -1; + p->pPager = pPager; + + sqlite3_mutex_enter(g_server.mutex); + rc = serverOpenHma(pPager, zPath, &p->pHma); + + /* File is now mapped. Find a free client slot. */ + if( rc==SQLITE_OK ){ + int i; + Server **aClient = p->pHma->aClient; + int fd = p->pHma->fd; + for(i=0; iHMA_CLIENT_SLOTS ){ + rc = SQLITE_BUSY; + }else{ + u32 *pSlot = serverClientSlot(p, i); + *piClient = p->iClient = i; + aClient[i] = p; + *pSlot = 1; + } + } + } + + sqlite3_mutex_leave(g_server.mutex); + } + + if( rc!=SQLITE_OK ){ + sqlite3ServerDisconnect(p, 0); + p = 0; + } + *ppOut = p; + return rc; +} + +static int serverOvercomeLock(Server *p, int bWrite, u32 v, int *pbRetry){ + int rc = SQLITE_OK; + int bLocal = 0; + int iBlock = ((int)(v>>HMA_CLIENT_SLOTS))-1; + + if( iBlock<0 ){ + for(iBlock=0; iBlockiClient && (v & (1<pHma->aClient[iBlock] ){ + bLocal = 1; + }else{ + rc = posixLock(p->pHma->fd, iBlock+1, SERVER_WRITE_LOCK, 0); + } + + if( bLocal==0 && rc==SQLITE_OK ){ + rc = serverRollbackClient(p, iBlock); + + /* Release the lock on slot iBlock */ + posixLock(p->pHma->fd, iBlock+1, SERVER_NO_LOCK, 0); + if( rc==SQLITE_OK ){ + *pbRetry = 1; + } + }else{ + assert( rc==SQLITE_OK || rc==SQLITE_BUSY ); + rc = SQLITE_OK; + } + sqlite3_mutex_leave(g_server.mutex); + + return rc; +} + +/* +** Begin a transaction. +*/ +int sqlite3ServerBegin(Server *p){ + return posixLock(p->pHma->fd, p->iClient+1, SERVER_WRITE_LOCK, 0); +} + +/* +** End a transaction (and release all locks). +*/ +int sqlite3ServerEnd(Server *p){ + int i; + for(i=0; inLock; i++){ + u32 *pSlot = serverPageLockSlot(p, p->aLock[i]); + while( 1 ){ + u32 v = *pSlot; + u32 n = v; + if( (v>>HMA_CLIENT_SLOTS)==p->iClient+1 ){ + n = n & ((1 << HMA_CLIENT_SLOTS)-1); + } + n = n & ~(1 << p->iClient); + if( __sync_val_compare_and_swap(pSlot, v, n)==v ) break; + } + } + p->nLock = 0; + return posixLock(p->pHma->fd, p->iClient+1, SERVER_READ_LOCK, 0); +} + +/* +** Release all write-locks. +*/ +int sqlite3ServerReleaseWriteLocks(Server *p){ + int rc = SQLITE_OK; + return rc; +} + +/* +** Lock page pgno for reading (bWrite==0) or writing (bWrite==1). +*/ +int sqlite3ServerLock(Server *p, Pgno pgno, int bWrite){ + int rc = SQLITE_OK; + + /* Grow the aLock[] array, if required */ + if( p->nLock==p->nAlloc ){ + int nNew = p->nAlloc ? p->nAlloc*2 : 128; + u32 *aNew; + aNew = (u32*)sqlite3_realloc(p->aLock, sizeof(u32)*nNew); + if( aNew==0 ){ + rc = SQLITE_NOMEM_BKPT; + }else{ + p->aLock = aNew; + p->nAlloc = nNew; + } + } + if( rc==SQLITE_OK ){ + u32 *pSlot = serverPageLockSlot(p, pgno); + u32 v = *pSlot; + + /* Check if the required lock is already held. If so, exit this function + ** early. Otherwise, add an entry to the aLock[] array to record the fact + ** that the lock may need to be released. */ + if( bWrite ){ + int iLock = ((int)(v>>HMA_CLIENT_SLOTS)) - 1; + if( iLock==p->iClient ) goto server_lock_out; + if( iLock<0 ){ + p->aLock[p->nLock++] = pgno; + } + }else{ + if( v & (1<iClient) ) goto server_lock_out; + p->aLock[p->nLock++] = pgno; + } + + while( 1 ){ + u32 n; + + while( (bWrite && (v & ~(1 << p->iClient))) || (v >> HMA_CLIENT_SLOTS) ){ + int bRetry = 0; + rc = serverOvercomeLock(p, bWrite, v, &bRetry); + if( rc!=SQLITE_OK ) goto server_lock_out; + if( bRetry==0 ){ + /* There is a conflicting lock. Cannot obtain this lock. */ + sqlite3_log(SQLITE_BUSY_DEADLOCK, "Conflict at page %d", (int)pgno); + rc = SQLITE_BUSY_DEADLOCK; + goto server_lock_out; + } + v = *pSlot; + } + + if( bWrite ){ + n = v | ((p->iClient+1) << HMA_CLIENT_SLOTS); + }else{ + n = v | (1 << p->iClient); + } + if( __sync_val_compare_and_swap(pSlot, v, n)==v ) break; + v = *pSlot; + } + } + +server_lock_out: + return rc; +} + +#endif /* ifdef SQLITE_SERVER_EDITION */ diff --git a/src/sqlite.h.in b/src/sqlite.h.in index 99d381d9d0..60e5edc989 100644 --- a/src/sqlite.h.in +++ b/src/sqlite.h.in @@ -497,6 +497,7 @@ int sqlite3_exec( #define SQLITE_LOCKED_SHAREDCACHE (SQLITE_LOCKED | (1<<8)) #define SQLITE_BUSY_RECOVERY (SQLITE_BUSY | (1<<8)) #define SQLITE_BUSY_SNAPSHOT (SQLITE_BUSY | (2<<8)) +#define SQLITE_BUSY_DEADLOCK (SQLITE_BUSY | (3<<8)) #define SQLITE_CANTOPEN_NOTEMPDIR (SQLITE_CANTOPEN | (1<<8)) #define SQLITE_CANTOPEN_ISDIR (SQLITE_CANTOPEN | (2<<8)) #define SQLITE_CANTOPEN_FULLPATH (SQLITE_CANTOPEN | (3<<8)) diff --git a/src/sqliteInt.h b/src/sqliteInt.h index 40660aed99..dba69939dc 100644 --- a/src/sqliteInt.h +++ b/src/sqliteInt.h @@ -1101,6 +1101,7 @@ typedef int VList; #include "pcache.h" #include "os.h" #include "mutex.h" +#include "server.h" /* The SQLITE_EXTRA_DURABLE compile-time option used to set the default ** synchronous setting to EXTRA. It is no longer supported. diff --git a/src/vdbeaux.c b/src/vdbeaux.c index 00a5ec91a9..e79675a060 100644 --- a/src/vdbeaux.c +++ b/src/vdbeaux.c @@ -2639,12 +2639,13 @@ int sqlite3VdbeHalt(Vdbe *p){ /* Check for one of the special errors */ mrc = p->rc & 0xff; isSpecialError = mrc==SQLITE_NOMEM || mrc==SQLITE_IOERR - || mrc==SQLITE_INTERRUPT || mrc==SQLITE_FULL; + || mrc==SQLITE_INTERRUPT || mrc==SQLITE_FULL + || p->rc==SQLITE_BUSY_DEADLOCK; if( isSpecialError ){ - /* If the query was read-only and the error code is SQLITE_INTERRUPT, - ** no rollback is necessary. Otherwise, at least a savepoint - ** transaction must be rolled back to restore the database to a - ** consistent state. + /* If the query was read-only and the error code is SQLITE_INTERRUPT + ** or SQLITE_BUSY_SERVER, no rollback is necessary. Otherwise, at + ** least a savepoint transaction must be rolled back to restore the + ** database to a consistent state. ** ** Even if the statement is read-only, it is important to perform ** a statement or transaction rollback operation. If the error @@ -2653,7 +2654,7 @@ int sqlite3VdbeHalt(Vdbe *p){ ** pagerStress() in pager.c), the rollback is required to restore ** the pager to a consistent state. */ - if( !p->readOnly || mrc!=SQLITE_INTERRUPT ){ + if( !p->readOnly || (mrc!=SQLITE_INTERRUPT && mrc!=SQLITE_BUSY) ){ if( (mrc==SQLITE_NOMEM || mrc==SQLITE_FULL) && p->usesStmtJournal ){ eStatementOp = SAVEPOINT_ROLLBACK; }else{ diff --git a/test/permutations.test b/test/permutations.test index 92eb2eec07..4f5ff1165f 100644 --- a/test/permutations.test +++ b/test/permutations.test @@ -273,6 +273,14 @@ test_suite "fts5" -prefix "" -description { All FTS5 tests. } -files [glob -nocomplain $::testdir/../ext/fts5/test/*.test] +test_suite "server" -prefix "" -description { + All server-edition tests. +} -files [ + test_set \ + [glob -nocomplain $::testdir/server*.test] \ + -exclude *server1.test +] + test_suite "fts5-light" -prefix "" -description { All FTS5 tests. } -files [ diff --git a/test/server2.test b/test/server2.test new file mode 100644 index 0000000000..b1a6fa067e --- /dev/null +++ b/test/server2.test @@ -0,0 +1,101 @@ +# 2017 April 25 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# This file implements regression tests for SQLite library. The +# focus of this script is testing the server mode of SQLite. +# + + +set testdir [file dirname $argv0] +source $testdir/tester.tcl +set testprefix server2 + +#------------------------------------------------------------------------- +# Check that the *-hma file is deleted correctly. +# +do_execsql_test 1.0 { + CREATE TABLE t1(a, b); +} {} +do_test 1.1 { + file exists test.db-hma +} {1} +do_test 1.2 { + db close + file exists test.db-hma +} {0} +do_test 1.3 { + sqlite3 db test.db + db eval { CREATE TABLE t2(a, b) } + sqlite3 db2 test.db + db2 eval { CREATE TABLE t3(a, b) } + file exists test.db-hma +} {1} +do_test 1.4 { + db2 close + file exists test.db-hma +} {1} +do_test 1.5 { + db close + file exists test.db-hma +} {0} + + +#------------------------------------------------------------------------- +# +reset_db +sqlite3 db2 test.db + +do_execsql_test 2.0 { + CREATE TABLE t1(a, b); + CREATE TABLE t2(c, d); +} + +# Two concurrent transactions committed. +# +do_test 2.1 { + db eval { + BEGIN; + INSERT INTO t1 VALUES(1, 2); + } + db2 eval { + BEGIN; + INSERT INTO t2 VALUES(3, 4); + } +} {} +do_test 2.2 { + lsort [glob test.db*] +} {test.db test.db-hma test.db-journal0 test.db-journal1} +do_test 2.3.1 { db eval COMMIT } {} +do_test 2.3.2 { db2 eval COMMIT } {} +do_execsql_test 2.4 {SELECT * FROM t1, t2} {1 2 3 4} +do_test 2.5 { + lsort [glob test.db*] +} {test.db test.db-hma test.db-journal0 test.db-journal1} + +do_test 2.6 { + execsql {BEGIN} + execsql {INSERT INTO t1 VALUES(5, 6)} + + execsql {BEGIN} db2 + catchsql {INSERT INTO t1 VALUES(7, 8)} db2 +} {1 {database is locked}} +do_test 2.7 { + # Transaction is automatically rolled back in this case. + sqlite3_get_autocommit db2 +} {1} +do_test 2.8 { + execsql COMMIT + execsql { SELECT * FROM t1 } db2 +} {1 2 5 6} + + + +finish_test + diff --git a/test/server3.test b/test/server3.test new file mode 100644 index 0000000000..85d69e28b4 --- /dev/null +++ b/test/server3.test @@ -0,0 +1,45 @@ +# 2017 April 25 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# This file implements regression tests for SQLite library. The +# focus of this script is testing the server mode of SQLite. +# + + +set testdir [file dirname $argv0] +source $testdir/tester.tcl +source $testdir/lock_common.tcl +set testprefix server3 + +db close + +do_multiclient_test tn { + do_test $tn.1 { + sql1 { CREATE TABLE t1(a, b) } + sql2 { CREATE TABLE t2(a, b) } + } {} + + do_test $tn.2 { + sql1 { + INSERT INTO t2 VALUES(1, 2); + BEGIN; + INSERT INTO t1 VALUES(1, 2); + } + } {} + + do_test $tn.3 { csql2 { SELECT * FROM t1 } } {1 {database is locked}} + do_test $tn.4 { csql2 { SELECT * FROM t1 } } {1 {database is locked}} + do_test $tn.5 { sql2 { SELECT * FROM t2 } } {1 2} + + +} + +finish_test + diff --git a/test/servercrash.test b/test/servercrash.test new file mode 100644 index 0000000000..d7cc15f4ea --- /dev/null +++ b/test/servercrash.test @@ -0,0 +1,68 @@ +# 2017 April 27 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# + + +set testdir [file dirname $argv0] +source $testdir/tester.tcl +set testprefix servercrash + +ifcapable !crashtest { + finish_test + return +} +do_not_use_codec + +do_execsql_test 1.0 { + PRAGMA page_siBlockze = 4096; + PRAGMA auto_vacuum = OFF; + CREATE TABLE t1(a, b); + CREATE TABLE t2(c, d); + + INSERT INTO t1 VALUES(1, 2), (3, 4); + INSERT INTO t2 VALUES(1, 2), (3, 4); +} + +for {set i 0} {$i < 10} {incr i} { + do_test 1.$i.1 { + crashsql -delay 1 -file test.db { INSERT INTO t1 VALUES(5, 6) } + } {1 {child process exited abnormally}} + + do_execsql_test 1.$i.2 { + SELECT * FROM t1 + } {1 2 3 4} +} + +for {set i 0} {$i < 10} {incr i} { + do_test 2.$i.1 { + crashsql -delay 1 -file test.db { INSERT INTO t1 VALUES(5, 6) } + } {1 {child process exited abnormally}} + + do_test 2.$i.2 { + sqlite3 dbX test.db + execsql { SELECT * FROM t1 } dbX + } {1 2 3 4} + dbX close +} + +db close +for {set i 0} {$i < 10} {incr i} { + do_test 3.$i.1 { + crashsql -delay 1 -file test.db { INSERT INTO t1 VALUES(5, 6) } + } {1 {child process exited abnormally}} + + sqlite3 db test.db + do_execsql_test 3.$i.2 { SELECT * FROM t1 } {1 2 3 4} + db close +} + +finish_test + diff --git a/test/tester.tcl b/test/tester.tcl index dc6547d033..df27023f6f 100644 --- a/test/tester.tcl +++ b/test/tester.tcl @@ -586,6 +586,10 @@ proc reset_db {} { forcedelete test.db forcedelete test.db-journal forcedelete test.db-wal + for {set i 0} {$i < 16} {incr i} { + forcedelete test.db-journal$i + } + sqlite3 db ./test.db set ::DB [sqlite3_connection_pointer db] if {[info exists ::SETUP_SQL]} { diff --git a/tool/mksqlite3c.tcl b/tool/mksqlite3c.tcl index 55179c4b80..214498c52a 100644 --- a/tool/mksqlite3c.tcl +++ b/tool/mksqlite3c.tcl @@ -114,6 +114,7 @@ foreach hdr { pcache.h pragma.h rtree.h + server.h sqlite3session.h sqlite3.h sqlite3ext.h @@ -319,6 +320,7 @@ foreach file { rowset.c pager.c wal.c + server.c btmutex.c btree.c