From 9663411f8f0042078a7e4531790fb57a66cbb8be Mon Sep 17 00:00:00 2001 From: drh Date: Tue, 19 Jun 2018 13:45:48 +0000 Subject: [PATCH] Initial attempt to get SQLite working with OFD locks on Linux. The code here does not function correctly. This is an incremental check-in for a work in progress. FossilOrigin-Name: 148f8dec9a26f11d343bfeb558fd12ba18d7f5d4d69da58fc8aa22f88e13c408 --- manifest | 19 ++++--- manifest.uuid | 2 +- src/os_unix.c | 145 +++++++++++++++++++++++++++++++++++------------- src/sqlite.h.in | 12 ++++ src/test1.c | 36 +++++++++++- 5 files changed, 164 insertions(+), 50 deletions(-) diff --git a/manifest b/manifest index 3668ff9309..93bfbd8c0a 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Minor\schange\sto\sthe\sinput\sgrammar\sto\smake\sthe\sparser\stables\sslightly\ssmaller. -D 2018-06-19T11:15:19.013 +C Initial\sattempt\sto\sget\sSQLite\sworking\swith\sOFD\slocks\son\sLinux.\sThe\scode\shere\ndoes\snot\sfunction\scorrectly.\sThis\sis\san\sincremental\scheck-in\sfor\sa\swork\sin\nprogress. +D 2018-06-19T13:45:48.548 F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1 F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea F Makefile.in bfc40f350586923e0419d2ea4b559c37ec10ee4b6e210e08c14401f8e340f0da @@ -479,7 +479,7 @@ F src/os.c 8aeb0b0f40f8f5b0da03fe49706695adaf42d2f516ab95abc72e86c245e119de F src/os.h 48388821692e87da174ea198bf96b1b2d9d83be5dfc908f673ee21fafbe0d432 F src/os_common.h b2f4707a603e36811d9b1a13278bffd757857b85 F src/os_setup.h 0dbaea40a7d36bf311613d31342e0b99e2536586 -F src/os_unix.c c230a7a24766320d8414afd087edcd43e499fb45e86361f6f4f464f343d965a9 +F src/os_unix.c da3e1802d4c8946d76350a5f58347ae6c25a1ff028b059bc3060efa7e90a969e F src/os_win.c ac29c25cde4cfb4adacc59cdec4aa45698ca0e29164ea127859585ccd9faa354 F src/os_win.h 7b073010f1451abe501be30d12f6bc599824944a F src/pager.c 1bb6a57fa0465296a4d6109a1a64610a0e7adde1f3acf3ef539a9d972908ce8f @@ -497,7 +497,7 @@ F src/resolve.c 14602f46800ba182ea6a490e0f304127d29ac1f724bdadcc639e25d3223fcf6e F src/rowset.c 7b7e7e479212e65b723bf40128c7b36dc5afdfac F src/select.c 8d3176c5258cc83942815ebe75b4c1f8dcf62b5e0f4d37373a14ebf23c046f9f F src/shell.c.in 8578421c5fb2a972461b2a996f7173646e55e0dbd2a2eee30c8f5dc7d3dbadfd -F src/sqlite.h.in 19de593baa0667854730e7b8bc2e3039c20ee80a4d537e9b5ec2038947fe3daf +F src/sqlite.h.in a97be4beec01ea557809fe79a8e6befefa370bd7e3ba0c1a1abfe4c94ae59888 F src/sqlite3.rc 5121c9e10c3964d5755191c80dd1180c122fc3a8 F src/sqlite3ext.h 9887b27e69c01e79c2cbe74ef73bf01af5b5703d6a7f0a4371e386d7249cb1c7 F src/sqliteInt.h c8d304712b6b4e2150cbb5355b1a80427eee7b3ea1a6b5827afadf557360d7fe @@ -505,7 +505,7 @@ F src/sqliteLimit.h 1513bfb7b20378aa0041e7022d04acb73525de35b80b252f1b83fedb4de6 F src/status.c 46e7aec11f79dad50965a5ca5fa9de009f7d6bde08be2156f1538a0a296d4d0e F src/table.c b46ad567748f24a326d9de40e5b9659f96ffff34 F src/tclsqlite.c 916a92de77ec5cbe27818ca194d8cf0c58aa7ad5b87527098f6aa5a6068800ce -F src/test1.c 51aa5f3030217ca45eb62e90944838794d4faaae7a8f60e0330ae01f30bc997b +F src/test1.c 553365d346ed17c099fef671204cdec6e40e701af62ef4f5527e4f2a08b98bb0 F src/test2.c 3efb99ab7f1fc8d154933e02ae1378bac9637da5 F src/test3.c 61798bb0d38b915067a8c8e03f5a534b431181f802659a6616f9b4ff7d872644 F src/test4.c 18ec393bb4d0ad1de729f0b94da7267270f3d8e6 @@ -1731,7 +1731,10 @@ F vsixtest/vsixtest.tcl 6a9a6ab600c25a91a7acc6293828957a386a8a93 F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0 -P 39434262d5cf1af197ce0abb1f1ee84ee0797823e290a493c5bf8376fbe287a6 -R b6cf2872dc6b136d2f9f02df3a45f74e +P 320fa69e6aa2a7d67f6444d6c13de9893e27b85c36a933b06da113d753b6aafc +R c7d7dcd11c67967d0f404fe07c1adca6 +T *branch * ofd-locks +T *sym-ofd-locks * +T -sym-trunk * U drh -Z ca3608404058acf123d3efb518cfb804 +Z d5c78e27187e28437ed1607871e9e53d diff --git a/manifest.uuid b/manifest.uuid index dda5fdd11b..362f9ad44d 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -320fa69e6aa2a7d67f6444d6c13de9893e27b85c36a933b06da113d753b6aafc \ No newline at end of file +148f8dec9a26f11d343bfeb558fd12ba18d7f5d4d69da58fc8aa22f88e13c408 \ No newline at end of file diff --git a/src/os_unix.c b/src/os_unix.c index 0dd461da0a..066ed7856d 100644 --- a/src/os_unix.c +++ b/src/os_unix.c @@ -178,6 +178,17 @@ */ #define IS_LOCK_ERROR(x) ((x != SQLITE_OK) && (x != SQLITE_BUSY)) +/* +** Are OFD locks supported? +*/ +#if defined(F_OFD_SETLK) && defined(F_OFD_GETLK) +# define HAVE_OFD_LOCKS 1 +#else +# define HAVE_OFD_LOCKS 0 +# define F_OFD_SETLK 0 /* Fake value so we can use the identifier */ +# define F_OFD_GETLK 0 /* Fake value so we can use the identifier */ +#endif + /* Forward references */ typedef struct unixShm unixShm; /* Connection shared memory */ typedef struct unixShmNode unixShmNode; /* Shared memory instance */ @@ -214,6 +225,7 @@ struct unixFile { const char *zPath; /* Name of the file */ unixShm *pShm; /* Shared memory segment information */ int szChunk; /* Configured by FCNTL_CHUNK_SIZE */ + int eGetLk, eSetLk; #if SQLITE_MAX_MMAP_SIZE>0 int nFetchOut; /* Number of outstanding xFetch refs */ sqlite3_int64 mmapSize; /* Usable size of mapping at pMapRegion */ @@ -748,9 +760,9 @@ static int lockTrace(int fd, int op, struct flock *p){ char *zOpName, *zType; int s; int savedErrno; - if( op==F_GETLK ){ + if( op==F_GETLK || op==F_OFD_GETLK ){ zOpName = "GETLK"; - }else if( op==F_SETLK ){ + }else if( op==F_SETLK || op==F_OFD_SETLK ){ zOpName = "SETLK"; }else{ s = osFcntl(fd, op, p); @@ -772,7 +784,10 @@ static int lockTrace(int fd, int op, struct flock *p){ sqlite3DebugPrintf("fcntl %d %d %s %s %d %d %d %d\n", threadid, fd, zOpName, zType, (int)p->l_start, (int)p->l_len, (int)p->l_pid, s); - if( s==(-1) && op==F_SETLK && (p->l_type==F_RDLCK || p->l_type==F_WRLCK) ){ + if( s==(-1) + && (op==F_SETLK || op==F_OFD_SETLK) + && (p->l_type==F_RDLCK || p->l_type==F_WRLCK) + ){ struct flock l2; l2 = *p; osFcntl(fd, F_GETLK, &l2); @@ -994,11 +1009,12 @@ static void vxworksReleaseFileId(struct vxworksFileId *pId){ *************************** Posix Advisory Locking **************************** ** ** POSIX advisory locks are broken by design. ANSI STD 1003.1 (1996) -** section 6.5.2.2 lines 483 through 490 specify that when a process +** section 6.5.2.2 lines 483 through 490 says that when a process ** sets or clears a lock, that operation overrides any prior locks set -** by the same process. It does not explicitly say so, but this implies -** that it overrides locks set by the same process using a different -** file descriptor. Consider this test case: +** by the *same process*. That means that if two different threads +** open the same file using different file descriptors, then POSIX +** advisory locking will not work to coordinate access between those two +** threads. Consider this test case: ** ** int fd1 = open("./file1", O_RDWR|O_CREAT, 0644); ** int fd2 = open("./file2", O_RDWR|O_CREAT, 0644); @@ -1006,11 +1022,12 @@ static void vxworksReleaseFileId(struct vxworksFileId *pId){ ** Suppose ./file1 and ./file2 are really the same file (because ** one is a hard or symbolic link to the other) then if you set ** an exclusive lock on fd1, then try to get an exclusive lock -** on fd2, it works. I would have expected the second lock to +** on fd2, it works. In a reasonable system, the second lock would ** fail since there was already a lock on the file due to fd1. -** But not so. Since both locks came from the same process, the -** second overrides the first, even though they were on different -** file descriptors opened on different file names. +** But this is not the case in POSIX advisory locking. Since both +** locks came from the same process, the second overrides the first, +** even though they were on different file descriptors opened on +** different file names. ** ** This means that we cannot use POSIX locks to synchronize file access ** among competing threads of the same process. POSIX locks will work fine @@ -1030,21 +1047,17 @@ static void vxworksReleaseFileId(struct vxworksFileId *pId){ ** For VxWorks, we have to use the alternative unique ID system based on ** canonical filename and implemented in the previous division.) ** -** The sqlite3_file structure for POSIX is no longer just an integer file -** descriptor. It is now a structure that holds the integer file -** descriptor and a pointer to a structure that describes the internal -** locks on the corresponding inode. There is one locking structure -** per inode, so if the same inode is opened twice, both unixFile structures -** point to the same locking structure. The locking structure keeps -** a reference count (so we will know when to delete it) and a "cnt" -** field that tells us its internal lock status. cnt==0 means the -** file is unlocked. cnt==-1 means the file has an exclusive lock. -** cnt>0 means there are cnt shared locks on the file. -** -** Any attempt to lock or unlock a file first checks the locking -** structure. The fcntl() system call is only invoked to set a -** POSIX lock if the internal lock structure transitions between -** a locked and an unlocked state. +** The sqlite3_file object for POSIX (a.k.a. the unixFile object) is more +** than just an integer file descriptor. It also holds a pointer +** a pointer to another object (unixInodeInfo) that describes the +** internal locks on the corresponding inode. There is one +** unixInodeInfo object per inode, so if the same inode is opened twice, +** both unixFile objects point to the same lunixInodeInfo. The unixInodeInfo +** keeps a reference count (nRef) so we will know when to delete it. +** +** Any attempt to lock or unlock a unixFile first checks the unixInodeInfo. +** The fcntl() system call is only invoked to set a POSIX lock if the +** unixInodeInfo transitions between a locked and an unlocked state. ** ** But wait: there are yet more problems with POSIX advisory locks. ** @@ -1052,14 +1065,14 @@ static void vxworksReleaseFileId(struct vxworksFileId *pId){ ** all locks on that file that are owned by the current process are ** released. To work around this problem, each unixInodeInfo object ** maintains a count of the number of pending locks on tha inode. -** When an attempt is made to close an unixFile, if there are -** other unixFile open on the same inode that are holding locks, the call +** When an attempt is made to close an unixFile, if there are other +** unixFile objcts open on the same inode that are holding locks, the call ** to close() the file descriptor is deferred until all of the locks clear. ** The unixInodeInfo structure keeps a list of file descriptors that need to ** be closed and that list is walked (and cleared) when the last lock ** clears. ** -** Yet another problem: LinuxThreads do not play well with posix locks. +** LinuxThreads: ** ** Many older versions of linux use the LinuxThreads library which is ** not posix compliant. Under LinuxThreads, a lock created by thread @@ -1078,6 +1091,26 @@ static void vxworksReleaseFileId(struct vxworksFileId *pId){ ** LinuxThreads provided that (1) there is no more than one connection ** per database file in the same process and (2) database connections ** do not move across threads. +** +** OFD Locks: +** +** Recent unix-like OSes have added support for Open File Description or "OFD" +** locks. (This is not a typo: the name is "Open File Description" not +** "Open File Descriptor". "-ion" not "-or". There is a subtle difference +** between "Discription" and "Descriptor" which is described on the Linux +** fcntl manpage and will not be repeated here.) The main difference +** between OFD locks and POSIX locks is that OFD locks are associated +** with a single open() system call and do not interfere with with file +** descriptors obtained from different open() system calls in the same +** process. In other words, OFD locks fix the brokenness of POSIX locks. +** +** As of 2018-06-19, SQLite will use OFD locks if they are available. +** But the older work-arounds for POSIX locks are still here in the code +** since SQLite also needs to work on systems that do not support +** OFD locks. Someday, perhaps, all unix systems will have reliable +** support for OFD locks, and at that time we can omit the unixInodeInfo +** object and all of its associated complication. But for now we still +** have to support the older POSIX lock work-around hack. */ /* @@ -1452,7 +1485,8 @@ static int unixCheckReservedLock(sqlite3_file *id, int *pResOut){ lock.l_start = RESERVED_BYTE; lock.l_len = 1; lock.l_type = F_WRLCK; - if( osFcntl(pFile->h, F_GETLK, &lock) ){ + lock.l_pid = 0; + if( osFcntl(pFile->h, pFile->eGetLk, &lock) ){ rc = SQLITE_IOERR_CHECKRESERVEDLOCK; storeLastErrno(pFile, errno); } else if( lock.l_type!=F_UNLCK ){ @@ -1482,14 +1516,15 @@ static int unixCheckReservedLock(sqlite3_file *id, int *pResOut){ ** attempt to set the lock. */ #ifndef SQLITE_ENABLE_SETLK_TIMEOUT -# define osSetPosixAdvisoryLock(h,x,t) osFcntl(h,F_SETLK,x) +# define osSetAdvisoryLock(h,e,x,t) osFcntl(h,e,x) #else -static int osSetPosixAdvisoryLock( +static int osSetAdvisoryLock( int h, /* The file descriptor on which to take the lock */ + int eSetLk, /* ioctl verb for setting the lock */ struct flock *pLock, /* The description of the lock */ unixFile *pFile /* Structure holding timeout value */ ){ - int rc = osFcntl(h,F_SETLK,pLock); + int rc = osFcntl(h,eSetLk,pLock); while( rc<0 && pFile->iBusyTimeout>0 ){ /* On systems that support some kind of blocking file lock with a timeout, ** make appropriate changes here to invoke that blocking file lock. On @@ -1497,7 +1532,7 @@ static int osSetPosixAdvisoryLock( ** lock once every millisecond until either the timeout expires, or until ** the lock is obtained. */ usleep(1000); - rc = osFcntl(h,F_SETLK,pLock); + rc = osFcntl(h,eSetLk,pLock); pFile->iBusyTimeout--; } return rc; @@ -1537,7 +1572,8 @@ static int unixFileLock(unixFile *pFile, struct flock *pLock){ lock.l_start = SHARED_FIRST; lock.l_len = SHARED_SIZE; lock.l_type = F_WRLCK; - rc = osSetPosixAdvisoryLock(pFile->h, &lock, pFile); + lock.l_pid = 0; + rc = osSetAdvisoryLock(pFile->h, pFile->eSetLk, &lock, pFile); if( rc<0 ) return rc; pInode->bProcessLock = 1; pInode->nLock++; @@ -1545,7 +1581,8 @@ static int unixFileLock(unixFile *pFile, struct flock *pLock){ rc = 0; } }else{ - rc = osSetPosixAdvisoryLock(pFile->h, pLock, pFile); + pLock->l_pid = 0; + rc = osSetAdvisoryLock(pFile->h, pFile->eSetLk, pLock, pFile); } return rc; } @@ -1664,8 +1701,10 @@ static int unixLock(sqlite3_file *id, int eFileLock){ ** has a SHARED or RESERVED lock, then increment reference counts and ** return SQLITE_OK. */ - if( eFileLock==SHARED_LOCK && - (pInode->eFileLock==SHARED_LOCK || pInode->eFileLock==RESERVED_LOCK) ){ + if( eFileLock==SHARED_LOCK + && pFile->eGetLk==F_GETLK + && (pInode->eFileLock==SHARED_LOCK || pInode->eFileLock==RESERVED_LOCK) + ){ assert( eFileLock==SHARED_LOCK ); assert( pFile->eFileLock==0 ); assert( pInode->nShared>0 ); @@ -3911,6 +3950,15 @@ static int unixFileControl(sqlite3_file *id, int op, void *pArg){ return SQLITE_OK; } #endif + case SQLITE_FCNTL_OFD_LOCKS: { + int x = *(int*)pArg; + if( x==0 ){ + pFile->eSetLk = F_SETLK; + pFile->eGetLk = F_GETLK; + } + *(int*)pArg = pFile->eSetLk==F_OFD_SETLK; + return SQLITE_OK; + } #if SQLITE_MAX_MMAP_SIZE>0 case SQLITE_FCNTL_MMAP_SIZE: { i64 newLimit = *(i64*)pArg; @@ -4230,7 +4278,8 @@ static int unixShmSystemLock( f.l_whence = SEEK_SET; f.l_start = ofst; f.l_len = n; - rc = osSetPosixAdvisoryLock(pShmNode->h, &f, pFile); + f.l_pid = 0; + rc = osSetAdvisoryLock(pShmNode->h, pFile->eSetLk, &f, pFile); rc = (rc!=(-1)) ? SQLITE_OK : SQLITE_BUSY; } @@ -4355,7 +4404,7 @@ static int unixLockSharedMemory(unixFile *pDbFd, unixShmNode *pShmNode){ lock.l_start = UNIX_SHM_DMS; lock.l_len = 1; lock.l_type = F_WRLCK; - if( osFcntl(pShmNode->h, F_GETLK, &lock)!=0 ) { + if( osFcntl(pShmNode->h, pDbFd->eGetLk, &lock)!=0 ) { rc = SQLITE_IOERR_LOCK; }else if( lock.l_type==F_UNLCK ){ if( pShmNode->isReadonly ){ @@ -5412,6 +5461,22 @@ static int fillInUnixFile( OSTRACE(("OPEN %-3d %s\n", h, zFilename)); pNew->h = h; + pNew->eSetLk = F_SETLK; + pNew->eGetLk = F_GETLK; +#if HAVE_OFD_LOCKS + { + struct flock lock; + lock.l_whence = SEEK_SET; + lock.l_start = RESERVED_BYTE; + lock.l_len = 1; + lock.l_type = F_WRLCK; + lock.l_pid = 0; + if( osFcntl(h, F_OFD_GETLK, &lock)==0 ){ + pNew->eSetLk = F_OFD_SETLK; + pNew->eGetLk = F_OFD_GETLK; + } + } +#endif pNew->pVfs = pVfs; pNew->zPath = zFilename; pNew->ctrlFlags = (u8)ctrlFlags; diff --git a/src/sqlite.h.in b/src/sqlite.h.in index 7d664177e4..cf2e6e14f6 100644 --- a/src/sqlite.h.in +++ b/src/sqlite.h.in @@ -1073,6 +1073,17 @@ struct sqlite3_io_methods { ** a file lock using the xLock or xShmLock methods of the VFS to wait ** for up to M milliseconds before failing, where M is the single ** unsigned integer parameter. +** +**
  • [[SQLITE_FCNTL_OFD_LOCKS]] +** The [SQLITE_FCNTL_OFD_LOCKS] opcode will query whether or not OFD +** locking is currently being used for an open file, or disable the use +** of OFD locking on the file. The argument is a pointer to an integer +** in the callers context. If that integer is initially -1, then it is +** set to 1 or 0 if the system is or is not using OFD locks for the file. +** If the integer is initially 0, then OFD locks are disabled for the file. +** This file-control is intended for testing and validation use only. +** Applications that strive for correctness and error-free operation should +** not mess with this file-control. ** */ #define SQLITE_FCNTL_LOCKSTATE 1 @@ -1108,6 +1119,7 @@ struct sqlite3_io_methods { #define SQLITE_FCNTL_COMMIT_ATOMIC_WRITE 32 #define SQLITE_FCNTL_ROLLBACK_ATOMIC_WRITE 33 #define SQLITE_FCNTL_LOCK_TIMEOUT 34 +#define SQLITE_FCNTL_OFD_LOCKS 35 /* deprecated names */ #define SQLITE_GET_LOCKPROXYFILE SQLITE_FCNTL_GET_LOCKPROXYFILE diff --git a/src/test1.c b/src/test1.c index f2511d259a..63920bd26a 100644 --- a/src/test1.c +++ b/src/test1.c @@ -5963,6 +5963,40 @@ static int SQLITE_TCLAPI file_control_persist_wal( return TCL_OK; } +/* +** tclcmd: file_control_ofd_locks DB ?DISABLE? +** +** Run sqlite3_file_control() to query the OFD lock capability. Return +** true if OFD locks are available and false if not. +** +** If the DISABLE argument is true, then disable OFD locking, if it is +** enabled. The returned value will show that OFD locks are disabled. +*/ +static int SQLITE_TCLAPI file_control_ofd_locks( + ClientData clientData, /* Pointer to sqlite3_enable_XXX function */ + Tcl_Interp *interp, /* The TCL interpreter that invoked this command */ + int objc, /* Number of arguments */ + Tcl_Obj *CONST objv[] /* Command arguments */ +){ + sqlite3 *db; + int rc; + int b = 0; + + if( objc!=2 && objc!=3 ){ + Tcl_AppendResult(interp, "wrong # args: should be \"", + Tcl_GetStringFromObj(objv[0], 0), " DB ?DISABLE?", 0); + return TCL_ERROR; + } + if( getDbPointer(interp, Tcl_GetString(objv[1]), &db) ){ + return TCL_ERROR; + } + if( objc==3 && Tcl_GetIntFromObj(interp, objv[2], &b) ) return TCL_ERROR; + b = b ? 0 : -1; + rc = sqlite3_file_control(db,NULL,SQLITE_FCNTL_OFD_LOCKS,(void*)&b); + Tcl_AppendResult(interp, (rc==SQLITE_OK && b) ? "1" : "0", (char*)0); + return TCL_OK; +} + /* ** tclcmd: file_control_powersafe_overwrite DB PSOW-FLAG ** @@ -5995,7 +6029,6 @@ static int SQLITE_TCLAPI file_control_powersafe_overwrite( return TCL_OK; } - /* ** tclcmd: file_control_vfsname DB ?AUXDB? ** @@ -7705,6 +7738,7 @@ int Sqlitetest1_Init(Tcl_Interp *interp){ { "file_control_win32_set_handle", file_control_win32_set_handle, 0 }, #endif { "file_control_persist_wal", file_control_persist_wal, 0 }, + { "file_control_ofd_locks", file_control_ofd_locks, 0 }, { "file_control_powersafe_overwrite",file_control_powersafe_overwrite,0}, { "file_control_vfsname", file_control_vfsname, 0 }, { "file_control_tempfilename", file_control_tempfilename, 0 }, -- 2.47.2