**
******************************************************************************
**
-** This file contains code that is specific to Unix systems.
+** This file contains the VFS implementation for unix-like operating systems
+** include Linux, MacOSX, *BSD, QNX, VxWorks, AIX, HPUX, and others.
**
-** $Id: os_unix.c,v 1.221 2008/11/25 12:07:41 danielk1977 Exp $
+** There are actually several different VFS implementations in this file.
+** The differences are in the way that file locking is done. The default
+** implementation uses Posix Advisory Locks. Alternative implementations
+** use flock(), dot-files, various proprietary locking schemas, or simply
+** skip locking all together.
+**
+** This source file is group into divisions where the logic for various
+** subfunctions is contained within the appropriate division. PLEASE
+** KEEP THE STRUCTURE OF THIS FILE INTACT. New code should be placed
+** in the correct division and should be clearly labeled.
+**
+** The current set of divisions is as follows:
+**
+** * General-purpose declarations and utility functions.
+** * Unique file ID logic used by VxWorks.
+** * Various locking primitive implementations:
+** + for Posix Advisory Locks
+** + for no-op locks
+** + for dot-file locks
+** + for flock() locking
+** + for named semaphore locks (VxWorks only)
+** + for AFP filesystem locks (MacOSX only)
+** + for proxy locks (MacOSX only)
+** * The routine used to detect an appropriate locking style
+** * sqlite3_file methods not associated with locking
+** * Implementations of sqlite3_os_init() and sqlite3_os_end()
+**
+** $Id: os_unix.c,v 1.222 2008/11/28 15:37:20 drh Exp $
*/
#include "sqliteInt.h"
#if SQLITE_OS_UNIX /* This file is used on unix only */
/*
-** If SQLITE_ENABLE_LOCKING_STYLE is defined and is non-zero, then several
-** alternative locking implementations are provided:
+** This module implements the following locking styles:
**
-** * POSIX locking (the default),
-** * No locking,
-** * Dot-file locking,
-** * flock() locking,
-** * AFP locking (OSX only),
-** * Named POSIX semaphores (VXWorks only),
-** * proxy locking.
+** 1. POSIX locking (the default),
+** 2. No locking,
+** 3. Dot-file locking,
+** 4. flock() locking,
+** 5. AFP locking (OSX only),
+** 6. Named POSIX semaphores (VXWorks only),
+** 7. proxy locking. (OSX only)
+**
+** Styles 4, 5, and 7 are only available of SQLITE_ENABLE_LOCKING_STYLE
+** is defined to 1. The SQLITE_ENABLE_LOCKING_STYLE also enables automatic
+** selection of the appropriate locking style based on the filesystem
+** where the database is located.
**
** SQLITE_ENABLE_LOCKING_STYLE only works on a Mac. It is turned on by
** default on a Mac and disabled on all other posix platforms.
*/
#define MAX_PATHNAME 512
+/*
+** The locking styles are associated with the different file locking
+** capabilities supported by different file systems.
+**
+** POSIX support for shared and exclusive byte-range locks
+**
+** NONE no locking will be attempted, this is only used for
+** read-only file systems currently
+**
+** DOTLOCK isn't a true locking style, it refers to the use of a special
+** file named the same as the database file with a '.lock'
+** extension, this can be used on file systems that do not
+** offer any reliable file locking
+**
+** FLOCK only a single file-global exclusive lock (Not on VxWorks)
+**
+** NAMEDSEM similar to DOTLOCK but uses a named semaphore instead of an
+** indicator file. (VxWorks only)
+**
+** AFP support exclusive byte-range locks (MacOSX only)
+**
+** PROXY uses a second file to represent the lock state of the database
+** file which is never actually locked, a third file controls
+** access to the proxy (MacOSX only)
+**
+** Note that because FLOCK and NAMEDSEM are never used together, they
+** share the same code number (3). The locking mode numbering is
+** chosen so that a set of locking modes that are contiguous integers
+** from 1 to N. On generic unix systems without flock() support,
+** the modes are 1..3. On generic unix with flock() support, the modes
+** are 1..4. On VxWorks, the modes are 1..4. On MacOSX the modes
+** are 1..6.
+*/
+#define LOCKING_STYLE_POSIX 1
+#define LOCKING_STYLE_NONE 2
+#define LOCKING_STYLE_DOTFILE 3
+#define LOCKING_STYLE_FLOCK 4
+#define LOCKING_STYLE_NAMEDSEM 4
+#define LOCKING_STYLE_AFP 5
+#define LOCKING_STYLE_PROXY 6
+
+#define LOCKING_STYLE_AUTOMATIC 0 /* Choose lock style automatically */
+
+/*
+** Only set the lastErrno if the error code is a real error and not
+** a normal expected return code of SQLITE_BUSY or SQLITE_OK
+*/
+#define IS_LOCK_ERROR(x) ((x != SQLITE_OK) && (x != SQLITE_BUSY))
+
/*
** The unixFile structure is subclass of sqlite3_file specific for the unix
int dirfd; /* File descriptor for the directory */
unsigned char locktype; /* The type of lock held on this fd */
int lastErrno; /* The unix errno from the last I/O error */
-#if SQLITE_ENABLE_LOCKING_STYLE
void *lockingContext; /* Locking style specific state */
- int oflags; /* The flags specified at open */
-#endif
-#if SQLITE_THREADSAFE
+ int openFlags; /* The flags specified at open */
+#if SQLITE_THREADSAFE && defined(__linux__)
pthread_t tid; /* The thread that "owns" this unixFile */
#endif
#if OS_VXWORKS
sqlite3_mutex_leave(sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MASTER));
}
-/************************************************************************
-*********** Posix Advisory Locking And Thread Interaction ***************
-*************************************************************************
-**
-** Here is the dirt on POSIX advisory locks: ANSI STD 1003.1 (1996)
-** section 6.5.2.2 lines 483 through 490 specify that when a process
-** sets or clears a lock, that operation overrides any prior locks set
-** by the same process. It does not explicitly say so, but this implies
-** that it overrides locks set by the same process using a different
-** file descriptor. Consider this test case:
-**
-** int fd1 = open("./file1", O_RDWR|O_CREAT, 0644);
-** int fd2 = open("./file2", O_RDWR|O_CREAT, 0644);
-**
-** Suppose ./file1 and ./file2 are really the same file (because
-** one is a hard or symbolic link to the other) then if you set
-** an exclusive lock on fd1, then try to get an exclusive lock
-** on fd2, it works. I would have expected the second lock to
-** fail since there was already a lock on the file due to fd1.
-** But not so. Since both locks came from the same process, the
-** second overrides the first, even though they were on different
-** file descriptors opened on different file names.
-**
-** Bummer. If you ask me, this is broken. Badly broken. It means
-** that we cannot use POSIX locks to synchronize file access among
-** competing threads of the same process. POSIX locks will work fine
-** to synchronize access for threads in separate processes, but not
-** threads within the same process.
-**
-** To work around the problem, SQLite has to manage file locks internally
-** on its own. Whenever a new database is opened, we have to find the
-** specific inode of the database file (the inode is determined by the
-** st_dev and st_ino fields of the stat structure that fstat() fills in)
-** and check for locks already existing on that inode. When locks are
-** created or removed, we have to look at our own internal record of the
-** locks to see if another thread has previously set a lock on that same
-** inode.
-**
-** The sqlite3_file structure for POSIX is no longer just an integer file
-** descriptor. It is now a structure that holds the integer file
-** descriptor and a pointer to a structure that describes the internal
-** locks on the corresponding inode. There is one locking structure
-** per inode, so if the same inode is opened twice, both unixFile structures
-** point to the same locking structure. The locking structure keeps
-** a reference count (so we will know when to delete it) and a "cnt"
-** field that tells us its internal lock status. cnt==0 means the
-** file is unlocked. cnt==-1 means the file has an exclusive lock.
-** cnt>0 means there are cnt shared locks on the file.
-**
-** Any attempt to lock or unlock a file first checks the locking
-** structure. The fcntl() system call is only invoked to set a
-** POSIX lock if the internal lock structure transitions between
-** a locked and an unlocked state.
-**
-** 2004-Jan-11:
-** More recent discoveries about POSIX advisory locks. (The more
-** I discover, the more I realize the a POSIX advisory locks are
-** an abomination.)
-**
-** If you close a file descriptor that points to a file that has locks,
-** all locks on that file that are owned by the current process are
-** released. To work around this problem, each unixFile structure contains
-** a pointer to an unixOpenCnt structure. There is one unixOpenCnt structure
-** per open inode, which means that multiple unixFile can point to a single
-** unixOpenCnt. When an attempt is made to close an unixFile, if there are
-** other unixFile open on the same inode that are holding locks, the call
-** to close() the file descriptor is deferred until all of the locks clear.
-** The unixOpenCnt structure keeps a list of file descriptors that need to
-** be closed and that list is walked (and cleared) when the last lock
-** clears.
-**
-** First, under Linux threads, because each thread has a separate
-** process ID, lock operations in one thread do not override locks
-** to the same file in other threads. Linux threads behave like
-** separate processes in this respect. But, if you close a file
-** descriptor in linux threads, all locks are cleared, even locks
-** on other threads and even though the other threads have different
-** process IDs. Linux threads is inconsistent in this respect.
-** (I'm beginning to think that linux threads is an abomination too.)
-** The consequence of this all is that the hash table for the unixLockInfo
-** structure has to include the process id as part of its key because
-** locks in different threads are treated as distinct. But the
-** unixOpenCnt structure should not include the process id in its
-** key because close() clears lock on all threads, not just the current
-** thread. Were it not for this goofiness in linux threads, we could
-** combine the unixLockInfo and unixOpenCnt structures into a single structure.
-**
-** 2004-Jun-28:
-** On some versions of linux, threads can override each others locks.
-** On others not. Sometimes you can change the behavior on the same
-** system by setting the LD_ASSUME_KERNEL environment variable. The
-** POSIX standard is silent as to which behavior is correct, as far
-** as I can tell, so other versions of unix might show the same
-** inconsistency. There is no little doubt in my mind that posix
-** advisory locks and linux threads are profoundly broken.
-**
-** To work around the inconsistencies, we have to test at runtime
-** whether or not threads can override each others locks. This test
-** is run once, the first time any lock is attempted. A static
-** variable is set to record the results of this test for future
-** use.
-*/
-
-/*
-** Set or check the unixFile.tid field. This field is set when an unixFile
-** is first opened. All subsequent uses of the unixFile verify that the
-** same thread is operating on the unixFile. Some operating systems do
-** not allow locks to be overridden by other threads and that restriction
-** means that sqlite3* database handles cannot be moved from one thread
-** to another. This logic makes sure a user does not try to do that
-** by mistake.
-**
-** Version 3.3.1 (2006-01-15): unixFile can be moved from one thread to
-** another as long as we are running on a system that supports threads
-** overriding each others locks (which now the most common behavior)
-** or if no locks are held. But the unixFile.pLock field needs to be
-** recomputed because its key includes the thread-id. See the
-** transferOwnership() function below for additional information
-*/
-#if SQLITE_THREADSAFE
-# define SET_THREADID(X) (X)->tid = pthread_self()
-# define CHECK_THREADID(X) (threadsOverrideEachOthersLocks==0 && \
- !pthread_equal((X)->tid, pthread_self()))
-#else
-# define SET_THREADID(X)
-# define CHECK_THREADID(X) 0
-#endif
+#ifdef SQLITE_DEBUG
/*
-** An instance of the following structure serves as the key used
-** to locate a particular unixOpenCnt structure given its inode. This
-** is the same as the unixLockKey except that the thread ID is omitted.
+** Helper function for printing out trace information from debugging
+** binaries. This returns the string represetation of the supplied
+** integer lock-type.
*/
-struct unixFileId {
- dev_t dev; /* Device number */
-#if OS_VXWORKS
- struct vxworksFileId *pId; /* Unique file ID for vxworks. */
-#else
- ino_t ino; /* Inode number */
+static const char *locktypeName(int locktype){
+ switch( locktype ){
+ case NO_LOCK: return "NONE";
+ case SHARED_LOCK: return "SHARED";
+ case RESERVED_LOCK: return "RESERVED";
+ case PENDING_LOCK: return "PENDING";
+ case EXCLUSIVE_LOCK: return "EXCLUSIVE";
+ }
+ return "ERROR";
+}
#endif
-};
+#ifdef SQLITE_LOCK_TRACE
/*
-** An instance of the following structure serves as the key used
-** to locate a particular unixLockInfo structure given its inode.
+** Print out information about all locking operations.
**
-** If threads cannot override each others locks, then we set the
-** unixLockKey.tid field to the thread ID. If threads can override
-** each others locks then tid is always set to zero. tid is omitted
-** if we compile without threading support.
+** This routine is used for troubleshooting locks on multithreaded
+** platforms. Enable by compiling with the -DSQLITE_LOCK_TRACE
+** command-line option on the compiler. This code is normally
+** turned off.
*/
-struct unixLockKey {
- struct unixFileId fid; /* Unique identifier for the file */
-#if SQLITE_THREADSAFE
- pthread_t tid; /* Thread ID or zero if threads can override each other */
-#endif
-};
+static int lockTrace(int fd, int op, struct flock *p){
+ char *zOpName, *zType;
+ int s;
+ int savedErrno;
+ if( op==F_GETLK ){
+ zOpName = "GETLK";
+ }else if( op==F_SETLK ){
+ zOpName = "SETLK";
+ }else{
+ s = fcntl(fd, op, p);
+ sqlite3DebugPrintf("fcntl unknown %d %d %d\n", fd, op, s);
+ return s;
+ }
+ if( p->l_type==F_RDLCK ){
+ zType = "RDLCK";
+ }else if( p->l_type==F_WRLCK ){
+ zType = "WRLCK";
+ }else if( p->l_type==F_UNLCK ){
+ zType = "UNLCK";
+ }else{
+ assert( 0 );
+ }
+ assert( p->l_whence==SEEK_SET );
+ s = fcntl(fd, op, p);
+ savedErrno = errno;
+ sqlite3DebugPrintf("fcntl %d %d %s %s %d %d %d %d\n",
+ threadid, fd, zOpName, zType, (int)p->l_start, (int)p->l_len,
+ (int)p->l_pid, s);
+ if( s==(-1) && op==F_SETLK && (p->l_type==F_RDLCK || p->l_type==F_WRLCK) ){
+ struct flock l2;
+ l2 = *p;
+ fcntl(fd, F_GETLK, &l2);
+ if( l2.l_type==F_RDLCK ){
+ zType = "RDLCK";
+ }else if( l2.l_type==F_WRLCK ){
+ zType = "WRLCK";
+ }else if( l2.l_type==F_UNLCK ){
+ zType = "UNLCK";
+ }else{
+ assert( 0 );
+ }
+ sqlite3DebugPrintf("fcntl-failure-reason: %s %d %d %d\n",
+ zType, (int)l2.l_start, (int)l2.l_len, (int)l2.l_pid);
+ }
+ errno = savedErrno;
+ return s;
+}
+#define fcntl lockTrace
+#endif /* SQLITE_LOCK_TRACE */
-/*
-** An instance of the following structure is allocated for each open
-** inode on each thread with a different process ID. (Threads have
-** different process IDs on linux, but not on most other unixes.)
-**
-** A single inode can have multiple file descriptors, so each unixFile
-** structure contains a pointer to an instance of this object and this
-** object keeps a count of the number of unixFile pointing to it.
-*/
-struct unixLockInfo {
- struct unixLockKey lockKey; /* The lookup key */
- int cnt; /* Number of SHARED locks held */
- int locktype; /* One of SHARED_LOCK, RESERVED_LOCK etc. */
- int nRef; /* Number of pointers to this structure */
- struct unixLockInfo *pNext; /* List of all unixLockInfo objects */
- struct unixLockInfo *pPrev; /* .... doubly linked */
-};
-/*
-** An instance of the following structure is allocated for each open
-** inode. This structure keeps track of the number of locks on that
-** inode. If a close is attempted against an inode that is holding
-** locks, the close is deferred until all locks clear by adding the
-** file descriptor to be closed to the pending list.
-*/
-struct unixOpenCnt {
- struct unixFileId fileId; /* The lookup key */
- int nRef; /* Number of pointers to this structure */
- int nLock; /* Number of outstanding locks */
- int nPending; /* Number of pending close() operations */
- int *aPending; /* Malloced space holding fd's awaiting a close() */
-#if OS_VXWORKS
- sem_t *pSem; /* Named POSIX semaphore */
- char aSemName[MAX_PATHNAME+1]; /* Name of that semaphore */
-#endif
- struct unixOpenCnt *pNext, *pPrev; /* List of all unixOpenCnt objects */
-};
/*
-** List of all unixLockInfo and unixOpenCnt objects. This used to be a hash
-** table. But the number of objects is rarely more than a dozen and
-** never exceeds a few thousand. And lookup is not on a critical
-** path so a simple linked list will suffice.
+** This routine translates a standard POSIX errno code into something
+** useful to the clients of the sqlite3 functions. Specifically, it is
+** intended to translate a variety of "try again" errors into SQLITE_BUSY
+** and a variety of "please close the file descriptor NOW" errors into
+** SQLITE_IOERR
+**
+** Errors during initialization of locks, or file system support for locks,
+** should handle ENOLCK, ENOTSUP, EOPNOTSUPP separately.
*/
-static struct unixLockInfo *lockList = 0;
-static struct unixOpenCnt *openList = 0;
+static int sqliteErrorFromPosixError(int posixError, int sqliteIOErr) {
+ switch (posixError) {
+ case 0:
+ return SQLITE_OK;
+
+ case EAGAIN:
+ case ETIMEDOUT:
+ case EBUSY:
+ case EINTR:
+ case ENOLCK:
+ /* random NFS retry error, unless during file system support
+ * introspection, in which it actually means what it says */
+ return SQLITE_BUSY;
+
+ case EACCES:
+ /* EACCES is like EAGAIN during locking operations, but not any other time*/
+ if( (sqliteIOErr == SQLITE_IOERR_LOCK) ||
+ (sqliteIOErr == SQLITE_IOERR_UNLOCK) ||
+ (sqliteIOErr == SQLITE_IOERR_RDLOCK) ||
+ (sqliteIOErr == SQLITE_IOERR_CHECKRESERVEDLOCK) ){
+ return SQLITE_BUSY;
+ }
+ /* else fall through */
+ case EPERM:
+ return SQLITE_PERM;
+
+ case EDEADLK:
+ return SQLITE_IOERR_BLOCKED;
+
+#if EOPNOTSUPP!=ENOTSUP
+ case EOPNOTSUPP:
+ /* something went terribly awry, unless during file system support
+ * introspection, in which it actually means what it says */
+#endif
+#ifdef ENOTSUP
+ case ENOTSUP:
+ /* invalid fd, unless during file system support introspection, in which
+ * it actually means what it says */
+#endif
+ case EIO:
+ case EBADF:
+ case EINVAL:
+ case ENOTCONN:
+ case ENODEV:
+ case ENXIO:
+ case ENOENT:
+ case ESTALE:
+ case ENOSYS:
+ /* these should force the client to close the file and reconnect */
+
+ default:
+ return sqliteIOErr;
+ }
+}
-/*
-** This variable records whether or not threads can override each others
-** locks.
+
+
+/******************************************************************************
+****************** Begin Unique File ID Utility Used By VxWorks ***************
**
-** 0: No. Threads cannot override each others locks.
-** 1: Yes. Threads can override each others locks.
-** -1: We don't know yet.
+** On most versions of unix, we can get a unique ID for a file by concatenating
+** the device number and the inode number. But this does not work on VxWorks.
+** On VxWorks, a unique file id must be based on the canonical filename.
**
-** On some systems, we know at compile-time if threads can override each
-** others locks. On those systems, the SQLITE_THREAD_OVERRIDE_LOCK macro
-** will be set appropriately. On other systems, we have to check at
-** runtime. On these latter systems, SQLTIE_THREAD_OVERRIDE_LOCK is
-** undefined.
+** A pointer to an instance of the following structure can be used as a
+** unique file ID in VxWorks. Each instance of this structure contains
+** a copy of the canonical filename. There is also a reference count.
+** The structure is reclaimed when the number of pointers to it drops to
+** zero.
**
-** This variable normally has file scope only. But during testing, we make
-** it a global so that the test code can change its value in order to verify
-** that the right stuff happens in either case.
+** There are never very many files open at one time and lookups are not
+** a performance-critical path, so it is sufficient to put these
+** structures on a linked list.
*/
-#ifndef SQLITE_THREAD_OVERRIDE_LOCK
-# define SQLITE_THREAD_OVERRIDE_LOCK -1
-#endif
-#ifdef SQLITE_TEST
-int threadsOverrideEachOthersLocks = SQLITE_THREAD_OVERRIDE_LOCK;
-#else
-static int threadsOverrideEachOthersLocks = SQLITE_THREAD_OVERRIDE_LOCK;
-#endif
+struct vxworksFileId {
+ struct vxworksFileId *pNext; /* Next in a list of them all */
+ int nRef; /* Number of references to this one */
+ int nName; /* Length of the zCanonicalName[] string */
+ char *zCanonicalName; /* Canonical filename */
+};
-/*
-** This structure holds information passed into individual test
-** threads by the testThreadLockingBehavior() routine.
+#if OS_VXWORKS
+/*
+** All unique filesname are held on a linked list headed by this
+** variable:
*/
-struct threadTestData {
- int fd; /* File to be locked */
- struct flock lock; /* The locking operation */
- int result; /* Result of the locking operation */
-};
+static struct vxworksFileId *vxworksFileList = 0;
-#if SQLITE_THREADSAFE && defined(__linux__)
/*
-** This function is used as the main routine for a thread launched by
-** testThreadLockingBehavior(). It tests whether the shared-lock obtained
-** by the main thread in testThreadLockingBehavior() conflicts with a
-** hypothetical write-lock obtained by this thread on the same file.
+** Simplify a filename into its canonical form
+** by making the following changes:
**
-** The write-lock is not actually acquired, as this is not possible if
-** the file is open in read-only mode (see ticket #3472).
-*/
-static void *threadLockingTest(void *pArg){
- struct threadTestData *pData = (struct threadTestData*)pArg;
- pData->result = fcntl(pData->fd, F_GETLK, &pData->lock);
- return pArg;
+** * removing any trailing and duplicate /
+** * removing /./
+** * removing /A/../
+**
+** Changes are made in-place. Return the new name length.
+**
+** The original filename is in z[0..n-1]. Return the number of
+** characters in the simplified name.
+*/
+static int vxworksSimplifyName(char *z, int n){
+ int i, j;
+ while( n>1 && z[n-1]=='/' ){ n--; }
+ for(i=j=0; i<n; i++){
+ if( z[i]=='/' ){
+ if( z[i+1]=='/' ) continue;
+ if( z[i+1]=='.' && i+2<n && z[i+2]=='/' ){
+ i += 1;
+ continue;
+ }
+ if( z[i+1]=='.' && i+3<n && z[i+2]=='.' && z[i+3]=='/' ){
+ while( j>0 && z[j-1]!='/' ){ j--; }
+ if( j>0 ){ j--; }
+ i += 2;
+ continue;
+ }
+ }
+ z[j++] = z[i];
+ }
+ z[j] = 0;
+ return j;
}
-#endif /* SQLITE_THREADSAFE && defined(__linux__) */
-
-#if SQLITE_THREADSAFE && defined(__linux__)
/*
-** This procedure attempts to determine whether or not threads
-** can override each others locks then sets the
-** threadsOverrideEachOthersLocks variable appropriately.
+** Find a unique file ID for the given absolute pathname. Return
+** a pointer to the vxworksFileId object. This pointer is the unique
+** file ID.
+**
+** The nRef field of the vxworksFileId object is incremented before
+** the object is returned. A new vxworksFileId object is created
+** and added to the global list if necessary.
+**
+** If a memory allocation error occurs, return NULL.
*/
-static void testThreadLockingBehavior(int fd_orig){
- int fd;
- int rc;
- struct threadTestData d;
- struct flock l;
- pthread_t t;
+static struct vxworksFileId *vxworksFindFileId(const char *zAbsoluteName){
+ struct vxworksFileId *pNew; /* search key and new file ID */
+ struct vxworksFileId *pCandidate; /* For looping over existing file IDs */
+ int n; /* Length of zAbsoluteName string */
- fd = dup(fd_orig);
- if( fd<0 ) return;
- memset(&l, 0, sizeof(l));
- l.l_type = F_RDLCK;
- l.l_len = 1;
- l.l_start = 0;
- l.l_whence = SEEK_SET;
- rc = fcntl(fd_orig, F_SETLK, &l);
- if( rc!=0 ) return;
- memset(&d, 0, sizeof(d));
- d.fd = fd;
- d.lock = l;
- d.lock.l_type = F_WRLCK;
- pthread_create(&t, 0, threadLockingTest, &d);
- pthread_join(t, 0);
- close(fd);
- if( d.result!=0 ) return;
- threadsOverrideEachOthersLocks = (d.lock.l_type==F_UNLCK);
-}
-#else /* if !SQLITE_THREADSAFE || !defined(__linux__) */
-/*
-** On anything other than linux, assume threads override each others locks.
-*/
-static void testThreadLockingBehavior(int fd_orig){
- UNUSED_PARAMETER(fd_orig);
- threadsOverrideEachOthersLocks = 1;
-}
-#endif /* SQLITE_THERADSAFE && defined(__linux__) */
+ assert( zAbsoluteName[0]=='/' );
+ n = strlen(zAbsoluteName);
+ pNew = sqlite3_malloc( sizeof(*pNew) + (n+1) );
+ if( pNew==0 ) return 0;
+ pNew->zCanonicalName = (char*)&pNew[1];
+ memcpy(pNew->zCanonicalName, zAbsoluteName, n+1);
+ n = vxworksSimplifyName(pNew->zCanonicalName, n);
-/*
-** Release a unixLockInfo structure previously allocated by findLockInfo().
-*/
-static void releaseLockInfo(struct unixLockInfo *pLock){
- if( pLock ){
- pLock->nRef--;
- if( pLock->nRef==0 ){
- if( pLock->pPrev ){
- assert( pLock->pPrev->pNext==pLock );
- pLock->pPrev->pNext = pLock->pNext;
- }else{
- assert( lockList==pLock );
- lockList = pLock->pNext;
- }
- if( pLock->pNext ){
- assert( pLock->pNext->pPrev==pLock );
- pLock->pNext->pPrev = pLock->pPrev;
- }
- sqlite3_free(pLock);
+ /* Search for an existing entry that matching the canonical name.
+ ** If found, increment the reference count and return a pointer to
+ ** the existing file ID.
+ */
+ unixEnterMutex();
+ for(pCandidate=vxworksFileList; pCandidate; pCandidate=pCandidate->pNext){
+ if( pCandidate->nName==n
+ && memcmp(pCandidate->zCanonicalName, pNew->zCanonicalName, n)==0
+ ){
+ sqlite3_free(pNew);
+ pCandidate->nRef++;
+ unixLeaveMutex();
+ return pCandidate;
}
}
-}
+ /* No match was found. We will make a new file ID */
+ pNew->nRef = 1;
+ pNew->nName = n;
+ pNew->pNext = vxworksFileList;
+ vxworksFileList = pNew;
+ unixLeaveMutex();
+ return pNew;
+}
/*
-** Release a unixOpenCnt structure previously allocated by findLockInfo().
+** Decrement the reference count on a vxworksFileId object. Free
+** the object when the reference count reaches zero.
*/
-static void releaseOpenCnt(struct unixOpenCnt *pOpen){
- if( pOpen ){
- pOpen->nRef--;
- if( pOpen->nRef==0 ){
- if( pOpen->pPrev ){
- assert( pOpen->pPrev->pNext==pOpen );
- pOpen->pPrev->pNext = pOpen->pNext;
- }else{
- assert( openList==pOpen );
- openList = pOpen->pNext;
- }
- if( pOpen->pNext ){
- assert( pOpen->pNext->pPrev==pOpen );
- pOpen->pNext->pPrev = pOpen->pPrev;
- }
- sqlite3_free(pOpen->aPending);
- sqlite3_free(pOpen);
- }
+static void vxworksReleaseFileId(struct vxworksFileId *pId){
+ unixEnterMutex();
+ assert( pId->nRef>0 );
+ pId->nRef--;
+ if( pId->nRef==0 ){
+ struct vxworksFileId **pp;
+ for(pp=&vxworksFileList; *pp && *pp!=pId; pp = &((*pp)->pNext)){}
+ assert( *pp==pId );
+ *pp = pId->pNext;
+ sqlite3_free(pId);
}
+ unixLeaveMutex();
}
+#endif /* OS_VXWORKS */
+/*************** End of Unique File ID Utility Used By VxWorks ****************
+******************************************************************************/
-/*
-** Given a file descriptor, locate unixLockInfo and unixOpenCnt structures that
-** describes that file descriptor. Create new ones if necessary. The
-** return values might be uninitialized if an error occurs.
+/******************************************************************************
+*************************** Posix Advisory Locking ****************************
**
-** Return an appropriate error code.
+** POSIX advisory locks broken by design. ANSI STD 1003.1 (1996)
+** section 6.5.2.2 lines 483 through 490 specify that when a process
+** sets or clears a lock, that operation overrides any prior locks set
+** by the same process. It does not explicitly say so, but this implies
+** that it overrides locks set by the same process using a different
+** file descriptor. Consider this test case:
+**
+** int fd1 = open("./file1", O_RDWR|O_CREAT, 0644);
+** int fd2 = open("./file2", O_RDWR|O_CREAT, 0644);
+**
+** Suppose ./file1 and ./file2 are really the same file (because
+** one is a hard or symbolic link to the other) then if you set
+** an exclusive lock on fd1, then try to get an exclusive lock
+** on fd2, it works. I would have expected the second lock to
+** fail since there was already a lock on the file due to fd1.
+** But not so. Since both locks came from the same process, the
+** second overrides the first, even though they were on different
+** file descriptors opened on different file names.
+**
+** This means that we cannot use POSIX locks to synchronize file access
+** among competing threads of the same process. POSIX locks will work fine
+** to synchronize access for threads in separate processes, but not
+** threads within the same process.
+**
+** To work around the problem, SQLite has to manage file locks internally
+** on its own. Whenever a new database is opened, we have to find the
+** specific inode of the database file (the inode is determined by the
+** st_dev and st_ino fields of the stat structure that fstat() fills in)
+** and check for locks already existing on that inode. When locks are
+** created or removed, we have to look at our own internal record of the
+** locks to see if another thread has previously set a lock on that same
+** inode.
+**
+** The sqlite3_file structure for POSIX is no longer just an integer file
+** descriptor. It is now a structure that holds the integer file
+** descriptor and a pointer to a structure that describes the internal
+** locks on the corresponding inode. There is one locking structure
+** per inode, so if the same inode is opened twice, both unixFile structures
+** point to the same locking structure. The locking structure keeps
+** a reference count (so we will know when to delete it) and a "cnt"
+** field that tells us its internal lock status. cnt==0 means the
+** file is unlocked. cnt==-1 means the file has an exclusive lock.
+** cnt>0 means there are cnt shared locks on the file.
+**
+** Any attempt to lock or unlock a file first checks the locking
+** structure. The fcntl() system call is only invoked to set a
+** POSIX lock if the internal lock structure transitions between
+** a locked and an unlocked state.
+**
+** But wait: there are yet more problems with POSIX advisory locks.
+**
+** If you close a file descriptor that points to a file that has locks,
+** all locks on that file that are owned by the current process are
+** released. To work around this problem, each unixFile structure contains
+** a pointer to an unixOpenCnt structure. There is one unixOpenCnt structure
+** per open inode, which means that multiple unixFile can point to a single
+** unixOpenCnt. When an attempt is made to close an unixFile, if there are
+** other unixFile open on the same inode that are holding locks, the call
+** to close() the file descriptor is deferred until all of the locks clear.
+** The unixOpenCnt structure keeps a list of file descriptors that need to
+** be closed and that list is walked (and cleared) when the last lock
+** clears.
+**
+** Yet another problem with posix locks and threads:
+**
+** Many older versions of linux us the LinuxThreads library which is
+** not posix compliant. Under LinuxThreads, a lock created thread
+** A cannot be modified or overridden by a different thread B.
+** Only thread A can modify the lock. Locking behavior is correct
+** if the appliation uses the newer Native Posix Thread Library (NPTL)
+** on linux - with NPTL a lock created by thread A can override locks
+** in thread B. But there is no way to know at compile-time which
+** threading library is being used. So there is no way to know at
+** compile-time whether or not thread A can override locks on thread B.
+** We have to do a run-time check to discover the behavior of the
+** current process.
+**
+** On systems where thread A is unable to modify locks created by
+** thread B, we have to keep track of which thread created each
+** lock. So there is an extra field in the key to the unixLockInfo
+** structure to record this information. And on those systems it
+** is illegal to begin a transaction in one thread and finish it
+** in another. For this latter restriction, there is no work-around.
+** It is a limitation of LinuxThreads.
*/
-static int findLockInfo(
- unixFile *pFile, /* Unix file with file desc used in the key */
- struct unixLockInfo **ppLock, /* Return the unixLockInfo structure here */
- struct unixOpenCnt **ppOpen /* Return the unixOpenCnt structure here */
-){
- int rc; /* System call return code */
- int fd; /* The file descriptor for pFile */
- struct unixLockKey lockKey; /* Lookup key for the unixLockInfo structure */
- struct unixFileId fileId; /* Lookup key for the unixOpenCnt struct */
- struct stat statbuf; /* Low-level file information */
- struct unixLockInfo *pLock; /* Candidate unixLockInfo object */
- struct unixOpenCnt *pOpen; /* Candidate unixOpenCnt object */
- /* Get low-level information about the file that we can used to
- ** create a unique name for the file.
- */
- fd = pFile->h;
- rc = fstat(fd, &statbuf);
- if( rc!=0 ){
- pFile->lastErrno = errno;
-#ifdef EOVERFLOW
- if( pFile->lastErrno==EOVERFLOW ) return SQLITE_NOLFS;
+/*
+** Set or check the unixFile.tid field. This field is set when an unixFile
+** is first opened. All subsequent uses of the unixFile verify that the
+** same thread is operating on the unixFile. Some operating systems do
+** not allow locks to be overridden by other threads and that restriction
+** means that sqlite3* database handles cannot be moved from one thread
+** to another while locks are held.
+**
+** Version 3.3.1 (2006-01-15): unixFile can be moved from one thread to
+** another as long as we are running on a system that supports threads
+** overriding each others locks (which is now the most common behavior)
+** or if no locks are held. But the unixFile.pLock field needs to be
+** recomputed because its key includes the thread-id. See the
+** transferOwnership() function below for additional information
+*/
+#if SQLITE_THREADSAFE && defined(__linux__)
+# define SET_THREADID(X) (X)->tid = pthread_self()
+# define CHECK_THREADID(X) (threadsOverrideEachOthersLocks==0 && \
+ !pthread_equal((X)->tid, pthread_self()))
+#else
+# define SET_THREADID(X)
+# define CHECK_THREADID(X) 0
#endif
- return SQLITE_IOERR;
- }
-
- /* On OS X on an msdos filesystem, the inode number is reported
- ** incorrectly for zero-size files. See ticket #3260. To work
- ** around this problem (we consider it a bug in OS X, not SQLite)
- ** we always increase the file size to 1 by writing a single byte
- ** prior to accessing the inode number. The one byte written is
- ** an ASCII 'S' character which also happens to be the first byte
- ** in the header of every SQLite database. In this way, if there
- ** is a race condition such that another thread has already populated
- ** the first page of the database, no damage is done.
- */
- if( statbuf.st_size==0 ){
- write(fd, "S", 1);
- rc = fstat(fd, &statbuf);
- if( rc!=0 ){
- pFile->lastErrno = errno;
- return SQLITE_IOERR;
- }
- }
- memset(&lockKey, 0, sizeof(lockKey));
- lockKey.fid.dev = statbuf.st_dev;
+/*
+** An instance of the following structure serves as the key used
+** to locate a particular unixOpenCnt structure given its inode. This
+** is the same as the unixLockKey except that the thread ID is omitted.
+*/
+struct unixFileId {
+ dev_t dev; /* Device number */
#if OS_VXWORKS
- lockKey.fid.pId = pFile->pId;
+ struct vxworksFileId *pId; /* Unique file ID for vxworks. */
#else
- lockKey.fid.ino = statbuf.st_ino;
-#endif
-#if SQLITE_THREADSAFE
- if( threadsOverrideEachOthersLocks<0 ){
- testThreadLockingBehavior(fd);
- }
- lockKey.tid = threadsOverrideEachOthersLocks ? 0 : pthread_self();
-#endif
- fileId = lockKey.fid;
- if( ppLock!=0 ){
- pLock = lockList;
- while( pLock && memcmp(&lockKey, &pLock->lockKey, sizeof(lockKey)) ){
- pLock = pLock->pNext;
- }
- if( pLock==0 ){
- pLock = sqlite3_malloc( sizeof(*pLock) );
- if( pLock==0 ){
- rc = SQLITE_NOMEM;
- goto exit_findlockinfo;
- }
- pLock->lockKey = lockKey;
- pLock->nRef = 1;
- pLock->cnt = 0;
- pLock->locktype = 0;
- pLock->pNext = lockList;
- pLock->pPrev = 0;
- if( lockList ) lockList->pPrev = pLock;
- lockList = pLock;
- }else{
- pLock->nRef++;
- }
- *ppLock = pLock;
- }
- if( ppOpen!=0 ){
- pOpen = openList;
- while( pOpen && memcmp(&fileId, &pOpen->fileId, sizeof(fileId)) ){
- pOpen = pOpen->pNext;
- }
- if( pOpen==0 ){
- pOpen = sqlite3_malloc( sizeof(*pOpen) );
- if( pOpen==0 ){
- releaseLockInfo(pLock);
- rc = SQLITE_NOMEM;
- goto exit_findlockinfo;
- }
- pOpen->fileId = fileId;
- pOpen->nRef = 1;
- pOpen->nLock = 0;
- pOpen->nPending = 0;
- pOpen->aPending = 0;
- pOpen->pNext = openList;
- pOpen->pPrev = 0;
- if( openList ) openList->pPrev = pOpen;
- openList = pOpen;
-#if OS_VXWORKS
- pOpen->pSem = NULL;
- pOpen->aSemName[0] = '\0';
+ ino_t ino; /* Inode number */
#endif
- }else{
- pOpen->nRef++;
- }
- *ppOpen = pOpen;
- }
-
-exit_findlockinfo:
- return rc;
-}
-/**************************************************************************
-******************** End of the posix lock work-around ********************
-**************************************************************************/
+};
-/**************************************************************************
-**************** Begin Unique File ID Utility Used By VxWorks *************
-***************************************************************************
-**
-** The inode numbers of files are meaningless in VxWorks. Inodes cannot
-** be used to find a unique identifier for a file. A unique file id
-** must be based on the canonical filename.
-**
-** A pointer to an instance of the following structure can be used as a
-** unique file ID in VxWorks. Each instance of this structure contains
-** a copy of the canonical filename. There is also a reference count.
-** The structure is reclaimed when the number of pointers to it drops to
-** zero.
+/*
+** An instance of the following structure serves as the key used
+** to locate a particular unixLockInfo structure given its inode.
**
-** There are never very many files open at one time and lookups are not
-** a performance-critical path, so it is sufficient to put these
-** structures on a linked list.
+** If threads cannot override each others locks (LinuxThreads), then we
+** set the unixLockKey.tid field to the thread ID. If threads can override
+** each others locks (Posix and NPTL) then tid is always set to zero.
+** tid is omitted if we compile without threading support or on an OS
+** other than linux.
*/
-struct vxworksFileId {
- struct vxworksFileId *pNext; /* Next in a list of them all */
- int nRef; /* Number of references to this one */
- int nName; /* Length of the zCanonicalName[] string */
- char *zCanonicalName; /* Canonical filename */
+struct unixLockKey {
+ struct unixFileId fid; /* Unique identifier for the file */
+#if SQLITE_THREADSAFE && defined(__linux__)
+ pthread_t tid; /* Thread ID of lock owner. Zero if not using LinuxThreads */
+#endif
};
-#if OS_VXWORKS
-/*
-** All unique filesname are held on a linked list headed by this
-** variable:
+/*
+** An instance of the following structure is allocated for each open
+** inode on each thread with a different process ID. (Threads have
+** different process IDs on some versions of linux, but not on most
+** other unixes.)
+**
+** A single inode can have multiple file descriptors, so each unixFile
+** structure contains a pointer to an instance of this object and this
+** object keeps a count of the number of unixFile pointing to it.
*/
-static struct vxworksFileId *vxworksFileList = 0;
-#endif
+struct unixLockInfo {
+ struct unixLockKey lockKey; /* The lookup key */
+ int cnt; /* Number of SHARED locks held */
+ int locktype; /* One of SHARED_LOCK, RESERVED_LOCK etc. */
+ int nRef; /* Number of pointers to this structure */
+ struct unixLockInfo *pNext; /* List of all unixLockInfo objects */
+ struct unixLockInfo *pPrev; /* .... doubly linked */
+};
+/*
+** An instance of the following structure is allocated for each open
+** inode. This structure keeps track of the number of locks on that
+** inode. If a close is attempted against an inode that is holding
+** locks, the close is deferred until all locks clear by adding the
+** file descriptor to be closed to the pending list.
+*/
+struct unixOpenCnt {
+ struct unixFileId fileId; /* The lookup key */
+ int nRef; /* Number of pointers to this structure */
+ int nLock; /* Number of outstanding locks */
+ int nPending; /* Number of pending close() operations */
+ int *aPending; /* Malloced space holding fd's awaiting a close() */
#if OS_VXWORKS
+ sem_t *pSem; /* Named POSIX semaphore */
+ char aSemName[MAX_PATHNAME+1]; /* Name of that semaphore */
+#endif
+ struct unixOpenCnt *pNext, *pPrev; /* List of all unixOpenCnt objects */
+};
+
/*
-** Simplify a filename into its canonical form
-** by making the following changes:
-**
-** * removing any trailing and duplicate /
-** * removing /./
-** * removing /A/../
-**
-** Changes are made in-place. Return the new name length.
-**
-** The original filename is in z[0..n-1]. Return the number of
-** characters in the simplified name.
+** List of all unixLockInfo and unixOpenCnt objects. This used to be a hash
+** table. But the number of objects is rarely more than a dozen and
+** never exceeds a few thousand. And lookup is not on a critical
+** path so a simple linked list will suffice.
*/
-static int vxworksSimplifyName(char *z, int n){
- int i, j;
- while( n>1 && z[n-1]=='/' ){ n--; }
- for(i=j=0; i<n; i++){
- if( z[i]=='/' ){
- if( z[i+1]=='/' ) continue;
- if( z[i+1]=='.' && i+2<n && z[i+2]=='/' ){
- i += 1;
- continue;
- }
- if( z[i+1]=='.' && i+3<n && z[i+2]=='.' && z[i+3]=='/' ){
- while( j>0 && z[j-1]!='/' ){ j--; }
- if( j>0 ){ j--; }
- i += 2;
- continue;
- }
- }
- z[j++] = z[i];
- }
- z[j] = 0;
- return j;
-}
-#endif /* OS_VXWORKS */
+static struct unixLockInfo *lockList = 0;
+static struct unixOpenCnt *openList = 0;
-#if OS_VXWORKS
/*
-** Find a unique file ID for the given absolute pathname. Return
-** a pointer to the vxworksFileId object. This pointer is the unique
-** file ID.
+** This variable records whether or not threads can override each others
+** locks.
**
-** The nRef field of the vxworksFileId object is incremented before
-** the object is returned. A new vxworksFileId object is created
-** and added to the global list if necessary.
+** 0: No. Threads cannot override each others locks.
+** 1: Yes. Threads can override each others locks.
+** -1: We don't know yet.
**
-** If a memory allocation error occurs, return NULL.
+** On some systems, we know at compile-time if threads can override each
+** others locks. On those systems, the SQLITE_THREAD_OVERRIDE_LOCK macro
+** will be set appropriately. On other systems, we have to check at
+** runtime. On these latter systems, SQLTIE_THREAD_OVERRIDE_LOCK is
+** undefined.
+**
+** This variable normally has file scope only. But during testing, we make
+** it a global so that the test code can change its value in order to verify
+** that the right stuff happens in either case.
*/
-static struct vxworksFileId *vxworksFindFileId(const char *zAbsoluteName){
- struct vxworksFileId *pNew; /* search key and new file ID */
- struct vxworksFileId *pCandidate; /* For looping over existing file IDs */
- int n; /* Length of zAbsoluteName string */
-
- assert( zAbsoluteName[0]=='/' );
- n = strlen(zAbsoluteName);
- pNew = sqlite3_malloc( sizeof(*pNew) + (n+1) );
- if( pNew==0 ) return 0;
- pNew->zCanonicalName = (char*)&pNew[1];
- memcpy(pNew->zCanonicalName, zAbsoluteName, n+1);
- n = vxworksSimplifyName(pNew->zCanonicalName, n);
-
- /* Search for an existing entry that matching the canonical name.
- ** If found, increment the reference count and return a pointer to
- ** the existing file ID.
- */
- unixEnterMutex();
- for(pCandidate=vxworksFileList; pCandidate; pCandidate=pCandidate->pNext){
- if( pCandidate->nName==n
- && memcmp(pCandidate->zCanonicalName, pNew->zCanonicalName, n)==0
- ){
- sqlite3_free(pNew);
- pCandidate->nRef++;
- unixLeaveMutex();
- return pCandidate;
- }
- }
-
- /* No match was found. We will make a new file ID */
- pNew->nRef = 1;
- pNew->nName = n;
- pNew->pNext = vxworksFileList;
- vxworksFileList = pNew;
- unixLeaveMutex();
- return pNew;
-}
-#endif /* OS_VXWORKS */
+#ifndef SQLITE_THREAD_OVERRIDE_LOCK
+# define SQLITE_THREAD_OVERRIDE_LOCK -1
+#endif
+#ifdef SQLITE_TEST
+int threadsOverrideEachOthersLocks = SQLITE_THREAD_OVERRIDE_LOCK;
+#else
+static int threadsOverrideEachOthersLocks = SQLITE_THREAD_OVERRIDE_LOCK;
+#endif
-#if OS_VXWORKS
/*
-** Decrement the reference count on a vxworksFileId object. Free
-** the object when the reference count reaches zero.
+** This structure holds information passed into individual test
+** threads by the testThreadLockingBehavior() routine.
*/
-static void vxworksReleaseFileId(struct vxworksFileId *pId){
- unixEnterMutex();
- assert( pId->nRef>0 );
- pId->nRef--;
- if( pId->nRef==0 ){
- struct vxworksFileId **pp;
- for(pp=&vxworksFileList; *pp && *pp!=pId; pp = &((*pp)->pNext)){}
- assert( *pp==pId );
- *pp = pId->pNext;
- sqlite3_free(pId);
- }
- unixLeaveMutex();
+struct threadTestData {
+ int fd; /* File to be locked */
+ struct flock lock; /* The locking operation */
+ int result; /* Result of the locking operation */
+};
+
+#if SQLITE_THREADSAFE && defined(__linux__)
+/*
+** This function is used as the main routine for a thread launched by
+** testThreadLockingBehavior(). It tests whether the shared-lock obtained
+** by the main thread in testThreadLockingBehavior() conflicts with a
+** hypothetical write-lock obtained by this thread on the same file.
+**
+** The write-lock is not actually acquired, as this is not possible if
+** the file is open in read-only mode (see ticket #3472).
+*/
+static void *threadLockingTest(void *pArg){
+ struct threadTestData *pData = (struct threadTestData*)pArg;
+ pData->result = fcntl(pData->fd, F_GETLK, &pData->lock);
+ return pArg;
}
-#endif /* OS_VXWORKS */
-/**************************************************************************
-************** End of Unique File ID Utility Used By VxWorks **************
-**************************************************************************/
+#endif /* SQLITE_THREADSAFE && defined(__linux__) */
-#ifdef SQLITE_TEST
-/* simulate multiple hosts by creating unique hostid file paths */
-int sqlite3_hostid_num = 0;
-#endif
+#if SQLITE_THREADSAFE && defined(__linux__)
/*
-** The locking styles are associated with the different file locking
-** capabilities supported by different file systems.
-**
-** POSIX support for shared and exclusive byte-range locks
-**
-** AFP support exclusive byte-range locks
-**
-** FLOCK only a single file-global exclusive lock
-**
-** DOTLOCK isn't a true locking style, it refers to the use of a special
-** file named the same as the database file with a '.lock'
-** extension, this can be used on file systems that do not
-** offer any reliable file locking
-**
-** NONE no locking will be attempted, this is only used for
-** read-only file systems currently
-**
-** NAMEDSEM similar to DOTLOCK but uses a named semaphore instead of an
-** indicator file.
-**
-** PROXY uses a second file to represent the lock state of the database
-** file which is never actually locked, a third file controls
-** access to the proxy
+** This procedure attempts to determine whether or not threads
+** can override each others locks then sets the
+** threadsOverrideEachOthersLocks variable appropriately.
*/
-#define LOCKING_STYLE_POSIX 1
-#define LOCKING_STYLE_NONE 2
-#define LOCKING_STYLE_DOTFILE 3
-#define LOCKING_STYLE_FLOCK 4
-#define LOCKING_STYLE_AFP 5
-#define LOCKING_STYLE_NAMEDSEM 6
-#define LOCKING_STYLE_PROXY 7
+static void testThreadLockingBehavior(int fd_orig){
+ int fd;
+ int rc;
+ struct threadTestData d;
+ struct flock l;
+ pthread_t t;
+ fd = dup(fd_orig);
+ if( fd<0 ) return;
+ memset(&l, 0, sizeof(l));
+ l.l_type = F_RDLCK;
+ l.l_len = 1;
+ l.l_start = 0;
+ l.l_whence = SEEK_SET;
+ rc = fcntl(fd_orig, F_SETLK, &l);
+ if( rc!=0 ) return;
+ memset(&d, 0, sizeof(d));
+ d.fd = fd;
+ d.lock = l;
+ d.lock.l_type = F_WRLCK;
+ pthread_create(&t, 0, threadLockingTest, &d);
+ pthread_join(t, 0);
+ close(fd);
+ if( d.result!=0 ) return;
+ threadsOverrideEachOthersLocks = (d.lock.l_type==F_UNLCK);
+}
+#elif SQLITE_THREADSAFE
/*
-** Only set the lastErrno if the error code is a real error and not
-** a normal expected return code of SQLITE_BUSY or SQLITE_OK
+** On anything other than linux, assume threads override each others locks.
*/
-#define IS_LOCK_ERROR(x) ((x != SQLITE_OK) && (x != SQLITE_BUSY))
+static void testThreadLockingBehavior(int fd_orig){
+ UNUSED_PARAMETER(fd_orig);
+ threadsOverrideEachOthersLocks = 1;
+}
+#endif /* SQLITE_THERADSAFE && defined(__linux__) */
-#ifdef SQLITE_LOCK_TRACE
/*
-** Print out information about all locking operations.
-**
-** This routine is used for troubleshooting locks on multithreaded
-** platforms. Enable by compiling with the -DSQLITE_LOCK_TRACE
-** command-line option on the compiler. This code is normally
-** turned off.
-*/
-static int lockTrace(int fd, int op, struct flock *p){
- char *zOpName, *zType;
- int s;
- int savedErrno;
- if( op==F_GETLK ){
- zOpName = "GETLK";
- }else if( op==F_SETLK ){
- zOpName = "SETLK";
- }else{
- s = fcntl(fd, op, p);
- sqlite3DebugPrintf("fcntl unknown %d %d %d\n", fd, op, s);
- return s;
- }
- if( p->l_type==F_RDLCK ){
- zType = "RDLCK";
- }else if( p->l_type==F_WRLCK ){
- zType = "WRLCK";
- }else if( p->l_type==F_UNLCK ){
- zType = "UNLCK";
- }else{
- assert( 0 );
- }
- assert( p->l_whence==SEEK_SET );
- s = fcntl(fd, op, p);
- savedErrno = errno;
- sqlite3DebugPrintf("fcntl %d %d %s %s %d %d %d %d\n",
- threadid, fd, zOpName, zType, (int)p->l_start, (int)p->l_len,
- (int)p->l_pid, s);
- if( s==(-1) && op==F_SETLK && (p->l_type==F_RDLCK || p->l_type==F_WRLCK) ){
- struct flock l2;
- l2 = *p;
- fcntl(fd, F_GETLK, &l2);
- if( l2.l_type==F_RDLCK ){
- zType = "RDLCK";
- }else if( l2.l_type==F_WRLCK ){
- zType = "WRLCK";
- }else if( l2.l_type==F_UNLCK ){
- zType = "UNLCK";
- }else{
- assert( 0 );
- }
- sqlite3DebugPrintf("fcntl-failure-reason: %s %d %d %d\n",
- zType, (int)l2.l_start, (int)l2.l_len, (int)l2.l_pid);
- }
- errno = savedErrno;
- return s;
-}
-#define fcntl lockTrace
-#endif /* SQLITE_LOCK_TRACE */
-
-
-#if SQLITE_ENABLE_LOCKING_STYLE
-/*
-** The proxyLockingContext has the path and file structures for the remote
-** and local proxy files in it
-*/
-typedef struct proxyLockingContext proxyLockingContext;
-struct proxyLockingContext {
- unixFile *conchFile;
- char *conchFilePath;
- unixFile *lockProxy;
- char *lockProxyPath;
- char *dbPath;
- int conchHeld;
- void *oldLockingContext; /* preserve the original locking context for close */
- sqlite3_io_methods const *pOldMethod; /* ditto pMethod */
-};
-
-static int getDbPathForUnixFile(unixFile *pFile, char *dbPath);
-static int getLockPath(const char *dbPath, char *lPath, size_t maxLen);
-static sqlite3_io_methods *ioMethodForLockingStyle(int style);
-static int createProxyUnixFile(const char *path, unixFile **ppFile);
-static int fillInUnixFile(sqlite3_vfs *pVfs, int h, int dirfd, sqlite3_file *pId, const char *zFilename, int noLock, int isDelete);
-static int takeConch(unixFile *pFile);
-static int releaseConch(unixFile *pFile);
-static int unixRandomness(sqlite3_vfs *pVfs, int nBuf, char *zBuf);
-
-/*
-** Tests a byte-range locking query to see if byte range locks are
-** supported, if not we fall back to dotlockLockingStyle.
-** On vxWorks we fall back to namedsemLockingStyle.
-*/
-static int testLockingStyle(int fd){
- struct flock lockInfo;
-
- /* Test byte-range lock using fcntl(). If the call succeeds,
- ** assume that the file-system supports POSIX style locks.
- */
- lockInfo.l_len = 1;
- lockInfo.l_start = 0;
- lockInfo.l_whence = SEEK_SET;
- lockInfo.l_type = F_RDLCK;
- if( fcntl(fd, F_GETLK, &lockInfo)!=-1 ) {
- return LOCKING_STYLE_POSIX;
- }
-
- /* Testing for flock() can give false positives. So if if the above
- ** test fails, then we fall back to using dot-file style locking (or
- ** named-semaphore locking on vxworks).
- */
- return (OS_VXWORKS ? LOCKING_STYLE_NAMEDSEM : LOCKING_STYLE_DOTFILE);
-}
-#endif
-
-/*
-** If SQLITE_ENABLE_LOCKING_STYLE is defined, this function Examines the
-** f_fstypename entry in the statfs structure as returned by stat() for
-** the file system hosting the database file and selects the appropriate
-** locking style based on its value. These values and assignments are
-** based on Darwin/OSX behavior and have not been thoroughly tested on
-** other systems.
-**
-** If SQLITE_ENABLE_LOCKING_STYLE is not defined, this function always
-** returns LOCKING_STYLE_POSIX.
-*/
-#if SQLITE_ENABLE_LOCKING_STYLE
-static int detectLockingStyle(
- sqlite3_vfs *pVfs,
- const char *filePath,
- int fd
-){
-#if OS_VXWORKS
- if( !filePath ){
- return LOCKING_STYLE_NONE;
- }
- if( pVfs->pAppData ){
- return SQLITE_PTR_TO_INT(pVfs->pAppData);
- }
- if (access(filePath, 0) != -1){
- return testLockingStyle(fd);
- }
-#else
- struct Mapping {
- const char *zFilesystem;
- int eLockingStyle;
- } aMap[] = {
- { "hfs", LOCKING_STYLE_POSIX },
- { "ufs", LOCKING_STYLE_POSIX },
- { "afpfs", LOCKING_STYLE_AFP },
-#ifdef SQLITE_ENABLE_AFP_LOCKING_SMB
- { "smbfs", LOCKING_STYLE_AFP },
-#else
- { "smbfs", LOCKING_STYLE_FLOCK },
-#endif
- { "webdav", LOCKING_STYLE_NONE },
- { 0, 0 }
- };
- int i;
- struct statfs fsInfo;
-
- if( !filePath ){
- return LOCKING_STYLE_NONE;
- }
- if( pVfs && pVfs->pAppData ){
- return SQLITE_PTR_TO_INT(pVfs->pAppData);
- }
-
- if( statfs(filePath, &fsInfo) != -1 ){
- if( fsInfo.f_flags & MNT_RDONLY ){
- return LOCKING_STYLE_NONE;
- }
- for(i=0; aMap[i].zFilesystem; i++){
- if( strcmp(fsInfo.f_fstypename, aMap[i].zFilesystem)==0 ){
- return aMap[i].eLockingStyle;
- }
- }
- }
-
- /* Default case. Handles, amongst others, "nfs". */
- return testLockingStyle(fd);
-#endif /* if OS_VXWORKS */
- return LOCKING_STYLE_POSIX;
-}
-#else
- #define detectLockingStyle(x,y,z) LOCKING_STYLE_POSIX
-#endif /* if SQLITE_ENABLE_LOCKING_STYLE */
-
-#ifdef SQLITE_DEBUG
-/*
-** Helper function for printing out trace information from debugging
-** binaries. This returns the string represetation of the supplied
-** integer lock-type.
-*/
-static const char *locktypeName(int locktype){
- switch( locktype ){
- case NO_LOCK: return "NONE";
- case SHARED_LOCK: return "SHARED";
- case RESERVED_LOCK: return "RESERVED";
- case PENDING_LOCK: return "PENDING";
- case EXCLUSIVE_LOCK: return "EXCLUSIVE";
- }
- return "ERROR";
-}
-#endif
-
-/*
-** If we are currently in a different thread than the thread that the
-** unixFile argument belongs to, then transfer ownership of the unixFile
-** over to the current thread.
+** If we are currently in a different thread than the thread that the
+** unixFile argument belongs to, then transfer ownership of the unixFile
+** over to the current thread.
**
** A unixFile is only owned by a thread on systems where one thread is
** unable to override locks created by a different thread. RedHat9 is
** If the unixFile is locked and an ownership is wrong, then return
** SQLITE_MISUSE. SQLITE_OK is returned if everything works.
*/
-#if SQLITE_THREADSAFE
+#if SQLITE_THREADSAFE && defined(__linux__)
static int transferOwnership(unixFile *pFile){
int rc;
pthread_t hSelf;
/*
-** Seek to the offset passed as the second argument, then read cnt
-** bytes into pBuf. Return the number of bytes actually read.
-**
-** NB: If you define USE_PREAD or USE_PREAD64, then it might also
-** be necessary to define _XOPEN_SOURCE to be 500. This varies from
-** one system to another. Since SQLite does not define USE_PREAD
-** any any form by default, we will not attempt to define _XOPEN_SOURCE.
-** See tickets #2741 and #2681.
-**
-** To avoid stomping the errno value on a failed read the lastErrno value
-** is set before returning.
+** Release a unixLockInfo structure previously allocated by findLockInfo().
*/
-static int seekAndRead(unixFile *id, sqlite3_int64 offset, void *pBuf, int cnt){
- int got;
- i64 newOffset;
- TIMER_START;
-#if defined(USE_PREAD)
- got = pread(id->h, pBuf, cnt, offset);
- SimulateIOError( got = -1 );
-#elif defined(USE_PREAD64)
- got = pread64(id->h, pBuf, cnt, offset);
- SimulateIOError( got = -1 );
-#else
- newOffset = lseek(id->h, offset, SEEK_SET);
- SimulateIOError( newOffset-- );
- if( newOffset!=offset ){
- if( newOffset == -1 ){
- ((unixFile*)id)->lastErrno = errno;
- }else{
- ((unixFile*)id)->lastErrno = 0;
+static void releaseLockInfo(struct unixLockInfo *pLock){
+ if( pLock ){
+ pLock->nRef--;
+ if( pLock->nRef==0 ){
+ if( pLock->pPrev ){
+ assert( pLock->pPrev->pNext==pLock );
+ pLock->pPrev->pNext = pLock->pNext;
+ }else{
+ assert( lockList==pLock );
+ lockList = pLock->pNext;
+ }
+ if( pLock->pNext ){
+ assert( pLock->pNext->pPrev==pLock );
+ pLock->pNext->pPrev = pLock->pPrev;
+ }
+ sqlite3_free(pLock);
}
- return -1;
}
- got = read(id->h, pBuf, cnt);
-#endif
- TIMER_END;
- if( got<0 ){
- ((unixFile*)id)->lastErrno = errno;
- }
- OSTRACE5("READ %-3d %5d %7lld %llu\n", id->h, got, offset, TIMER_ELAPSED);
- return got;
}
/*
-** Read data from a file into a buffer. Return SQLITE_OK if all
-** bytes were read successfully and SQLITE_IOERR if anything goes
-** wrong.
+** Release a unixOpenCnt structure previously allocated by findLockInfo().
*/
-static int unixRead(
- sqlite3_file *id,
- void *pBuf,
- int amt,
- sqlite3_int64 offset
-){
- int got;
- assert( id );
- got = seekAndRead((unixFile*)id, offset, pBuf, amt);
- if( got==amt ){
- return SQLITE_OK;
- }else if( got<0 ){
- /* lastErrno set by seekAndRead */
- return SQLITE_IOERR_READ;
- }else{
- ((unixFile*)id)->lastErrno = 0; /* not a system error */
- /* Unread parts of the buffer must be zero-filled */
- memset(&((char*)pBuf)[got], 0, amt-got);
- return SQLITE_IOERR_SHORT_READ;
+static void releaseOpenCnt(struct unixOpenCnt *pOpen){
+ if( pOpen ){
+ pOpen->nRef--;
+ if( pOpen->nRef==0 ){
+ if( pOpen->pPrev ){
+ assert( pOpen->pPrev->pNext==pOpen );
+ pOpen->pPrev->pNext = pOpen->pNext;
+ }else{
+ assert( openList==pOpen );
+ openList = pOpen->pNext;
+ }
+ if( pOpen->pNext ){
+ assert( pOpen->pNext->pPrev==pOpen );
+ pOpen->pNext->pPrev = pOpen->pPrev;
+ }
+ sqlite3_free(pOpen->aPending);
+ sqlite3_free(pOpen);
+ }
}
}
+
/*
-** Seek to the offset in id->offset then read cnt bytes into pBuf.
-** Return the number of bytes actually read. Update the offset.
+** Given a file descriptor, locate unixLockInfo and unixOpenCnt structures that
+** describes that file descriptor. Create new ones if necessary. The
+** return values might be uninitialized if an error occurs.
**
-** To avoid stomping the errno value on a failed write the lastErrno value
-** is set before returning.
+** Return an appropriate error code.
*/
-static int seekAndWrite(unixFile *id, i64 offset, const void *pBuf, int cnt){
- int got;
- i64 newOffset;
- TIMER_START;
-#if defined(USE_PREAD)
- got = pwrite(id->h, pBuf, cnt, offset);
-#elif defined(USE_PREAD64)
- got = pwrite64(id->h, pBuf, cnt, offset);
+static int findLockInfo(
+ unixFile *pFile, /* Unix file with file desc used in the key */
+ struct unixLockInfo **ppLock, /* Return the unixLockInfo structure here */
+ struct unixOpenCnt **ppOpen /* Return the unixOpenCnt structure here */
+){
+ int rc; /* System call return code */
+ int fd; /* The file descriptor for pFile */
+ struct unixLockKey lockKey; /* Lookup key for the unixLockInfo structure */
+ struct unixFileId fileId; /* Lookup key for the unixOpenCnt struct */
+ struct stat statbuf; /* Low-level file information */
+ struct unixLockInfo *pLock; /* Candidate unixLockInfo object */
+ struct unixOpenCnt *pOpen; /* Candidate unixOpenCnt object */
+
+ /* Get low-level information about the file that we can used to
+ ** create a unique name for the file.
+ */
+ fd = pFile->h;
+ rc = fstat(fd, &statbuf);
+ if( rc!=0 ){
+ pFile->lastErrno = errno;
+#ifdef EOVERFLOW
+ if( pFile->lastErrno==EOVERFLOW ) return SQLITE_NOLFS;
+#endif
+ return SQLITE_IOERR;
+ }
+
+ /* On OS X on an msdos filesystem, the inode number is reported
+ ** incorrectly for zero-size files. See ticket #3260. To work
+ ** around this problem (we consider it a bug in OS X, not SQLite)
+ ** we always increase the file size to 1 by writing a single byte
+ ** prior to accessing the inode number. The one byte written is
+ ** an ASCII 'S' character which also happens to be the first byte
+ ** in the header of every SQLite database. In this way, if there
+ ** is a race condition such that another thread has already populated
+ ** the first page of the database, no damage is done.
+ */
+ if( statbuf.st_size==0 ){
+ write(fd, "S", 1);
+ rc = fstat(fd, &statbuf);
+ if( rc!=0 ){
+ pFile->lastErrno = errno;
+ return SQLITE_IOERR;
+ }
+ }
+
+ memset(&lockKey, 0, sizeof(lockKey));
+ lockKey.fid.dev = statbuf.st_dev;
+#if OS_VXWORKS
+ lockKey.fid.pId = pFile->pId;
#else
- newOffset = lseek(id->h, offset, SEEK_SET);
- if( newOffset!=offset ){
- if( newOffset == -1 ){
- ((unixFile*)id)->lastErrno = errno;
+ lockKey.fid.ino = statbuf.st_ino;
+#endif
+#if SQLITE_THREADSAFE && defined(__linux__)
+ if( threadsOverrideEachOthersLocks<0 ){
+ testThreadLockingBehavior(fd);
+ }
+ lockKey.tid = threadsOverrideEachOthersLocks ? 0 : pthread_self();
+#endif
+ fileId = lockKey.fid;
+ if( ppLock!=0 ){
+ pLock = lockList;
+ while( pLock && memcmp(&lockKey, &pLock->lockKey, sizeof(lockKey)) ){
+ pLock = pLock->pNext;
+ }
+ if( pLock==0 ){
+ pLock = sqlite3_malloc( sizeof(*pLock) );
+ if( pLock==0 ){
+ rc = SQLITE_NOMEM;
+ goto exit_findlockinfo;
+ }
+ pLock->lockKey = lockKey;
+ pLock->nRef = 1;
+ pLock->cnt = 0;
+ pLock->locktype = 0;
+ pLock->pNext = lockList;
+ pLock->pPrev = 0;
+ if( lockList ) lockList->pPrev = pLock;
+ lockList = pLock;
}else{
- ((unixFile*)id)->lastErrno = 0;
+ pLock->nRef++;
}
- return -1;
+ *ppLock = pLock;
}
- got = write(id->h, pBuf, cnt);
+ if( ppOpen!=0 ){
+ pOpen = openList;
+ while( pOpen && memcmp(&fileId, &pOpen->fileId, sizeof(fileId)) ){
+ pOpen = pOpen->pNext;
+ }
+ if( pOpen==0 ){
+ pOpen = sqlite3_malloc( sizeof(*pOpen) );
+ if( pOpen==0 ){
+ releaseLockInfo(pLock);
+ rc = SQLITE_NOMEM;
+ goto exit_findlockinfo;
+ }
+ pOpen->fileId = fileId;
+ pOpen->nRef = 1;
+ pOpen->nLock = 0;
+ pOpen->nPending = 0;
+ pOpen->aPending = 0;
+ pOpen->pNext = openList;
+ pOpen->pPrev = 0;
+ if( openList ) openList->pPrev = pOpen;
+ openList = pOpen;
+#if OS_VXWORKS
+ pOpen->pSem = NULL;
+ pOpen->aSemName[0] = '\0';
#endif
- TIMER_END;
- if( got<0 ){
- ((unixFile*)id)->lastErrno = errno;
+ }else{
+ pOpen->nRef++;
+ }
+ *ppOpen = pOpen;
}
- OSTRACE5("WRITE %-3d %5d %7lld %llu\n", id->h, got, offset, TIMER_ELAPSED);
- return got;
+exit_findlockinfo:
+ return rc;
}
/*
-** Write data from a buffer into a file. Return SQLITE_OK on success
-** or some other error code on failure.
+** This routine checks if there is a RESERVED lock held on the specified
+** file by this or any other process. If such a lock is held, set *pResOut
+** to a non-zero value otherwise *pResOut is set to zero. The return value
+** is set to SQLITE_OK unless an I/O error occurs during lock checking.
*/
-static int unixWrite(
- sqlite3_file *id,
- const void *pBuf,
- int amt,
- sqlite3_int64 offset
-){
- int wrote = 0;
- assert( id );
- assert( amt>0 );
- while( amt>0 && (wrote = seekAndWrite((unixFile*)id, offset, pBuf, amt))>0 ){
- amt -= wrote;
- offset += wrote;
- pBuf = &((char*)pBuf)[wrote];
+static int unixCheckReservedLock(sqlite3_file *id, int *pResOut){
+ int rc = SQLITE_OK;
+ int reserved = 0;
+ unixFile *pFile = (unixFile*)id;
+
+ SimulateIOError( return SQLITE_IOERR_CHECKRESERVEDLOCK; );
+
+ assert( pFile );
+ unixEnterMutex(); /* Because pFile->pLock is shared across threads */
+
+ /* Check if a thread in this process holds such a lock */
+ if( pFile->pLock->locktype>SHARED_LOCK ){
+ reserved = 1;
}
- SimulateIOError(( wrote=(-1), amt=1 ));
- SimulateDiskfullError(( wrote=0, amt=1 ));
- if( amt>0 ){
- if( wrote<0 ){
- /* lastErrno set by seekAndWrite */
- return SQLITE_IOERR_WRITE;
- }else{
- ((unixFile*)id)->lastErrno = 0; /* not a system error */
- return SQLITE_FULL;
+
+ /* Otherwise see if some other process holds it.
+ */
+ if( !reserved ){
+ struct flock lock;
+ lock.l_whence = SEEK_SET;
+ lock.l_start = RESERVED_BYTE;
+ lock.l_len = 1;
+ lock.l_type = F_WRLCK;
+ if (-1 == fcntl(pFile->h, F_GETLK, &lock)) {
+ int tErrno = errno;
+ rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_CHECKRESERVEDLOCK);
+ pFile->lastErrno = tErrno;
+ } else if( lock.l_type!=F_UNLCK ){
+ reserved = 1;
}
}
- return SQLITE_OK;
-}
+
+ unixLeaveMutex();
+ OSTRACE4("TEST WR-LOCK %d %d %d\n", pFile->h, rc, reserved);
-#ifdef SQLITE_TEST
-/*
-** Count the number of fullsyncs and normal syncs. This is used to test
-** that syncs and fullsyncs are occuring at the right times.
-*/
-int sqlite3_sync_count = 0;
-int sqlite3_fullsync_count = 0;
-#endif
+ *pResOut = reserved;
+ return rc;
+}
/*
-** Use the fdatasync() API only if the HAVE_FDATASYNC macro is defined.
-** Otherwise use fsync() in its place.
-*/
-#ifndef HAVE_FDATASYNC
-# define fdatasync fsync
-#endif
-
-/*
-** Define HAVE_FULLFSYNC to 0 or 1 depending on whether or not
-** the F_FULLFSYNC macro is defined. F_FULLFSYNC is currently
-** only available on Mac OS X. But that could change.
+** Lock the file with the lock specified by parameter locktype - one
+** of the following:
+**
+** (1) SHARED_LOCK
+** (2) RESERVED_LOCK
+** (3) PENDING_LOCK
+** (4) EXCLUSIVE_LOCK
+**
+** Sometimes when requesting one lock state, additional lock states
+** are inserted in between. The locking might fail on one of the later
+** transitions leaving the lock state different from what it started but
+** still short of its goal. The following chart shows the allowed
+** transitions and the inserted intermediate states:
+**
+** UNLOCKED -> SHARED
+** SHARED -> RESERVED
+** SHARED -> (PENDING) -> EXCLUSIVE
+** RESERVED -> (PENDING) -> EXCLUSIVE
+** PENDING -> EXCLUSIVE
+**
+** This routine will only increase a lock. Use the sqlite3OsUnlock()
+** routine to lower a locking level.
*/
-#ifdef F_FULLFSYNC
-# define HAVE_FULLFSYNC 1
-#else
-# define HAVE_FULLFSYNC 0
-#endif
+static int unixLock(sqlite3_file *id, int locktype){
+ /* The following describes the implementation of the various locks and
+ ** lock transitions in terms of the POSIX advisory shared and exclusive
+ ** lock primitives (called read-locks and write-locks below, to avoid
+ ** confusion with SQLite lock names). The algorithms are complicated
+ ** slightly in order to be compatible with windows systems simultaneously
+ ** accessing the same database file, in case that is ever required.
+ **
+ ** Symbols defined in os.h indentify the 'pending byte' and the 'reserved
+ ** byte', each single bytes at well known offsets, and the 'shared byte
+ ** range', a range of 510 bytes at a well known offset.
+ **
+ ** To obtain a SHARED lock, a read-lock is obtained on the 'pending
+ ** byte'. If this is successful, a random byte from the 'shared byte
+ ** range' is read-locked and the lock on the 'pending byte' released.
+ **
+ ** A process may only obtain a RESERVED lock after it has a SHARED lock.
+ ** A RESERVED lock is implemented by grabbing a write-lock on the
+ ** 'reserved byte'.
+ **
+ ** A process may only obtain a PENDING lock after it has obtained a
+ ** SHARED lock. A PENDING lock is implemented by obtaining a write-lock
+ ** on the 'pending byte'. This ensures that no new SHARED locks can be
+ ** obtained, but existing SHARED locks are allowed to persist. A process
+ ** does not have to obtain a RESERVED lock on the way to a PENDING lock.
+ ** This property is used by the algorithm for rolling back a journal file
+ ** after a crash.
+ **
+ ** An EXCLUSIVE lock, obtained after a PENDING lock is held, is
+ ** implemented by obtaining a write-lock on the entire 'shared byte
+ ** range'. Since all other locks require a read-lock on one of the bytes
+ ** within this range, this ensures that no other locks are held on the
+ ** database.
+ **
+ ** The reason a single byte cannot be used instead of the 'shared byte
+ ** range' is that some versions of windows do not support read-locks. By
+ ** locking a random byte from a range, concurrent SHARED locks may exist
+ ** even if the locking primitive used is always a write-lock.
+ */
+ int rc = SQLITE_OK;
+ unixFile *pFile = (unixFile*)id;
+ struct unixLockInfo *pLock = pFile->pLock;
+ struct flock lock;
+ int s;
+ assert( pFile );
+ OSTRACE7("LOCK %d %s was %s(%s,%d) pid=%d\n", pFile->h,
+ locktypeName(locktype), locktypeName(pFile->locktype),
+ locktypeName(pLock->locktype), pLock->cnt , getpid());
-/*
-** The fsync() system call does not work as advertised on many
-** unix systems. The following procedure is an attempt to make
-** it work better.
-**
-** The SQLITE_NO_SYNC macro disables all fsync()s. This is useful
-** for testing when we want to run through the test suite quickly.
-** You are strongly advised *not* to deploy with SQLITE_NO_SYNC
-** enabled, however, since with SQLITE_NO_SYNC enabled, an OS crash
-** or power failure will likely corrupt the database file.
-*/
-static int full_fsync(int fd, int fullSync, int dataOnly){
- int rc;
+ /* If there is already a lock of this type or more restrictive on the
+ ** unixFile, do nothing. Don't use the end_lock: exit path, as
+ ** unixEnterMutex() hasn't been called yet.
+ */
+ if( pFile->locktype>=locktype ){
+ OSTRACE3("LOCK %d %s ok (already held)\n", pFile->h,
+ locktypeName(locktype));
+ return SQLITE_OK;
+ }
- /* The following "ifdef/elif/else/" block has the same structure as
- ** the one below. It is replicated here solely to avoid cluttering
- ** up the real code with the UNUSED_PARAMETER() macros.
+ /* Make sure the locking sequence is correct
*/
-#ifdef SQLITE_NO_SYNC
- UNUSED_PARAMETER(fd);
- UNUSED_PARAMETER(fullSync);
- UNUSED_PARAMETER(dataOnly);
-#elif HAVE_FULLFSYNC
- UNUSED_PARAMETER(dataOnly);
-#else
- UNUSED_PARAMETER(fullSync);
-#endif
+ assert( pFile->locktype!=NO_LOCK || locktype==SHARED_LOCK );
+ assert( locktype!=PENDING_LOCK );
+ assert( locktype!=RESERVED_LOCK || pFile->locktype==SHARED_LOCK );
- /* Record the number of times that we do a normal fsync() and
- ** FULLSYNC. This is used during testing to verify that this procedure
- ** gets called with the correct arguments.
+ /* This mutex is needed because pFile->pLock is shared across threads
*/
-#ifdef SQLITE_TEST
- if( fullSync ) sqlite3_fullsync_count++;
- sqlite3_sync_count++;
-#endif
+ unixEnterMutex();
- /* If we compiled with the SQLITE_NO_SYNC flag, then syncing is a
- ** no-op
+ /* Make sure the current thread owns the pFile.
*/
-#ifdef SQLITE_NO_SYNC
- rc = SQLITE_OK;
-#elif HAVE_FULLFSYNC
- if( fullSync ){
- rc = fcntl(fd, F_FULLFSYNC, 0);
- }else{
- rc = 1;
+ rc = transferOwnership(pFile);
+ if( rc!=SQLITE_OK ){
+ unixLeaveMutex();
+ return rc;
}
- /* If the FULLFSYNC failed, fall back to attempting an fsync().
- * It shouldn't be possible for fullfsync to fail on the local
- * file system (on OSX), so failure indicates that FULLFSYNC
- * isn't supported for this file system. So, attempt an fsync
- * and (for now) ignore the overhead of a superfluous fcntl call.
- * It'd be better to detect fullfsync support once and avoid
- * the fcntl call every time sync is called.
- */
- if( rc ) rc = fsync(fd);
+ pLock = pFile->pLock;
-#else
- if( dataOnly ){
- rc = fdatasync(fd);
- if( OS_VXWORKS && rc==-1 && errno==ENOTSUP ){
- rc = fsync(fd);
- }
- }else{
- rc = fsync(fd);
+ /* If some thread using this PID has a lock via a different unixFile*
+ ** handle that precludes the requested lock, return BUSY.
+ */
+ if( (pFile->locktype!=pLock->locktype &&
+ (pLock->locktype>=PENDING_LOCK || locktype>SHARED_LOCK))
+ ){
+ rc = SQLITE_BUSY;
+ goto end_lock;
}
-#endif /* ifdef SQLITE_NO_SYNC elif HAVE_FULLFSYNC */
- if( OS_VXWORKS && rc!= -1 ){
- rc = 0;
+ /* If a SHARED lock is requested, and some thread using this PID already
+ ** has a SHARED or RESERVED lock, then increment reference counts and
+ ** return SQLITE_OK.
+ */
+ if( locktype==SHARED_LOCK &&
+ (pLock->locktype==SHARED_LOCK || pLock->locktype==RESERVED_LOCK) ){
+ assert( locktype==SHARED_LOCK );
+ assert( pFile->locktype==0 );
+ assert( pLock->cnt>0 );
+ pFile->locktype = SHARED_LOCK;
+ pLock->cnt++;
+ pFile->pOpen->nLock++;
+ goto end_lock;
}
- return rc;
-}
-/*
-** Make sure all writes to a particular file are committed to disk.
-**
-** If dataOnly==0 then both the file itself and its metadata (file
-** size, access time, etc) are synced. If dataOnly!=0 then only the
-** file data is synced.
-**
-** Under Unix, also make sure that the directory entry for the file
-** has been created by fsync-ing the directory that contains the file.
-** If we do not do this and we encounter a power failure, the directory
-** entry for the journal might not exist after we reboot. The next
-** SQLite to access the file will not know that the journal exists (because
-** the directory entry for the journal was never created) and the transaction
-** will not roll back - possibly leading to database corruption.
-*/
-static int unixSync(sqlite3_file *id, int flags){
- int rc;
- unixFile *pFile = (unixFile*)id;
+ lock.l_len = 1L;
- int isDataOnly = (flags&SQLITE_SYNC_DATAONLY);
- int isFullsync = (flags&0x0F)==SQLITE_SYNC_FULL;
+ lock.l_whence = SEEK_SET;
- /* Check that one of SQLITE_SYNC_NORMAL or FULL was passed */
- assert((flags&0x0F)==SQLITE_SYNC_NORMAL
- || (flags&0x0F)==SQLITE_SYNC_FULL
- );
+ /* A PENDING lock is needed before acquiring a SHARED lock and before
+ ** acquiring an EXCLUSIVE lock. For the SHARED lock, the PENDING will
+ ** be released.
+ */
+ if( locktype==SHARED_LOCK
+ || (locktype==EXCLUSIVE_LOCK && pFile->locktype<PENDING_LOCK)
+ ){
+ lock.l_type = (locktype==SHARED_LOCK?F_RDLCK:F_WRLCK);
+ lock.l_start = PENDING_BYTE;
+ s = fcntl(pFile->h, F_SETLK, &lock);
+ if( s==(-1) ){
+ int tErrno = errno;
+ rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK);
+ if( IS_LOCK_ERROR(rc) ){
+ pFile->lastErrno = tErrno;
+ }
+ goto end_lock;
+ }
+ }
- /* Unix cannot, but some systems may return SQLITE_FULL from here. This
- ** line is to test that doing so does not cause any problems.
+
+ /* If control gets to this point, then actually go ahead and make
+ ** operating system calls for the specified lock.
*/
- SimulateDiskfullError( return SQLITE_FULL );
+ if( locktype==SHARED_LOCK ){
+ int tErrno = 0;
+ assert( pLock->cnt==0 );
+ assert( pLock->locktype==0 );
- assert( pFile );
- OSTRACE2("SYNC %-3d\n", pFile->h);
- rc = full_fsync(pFile->h, isFullsync, isDataOnly);
- SimulateIOError( rc=1 );
- if( rc ){
- pFile->lastErrno = errno;
- return SQLITE_IOERR_FSYNC;
- }
- if( pFile->dirfd>=0 ){
- int err;
- OSTRACE4("DIRSYNC %-3d (have_fullfsync=%d fullsync=%d)\n", pFile->dirfd,
- HAVE_FULLFSYNC, isFullsync);
-#ifndef SQLITE_DISABLE_DIRSYNC
- /* The directory sync is only attempted if full_fsync is
- ** turned off or unavailable. If a full_fsync occurred above,
- ** then the directory sync is superfluous.
- */
- if( (!HAVE_FULLFSYNC || !isFullsync) && full_fsync(pFile->dirfd,0,0) ){
- /*
- ** We have received multiple reports of fsync() returning
- ** errors when applied to directories on certain file systems.
- ** A failed directory sync is not a big deal. So it seems
- ** better to ignore the error. Ticket #1657
- */
- /* pFile->lastErrno = errno; */
- /* return SQLITE_IOERR; */
+ /* Now get the read-lock */
+ lock.l_start = SHARED_FIRST;
+ lock.l_len = SHARED_SIZE;
+ if( (s = fcntl(pFile->h, F_SETLK, &lock))==(-1) ){
+ tErrno = errno;
}
-#endif
- err = close(pFile->dirfd); /* Only need to sync once, so close the */
- if( err==0 ){ /* directory when we are done */
- pFile->dirfd = -1;
+ /* Drop the temporary PENDING lock */
+ lock.l_start = PENDING_BYTE;
+ lock.l_len = 1L;
+ lock.l_type = F_UNLCK;
+ if( fcntl(pFile->h, F_SETLK, &lock)!=0 ){
+ if( s != -1 ){
+ /* This could happen with a network mount */
+ tErrno = errno;
+ rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_UNLOCK);
+ if( IS_LOCK_ERROR(rc) ){
+ pFile->lastErrno = tErrno;
+ }
+ goto end_lock;
+ }
+ }
+ if( s==(-1) ){
+ rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK);
+ if( IS_LOCK_ERROR(rc) ){
+ pFile->lastErrno = tErrno;
+ }
}else{
- pFile->lastErrno = errno;
- rc = SQLITE_IOERR_DIR_CLOSE;
+ pFile->locktype = SHARED_LOCK;
+ pFile->pOpen->nLock++;
+ pLock->cnt = 1;
}
- }
- return rc;
-}
-
-/*
-** Truncate an open file to a specified size
-*/
-static int unixTruncate(sqlite3_file *id, i64 nByte){
- int rc;
- assert( id );
- SimulateIOError( return SQLITE_IOERR_TRUNCATE );
- rc = ftruncate(((unixFile*)id)->h, (off_t)nByte);
- if( rc ){
- ((unixFile*)id)->lastErrno = errno;
- return SQLITE_IOERR_TRUNCATE;
+ }else if( locktype==EXCLUSIVE_LOCK && pLock->cnt>1 ){
+ /* We are trying for an exclusive lock but another thread in this
+ ** same process is still holding a shared lock. */
+ rc = SQLITE_BUSY;
}else{
- return SQLITE_OK;
+ /* The request was for a RESERVED or EXCLUSIVE lock. It is
+ ** assumed that there is a SHARED or greater lock on the file
+ ** already.
+ */
+ assert( 0!=pFile->locktype );
+ lock.l_type = F_WRLCK;
+ switch( locktype ){
+ case RESERVED_LOCK:
+ lock.l_start = RESERVED_BYTE;
+ break;
+ case EXCLUSIVE_LOCK:
+ lock.l_start = SHARED_FIRST;
+ lock.l_len = SHARED_SIZE;
+ break;
+ default:
+ assert(0);
+ }
+ s = fcntl(pFile->h, F_SETLK, &lock);
+ if( s==(-1) ){
+ int tErrno = errno;
+ rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK);
+ if( IS_LOCK_ERROR(rc) ){
+ pFile->lastErrno = tErrno;
+ }
+ }
}
-}
-
-/*
-** Determine the current size of a file in bytes
-*/
-static int unixFileSize(sqlite3_file *id, i64 *pSize){
- int rc;
- struct stat buf;
- assert( id );
- rc = fstat(((unixFile*)id)->h, &buf);
- SimulateIOError( rc=1 );
- if( rc!=0 ){
- ((unixFile*)id)->lastErrno = errno;
- return SQLITE_IOERR_FSTAT;
+
+ if( rc==SQLITE_OK ){
+ pFile->locktype = locktype;
+ pLock->locktype = locktype;
+ }else if( locktype==EXCLUSIVE_LOCK ){
+ pFile->locktype = PENDING_LOCK;
+ pLock->locktype = PENDING_LOCK;
}
- *pSize = buf.st_size;
- /* When opening a zero-size database, the findLockInfo() procedure
- ** writes a single byte into that file in order to work around a bug
- ** in the OS-X msdos filesystem. In order to avoid problems with upper
- ** layers, we need to report this file size as zero even though it is
- ** really 1. Ticket #3260.
- */
- if( *pSize==1 ) *pSize = 0;
-
-
- return SQLITE_OK;
-}
-
-/*
-** This routine translates a standard POSIX errno code into something
-** useful to the clients of the sqlite3 functions. Specifically, it is
-** intended to translate a variety of "try again" errors into SQLITE_BUSY
-** and a variety of "please close the file descriptor NOW" errors into
-** SQLITE_IOERR
-**
-** Errors during initialization of locks, or file system support for locks,
-** should handle ENOLCK, ENOTSUP, EOPNOTSUPP separately.
-*/
-static int sqliteErrorFromPosixError(int posixError, int sqliteIOErr) {
- switch (posixError) {
- case 0:
- return SQLITE_OK;
-
- case EAGAIN:
- case ETIMEDOUT:
- case EBUSY:
- case EINTR:
- case ENOLCK:
- /* random NFS retry error, unless during file system support
- * introspection, in which it actually means what it says */
- return SQLITE_BUSY;
-
- case EACCES:
- /* EACCES is like EAGAIN during locking operations, but not any other time*/
- if( (sqliteIOErr == SQLITE_IOERR_LOCK) ||
- (sqliteIOErr == SQLITE_IOERR_UNLOCK) ||
- (sqliteIOErr == SQLITE_IOERR_RDLOCK) ||
- (sqliteIOErr == SQLITE_IOERR_CHECKRESERVEDLOCK) ){
- return SQLITE_BUSY;
- }
- /* else fall through */
- case EPERM:
- return SQLITE_PERM;
-
- case EDEADLK:
- return SQLITE_IOERR_BLOCKED;
-
-#if EOPNOTSUPP!=ENOTSUP
- case EOPNOTSUPP:
- /* something went terribly awry, unless during file system support
- * introspection, in which it actually means what it says */
-#endif
-#ifdef ENOTSUP
- case ENOTSUP:
- /* invalid fd, unless during file system support introspection, in which
- * it actually means what it says */
-#endif
- case EIO:
- case EBADF:
- case EINVAL:
- case ENOTCONN:
- case ENODEV:
- case ENXIO:
- case ENOENT:
- case ESTALE:
- case ENOSYS:
- /* these should force the client to close the file and reconnect */
-
- default:
- return sqliteIOErr;
- }
+end_lock:
+ unixLeaveMutex();
+ OSTRACE4("LOCK %d %s %s\n", pFile->h, locktypeName(locktype),
+ rc==SQLITE_OK ? "ok" : "failed");
+ return rc;
}
/*
-** This routine checks if there is a RESERVED lock held on the specified
-** file by this or any other process. If such a lock is held, set *pResOut
-** to a non-zero value otherwise *pResOut is set to zero. The return value
-** is set to SQLITE_OK unless an I/O error occurs during lock checking.
+** Lower the locking level on file descriptor pFile to locktype. locktype
+** must be either NO_LOCK or SHARED_LOCK.
+**
+** If the locking level of the file descriptor is already at or below
+** the requested locking level, this routine is a no-op.
*/
-static int unixCheckReservedLock(sqlite3_file *id, int *pResOut){
+static int unixUnlock(sqlite3_file *id, int locktype){
+ struct unixLockInfo *pLock;
+ struct flock lock;
int rc = SQLITE_OK;
- int reserved = 0;
unixFile *pFile = (unixFile*)id;
-
- SimulateIOError( return SQLITE_IOERR_CHECKRESERVEDLOCK; );
+ int h;
assert( pFile );
- unixEnterMutex(); /* Because pFile->pLock is shared across threads */
+ OSTRACE7("UNLOCK %d %d was %d(%d,%d) pid=%d\n", pFile->h, locktype,
+ pFile->locktype, pFile->pLock->locktype, pFile->pLock->cnt, getpid());
- /* Check if a thread in this process holds such a lock */
- if( pFile->pLock->locktype>SHARED_LOCK ){
- reserved = 1;
+ assert( locktype<=SHARED_LOCK );
+ if( pFile->locktype<=locktype ){
+ return SQLITE_OK;
}
-
- /* Otherwise see if some other process holds it.
- */
- if( !reserved ){
- struct flock lock;
- lock.l_whence = SEEK_SET;
- lock.l_start = RESERVED_BYTE;
- lock.l_len = 1;
- lock.l_type = F_WRLCK;
- if (-1 == fcntl(pFile->h, F_GETLK, &lock)) {
- int tErrno = errno;
- rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_CHECKRESERVEDLOCK);
- pFile->lastErrno = tErrno;
- } else if( lock.l_type!=F_UNLCK ){
- reserved = 1;
- }
- }
-
- unixLeaveMutex();
- OSTRACE4("TEST WR-LOCK %d %d %d\n", pFile->h, rc, reserved);
-
- *pResOut = reserved;
- return rc;
-}
-
-/*
-** Lock the file with the lock specified by parameter locktype - one
-** of the following:
-**
-** (1) SHARED_LOCK
-** (2) RESERVED_LOCK
-** (3) PENDING_LOCK
-** (4) EXCLUSIVE_LOCK
-**
-** Sometimes when requesting one lock state, additional lock states
-** are inserted in between. The locking might fail on one of the later
-** transitions leaving the lock state different from what it started but
-** still short of its goal. The following chart shows the allowed
-** transitions and the inserted intermediate states:
-**
-** UNLOCKED -> SHARED
-** SHARED -> RESERVED
-** SHARED -> (PENDING) -> EXCLUSIVE
-** RESERVED -> (PENDING) -> EXCLUSIVE
-** PENDING -> EXCLUSIVE
-**
-** This routine will only increase a lock. Use the sqlite3OsUnlock()
-** routine to lower a locking level.
-*/
-static int unixLock(sqlite3_file *id, int locktype){
- /* The following describes the implementation of the various locks and
- ** lock transitions in terms of the POSIX advisory shared and exclusive
- ** lock primitives (called read-locks and write-locks below, to avoid
- ** confusion with SQLite lock names). The algorithms are complicated
- ** slightly in order to be compatible with windows systems simultaneously
- ** accessing the same database file, in case that is ever required.
- **
- ** Symbols defined in os.h indentify the 'pending byte' and the 'reserved
- ** byte', each single bytes at well known offsets, and the 'shared byte
- ** range', a range of 510 bytes at a well known offset.
- **
- ** To obtain a SHARED lock, a read-lock is obtained on the 'pending
- ** byte'. If this is successful, a random byte from the 'shared byte
- ** range' is read-locked and the lock on the 'pending byte' released.
- **
- ** A process may only obtain a RESERVED lock after it has a SHARED lock.
- ** A RESERVED lock is implemented by grabbing a write-lock on the
- ** 'reserved byte'.
- **
- ** A process may only obtain a PENDING lock after it has obtained a
- ** SHARED lock. A PENDING lock is implemented by obtaining a write-lock
- ** on the 'pending byte'. This ensures that no new SHARED locks can be
- ** obtained, but existing SHARED locks are allowed to persist. A process
- ** does not have to obtain a RESERVED lock on the way to a PENDING lock.
- ** This property is used by the algorithm for rolling back a journal file
- ** after a crash.
- **
- ** An EXCLUSIVE lock, obtained after a PENDING lock is held, is
- ** implemented by obtaining a write-lock on the entire 'shared byte
- ** range'. Since all other locks require a read-lock on one of the bytes
- ** within this range, this ensures that no other locks are held on the
- ** database.
- **
- ** The reason a single byte cannot be used instead of the 'shared byte
- ** range' is that some versions of windows do not support read-locks. By
- ** locking a random byte from a range, concurrent SHARED locks may exist
- ** even if the locking primitive used is always a write-lock.
- */
- int rc = SQLITE_OK;
- unixFile *pFile = (unixFile*)id;
- struct unixLockInfo *pLock = pFile->pLock;
- struct flock lock;
- int s;
-
- assert( pFile );
- OSTRACE7("LOCK %d %s was %s(%s,%d) pid=%d\n", pFile->h,
- locktypeName(locktype), locktypeName(pFile->locktype),
- locktypeName(pLock->locktype), pLock->cnt , getpid());
-
- /* If there is already a lock of this type or more restrictive on the
- ** unixFile, do nothing. Don't use the end_lock: exit path, as
- ** unixEnterMutex() hasn't been called yet.
- */
- if( pFile->locktype>=locktype ){
- OSTRACE3("LOCK %d %s ok (already held)\n", pFile->h,
- locktypeName(locktype));
- return SQLITE_OK;
- }
-
- /* Make sure the locking sequence is correct
- */
- assert( pFile->locktype!=NO_LOCK || locktype==SHARED_LOCK );
- assert( locktype!=PENDING_LOCK );
- assert( locktype!=RESERVED_LOCK || pFile->locktype==SHARED_LOCK );
-
- /* This mutex is needed because pFile->pLock is shared across threads
- */
- unixEnterMutex();
-
- /* Make sure the current thread owns the pFile.
- */
- rc = transferOwnership(pFile);
- if( rc!=SQLITE_OK ){
- unixLeaveMutex();
- return rc;
- }
- pLock = pFile->pLock;
-
- /* If some thread using this PID has a lock via a different unixFile*
- ** handle that precludes the requested lock, return BUSY.
- */
- if( (pFile->locktype!=pLock->locktype &&
- (pLock->locktype>=PENDING_LOCK || locktype>SHARED_LOCK))
- ){
- rc = SQLITE_BUSY;
- goto end_lock;
- }
-
- /* If a SHARED lock is requested, and some thread using this PID already
- ** has a SHARED or RESERVED lock, then increment reference counts and
- ** return SQLITE_OK.
- */
- if( locktype==SHARED_LOCK &&
- (pLock->locktype==SHARED_LOCK || pLock->locktype==RESERVED_LOCK) ){
- assert( locktype==SHARED_LOCK );
- assert( pFile->locktype==0 );
- assert( pLock->cnt>0 );
- pFile->locktype = SHARED_LOCK;
- pLock->cnt++;
- pFile->pOpen->nLock++;
- goto end_lock;
- }
-
- lock.l_len = 1L;
-
- lock.l_whence = SEEK_SET;
-
- /* A PENDING lock is needed before acquiring a SHARED lock and before
- ** acquiring an EXCLUSIVE lock. For the SHARED lock, the PENDING will
- ** be released.
- */
- if( locktype==SHARED_LOCK
- || (locktype==EXCLUSIVE_LOCK && pFile->locktype<PENDING_LOCK)
- ){
- lock.l_type = (locktype==SHARED_LOCK?F_RDLCK:F_WRLCK);
- lock.l_start = PENDING_BYTE;
- s = fcntl(pFile->h, F_SETLK, &lock);
- if( s==(-1) ){
- int tErrno = errno;
- rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK);
- if( IS_LOCK_ERROR(rc) ){
- pFile->lastErrno = tErrno;
- }
- goto end_lock;
- }
- }
-
-
- /* If control gets to this point, then actually go ahead and make
- ** operating system calls for the specified lock.
- */
- if( locktype==SHARED_LOCK ){
- int tErrno = 0;
- assert( pLock->cnt==0 );
- assert( pLock->locktype==0 );
-
- /* Now get the read-lock */
- lock.l_start = SHARED_FIRST;
- lock.l_len = SHARED_SIZE;
- if( (s = fcntl(pFile->h, F_SETLK, &lock))==(-1) ){
- tErrno = errno;
- }
- /* Drop the temporary PENDING lock */
- lock.l_start = PENDING_BYTE;
- lock.l_len = 1L;
- lock.l_type = F_UNLCK;
- if( fcntl(pFile->h, F_SETLK, &lock)!=0 ){
- if( s != -1 ){
- /* This could happen with a network mount */
- tErrno = errno;
- rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_UNLOCK);
- if( IS_LOCK_ERROR(rc) ){
- pFile->lastErrno = tErrno;
- }
- goto end_lock;
- }
- }
- if( s==(-1) ){
- rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK);
- if( IS_LOCK_ERROR(rc) ){
- pFile->lastErrno = tErrno;
- }
- }else{
- pFile->locktype = SHARED_LOCK;
- pFile->pOpen->nLock++;
- pLock->cnt = 1;
- }
- }else if( locktype==EXCLUSIVE_LOCK && pLock->cnt>1 ){
- /* We are trying for an exclusive lock but another thread in this
- ** same process is still holding a shared lock. */
- rc = SQLITE_BUSY;
- }else{
- /* The request was for a RESERVED or EXCLUSIVE lock. It is
- ** assumed that there is a SHARED or greater lock on the file
- ** already.
- */
- assert( 0!=pFile->locktype );
- lock.l_type = F_WRLCK;
- switch( locktype ){
- case RESERVED_LOCK:
- lock.l_start = RESERVED_BYTE;
- break;
- case EXCLUSIVE_LOCK:
- lock.l_start = SHARED_FIRST;
- lock.l_len = SHARED_SIZE;
- break;
- default:
- assert(0);
- }
- s = fcntl(pFile->h, F_SETLK, &lock);
- if( s==(-1) ){
- int tErrno = errno;
- rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK);
- if( IS_LOCK_ERROR(rc) ){
- pFile->lastErrno = tErrno;
- }
- }
- }
-
- if( rc==SQLITE_OK ){
- pFile->locktype = locktype;
- pLock->locktype = locktype;
- }else if( locktype==EXCLUSIVE_LOCK ){
- pFile->locktype = PENDING_LOCK;
- pLock->locktype = PENDING_LOCK;
- }
-
-end_lock:
- unixLeaveMutex();
- OSTRACE4("LOCK %d %s %s\n", pFile->h, locktypeName(locktype),
- rc==SQLITE_OK ? "ok" : "failed");
- return rc;
-}
-
-/*
-** Lower the locking level on file descriptor pFile to locktype. locktype
-** must be either NO_LOCK or SHARED_LOCK.
-**
-** If the locking level of the file descriptor is already at or below
-** the requested locking level, this routine is a no-op.
-*/
-static int unixUnlock(sqlite3_file *id, int locktype){
- struct unixLockInfo *pLock;
- struct flock lock;
- int rc = SQLITE_OK;
- unixFile *pFile = (unixFile*)id;
- int h;
-
- assert( pFile );
- OSTRACE7("UNLOCK %d %d was %d(%d,%d) pid=%d\n", pFile->h, locktype,
- pFile->locktype, pFile->pLock->locktype, pFile->pLock->cnt, getpid());
-
- assert( locktype<=SHARED_LOCK );
- if( pFile->locktype<=locktype ){
- return SQLITE_OK;
- }
- if( CHECK_THREADID(pFile) ){
- return SQLITE_MISUSE;
+ if( CHECK_THREADID(pFile) ){
+ return SQLITE_MISUSE;
}
unixEnterMutex();
h = pFile->h;
return rc;
}
+/************** End of the posix advisory lock implementation *****************
+******************************************************************************/
-#if SQLITE_ENABLE_LOCKING_STYLE
-#if !OS_VXWORKS
-#pragma mark AFP support
-
-/*
- ** The afpLockingContext structure contains all afp lock specific state
- */
-typedef struct afpLockingContext afpLockingContext;
-struct afpLockingContext {
- unsigned long long sharedByte;
- const char *dbPath;
-};
+/******************************************************************************
+****************************** No-op Locking **********************************
+**
+** Of the various locking implementations available, this is by far the
+** simplest: locking is ignored. No attempt is made to lock the database
+** file for reading or writing.
+**
+** This locking mode is appropriate for use on read-only databases
+** (ex: databases that are burned into CD-ROM, for example.) It can
+** also be used if the application employs some external mechanism to
+** prevent simultaneous access of the same database by two or more
+** database connections. But there is a serious risk of database
+** corruption if this locking mode is used in situations where multiple
+** database connections are accessing the same database file at the same
+** time and one or more of those connections are writing.
+*/
-struct ByteRangeLockPB2
-{
- unsigned long long offset; /* offset to first byte to lock */
- unsigned long long length; /* nbr of bytes to lock */
- unsigned long long retRangeStart; /* nbr of 1st byte locked if successful */
- unsigned char unLockFlag; /* 1 = unlock, 0 = lock */
- unsigned char startEndFlag; /* 1=rel to end of fork, 0=rel to start */
- int fd; /* file desc to assoc this lock with */
-};
+/*
+** The nolockLockingContext is void
+*/
+typedef void nolockLockingContext;
-#define afpfsByteRangeLock2FSCTL _IOWR('z', 23, struct ByteRangeLockPB2)
+static int nolockCheckReservedLock(sqlite3_file *NotUsed, int *pResOut){
+ UNUSED_PARAMETER(NotUsed);
+ *pResOut = 0;
+ return SQLITE_OK;
+}
-/*
- ** Return SQLITE_OK on success, SQLITE_BUSY on failure.
- */
-static int _AFPFSSetLock(
- const char *path,
- unixFile *pFile,
- unsigned long long offset,
- unsigned long long length,
- int setLockFlag
-){
- struct ByteRangeLockPB2 pb;
- int err;
-
- pb.unLockFlag = setLockFlag ? 0 : 1;
- pb.startEndFlag = 0;
- pb.offset = offset;
- pb.length = length;
- pb.fd = pFile->h;
- //SimulateIOErrorBenign(1);
- //SimulateIOError( pb.fd=(-1) )
- //SimulateIOErrorBenign(0);
-
- OSTRACE6("AFPSETLOCK [%s] for %d%s in range %llx:%llx\n",
- (setLockFlag?"ON":"OFF"), pFile->h, (pb.fd==-1?"[testval-1]":""), offset, length);
- err = fsctl(path, afpfsByteRangeLock2FSCTL, &pb, 0);
- if ( err==-1 ) {
- int rc;
- int tErrno = errno;
- OSTRACE4("AFPSETLOCK failed to fsctl() '%s' %d %s\n", path, tErrno, strerror(tErrno));
-#ifdef SQLITE_IGNORE_AFP_LOCK_ERRORS
- rc = SQLITE_BUSY;
-#else
- rc = sqliteErrorFromPosixError(tErrno, setLockFlag ? SQLITE_IOERR_LOCK : SQLITE_IOERR_UNLOCK);
-#endif /* SQLITE_IGNORE_AFP_LOCK_ERRORS */
- if( IS_LOCK_ERROR(rc) ){
- pFile->lastErrno = tErrno;
- }
- return rc;
- } else {
- return SQLITE_OK;
- }
+static int nolockLock(sqlite3_file *NotUsed, int NotUsed2){
+ UNUSED_PARAMETER2(NotUsed, NotUsed2);
+ return SQLITE_OK;
}
-/* AFP-style reserved lock checking following the behavior of
+static int nolockUnlock(sqlite3_file *NotUsed, int NotUsed2){
+ UNUSED_PARAMETER2(NotUsed, NotUsed2);
+ return SQLITE_OK;
+}
+
+/*
+** Close a file.
+*/
+static int nolockClose(sqlite3_file *id) {
+ int rc;
+ if( OS_VXWORKS ) unixEnterMutex();
+ rc = closeUnixFile(id);
+ if( OS_VXWORKS ) unixLeaveMutex();
+ return rc;
+}
+
+/******************* End of the no-op lock implementation *********************
+******************************************************************************/
+
+/******************************************************************************
+************************* Begin dot-file Locking ******************************
+**
+** The dotfile locking implementation uses the existing of separate lock
+** files in order to control access to the database. This works on just
+** about every filesystem imaginable. But there are serious downsides:
+**
+** (1) There is zero concurrency. A single reader blocks all other
+** connections from reading or writing the database.
+**
+** (2) An application crash or power loss can leave stale lock files
+** sitting around that need to be cleared manually.
+**
+** Nevertheless, a dotlock is an appropriate locking mode for use if no
+** other locking strategy is available.
+*/
+
+/*
+** The file suffix added to the data base filename in order to create the
+** lock file.
+*/
+#define DOTLOCK_SUFFIX ".lock"
+
+/* Dotlock-style reserved lock checking following the behavior of
** unixCheckReservedLock, see the unixCheckReservedLock function comments */
-static int afpCheckReservedLock(sqlite3_file *id, int *pResOut){
+static int dotlockCheckReservedLock(sqlite3_file *id, int *pResOut) {
int rc = SQLITE_OK;
int reserved = 0;
unixFile *pFile = (unixFile*)id;
-
+
SimulateIOError( return SQLITE_IOERR_CHECKRESERVEDLOCK; );
assert( pFile );
- afpLockingContext *context = (afpLockingContext *) pFile->lockingContext;
-
+
/* Check if a thread in this process holds such a lock */
if( pFile->locktype>SHARED_LOCK ){
reserved = 1;
}
- /* Otherwise see if some other process holds it.
- */
+ /* Otherwise see if some other process holds it. */
if( !reserved ){
- /* lock the RESERVED byte */
- int lrc = _AFPFSSetLock(context->dbPath, pFile, RESERVED_BYTE, 1,1);
- if( SQLITE_OK==lrc ){
- /* if we succeeded in taking the reserved lock, unlock it to restore
- ** the original state */
- lrc = _AFPFSSetLock(context->dbPath, pFile, RESERVED_BYTE, 1, 0);
- } else {
- /* if we failed to get the lock then someone else must have it */
+ char *zLockFile = (char *)pFile->lockingContext;
+ struct stat statBuf;
+
+ if( lstat(zLockFile, &statBuf)==0 ){
+ /* file exists, someone else has the lock */
reserved = 1;
- }
- if( IS_LOCK_ERROR(lrc) ){
- rc=lrc;
+ }else{
+ /* file does not exist, we could have it if we want it */
+ int tErrno = errno;
+ if( ENOENT != tErrno ){
+ rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_CHECKRESERVEDLOCK);
+ pFile->lastErrno = tErrno;
+ }
}
}
-
OSTRACE4("TEST WR-LOCK %d %d %d\n", pFile->h, rc, reserved);
-
+
*pResOut = reserved;
return rc;
}
-/* AFP-style locking following the behavior of unixLock, see the unixLock
-** function comments for details of lock management. */
-static int afpLock(sqlite3_file *id, int locktype){
- int rc = SQLITE_OK;
+static int dotlockLock(sqlite3_file *id, int locktype) {
unixFile *pFile = (unixFile*)id;
- afpLockingContext *context = (afpLockingContext *) pFile->lockingContext;
+ int fd;
+ char *zLockFile = (char *)pFile->lockingContext;
+ int rc=SQLITE_OK;
+
+ /* if we already have a lock, it is exclusive.
+ ** Just adjust level and punt on outta here. */
+ if (pFile->locktype > NO_LOCK) {
+ pFile->locktype = locktype;
+#if !OS_VXWORKS
+ /* Always update the timestamp on the old file */
+ utimes(zLockFile, NULL);
+#endif
+ rc = SQLITE_OK;
+ goto dotlock_end_lock;
+ }
- assert( pFile );
- OSTRACE5("LOCK %d %s was %s pid=%d\n", pFile->h,
- locktypeName(locktype), locktypeName(pFile->locktype), getpid());
+ /* check to see if lock file already exists */
+ struct stat statBuf;
+ if (lstat(zLockFile,&statBuf) == 0){
+ rc = SQLITE_BUSY; /* it does, busy */
+ goto dotlock_end_lock;
+ }
+
+ /* grab an exclusive lock */
+ fd = open(zLockFile,O_RDONLY|O_CREAT|O_EXCL,0600);
+ if( fd<0 ){
+ /* failed to open/create the file, someone else may have stolen the lock */
+ int tErrno = errno;
+ if( EEXIST == tErrno ){
+ rc = SQLITE_BUSY;
+ } else {
+ rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK);
+ if( IS_LOCK_ERROR(rc) ){
+ pFile->lastErrno = tErrno;
+ }
+ }
+ goto dotlock_end_lock;
+ }
+ if( close(fd) ){
+ pFile->lastErrno = errno;
+ rc = SQLITE_IOERR_CLOSE;
+ }
+
+ /* got it, set the type and return ok */
+ pFile->locktype = locktype;
- /* If there is already a lock of this type or more restrictive on the
- ** unixFile, do nothing. Don't use the afp_end_lock: exit path, as
- ** unixEnterMutex() hasn't been called yet.
- */
- if( pFile->locktype>=locktype ){
- OSTRACE3("LOCK %d %s ok (already held)\n", pFile->h,
- locktypeName(locktype));
+ dotlock_end_lock:
+ return rc;
+}
+
+static int dotlockUnlock(sqlite3_file *id, int locktype) {
+ unixFile *pFile = (unixFile*)id;
+ char *zLockFile = (char *)pFile->lockingContext;
+
+ assert( pFile );
+ OSTRACE5("UNLOCK %d %d was %d pid=%d\n", pFile->h, locktype,
+ pFile->locktype, getpid());
+ assert( locktype<=SHARED_LOCK );
+
+ /* no-op if possible */
+ if( pFile->locktype==locktype ){
return SQLITE_OK;
}
-
- /* Make sure the locking sequence is correct
- */
- assert( pFile->locktype!=NO_LOCK || locktype==SHARED_LOCK );
- assert( locktype!=PENDING_LOCK );
- assert( locktype!=RESERVED_LOCK || pFile->locktype==SHARED_LOCK );
- /* This mutex is needed because pFile->pLock is shared across threads
- */
- unixEnterMutex();
-
- /* Make sure the current thread owns the pFile.
- */
- rc = transferOwnership(pFile);
- if( rc!=SQLITE_OK ){
- unixLeaveMutex();
- return rc;
+ /* shared can just be set because we always have an exclusive */
+ if (locktype==SHARED_LOCK) {
+ pFile->locktype = locktype;
+ return SQLITE_OK;
}
-
- /* A PENDING lock is needed before acquiring a SHARED lock and before
- ** acquiring an EXCLUSIVE lock. For the SHARED lock, the PENDING will
- ** be released.
- */
- if( locktype==SHARED_LOCK
- || (locktype==EXCLUSIVE_LOCK && pFile->locktype<PENDING_LOCK)
- ){
- int failed;
- failed = _AFPFSSetLock(context->dbPath, pFile, PENDING_BYTE, 1, 1);
- if (failed) {
- rc = failed;
- goto afp_end_lock;
+
+ /* no, really, unlock. */
+ if (unlink(zLockFile) ) {
+ int rc, tErrno = errno;
+ if( ENOENT != tErrno ){
+ rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_UNLOCK);
}
+ if( IS_LOCK_ERROR(rc) ){
+ pFile->lastErrno = tErrno;
+ }
+ return rc;
}
-
- /* If control gets to this point, then actually go ahead and make
- ** operating system calls for the specified lock.
- */
- if( locktype==SHARED_LOCK ){
- int lk, lrc1, lrc2, lrc1Errno;
-
- /* Now get the read-lock SHARED_LOCK */
- /* note that the quality of the randomness doesn't matter that much */
- lk = random();
- context->sharedByte = (lk & 0x7fffffff)%(SHARED_SIZE - 1);
- lrc1 = _AFPFSSetLock(context->dbPath, pFile,
- SHARED_FIRST+context->sharedByte, 1, 1);
- if( IS_LOCK_ERROR(lrc1) ){
- lrc1Errno = pFile->lastErrno;
- }
- /* Drop the temporary PENDING lock */
- lrc2 = _AFPFSSetLock(context->dbPath, pFile, PENDING_BYTE, 1, 0);
-
- if( IS_LOCK_ERROR(lrc1) ) {
- pFile->lastErrno = lrc1Errno;
- rc = lrc1;
- goto afp_end_lock;
- } else if( IS_LOCK_ERROR(lrc2) ){
- rc = lrc2;
- goto afp_end_lock;
- } else if( lrc1 != SQLITE_OK ) {
- rc = lrc1;
- } else {
- pFile->locktype = SHARED_LOCK;
- pFile->pOpen->nLock++;
- }
- }else{
- /* The request was for a RESERVED or EXCLUSIVE lock. It is
- ** assumed that there is a SHARED or greater lock on the file
- ** already.
- */
- int failed = 0;
- assert( 0!=pFile->locktype );
- if (locktype >= RESERVED_LOCK && pFile->locktype < RESERVED_LOCK) {
- /* Acquire a RESERVED lock */
- failed = _AFPFSSetLock(context->dbPath, pFile, RESERVED_BYTE, 1,1);
- }
- if (!failed && locktype == EXCLUSIVE_LOCK) {
- /* Acquire an EXCLUSIVE lock */
-
- /* Remove the shared lock before trying the range. we'll need to
- ** reestablish the shared lock if we can't get the afpUnlock
- */
- if( !(failed = _AFPFSSetLock(context->dbPath, pFile, SHARED_FIRST +
- context->sharedByte, 1, 0)) ){
- int failed2 = SQLITE_OK;
- /* now attemmpt to get the exclusive lock range */
- failed = _AFPFSSetLock(context->dbPath, pFile, SHARED_FIRST,
- SHARED_SIZE, 1);
- if( failed && (failed2 = _AFPFSSetLock(context->dbPath, pFile,
- SHARED_FIRST + context->sharedByte, 1, 1)) ){
- /* Can't reestablish the shared lock. Sqlite can't deal, this is
- ** a critical I/O error
- */
- rc = ((failed & SQLITE_IOERR) == SQLITE_IOERR) ? failed2 :
- SQLITE_IOERR_LOCK;
- goto afp_end_lock;
- }
- }else{
- rc = failed;
- }
- }
- if( failed ){
- rc = failed;
- }
- }
-
- if( rc==SQLITE_OK ){
- pFile->locktype = locktype;
- }else if( locktype==EXCLUSIVE_LOCK ){
- pFile->locktype = PENDING_LOCK;
- }
-
-afp_end_lock:
- unixLeaveMutex();
- OSTRACE4("LOCK %d %s %s\n", pFile->h, locktypeName(locktype),
- rc==SQLITE_OK ? "ok" : "failed");
- return rc;
-}
-
-/*
-** Lower the locking level on file descriptor pFile to locktype. locktype
-** must be either NO_LOCK or SHARED_LOCK.
-**
-** If the locking level of the file descriptor is already at or below
-** the requested locking level, this routine is a no-op.
-*/
-static int afpUnlock(sqlite3_file *id, int locktype) {
- int rc = SQLITE_OK;
- unixFile *pFile = (unixFile*)id;
- afpLockingContext *pCtx = (afpLockingContext *) pFile->lockingContext;
-
- assert( pFile );
- OSTRACE5("UNLOCK %d %d was %d pid=%d\n", pFile->h, locktype,
- pFile->locktype, getpid());
-
- assert( locktype<=SHARED_LOCK );
- if( pFile->locktype<=locktype ){
- return SQLITE_OK;
- }
- if( CHECK_THREADID(pFile) ){
- return SQLITE_MISUSE;
- }
- unixEnterMutex();
- if( pFile->locktype>SHARED_LOCK ){
-
- if( pFile->locktype==EXCLUSIVE_LOCK ){
- rc = _AFPFSSetLock(pCtx->dbPath, pFile, SHARED_FIRST, SHARED_SIZE, 0);
- if( rc==SQLITE_OK && locktype==SHARED_LOCK ){
- /* only re-establish the shared lock if necessary */
- int sharedLockByte = SHARED_FIRST+pCtx->sharedByte;
- rc = _AFPFSSetLock(pCtx->dbPath, pFile, sharedLockByte, 1, 1);
- }
- }
- if( rc==SQLITE_OK && pFile->locktype>=PENDING_LOCK ){
- rc = _AFPFSSetLock(pCtx->dbPath, pFile, PENDING_BYTE, 1, 0);
- }
- if( rc==SQLITE_OK && pFile->locktype>=RESERVED_LOCK ){
- rc = _AFPFSSetLock(pCtx->dbPath, pFile, RESERVED_BYTE, 1, 0);
- }
- }else if( locktype==NO_LOCK ){
- /* clear the shared lock */
- int sharedLockByte = SHARED_FIRST+pCtx->sharedByte;
- rc = _AFPFSSetLock(pCtx->dbPath, pFile, sharedLockByte, 1, 0);
- }
-
- if( rc==SQLITE_OK ){
- if( locktype==NO_LOCK ){
- struct unixOpenCnt *pOpen = pFile->pOpen;
- pOpen->nLock--;
- assert( pOpen->nLock>=0 );
- if( pOpen->nLock==0 && pOpen->nPending>0 ){
- int i;
- for(i=0; i<pOpen->nPending; i++){
- if( pOpen->aPending[i] < 0 ) continue;
- if( close(pOpen->aPending[i]) ){
- pFile->lastErrno = errno;
- rc = SQLITE_IOERR_CLOSE;
- }else{
- pOpen->aPending[i] = -1;
- }
- }
- if( rc==SQLITE_OK ){
- sqlite3_free(pOpen->aPending);
- pOpen->nPending = 0;
- pOpen->aPending = 0;
- }
- }
- }
- }
-end_afpunlock:
- unixLeaveMutex();
- if( rc==SQLITE_OK ) pFile->locktype = locktype;
- return rc;
+ pFile->locktype = NO_LOCK;
+ return SQLITE_OK;
}
/*
-** Close a file & cleanup AFP specific locking context
+** Close a file.
*/
-static int afpClose(sqlite3_file *id) {
+static int dotlockClose(sqlite3_file *id) {
+ int rc;
if( id ){
unixFile *pFile = (unixFile*)id;
- afpUnlock(id, NO_LOCK);
- unixEnterMutex();
- if( pFile->pOpen && pFile->pOpen->nLock ){
- /* If there are outstanding locks, do not actually close the file just
- ** yet because that would clear those locks. Instead, add the file
- ** descriptor to pOpen->aPending. It will be automatically closed when
- ** the last lock is cleared.
- */
- int *aNew;
- struct unixOpenCnt *pOpen = pFile->pOpen;
- aNew = sqlite3_realloc(pOpen->aPending, (pOpen->nPending+1)*sizeof(int) );
- if( aNew==0 ){
- /* If a malloc fails, just leak the file descriptor */
- }else{
- pOpen->aPending = aNew;
- pOpen->aPending[pOpen->nPending] = pFile->h;
- pOpen->nPending++;
- pFile->h = -1;
- }
- }
- releaseOpenCnt(pFile->pOpen);
+ dotlockUnlock(id, NO_LOCK);
sqlite3_free(pFile->lockingContext);
- closeUnixFile(id);
- unixLeaveMutex();
}
- return SQLITE_OK;
+ if( OS_VXWORKS ) unixEnterMutex();
+ rc = closeUnixFile(id);
+ if( OS_VXWORKS ) unixLeaveMutex();
+ return rc;
}
+/****************** End of the dot-file lock implementation *******************
+******************************************************************************/
-
-#pragma mark flock() style locking
+/******************************************************************************
+************************** Begin flock Locking ********************************
+**
+** Use the flock() system call to do file locking.
+**
+** Omit this section if SQLITE_ENABLE_LOCKING_STYLE is turned off or if
+** compiling for VXWORKS.
+*/
+#if SQLITE_ENABLE_LOCKING_STYLE && !OS_VXWORKS
/*
** The flockLockingContext is not used
return closeUnixFile(id);
}
-#endif /* !OS_VXWORKS */
+#endif /* SQLITE_ENABLE_LOCKING_STYLE && !OS_VXWORK */
-#pragma mark Old-School .lock file based locking
-#define DOTLOCK_SUFFIX ".lock"
+/******************* End of the flock lock implementation *********************
+******************************************************************************/
-/* Dotlock-style reserved lock checking following the behavior of
+/******************************************************************************
+************************ Begin Named Semaphore Locking ************************
+**
+** Named semaphore locking is only supported on VxWorks.
+*/
+#if OS_VXWORKS
+
+/* Namedsem-style reserved lock checking following the behavior of
** unixCheckReservedLock, see the unixCheckReservedLock function comments */
-static int dotlockCheckReservedLock(sqlite3_file *id, int *pResOut) {
+static int semCheckReservedLock(sqlite3_file *id, int *pResOut) {
int rc = SQLITE_OK;
int reserved = 0;
unixFile *pFile = (unixFile*)id;
/* Otherwise see if some other process holds it. */
if( !reserved ){
- char *zLockFile = (char *)pFile->lockingContext;
+ sem_t *pSem = pFile->pOpen->pSem;
struct stat statBuf;
-
- if( lstat(zLockFile, &statBuf)==0 ){
- /* file exists, someone else has the lock */
- reserved = 1;
- }else{
- /* file does not exist, we could have it if we want it */
+
+ if( sem_trywait(pSem)==-1 ){
int tErrno = errno;
- if( ENOENT != tErrno ){
+ if( EAGAIN != tErrno ){
rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_CHECKRESERVEDLOCK);
pFile->lastErrno = tErrno;
+ } else {
+ /* someone else has the lock when we are in NO_LOCK */
+ reserved = (pFile->locktype < SHARED_LOCK);
}
+ }else{
+ /* we could have it if we want it */
+ sem_post(pSem);
}
}
OSTRACE4("TEST WR-LOCK %d %d %d\n", pFile->h, rc, reserved);
return rc;
}
-static int dotlockLock(sqlite3_file *id, int locktype) {
+static int semLock(sqlite3_file *id, int locktype) {
unixFile *pFile = (unixFile*)id;
int fd;
- char *zLockFile = (char *)pFile->lockingContext;
- int rc=SQLITE_OK;
+ sem_t *pSem = pFile->pOpen->pSem;
+ int rc = SQLITE_OK;
/* if we already have a lock, it is exclusive.
** Just adjust level and punt on outta here. */
if (pFile->locktype > NO_LOCK) {
pFile->locktype = locktype;
-#if !OS_VXWORKS
- /* Always update the timestamp on the old file */
- utimes(zLockFile, NULL);
-#endif
rc = SQLITE_OK;
- goto dotlock_end_lock;
+ goto sem_end_lock;
}
- /* check to see if lock file already exists */
- struct stat statBuf;
- if (lstat(zLockFile,&statBuf) == 0){
- rc = SQLITE_BUSY; /* it does, busy */
- goto dotlock_end_lock;
+ /* lock semaphore now but bail out when already locked. */
+ if( sem_trywait(pSem)==-1 ){
+ rc = SQLITE_BUSY;
+ goto sem_end_lock;
}
-
- /* grab an exclusive lock */
- fd = open(zLockFile,O_RDONLY|O_CREAT|O_EXCL,0600);
- if( fd<0 ){
- /* failed to open/create the file, someone else may have stolen the lock */
- int tErrno = errno;
- if( EEXIST == tErrno ){
- rc = SQLITE_BUSY;
- } else {
- rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK);
- if( IS_LOCK_ERROR(rc) ){
- pFile->lastErrno = tErrno;
- }
- }
- goto dotlock_end_lock;
- }
- if( close(fd) ){
- pFile->lastErrno = errno;
- rc = SQLITE_IOERR_CLOSE;
- }
-
- /* got it, set the type and return ok */
- pFile->locktype = locktype;
-
- dotlock_end_lock:
- return rc;
-}
-
-static int dotlockUnlock(sqlite3_file *id, int locktype) {
- unixFile *pFile = (unixFile*)id;
- char *zLockFile = (char *)pFile->lockingContext;
-
- assert( pFile );
- OSTRACE5("UNLOCK %d %d was %d pid=%d\n", pFile->h, locktype,
- pFile->locktype, getpid());
- assert( locktype<=SHARED_LOCK );
+
+ /* got it, set the type and return ok */
+ pFile->locktype = locktype;
+
+ sem_end_lock:
+ return rc;
+}
+
+static int semUnlock(sqlite3_file *id, int locktype) {
+ unixFile *pFile = (unixFile*)id;
+ sem_t *pSem = pFile->pOpen->pSem;
+
+ assert( pFile );
+ assert( pSem );
+ OSTRACE5("UNLOCK %d %d was %d pid=%d\n", pFile->h, locktype,
+ pFile->locktype, getpid());
+ assert( locktype<=SHARED_LOCK );
/* no-op if possible */
if( pFile->locktype==locktype ){
return SQLITE_OK;
}
- /* no, really, unlock. */
- if (unlink(zLockFile) ) {
+ /* no, really unlock. */
+ if ( sem_post(pSem)==-1 ) {
int rc, tErrno = errno;
- if( ENOENT != tErrno ){
- rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_UNLOCK);
- }
+ rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_UNLOCK);
if( IS_LOCK_ERROR(rc) ){
pFile->lastErrno = tErrno;
}
/*
** Close a file.
*/
-static int dotlockClose(sqlite3_file *id) {
- int rc;
+static int semClose(sqlite3_file *id) {
if( id ){
unixFile *pFile = (unixFile*)id;
- dotlockUnlock(id, NO_LOCK);
- sqlite3_free(pFile->lockingContext);
+ semUnlock(id, NO_LOCK);
+ assert( pFile );
+ unixEnterMutex();
+ releaseLockInfo(pFile->pLock);
+ releaseOpenCnt(pFile->pOpen);
+ closeUnixFile(id);
+ unixLeaveMutex();
}
- if( OS_VXWORKS ) unixEnterMutex();
- rc = closeUnixFile(id);
- if( OS_VXWORKS ) unixLeaveMutex();
- return rc;
+ return SQLITE_OK;
}
-#if OS_VXWORKS
+#endif /* OS_VXWORKS */
+/*
+** Named semaphore locking is only available on VxWorks.
+**
+*************** End of the named semaphore lock implementation ****************
+******************************************************************************/
-#pragma mark POSIX/vxWorks named semaphore based locking
-/* Namedsem-style reserved lock checking following the behavior of
+/******************************************************************************
+*************************** Begin AFP Locking *********************************
+**
+** AFP is the Apple Filing Protocol. AFP is a network filesystem found
+** on Apple Macintosh computers - both OS9 and OSX.
+**
+** Third-party implementations of AFP are available. But this code here
+** only works on OSX.
+*/
+
+#if defined(__DARWIN__) && SQLITE_ENABLE_LOCKING_STYLE
+/*
+** The afpLockingContext structure contains all afp lock specific state
+*/
+typedef struct afpLockingContext afpLockingContext;
+struct afpLockingContext {
+ unsigned long long sharedByte;
+ const char *dbPath;
+};
+
+struct ByteRangeLockPB2
+{
+ unsigned long long offset; /* offset to first byte to lock */
+ unsigned long long length; /* nbr of bytes to lock */
+ unsigned long long retRangeStart; /* nbr of 1st byte locked if successful */
+ unsigned char unLockFlag; /* 1 = unlock, 0 = lock */
+ unsigned char startEndFlag; /* 1=rel to end of fork, 0=rel to start */
+ int fd; /* file desc to assoc this lock with */
+};
+
+#define afpfsByteRangeLock2FSCTL _IOWR('z', 23, struct ByteRangeLockPB2)
+
+/*
+ ** Return SQLITE_OK on success, SQLITE_BUSY on failure.
+ */
+static int _AFPFSSetLock(
+ const char *path,
+ unixFile *pFile,
+ unsigned long long offset,
+ unsigned long long length,
+ int setLockFlag
+){
+ struct ByteRangeLockPB2 pb;
+ int err;
+
+ pb.unLockFlag = setLockFlag ? 0 : 1;
+ pb.startEndFlag = 0;
+ pb.offset = offset;
+ pb.length = length;
+ pb.fd = pFile->h;
+ //SimulateIOErrorBenign(1);
+ //SimulateIOError( pb.fd=(-1) )
+ //SimulateIOErrorBenign(0);
+
+ OSTRACE6("AFPSETLOCK [%s] for %d%s in range %llx:%llx\n",
+ (setLockFlag?"ON":"OFF"), pFile->h, (pb.fd==-1?"[testval-1]":""),
+ offset, length);
+ err = fsctl(path, afpfsByteRangeLock2FSCTL, &pb, 0);
+ if ( err==-1 ) {
+ int rc;
+ int tErrno = errno;
+ OSTRACE4("AFPSETLOCK failed to fsctl() '%s' %d %s\n",
+ path, tErrno, strerror(tErrno));
+#ifdef SQLITE_IGNORE_AFP_LOCK_ERRORS
+ rc = SQLITE_BUSY;
+#else
+ rc = sqliteErrorFromPosixError(tErrno,
+ setLockFlag ? SQLITE_IOERR_LOCK : SQLITE_IOERR_UNLOCK);
+#endif /* SQLITE_IGNORE_AFP_LOCK_ERRORS */
+ if( IS_LOCK_ERROR(rc) ){
+ pFile->lastErrno = tErrno;
+ }
+ return rc;
+ } else {
+ return SQLITE_OK;
+ }
+}
+
+/* AFP-style reserved lock checking following the behavior of
** unixCheckReservedLock, see the unixCheckReservedLock function comments */
-static int namedsemCheckReservedLock(sqlite3_file *id, int *pResOut) {
+static int afpCheckReservedLock(sqlite3_file *id, int *pResOut){
int rc = SQLITE_OK;
int reserved = 0;
unixFile *pFile = (unixFile*)id;
-
+
SimulateIOError( return SQLITE_IOERR_CHECKRESERVEDLOCK; );
assert( pFile );
-
+ afpLockingContext *context = (afpLockingContext *) pFile->lockingContext;
+
/* Check if a thread in this process holds such a lock */
if( pFile->locktype>SHARED_LOCK ){
reserved = 1;
}
- /* Otherwise see if some other process holds it. */
+ /* Otherwise see if some other process holds it.
+ */
if( !reserved ){
- sem_t *pSem = pFile->pOpen->pSem;
- struct stat statBuf;
-
- if( sem_trywait(pSem)==-1 ){
- int tErrno = errno;
- if( EAGAIN != tErrno ){
- rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_CHECKRESERVEDLOCK);
- pFile->lastErrno = tErrno;
- } else {
- /* someone else has the lock when we are in NO_LOCK */
- reserved = (pFile->locktype < SHARED_LOCK);
- }
- }else{
- /* we could have it if we want it */
- sem_post(pSem);
+ /* lock the RESERVED byte */
+ int lrc = _AFPFSSetLock(context->dbPath, pFile, RESERVED_BYTE, 1,1);
+ if( SQLITE_OK==lrc ){
+ /* if we succeeded in taking the reserved lock, unlock it to restore
+ ** the original state */
+ lrc = _AFPFSSetLock(context->dbPath, pFile, RESERVED_BYTE, 1, 0);
+ } else {
+ /* if we failed to get the lock then someone else must have it */
+ reserved = 1;
+ }
+ if( IS_LOCK_ERROR(lrc) ){
+ rc=lrc;
}
}
+
OSTRACE4("TEST WR-LOCK %d %d %d\n", pFile->h, rc, reserved);
-
+
*pResOut = reserved;
return rc;
}
-static int namedsemLock(sqlite3_file *id, int locktype) {
- unixFile *pFile = (unixFile*)id;
- int fd;
- sem_t *pSem = pFile->pOpen->pSem;
+/* AFP-style locking following the behavior of unixLock, see the unixLock
+** function comments for details of lock management. */
+static int afpLock(sqlite3_file *id, int locktype){
int rc = SQLITE_OK;
-
- /* if we already have a lock, it is exclusive.
- ** Just adjust level and punt on outta here. */
- if (pFile->locktype > NO_LOCK) {
- pFile->locktype = locktype;
- rc = SQLITE_OK;
- goto namedsem_end_lock;
- }
-
- /* lock semaphore now but bail out when already locked. */
- if( sem_trywait(pSem)==-1 ){
- rc = SQLITE_BUSY;
- goto namedsem_end_lock;
- }
-
- /* got it, set the type and return ok */
- pFile->locktype = locktype;
-
- namedsem_end_lock:
- return rc;
-}
-
-static int namedsemUnlock(sqlite3_file *id, int locktype) {
unixFile *pFile = (unixFile*)id;
- sem_t *pSem = pFile->pOpen->pSem;
-
- assert( pFile );
- assert( pSem );
- OSTRACE5("UNLOCK %d %d was %d pid=%d\n", pFile->h, locktype,
- pFile->locktype, getpid());
- assert( locktype<=SHARED_LOCK );
-
- /* no-op if possible */
- if( pFile->locktype==locktype ){
- return SQLITE_OK;
- }
+ afpLockingContext *context = (afpLockingContext *) pFile->lockingContext;
- /* shared can just be set because we always have an exclusive */
- if (locktype==SHARED_LOCK) {
- pFile->locktype = locktype;
+ assert( pFile );
+ OSTRACE5("LOCK %d %s was %s pid=%d\n", pFile->h,
+ locktypeName(locktype), locktypeName(pFile->locktype), getpid());
+
+ /* If there is already a lock of this type or more restrictive on the
+ ** unixFile, do nothing. Don't use the afp_end_lock: exit path, as
+ ** unixEnterMutex() hasn't been called yet.
+ */
+ if( pFile->locktype>=locktype ){
+ OSTRACE3("LOCK %d %s ok (already held)\n", pFile->h,
+ locktypeName(locktype));
return SQLITE_OK;
}
+
+ /* Make sure the locking sequence is correct
+ */
+ assert( pFile->locktype!=NO_LOCK || locktype==SHARED_LOCK );
+ assert( locktype!=PENDING_LOCK );
+ assert( locktype!=RESERVED_LOCK || pFile->locktype==SHARED_LOCK );
- /* no, really unlock. */
- if ( sem_post(pSem)==-1 ) {
- int rc, tErrno = errno;
- rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_UNLOCK);
- if( IS_LOCK_ERROR(rc) ){
- pFile->lastErrno = tErrno;
- }
- return rc;
- }
- pFile->locktype = NO_LOCK;
- return SQLITE_OK;
-}
+ /* This mutex is needed because pFile->pLock is shared across threads
+ */
+ unixEnterMutex();
-/*
- ** Close a file.
- */
-static int namedsemClose(sqlite3_file *id) {
- if( id ){
- unixFile *pFile = (unixFile*)id;
- namedsemUnlock(id, NO_LOCK);
- assert( pFile );
- unixEnterMutex();
- releaseLockInfo(pFile->pLock);
- releaseOpenCnt(pFile->pOpen);
- closeUnixFile(id);
+ /* Make sure the current thread owns the pFile.
+ */
+ rc = transferOwnership(pFile);
+ if( rc!=SQLITE_OK ){
unixLeaveMutex();
+ return rc;
}
- return SQLITE_OK;
-}
-
-#endif /* OS_VXWORKS */
-
-#pragma mark Proxy locking support
-
-static int proxyCheckReservedLock(sqlite3_file *id, int *pResOut) {
- unixFile *pFile = (unixFile*)id;
- int rc = takeConch(pFile);
+
+ /* A PENDING lock is needed before acquiring a SHARED lock and before
+ ** acquiring an EXCLUSIVE lock. For the SHARED lock, the PENDING will
+ ** be released.
+ */
+ if( locktype==SHARED_LOCK
+ || (locktype==EXCLUSIVE_LOCK && pFile->locktype<PENDING_LOCK)
+ ){
+ int failed;
+ failed = _AFPFSSetLock(context->dbPath, pFile, PENDING_BYTE, 1, 1);
+ if (failed) {
+ rc = failed;
+ goto afp_end_lock;
+ }
+ }
+
+ /* If control gets to this point, then actually go ahead and make
+ ** operating system calls for the specified lock.
+ */
+ if( locktype==SHARED_LOCK ){
+ int lk, lrc1, lrc2, lrc1Errno;
+
+ /* Now get the read-lock SHARED_LOCK */
+ /* note that the quality of the randomness doesn't matter that much */
+ lk = random();
+ context->sharedByte = (lk & 0x7fffffff)%(SHARED_SIZE - 1);
+ lrc1 = _AFPFSSetLock(context->dbPath, pFile,
+ SHARED_FIRST+context->sharedByte, 1, 1);
+ if( IS_LOCK_ERROR(lrc1) ){
+ lrc1Errno = pFile->lastErrno;
+ }
+ /* Drop the temporary PENDING lock */
+ lrc2 = _AFPFSSetLock(context->dbPath, pFile, PENDING_BYTE, 1, 0);
+
+ if( IS_LOCK_ERROR(lrc1) ) {
+ pFile->lastErrno = lrc1Errno;
+ rc = lrc1;
+ goto afp_end_lock;
+ } else if( IS_LOCK_ERROR(lrc2) ){
+ rc = lrc2;
+ goto afp_end_lock;
+ } else if( lrc1 != SQLITE_OK ) {
+ rc = lrc1;
+ } else {
+ pFile->locktype = SHARED_LOCK;
+ pFile->pOpen->nLock++;
+ }
+ }else{
+ /* The request was for a RESERVED or EXCLUSIVE lock. It is
+ ** assumed that there is a SHARED or greater lock on the file
+ ** already.
+ */
+ int failed = 0;
+ assert( 0!=pFile->locktype );
+ if (locktype >= RESERVED_LOCK && pFile->locktype < RESERVED_LOCK) {
+ /* Acquire a RESERVED lock */
+ failed = _AFPFSSetLock(context->dbPath, pFile, RESERVED_BYTE, 1,1);
+ }
+ if (!failed && locktype == EXCLUSIVE_LOCK) {
+ /* Acquire an EXCLUSIVE lock */
+
+ /* Remove the shared lock before trying the range. we'll need to
+ ** reestablish the shared lock if we can't get the afpUnlock
+ */
+ if( !(failed = _AFPFSSetLock(context->dbPath, pFile, SHARED_FIRST +
+ context->sharedByte, 1, 0)) ){
+ int failed2 = SQLITE_OK;
+ /* now attemmpt to get the exclusive lock range */
+ failed = _AFPFSSetLock(context->dbPath, pFile, SHARED_FIRST,
+ SHARED_SIZE, 1);
+ if( failed && (failed2 = _AFPFSSetLock(context->dbPath, pFile,
+ SHARED_FIRST + context->sharedByte, 1, 1)) ){
+ /* Can't reestablish the shared lock. Sqlite can't deal, this is
+ ** a critical I/O error
+ */
+ rc = ((failed & SQLITE_IOERR) == SQLITE_IOERR) ? failed2 :
+ SQLITE_IOERR_LOCK;
+ goto afp_end_lock;
+ }
+ }else{
+ rc = failed;
+ }
+ }
+ if( failed ){
+ rc = failed;
+ }
+ }
+
if( rc==SQLITE_OK ){
- proxyLockingContext *pCtx = (proxyLockingContext *)pFile->lockingContext;
- unixFile *proxy = pCtx->lockProxy;
- return proxy->pMethod->xCheckReservedLock((sqlite3_file*)proxy, pResOut);
+ pFile->locktype = locktype;
+ }else if( locktype==EXCLUSIVE_LOCK ){
+ pFile->locktype = PENDING_LOCK;
}
+
+afp_end_lock:
+ unixLeaveMutex();
+ OSTRACE4("LOCK %d %s %s\n", pFile->h, locktypeName(locktype),
+ rc==SQLITE_OK ? "ok" : "failed");
return rc;
}
-static int proxyLock(sqlite3_file *id, int locktype) {
+/*
+** Lower the locking level on file descriptor pFile to locktype. locktype
+** must be either NO_LOCK or SHARED_LOCK.
+**
+** If the locking level of the file descriptor is already at or below
+** the requested locking level, this routine is a no-op.
+*/
+static int afpUnlock(sqlite3_file *id, int locktype) {
+ int rc = SQLITE_OK;
unixFile *pFile = (unixFile*)id;
- int rc = takeConch(pFile);
- if( rc==SQLITE_OK ){
- proxyLockingContext *pCtx = (proxyLockingContext *)pFile->lockingContext;
- unixFile *proxy = pCtx->lockProxy;
- rc = proxy->pMethod->xLock((sqlite3_file*)proxy, locktype);
- pFile->locktype = proxy->locktype;
+ afpLockingContext *pCtx = (afpLockingContext *) pFile->lockingContext;
+
+ assert( pFile );
+ OSTRACE5("UNLOCK %d %d was %d pid=%d\n", pFile->h, locktype,
+ pFile->locktype, getpid());
+
+ assert( locktype<=SHARED_LOCK );
+ if( pFile->locktype<=locktype ){
+ return SQLITE_OK;
+ }
+ if( CHECK_THREADID(pFile) ){
+ return SQLITE_MISUSE;
+ }
+ unixEnterMutex();
+ if( pFile->locktype>SHARED_LOCK ){
+
+ if( pFile->locktype==EXCLUSIVE_LOCK ){
+ rc = _AFPFSSetLock(pCtx->dbPath, pFile, SHARED_FIRST, SHARED_SIZE, 0);
+ if( rc==SQLITE_OK && locktype==SHARED_LOCK ){
+ /* only re-establish the shared lock if necessary */
+ int sharedLockByte = SHARED_FIRST+pCtx->sharedByte;
+ rc = _AFPFSSetLock(pCtx->dbPath, pFile, sharedLockByte, 1, 1);
+ }
+ }
+ if( rc==SQLITE_OK && pFile->locktype>=PENDING_LOCK ){
+ rc = _AFPFSSetLock(pCtx->dbPath, pFile, PENDING_BYTE, 1, 0);
+ }
+ if( rc==SQLITE_OK && pFile->locktype>=RESERVED_LOCK ){
+ rc = _AFPFSSetLock(pCtx->dbPath, pFile, RESERVED_BYTE, 1, 0);
+ }
+ }else if( locktype==NO_LOCK ){
+ /* clear the shared lock */
+ int sharedLockByte = SHARED_FIRST+pCtx->sharedByte;
+ rc = _AFPFSSetLock(pCtx->dbPath, pFile, sharedLockByte, 1, 0);
}
- return rc;
-}
-static int proxyUnlock(sqlite3_file *id, int locktype) {
- unixFile *pFile = (unixFile*)id;
- int rc = takeConch(pFile);
if( rc==SQLITE_OK ){
- proxyLockingContext *pCtx = (proxyLockingContext *)pFile->lockingContext;
- unixFile *proxy = pCtx->lockProxy;
- rc = proxy->pMethod->xUnlock((sqlite3_file*)proxy, locktype);
- pFile->locktype = proxy->locktype;
+ if( locktype==NO_LOCK ){
+ struct unixOpenCnt *pOpen = pFile->pOpen;
+ pOpen->nLock--;
+ assert( pOpen->nLock>=0 );
+ if( pOpen->nLock==0 && pOpen->nPending>0 ){
+ int i;
+ for(i=0; i<pOpen->nPending; i++){
+ if( pOpen->aPending[i] < 0 ) continue;
+ if( close(pOpen->aPending[i]) ){
+ pFile->lastErrno = errno;
+ rc = SQLITE_IOERR_CLOSE;
+ }else{
+ pOpen->aPending[i] = -1;
+ }
+ }
+ if( rc==SQLITE_OK ){
+ sqlite3_free(pOpen->aPending);
+ pOpen->nPending = 0;
+ pOpen->aPending = 0;
+ }
+ }
+ }
}
+end_afpunlock:
+ unixLeaveMutex();
+ if( rc==SQLITE_OK ) pFile->locktype = locktype;
return rc;
}
/*
- ** Close a file.
- */
-static int proxyClose(sqlite3_file *id) {
+** Close a file & cleanup AFP specific locking context
+*/
+static int afpClose(sqlite3_file *id) {
if( id ){
unixFile *pFile = (unixFile*)id;
- proxyLockingContext *pCtx = (proxyLockingContext *)pFile->lockingContext;
- unixFile *lockProxy = pCtx->lockProxy;
- unixFile *conchFile = pCtx->conchFile;
- int rc = SQLITE_OK;
-
- if( lockProxy ){
- rc = lockProxy->pMethod->xUnlock((sqlite3_file*)lockProxy, NO_LOCK);
- if( rc ) return rc;
- rc = lockProxy->pMethod->xClose((sqlite3_file*)lockProxy);
- if( rc ) return rc;
- sqlite3_free(lockProxy);
- }
- if( conchFile ){
- if( pCtx->conchHeld ){
- rc = releaseConch(pFile);
- if( rc ) return rc;
+ afpUnlock(id, NO_LOCK);
+ unixEnterMutex();
+ if( pFile->pOpen && pFile->pOpen->nLock ){
+ /* If there are outstanding locks, do not actually close the file just
+ ** yet because that would clear those locks. Instead, add the file
+ ** descriptor to pOpen->aPending. It will be automatically closed when
+ ** the last lock is cleared.
+ */
+ int *aNew;
+ struct unixOpenCnt *pOpen = pFile->pOpen;
+ aNew = sqlite3_realloc(pOpen->aPending, (pOpen->nPending+1)*sizeof(int) );
+ if( aNew==0 ){
+ /* If a malloc fails, just leak the file descriptor */
+ }else{
+ pOpen->aPending = aNew;
+ pOpen->aPending[pOpen->nPending] = pFile->h;
+ pOpen->nPending++;
+ pFile->h = -1;
}
- rc = conchFile->pMethod->xClose((sqlite3_file*)conchFile);
- if( rc ) return rc;
- sqlite3_free(conchFile);
}
- sqlite3_free(pCtx->lockProxyPath);
- sqlite3_free(pCtx->conchFilePath);
- sqlite3_free(pCtx->dbPath);
- /* restore the original locking context and pMethod then close it */
- pFile->lockingContext = pCtx->oldLockingContext;
- pFile->pMethod = pCtx->pOldMethod;
- sqlite3_free(pCtx);
- return pFile->pMethod->xClose(id);
+ releaseOpenCnt(pFile->pOpen);
+ sqlite3_free(pFile->lockingContext);
+ closeUnixFile(id);
+ unixLeaveMutex();
}
return SQLITE_OK;
}
-/* HOSTIDLEN and CONCHLEN both include space for the string
-** terminating nul
-*/
-#define HOSTIDLEN 128
-#define CONCHLEN (MAXPATHLEN+HOSTIDLEN+1)
-#ifndef HOSTIDPATH
-# define HOSTIDPATH "/Library/Caches/.com.apple.sqliteConchHostId"
-#endif
+#endif /* defined(__DARWIN__) && SQLITE_ENABLE_LOCKING_STYLE */
+/*
+** The code above is the AFP lock implementation. The code is specific
+** to MacOSX and does not work on other unix platforms. No alternative
+** is available. If you don't compile for a mac, then the "unix-afp"
+** VFS is not available.
+**
+********************* End of the AFP lock implementation **********************
+******************************************************************************/
-/* basically a copy of unixRandomness with different
-** test behavior built in */
-static int genHostID(char *pHostID){
+/******************************************************************************
+************************** Begin Proxy Locking ********************************
+**
+**
+** The default locking schemes in SQLite use byte-range locks on the
+** database file to coordinate safe, concurrent access by multiple readers
+** and writers [http://sqlite.org/lockingv3.html]. The five file locking
+** states (UNLOCKED, PENDING, SHARED, RESERVED, EXCLUSIVE) are implemented
+** as POSIX read & write locks over fixed set of locations (via fsctl),
+** on AFP and SMB only exclusive byte-range locks are available via fsctl
+** with _IOWR('z', 23, struct ByteRangeLockPB2) to track the same 5 states.
+** To simulate a F_RDLCK on the shared range, on AFP a randomly selected
+** address in the shared range is taken for a SHARED lock, the entire
+** shared range is taken for an EXCLUSIVE lock):
+**
+** PENDING_BYTE 0x40000000
+** RESERVED_BYTE 0x40000001
+** SHARED_RANGE 0x40000002 -> 0x40000200
+**
+** This works well on the local file system, but shows a nearly 100x
+** slowdown in read performance on AFP because the AFP client disables
+** the read cache when byte-range locks are present. Enabling the read
+** cache exposes a cache coherency problem that is present on all OS X
+** supported network file systems. NFS and AFP both observe the
+** close-to-open semantics for ensuring cache coherency
+** [http://nfs.sourceforge.net/#faq_a8], which does not effectively
+** address the requirements for concurrent database access by multiple
+** readers and writers
+** [http://www.nabble.com/SQLite-on-NFS-cache-coherency-td15655701.html].
+**
+** To address the performance and cache coherency issues, proxy file locking
+** changes the way database access is controlled by limiting access to a
+** single host at a time and moving file locks off of the database file
+** and onto a proxy file on the local file system.
+**
+**
+** Using proxy locks
+** -----------------
+**
+** C APIs
+**
+** sqlite3_file_control(db, dbname, SQLITE_SET_LOCKPROXYFILE,
+** <proxy_path> | ":auto:");
+** sqlite3_file_control(db, dbname, SQLITE_GET_LOCKPROXYFILE, &<proxy_path>);
+**
+**
+** SQL pragmas
+**
+** PRAGMA [database.]lock_proxy_file=<proxy_path> | :auto:
+** PRAGMA [database.]lock_proxy_file
+**
+** Specifying ":auto:" means that if there is a conch file with a matching
+** host ID in it, the proxy path in the conch file will be used, otherwise
+** a proxy path based on the user's temp dir
+** (via confstr(_CS_DARWIN_USER_TEMP_DIR,...)) will be used and the
+** actual proxy file name is generated from the name and path of the
+** database file. For example:
+**
+** For database path "/Users/me/foo.db"
+** The lock path will be "<tmpdir>/sqliteplocks/_Users_me_foo.db:auto:")
+**
+** Once a lock proxy is configured for a database connection, it can not
+** be removed, however it may be switched to a different proxy path via
+** the above APIs (assuming the conch file is not being held by another
+** connection or process).
+**
+**
+** How proxy locking works
+** -----------------------
+**
+** Proxy file locking relies primarily on two new supporting files:
+**
+** * conch file to limit access to the database file to a single host
+** at a time
+**
+** * proxy file to act as a proxy for the advisory locks normally
+** taken on the database
+**
+** The conch file - to use a proxy file, sqlite must first "hold the conch"
+** by taking an sqlite-style shared lock on the conch file, reading the
+** contents and comparing the host's unique host ID (see below) and lock
+** proxy path against the values stored in the conch. The conch file is
+** stored in the same directory as the database file and the file name
+** is patterned after the database file name as ".<databasename>-conch".
+** If the conch file does not exist, or it's contents do not match the
+** host ID and/or proxy path, then the lock is escalated to an exclusive
+** lock and the conch file contents is updated with the host ID and proxy
+** path and the lock is downgraded to a shared lock again. If the conch
+** is held by another process (with a shared lock), the exclusive lock
+** will fail and SQLITE_BUSY is returned.
+**
+** The proxy file - a single-byte file used for all advisory file locks
+** normally taken on the database file. This allows for safe sharing
+** of the database file for multiple readers and writers on the same
+** host (the conch ensures that they all use the same local lock file).
+**
+** There is a third file - the host ID file - used as a persistent record
+** of a unique identifier for the host, a 128-byte unique host id file
+** in the path defined by the HOSTIDPATH macro (default value is
+** /Library/Caches/.com.apple.sqliteConchHostId).
+**
+** Requesting the lock proxy does not immediately take the conch, it is
+** only taken when the first request to lock database file is made.
+** This matches the semantics of the traditional locking behavior, where
+** opening a connection to a database file does not take a lock on it.
+** The shared lock and an open file descriptor are maintained until
+** the connection to the database is closed.
+**
+** The proxy file and the lock file are never deleted so they only need
+** to be created the first time they are used.
+**
+** Configuration options
+** ---------------------
+**
+** SQLITE_PREFER_PROXY_LOCKING
+**
+** Database files accessed on non-local file systems are
+** automatically configured for proxy locking, lock files are
+** named automatically using the same logic as
+** PRAGMA lock_proxy_file=":auto:"
+**
+** SQLITE_PROXY_DEBUG
+**
+** Enables the logging of error messages during host id file
+** retrieval and creation
+**
+** HOSTIDPATH
+**
+** Overrides the default host ID file path location
+**
+** LOCKPROXYDIR
+**
+** Overrides the default directory used for lock proxy files that
+** are named automatically via the ":auto:" setting
+**
+** SQLITE_DEFAULT_PROXYDIR_PERMISSIONS
+**
+** Permissions to use when creating a directory for storing the
+** lock proxy files, only used when LOCKPROXYDIR is not set.
+**
+**
+** As mentioned above, when compiled with SQLITE_PREFER_PROXY_LOCKING,
+** setting the environment variable SQLITE_FORCE_PROXY_LOCKING to 1 will
+** force proxy locking to be used for every database file opened, and 0
+** will force automatic proxy locking to be disabled for all database
+** files (explicity calling the SQLITE_SET_LOCKPROXYFILE pragma or
+** sqlite_file_control API is not affected by SQLITE_FORCE_PROXY_LOCKING).
+*/
+
+/*
+** Proxy locking is only available on MacOSX
+*/
+#if defined(__DARWIN__) && SQLITE_ENABLE_LOCKING_STYLE
+
+
+static int getDbPathForUnixFile(unixFile *pFile, char *dbPath);
+static int getLockPath(const char *dbPath, char *lPath, size_t maxLen);
+static sqlite3_io_methods *ioMethodForLockingStyle(int style);
+static int createProxyUnixFile(const char *path, unixFile **ppFile);
+static int fillInUnixFile(sqlite3_vfs *pVfs, int h, int dirfd, sqlite3_file *pId, const char *zFilename, int noLock, int isDelete);
+static int takeConch(unixFile *pFile);
+static int releaseConch(unixFile *pFile);
+static int unixRandomness(sqlite3_vfs *pVfs, int nBuf, char *zBuf);
+
+/*
+** Tests a byte-range locking query to see if byte range locks are
+** supported, if not we fall back to dotlockLockingStyle.
+** On vxWorks we fall back to semLockingStyle.
+*/
+static int testLockingStyle(int fd){
+ struct flock lockInfo;
+
+ /* Test byte-range lock using fcntl(). If the call succeeds,
+ ** assume that the file-system supports POSIX style locks.
+ */
+ lockInfo.l_len = 1;
+ lockInfo.l_start = 0;
+ lockInfo.l_whence = SEEK_SET;
+ lockInfo.l_type = F_RDLCK;
+ if( fcntl(fd, F_GETLK, &lockInfo)!=-1 ) {
+ return LOCKING_STYLE_POSIX;
+ }
+
+ /* Testing for flock() can give false positives. So if if the above
+ ** test fails, then we fall back to using dot-file style locking (or
+ ** named-semaphore locking on vxworks).
+ */
+ return (OS_VXWORKS ? LOCKING_STYLE_NAMEDSEM : LOCKING_STYLE_DOTFILE);
+}
+
+
+#ifdef SQLITE_TEST
+/* simulate multiple hosts by creating unique hostid file paths */
+int sqlite3_hostid_num = 0;
+#endif
+
+/*
+** The proxyLockingContext has the path and file structures for the remote
+** and local proxy files in it
+*/
+typedef struct proxyLockingContext proxyLockingContext;
+struct proxyLockingContext {
+ unixFile *conchFile;
+ char *conchFilePath;
+ unixFile *lockProxy;
+ char *lockProxyPath;
+ char *dbPath;
+ int conchHeld;
+ void *oldLockingContext; /* preserve the original locking context for close */
+ sqlite3_io_methods const *pOldMethod; /* ditto pMethod */
+};
+
+
+static int proxyCheckReservedLock(sqlite3_file *id, int *pResOut) {
+ unixFile *pFile = (unixFile*)id;
+ int rc = takeConch(pFile);
+ if( rc==SQLITE_OK ){
+ proxyLockingContext *pCtx = (proxyLockingContext *)pFile->lockingContext;
+ unixFile *proxy = pCtx->lockProxy;
+ return proxy->pMethod->xCheckReservedLock((sqlite3_file*)proxy, pResOut);
+ }
+ return rc;
+}
+
+static int proxyLock(sqlite3_file *id, int locktype) {
+ unixFile *pFile = (unixFile*)id;
+ int rc = takeConch(pFile);
+ if( rc==SQLITE_OK ){
+ proxyLockingContext *pCtx = (proxyLockingContext *)pFile->lockingContext;
+ unixFile *proxy = pCtx->lockProxy;
+ rc = proxy->pMethod->xLock((sqlite3_file*)proxy, locktype);
+ pFile->locktype = proxy->locktype;
+ }
+ return rc;
+}
+
+static int proxyUnlock(sqlite3_file *id, int locktype) {
+ unixFile *pFile = (unixFile*)id;
+ int rc = takeConch(pFile);
+ if( rc==SQLITE_OK ){
+ proxyLockingContext *pCtx = (proxyLockingContext *)pFile->lockingContext;
+ unixFile *proxy = pCtx->lockProxy;
+ rc = proxy->pMethod->xUnlock((sqlite3_file*)proxy, locktype);
+ pFile->locktype = proxy->locktype;
+ }
+ return rc;
+}
+
+/*
+ ** Close a file.
+ */
+static int proxyClose(sqlite3_file *id) {
+ if( id ){
+ unixFile *pFile = (unixFile*)id;
+ proxyLockingContext *pCtx = (proxyLockingContext *)pFile->lockingContext;
+ unixFile *lockProxy = pCtx->lockProxy;
+ unixFile *conchFile = pCtx->conchFile;
+ int rc = SQLITE_OK;
+
+ if( lockProxy ){
+ rc = lockProxy->pMethod->xUnlock((sqlite3_file*)lockProxy, NO_LOCK);
+ if( rc ) return rc;
+ rc = lockProxy->pMethod->xClose((sqlite3_file*)lockProxy);
+ if( rc ) return rc;
+ sqlite3_free(lockProxy);
+ }
+ if( conchFile ){
+ if( pCtx->conchHeld ){
+ rc = releaseConch(pFile);
+ if( rc ) return rc;
+ }
+ rc = conchFile->pMethod->xClose((sqlite3_file*)conchFile);
+ if( rc ) return rc;
+ sqlite3_free(conchFile);
+ }
+ sqlite3_free(pCtx->lockProxyPath);
+ sqlite3_free(pCtx->conchFilePath);
+ sqlite3_free(pCtx->dbPath);
+ /* restore the original locking context and pMethod then close it */
+ pFile->lockingContext = pCtx->oldLockingContext;
+ pFile->pMethod = pCtx->pOldMethod;
+ sqlite3_free(pCtx);
+ return pFile->pMethod->xClose(id);
+ }
+ return SQLITE_OK;
+}
+
+/* HOSTIDLEN and CONCHLEN both include space for the string
+** terminating nul
+*/
+#define HOSTIDLEN 128
+#define CONCHLEN (MAXPATHLEN+HOSTIDLEN+1)
+#ifndef HOSTIDPATH
+# define HOSTIDPATH "/Library/Caches/.com.apple.sqliteConchHostId"
+#endif
+
+/* basically a copy of unixRandomness with different
+** test behavior built in */
+static int genHostID(char *pHostID){
int pid, fd, i, len;
unsigned char *key = (unsigned char *)pHostID;
if( err!=EEXIST ){
#ifdef SQLITE_PROXY_DEBUG /* set the sqlite error message instead */
- fprintf(stderr, "sqlite error creating host ID file %s: %s\n", path, strerror(err));
+ fprintf(stderr, "sqlite error creating host ID file %s: %s\n",
+ path, strerror(err));
#endif
return SQLITE_PERM;
}
if( fd<0 ){
int err = errno;
#ifdef SQLITE_PROXY_DEBUG /* set the sqlite error message instead */
- fprintf(stderr, "sqlite error opening host ID file %s: %s\n", path, strerror(err));
+ fprintf(stderr, "sqlite error opening host ID file %s: %s\n",
+ path, strerror(err));
#endif
return SQLITE_PERM;
}
len = pread(fd, pHostID, HOSTIDLEN, 0);
- if( len<0 ){
- *pError = errno;
- rc = SQLITE_IOERR_READ;
- }else if( len<HOSTIDLEN ){
- *pError = 0;
- rc = SQLITE_IOERR_SHORT_READ;
- }
+ if( len<0 ){
+ *pError = errno;
+ rc = SQLITE_IOERR_READ;
+ }else if( len<HOSTIDLEN ){
+ *pError = 0;
+ rc = SQLITE_IOERR_SHORT_READ;
+ }
close(fd); /* silently leak the fd if it fails */
OSTRACE3("GETHOSTID read %s pid=%d\n", pHostID, getpid());
return rc;
/* we're creating the host ID file (use a random string of bytes) */
genHostID(pHostID);
len = pwrite(fd, pHostID, HOSTIDLEN, 0);
- if( len<0 ){
- *pError = errno;
- rc = SQLITE_IOERR_WRITE;
- }else if( len<HOSTIDLEN ){
- *pError = 0;
- rc = SQLITE_IOERR_WRITE;
- }
+ if( len<0 ){
+ *pError = errno;
+ rc = SQLITE_IOERR_WRITE;
+ }else if( len<HOSTIDLEN ){
+ *pError = 0;
+ rc = SQLITE_IOERR_WRITE;
+ }
close(fd); /* silently leak the fd if it fails */
OSTRACE3("GETHOSTID wrote %s pid=%d\n", pHostID, getpid());
return rc;
if( readRc!=SQLITE_IOERR_SHORT_READ ){
int match = 0;
if( readRc!=SQLITE_OK ){
- if( rc&SQLITE_IOERR==SQLITE_IOERR ){
- pFile->lastErrno = conchFile->lastErrno;
- }
+ if( rc&SQLITE_IOERR==SQLITE_IOERR ){
+ pFile->lastErrno = conchFile->lastErrno;
+ }
rc = readRc;
goto end_takeconch;
}
#else
if( fchmod(conchFile->h, buf.st_mode)!=0 ){
int code = errno;
- fprintf(stderr, "fchmod %o FAILED with %d %s\n",buf.st_mode, code, strerror(code));
+ fprintf(stderr, "fchmod %o FAILED with %d %s\n",
+ buf.st_mode, code, strerror(code));
} else {
fprintf(stderr, "fchmod %o SUCCEDED\n",buf.st_mode);
}
}else{
int code = errno;
- fprintf(stderr, "STAT FAILED[%d] with %d %s\n", err, code, strerror(code));
+ fprintf(stderr, "STAT FAILED[%d] with %d %s\n",
+ err, code, strerror(code));
#endif
}
}
end_takeconch:
OSTRACE2("TRANSPROXY: CLOSE %d\n", pFile->h);
- if( rc==SQLITE_OK && pFile->oflags ){
+ if( rc==SQLITE_OK && pFile->openFlags ){
if( pFile->h>=0 ){
#ifdef STRICT_CLOSE_ERROR
if( close(pFile->h) ){
#endif
}
pFile->h = -1;
- int fd = open(pCtx->dbPath, pFile->oflags, SQLITE_DEFAULT_FILE_PERMISSIONS);
+ int fd = open(pCtx->dbPath, pFile->openFlags,
+ SQLITE_DEFAULT_FILE_PERMISSIONS);
OSTRACE2("TRANSPROXY: OPEN %d\n", fd);
if( fd>=0 ){
pFile->h = fd;
if( tLockPath ){
pCtx->lockProxyPath = sqlite3DbStrDup(0, tLockPath);
- if( pCtx->lockProxy->pMethod == ioMethodForLockingStyle(LOCKING_STYLE_AFP) ){
- ((afpLockingContext *)pCtx->lockProxy->lockingContext)->dbPath = pCtx->lockProxyPath;
+ if( pCtx->lockProxy->pMethod ==
+ ioMethodForLockingStyle(LOCKING_STYLE_AFP) ){
+ ((afpLockingContext *)pCtx->lockProxy->lockingContext)->dbPath =
+ pCtx->lockProxyPath;
}
}
} else {
conchFile->pMethod->xUnlock((sqlite3_file*)conchFile, NO_LOCK);
}
- OSTRACE3("TAKECONCH %d %s\n", conchFile->h, rc==SQLITE_OK ? "ok" : "failed");
+ OSTRACE3("TAKECONCH %d %s\n", conchFile->h, rc==SQLITE_OK?"ok":"failed");
return rc;
}
}
return rc;
}
-static int getConchPathFromDBPath(char *dbPath, char **pConchPath){
- int i;
- int len = strlen(dbPath);
- char *conchPath;
-
- conchPath = (char *)sqlite3_malloc(len + 8);
- if( conchPath==0 ){
- return SQLITE_NOMEM;
+static int getConchPathFromDBPath(char *dbPath, char **pConchPath){
+ int i;
+ int len = strlen(dbPath);
+ char *conchPath;
+
+ conchPath = (char *)sqlite3_malloc(len + 8);
+ if( conchPath==0 ){
+ return SQLITE_NOMEM;
+ }
+ strlcpy(conchPath, dbPath, len+1);
+
+ /* now insert a "." before the last / character */
+ for( i=(len-1); i>=0; i-- ){
+ if( conchPath[i]=='/' ){
+ i++;
+ break;
+ }
+ }
+ conchPath[i]='.';
+ while ( i<len ){
+ conchPath[i+1]=dbPath[i];
+ i++;
+ }
+ conchPath[i+1]='\0';
+ strlcat(conchPath, "-conch", len + 8);
+ *pConchPath = conchPath;
+ return SQLITE_OK;
+}
+
+
+static int getLockPath(const char *dbPath, char *lPath, size_t maxLen){
+ int len;
+ int dbLen;
+ int i;
+
+#ifdef LOCKPROXYDIR
+ len = strlcpy(lPath, LOCKPROXYDIR, maxLen);
+#else
+# ifdef _CS_DARWIN_USER_TEMP_DIR
+ {
+ char utdir[MAXPATHLEN];
+
+ confstr(_CS_DARWIN_USER_TEMP_DIR, lPath, maxLen);
+ len = strlcat(lPath, "sqliteplocks", maxLen);
+ if( mkdir(lPath, SQLITE_DEFAULT_PROXYDIR_PERMISSIONS) ){
+ /* if mkdir fails, handle as lock file creation failure */
+ int err = errno;
+# ifdef SQLITE_DEBUG
+ if( err!=EEXIST ){
+ fprintf(stderr, "getLockPath: mkdir(%s,0%o) error %d %s\n", lPath,
+ SQLITE_DEFAULT_PROXYDIR_PERMISSIONS, err, strerror(err));
+ }
+# endif
+ }else{
+ OSTRACE3("GETLOCKPATH mkdir %s pid=%d\n", lPath, getpid());
+ }
+
+ }
+# else
+ len = strlcpy(lPath, "/tmp/", maxLen);
+# endif
+#endif
+
+ if( lPath[len-1]!='/' ){
+ len = strlcat(lPath, "/", maxLen);
+ }
+
+ /* transform the db path to a unique cache name */
+ dbLen = strlen(dbPath);
+ for( i=0; i<dbLen && (i+len+7)<maxLen; i++){
+ char c = dbPath[i];
+ lPath[i+len] = (c=='/')?'_':c;
+ }
+ lPath[i+len]='\0';
+ strlcat(lPath, ":auto:", maxLen);
+ return SQLITE_OK;
+}
+
+/* Takes a fully configured proxy locking-style unix file and switches
+** the local lock file path
+*/
+static int switchLockProxyPath(unixFile *pFile, const char *path) {
+ proxyLockingContext *pCtx = (proxyLockingContext*)pFile->lockingContext;
+ char *oldPath = pCtx->lockProxyPath;
+ int taken = 0;
+ int rc = SQLITE_OK;
+
+ if( pFile->locktype!=NO_LOCK ){
+ return SQLITE_BUSY;
+ }
+
+ /* nothing to do if the path is NULL, :auto: or matches the existing path */
+ if( !path || path[0]=='\0' || !strcmp(path, ":auto:") ||
+ (oldPath && !strncmp(oldPath, path, MAXPATHLEN)) ){
+ return SQLITE_OK;
+ }else{
+ unixFile *lockProxy = pCtx->lockProxy;
+ pCtx->lockProxy=NULL;
+ pCtx->conchHeld = 0;
+ if( lockProxy!=NULL ){
+ rc=lockProxy->pMethod->xClose((sqlite3_file *)lockProxy);
+ if( rc ) return rc;
+ sqlite3_free(lockProxy);
+ }
+ sqlite3_free(oldPath);
+ pCtx->lockProxyPath = sqlite3DbStrDup(0, path);
+ }
+
+ return rc;
+}
+
+/*
+** Takes an already filled in unix file and alters it so all file locking
+** will be performed on the local proxy lock file. The following fields
+** are preserved in the locking context so that they can be restored and
+** the unix structure properly cleaned up at close time:
+** ->lockingContext
+** ->pMethod
+*/
+static int transformUnixFileForLockProxy(unixFile *pFile, const char *path) {
+ proxyLockingContext *pCtx;
+ char dbPath[MAXPATHLEN];
+ char *lockPath=NULL;
+ int rc = SQLITE_OK;
+
+ if( pFile->locktype!=NO_LOCK ){
+ return SQLITE_BUSY;
+ }
+ getDbPathForUnixFile(pFile, dbPath);
+ if( !path || path[0]=='\0' || !strcmp(path, ":auto:") ){
+ lockPath=NULL;
+ }else{
+ lockPath=(char *)path;
+ }
+
+ OSTRACE4("TRANSPROXY %d for %s pid=%d\n", pFile->h,
+ (lockPath ? lockPath : ":auto:"), getpid());
+
+ pCtx = sqlite3_malloc( sizeof(*pCtx) );
+ if( pCtx==0 ){
+ return SQLITE_NOMEM;
+ }
+ memset(pCtx, 0, sizeof(*pCtx));
+
+ rc = getConchPathFromDBPath(dbPath, &pCtx->conchFilePath);
+ if( rc==SQLITE_OK ){
+ rc = createProxyUnixFile(pCtx->conchFilePath, &pCtx->conchFile);
+ }
+ if( rc==SQLITE_OK && lockPath ){
+ pCtx->lockProxyPath = sqlite3DbStrDup(0, lockPath);
+ }
+
+end_transform_file:
+ if( rc==SQLITE_OK ){
+ /* all memory is allocated, proxys are created and assigned,
+ ** switch the locking context and pMethod then return.
+ */
+ pCtx->dbPath = sqlite3DbStrDup(0, dbPath);
+ pCtx->oldLockingContext = pFile->lockingContext;
+ pFile->lockingContext = pCtx;
+ pCtx->pOldMethod = pFile->pMethod;
+ pFile->pMethod = ioMethodForLockingStyle(LOCKING_STYLE_PROXY);
+ }else{
+ if( pCtx->conchFile ){
+ rc = pCtx->conchFile->pMethod->xClose((sqlite3_file *)pCtx->conchFile);
+ if( rc ) return rc;
+ sqlite3_free(pCtx->conchFile);
+ }
+ sqlite3_free(pCtx->conchFilePath);
+ sqlite3_free(pCtx);
+ }
+ OSTRACE3("TRANSPROXY %d %s\n", pFile->h,
+ (rc==SQLITE_OK ? "ok" : "failed"));
+ return rc;
+}
+
+static int createProxyUnixFile(const char *path, unixFile **ppFile) {
+ int fd;
+ int dirfd = -1;
+ unixFile *pNew;
+ int rc = SQLITE_OK;
+
+ fd = open(path, O_RDWR | O_CREAT, SQLITE_DEFAULT_FILE_PERMISSIONS);
+ if( fd<0 ){
+ return SQLITE_CANTOPEN;
+ }
+
+ pNew = (unixFile *)sqlite3_malloc(sizeof(unixFile));
+ if( pNew==NULL ){
+ rc = SQLITE_NOMEM;
+ goto end_create_proxy;
+ }
+ memset(pNew, 0, sizeof(unixFile));
+
+ rc = fillInUnixFile(NULL, fd, dirfd, (sqlite3_file*)pNew, path, 0, 0);
+ if( rc==SQLITE_OK ){
+ *ppFile = pNew;
+ return SQLITE_OK;
+ }
+end_create_proxy:
+ close(fd); /* silently leak fd if error, we're already in error */
+ sqlite3_free(pNew);
+ return rc;
+}
+
+
+#endif /* defined(__DARWIN__) && SQLITE_ENABLE_LOCKING_STYLE */
+/*
+** The proxy locking style is intended for use with AFP filesystems.
+** And since AFP is only supported on MacOSX, the proxy locking is also
+** restricted to MacOSX.
+**
+**
+******************* End of the proxy lock implementation **********************
+******************************************************************************/
+
+
+/******************************************************************************
+**************** Non-locking sqlite3_file methods *****************************
+**
+** The next division contains implementations for all methods of the
+** sqlite3_file object other than the locking methods. The locking
+** methods were defined in divisions above (one locking method per
+** division). Those methods that are common to all locking modes
+** are gather together into this division.
+*/
+
+/*
+** Seek to the offset passed as the second argument, then read cnt
+** bytes into pBuf. Return the number of bytes actually read.
+**
+** NB: If you define USE_PREAD or USE_PREAD64, then it might also
+** be necessary to define _XOPEN_SOURCE to be 500. This varies from
+** one system to another. Since SQLite does not define USE_PREAD
+** any any form by default, we will not attempt to define _XOPEN_SOURCE.
+** See tickets #2741 and #2681.
+**
+** To avoid stomping the errno value on a failed read the lastErrno value
+** is set before returning.
+*/
+static int seekAndRead(unixFile *id, sqlite3_int64 offset, void *pBuf, int cnt){
+ int got;
+ i64 newOffset;
+ TIMER_START;
+#if defined(USE_PREAD)
+ got = pread(id->h, pBuf, cnt, offset);
+ SimulateIOError( got = -1 );
+#elif defined(USE_PREAD64)
+ got = pread64(id->h, pBuf, cnt, offset);
+ SimulateIOError( got = -1 );
+#else
+ newOffset = lseek(id->h, offset, SEEK_SET);
+ SimulateIOError( newOffset-- );
+ if( newOffset!=offset ){
+ if( newOffset == -1 ){
+ ((unixFile*)id)->lastErrno = errno;
+ }else{
+ ((unixFile*)id)->lastErrno = 0;
+ }
+ return -1;
+ }
+ got = read(id->h, pBuf, cnt);
+#endif
+ TIMER_END;
+ if( got<0 ){
+ ((unixFile*)id)->lastErrno = errno;
+ }
+ OSTRACE5("READ %-3d %5d %7lld %llu\n", id->h, got, offset, TIMER_ELAPSED);
+ return got;
+}
+
+/*
+** Read data from a file into a buffer. Return SQLITE_OK if all
+** bytes were read successfully and SQLITE_IOERR if anything goes
+** wrong.
+*/
+static int unixRead(
+ sqlite3_file *id,
+ void *pBuf,
+ int amt,
+ sqlite3_int64 offset
+){
+ int got;
+ assert( id );
+ got = seekAndRead((unixFile*)id, offset, pBuf, amt);
+ if( got==amt ){
+ return SQLITE_OK;
+ }else if( got<0 ){
+ /* lastErrno set by seekAndRead */
+ return SQLITE_IOERR_READ;
+ }else{
+ ((unixFile*)id)->lastErrno = 0; /* not a system error */
+ /* Unread parts of the buffer must be zero-filled */
+ memset(&((char*)pBuf)[got], 0, amt-got);
+ return SQLITE_IOERR_SHORT_READ;
+ }
+}
+
+/*
+** Seek to the offset in id->offset then read cnt bytes into pBuf.
+** Return the number of bytes actually read. Update the offset.
+**
+** To avoid stomping the errno value on a failed write the lastErrno value
+** is set before returning.
+*/
+static int seekAndWrite(unixFile *id, i64 offset, const void *pBuf, int cnt){
+ int got;
+ i64 newOffset;
+ TIMER_START;
+#if defined(USE_PREAD)
+ got = pwrite(id->h, pBuf, cnt, offset);
+#elif defined(USE_PREAD64)
+ got = pwrite64(id->h, pBuf, cnt, offset);
+#else
+ newOffset = lseek(id->h, offset, SEEK_SET);
+ if( newOffset!=offset ){
+ if( newOffset == -1 ){
+ ((unixFile*)id)->lastErrno = errno;
+ }else{
+ ((unixFile*)id)->lastErrno = 0;
+ }
+ return -1;
+ }
+ got = write(id->h, pBuf, cnt);
+#endif
+ TIMER_END;
+ if( got<0 ){
+ ((unixFile*)id)->lastErrno = errno;
+ }
+
+ OSTRACE5("WRITE %-3d %5d %7lld %llu\n", id->h, got, offset, TIMER_ELAPSED);
+ return got;
+}
+
+
+/*
+** Write data from a buffer into a file. Return SQLITE_OK on success
+** or some other error code on failure.
+*/
+static int unixWrite(
+ sqlite3_file *id,
+ const void *pBuf,
+ int amt,
+ sqlite3_int64 offset
+){
+ int wrote = 0;
+ assert( id );
+ assert( amt>0 );
+ while( amt>0 && (wrote = seekAndWrite((unixFile*)id, offset, pBuf, amt))>0 ){
+ amt -= wrote;
+ offset += wrote;
+ pBuf = &((char*)pBuf)[wrote];
}
- strlcpy(conchPath, dbPath, len+1);
-
- /* now insert a "." before the last / character */
- for( i=(len-1); i>=0; i-- ){
- if( conchPath[i]=='/' ){
- i++;
- break;
+ SimulateIOError(( wrote=(-1), amt=1 ));
+ SimulateDiskfullError(( wrote=0, amt=1 ));
+ if( amt>0 ){
+ if( wrote<0 ){
+ /* lastErrno set by seekAndWrite */
+ return SQLITE_IOERR_WRITE;
+ }else{
+ ((unixFile*)id)->lastErrno = 0; /* not a system error */
+ return SQLITE_FULL;
}
}
- conchPath[i]='.';
- while ( i<len ){
- conchPath[i+1]=dbPath[i];
- i++;
- }
- conchPath[i+1]='\0';
- strlcat(conchPath, "-conch", len + 8);
- *pConchPath = conchPath;
return SQLITE_OK;
}
-static int getLockPath(const char *dbPath, char *lPath, size_t maxLen){
- int len;
- int dbLen;
- int i;
+#ifdef SQLITE_TEST
+/*
+** Count the number of fullsyncs and normal syncs. This is used to test
+** that syncs and fullsyncs are occuring at the right times.
+*/
+int sqlite3_sync_count = 0;
+int sqlite3_fullsync_count = 0;
+#endif
-#ifdef LOCKPROXYDIR
- len = strlcpy(lPath, LOCKPROXYDIR, maxLen);
+/*
+** Use the fdatasync() API only if the HAVE_FDATASYNC macro is defined.
+** Otherwise use fsync() in its place.
+*/
+#ifndef HAVE_FDATASYNC
+# define fdatasync fsync
+#endif
+
+/*
+** Define HAVE_FULLFSYNC to 0 or 1 depending on whether or not
+** the F_FULLFSYNC macro is defined. F_FULLFSYNC is currently
+** only available on Mac OS X. But that could change.
+*/
+#ifdef F_FULLFSYNC
+# define HAVE_FULLFSYNC 1
#else
-# ifdef _CS_DARWIN_USER_TEMP_DIR
- {
- char utdir[MAXPATHLEN];
-
- confstr(_CS_DARWIN_USER_TEMP_DIR, lPath, maxLen);
- len = strlcat(lPath, "sqliteplocks", maxLen);
- if( mkdir(lPath, SQLITE_DEFAULT_PROXYDIR_PERMISSIONS) ){
- /* if mkdir fails, handle as lock file creation failure */
- int err = errno;
-# ifdef SQLITE_DEBUG
- if( err!=EEXIST ){
- fprintf(stderr, "getLockPath: mkdir(%s,0%o) error %d %s\n", lPath,
- SQLITE_DEFAULT_PROXYDIR_PERMISSIONS, err, strerror(err));
- }
-# endif
- }else{
- OSTRACE3("GETLOCKPATH mkdir %s pid=%d\n", lPath, getpid());
- }
-
- }
-# else
- len = strlcpy(lPath, "/tmp/", maxLen);
-# endif
+# define HAVE_FULLFSYNC 0
#endif
- if( lPath[len-1]!='/' ){
- len = strlcat(lPath, "/", maxLen);
- }
-
- /* transform the db path to a unique cache name */
- dbLen = strlen(dbPath);
- for( i=0; i<dbLen && (i+len+7)<maxLen; i++){
- char c = dbPath[i];
- lPath[i+len] = (c=='/')?'_':c;
- }
- lPath[i+len]='\0';
- strlcat(lPath, ":auto:", maxLen);
- return SQLITE_OK;
-}
-/* Takes a fully configured proxy locking-style unix file and switches
-** the local lock file path
+/*
+** The fsync() system call does not work as advertised on many
+** unix systems. The following procedure is an attempt to make
+** it work better.
+**
+** The SQLITE_NO_SYNC macro disables all fsync()s. This is useful
+** for testing when we want to run through the test suite quickly.
+** You are strongly advised *not* to deploy with SQLITE_NO_SYNC
+** enabled, however, since with SQLITE_NO_SYNC enabled, an OS crash
+** or power failure will likely corrupt the database file.
*/
-static int switchLockProxyPath(unixFile *pFile, const char *path) {
- proxyLockingContext *pCtx = (proxyLockingContext*)pFile->lockingContext;
- char *oldPath = pCtx->lockProxyPath;
- int taken = 0;
- int rc = SQLITE_OK;
+static int full_fsync(int fd, int fullSync, int dataOnly){
+ int rc;
- if( pFile->locktype!=NO_LOCK ){
- return SQLITE_BUSY;
- }
+ /* The following "ifdef/elif/else/" block has the same structure as
+ ** the one below. It is replicated here solely to avoid cluttering
+ ** up the real code with the UNUSED_PARAMETER() macros.
+ */
+#ifdef SQLITE_NO_SYNC
+ UNUSED_PARAMETER(fd);
+ UNUSED_PARAMETER(fullSync);
+ UNUSED_PARAMETER(dataOnly);
+#elif HAVE_FULLFSYNC
+ UNUSED_PARAMETER(dataOnly);
+#else
+ UNUSED_PARAMETER(fullSync);
+#endif
- /* nothing to do if the path is NULL, :auto: or matches the existing path */
- if( !path || path[0]=='\0' || !strcmp(path, ":auto:") ||
- (oldPath && !strncmp(oldPath, path, MAXPATHLEN)) ){
- return SQLITE_OK;
+ /* Record the number of times that we do a normal fsync() and
+ ** FULLSYNC. This is used during testing to verify that this procedure
+ ** gets called with the correct arguments.
+ */
+#ifdef SQLITE_TEST
+ if( fullSync ) sqlite3_fullsync_count++;
+ sqlite3_sync_count++;
+#endif
+
+ /* If we compiled with the SQLITE_NO_SYNC flag, then syncing is a
+ ** no-op
+ */
+#ifdef SQLITE_NO_SYNC
+ rc = SQLITE_OK;
+#elif HAVE_FULLFSYNC
+ if( fullSync ){
+ rc = fcntl(fd, F_FULLFSYNC, 0);
}else{
- unixFile *lockProxy = pCtx->lockProxy;
- pCtx->lockProxy=NULL;
- pCtx->conchHeld = 0;
- if( lockProxy!=NULL ){
- rc=lockProxy->pMethod->xClose((sqlite3_file *)lockProxy);
- if( rc ) return rc;
- sqlite3_free(lockProxy);
+ rc = 1;
+ }
+ /* If the FULLFSYNC failed, fall back to attempting an fsync().
+ * It shouldn't be possible for fullfsync to fail on the local
+ * file system (on OSX), so failure indicates that FULLFSYNC
+ * isn't supported for this file system. So, attempt an fsync
+ * and (for now) ignore the overhead of a superfluous fcntl call.
+ * It'd be better to detect fullfsync support once and avoid
+ * the fcntl call every time sync is called.
+ */
+ if( rc ) rc = fsync(fd);
+
+#else
+ if( dataOnly ){
+ rc = fdatasync(fd);
+ if( OS_VXWORKS && rc==-1 && errno==ENOTSUP ){
+ rc = fsync(fd);
}
- sqlite3_free(oldPath);
- pCtx->lockProxyPath = sqlite3DbStrDup(0, path);
+ }else{
+ rc = fsync(fd);
+ }
+#endif /* ifdef SQLITE_NO_SYNC elif HAVE_FULLFSYNC */
+
+ if( OS_VXWORKS && rc!= -1 ){
+ rc = 0;
}
-
return rc;
}
/*
-** Takes an already filled in unix file and alters it so all file locking
-** will be performed on the local proxy lock file. The following fields
-** are preserved in the locking context so that they can be restored and
-** the unix structure properly cleaned up at close time:
-** ->lockingContext
-** ->pMethod
+** Make sure all writes to a particular file are committed to disk.
+**
+** If dataOnly==0 then both the file itself and its metadata (file
+** size, access time, etc) are synced. If dataOnly!=0 then only the
+** file data is synced.
+**
+** Under Unix, also make sure that the directory entry for the file
+** has been created by fsync-ing the directory that contains the file.
+** If we do not do this and we encounter a power failure, the directory
+** entry for the journal might not exist after we reboot. The next
+** SQLite to access the file will not know that the journal exists (because
+** the directory entry for the journal was never created) and the transaction
+** will not roll back - possibly leading to database corruption.
*/
-static int transformUnixFileForLockProxy(unixFile *pFile, const char *path) {
- proxyLockingContext *pCtx;
- char dbPath[MAXPATHLEN];
- char *lockPath=NULL;
- int rc = SQLITE_OK;
-
- if( pFile->locktype!=NO_LOCK ){
- return SQLITE_BUSY;
- }
- getDbPathForUnixFile(pFile, dbPath);
- if( !path || path[0]=='\0' || !strcmp(path, ":auto:") ){
- lockPath=NULL;
- }else{
- lockPath=(char *)path;
- }
-
- OSTRACE4("TRANSPROXY %d for %s pid=%d\n", pFile->h,
- (lockPath ? lockPath : ":auto:"), getpid());
+static int unixSync(sqlite3_file *id, int flags){
+ int rc;
+ unixFile *pFile = (unixFile*)id;
+
+ int isDataOnly = (flags&SQLITE_SYNC_DATAONLY);
+ int isFullsync = (flags&0x0F)==SQLITE_SYNC_FULL;
+
+ /* Check that one of SQLITE_SYNC_NORMAL or FULL was passed */
+ assert((flags&0x0F)==SQLITE_SYNC_NORMAL
+ || (flags&0x0F)==SQLITE_SYNC_FULL
+ );
- pCtx = sqlite3_malloc( sizeof(*pCtx) );
- if( pCtx==0 ){
- return SQLITE_NOMEM;
- }
- memset(pCtx, 0, sizeof(*pCtx));
+ /* Unix cannot, but some systems may return SQLITE_FULL from here. This
+ ** line is to test that doing so does not cause any problems.
+ */
+ SimulateDiskfullError( return SQLITE_FULL );
- rc = getConchPathFromDBPath(dbPath, &pCtx->conchFilePath);
- if( rc==SQLITE_OK ){
- rc = createProxyUnixFile(pCtx->conchFilePath, &pCtx->conchFile);
- }
- if( rc==SQLITE_OK && lockPath ){
- pCtx->lockProxyPath = sqlite3DbStrDup(0, lockPath);
+ assert( pFile );
+ OSTRACE2("SYNC %-3d\n", pFile->h);
+ rc = full_fsync(pFile->h, isFullsync, isDataOnly);
+ SimulateIOError( rc=1 );
+ if( rc ){
+ pFile->lastErrno = errno;
+ return SQLITE_IOERR_FSYNC;
}
-
-end_transform_file:
- if( rc==SQLITE_OK ){
- /* all memory is allocated, proxys are created and assigned,
- ** switch the locking context and pMethod then return.
+ if( pFile->dirfd>=0 ){
+ int err;
+ OSTRACE4("DIRSYNC %-3d (have_fullfsync=%d fullsync=%d)\n", pFile->dirfd,
+ HAVE_FULLFSYNC, isFullsync);
+#ifndef SQLITE_DISABLE_DIRSYNC
+ /* The directory sync is only attempted if full_fsync is
+ ** turned off or unavailable. If a full_fsync occurred above,
+ ** then the directory sync is superfluous.
*/
- pCtx->dbPath = sqlite3DbStrDup(0, dbPath);
- pCtx->oldLockingContext = pFile->lockingContext;
- pFile->lockingContext = pCtx;
- pCtx->pOldMethod = pFile->pMethod;
- pFile->pMethod = ioMethodForLockingStyle(LOCKING_STYLE_PROXY);
- }else{
- if( pCtx->conchFile ){
- rc = pCtx->conchFile->pMethod->xClose((sqlite3_file *)pCtx->conchFile);
- if( rc ) return rc;
- sqlite3_free(pCtx->conchFile);
+ if( (!HAVE_FULLFSYNC || !isFullsync) && full_fsync(pFile->dirfd,0,0) ){
+ /*
+ ** We have received multiple reports of fsync() returning
+ ** errors when applied to directories on certain file systems.
+ ** A failed directory sync is not a big deal. So it seems
+ ** better to ignore the error. Ticket #1657
+ */
+ /* pFile->lastErrno = errno; */
+ /* return SQLITE_IOERR; */
+ }
+#endif
+ err = close(pFile->dirfd); /* Only need to sync once, so close the */
+ if( err==0 ){ /* directory when we are done */
+ pFile->dirfd = -1;
+ }else{
+ pFile->lastErrno = errno;
+ rc = SQLITE_IOERR_DIR_CLOSE;
}
- sqlite3_free(pCtx->conchFilePath);
- sqlite3_free(pCtx);
}
- OSTRACE3("TRANSPROXY %d %s\n", pFile->h,
- (rc==SQLITE_OK ? "ok" : "failed"));
return rc;
-}
-
-static int createProxyUnixFile(const char *path, unixFile **ppFile) {
- int fd;
- int dirfd = -1;
- unixFile *pNew;
- int rc = SQLITE_OK;
+}
- fd = open(path, O_RDWR | O_CREAT, SQLITE_DEFAULT_FILE_PERMISSIONS);
- if( fd<0 ){
- return SQLITE_CANTOPEN;
- }
-
- pNew = (unixFile *)sqlite3_malloc(sizeof(unixFile));
- if( pNew==NULL ){
- rc = SQLITE_NOMEM;
- goto end_create_proxy;
- }
- memset(pNew, 0, sizeof(unixFile));
-
- rc = fillInUnixFile(NULL, fd, dirfd, (sqlite3_file*)pNew, path, 0, 0);
- if( rc==SQLITE_OK ){
- *ppFile = pNew;
+/*
+** Truncate an open file to a specified size
+*/
+static int unixTruncate(sqlite3_file *id, i64 nByte){
+ int rc;
+ assert( id );
+ SimulateIOError( return SQLITE_IOERR_TRUNCATE );
+ rc = ftruncate(((unixFile*)id)->h, (off_t)nByte);
+ if( rc ){
+ ((unixFile*)id)->lastErrno = errno;
+ return SQLITE_IOERR_TRUNCATE;
+ }else{
return SQLITE_OK;
}
-end_create_proxy:
- close(fd); /* silently leak fd if error, we're already in error */
- sqlite3_free(pNew);
- return rc;
}
-
-#endif /* SQLITE_ENABLE_LOCKING_STYLE */
-
/*
-** The nolockLockingContext is void
+** Determine the current size of a file in bytes
*/
-typedef void nolockLockingContext;
+static int unixFileSize(sqlite3_file *id, i64 *pSize){
+ int rc;
+ struct stat buf;
+ assert( id );
+ rc = fstat(((unixFile*)id)->h, &buf);
+ SimulateIOError( rc=1 );
+ if( rc!=0 ){
+ ((unixFile*)id)->lastErrno = errno;
+ return SQLITE_IOERR_FSTAT;
+ }
+ *pSize = buf.st_size;
-static int nolockCheckReservedLock(sqlite3_file *NotUsed, int *pResOut){
- UNUSED_PARAMETER(NotUsed);
- *pResOut = 0;
- return SQLITE_OK;
-}
+ /* When opening a zero-size database, the findLockInfo() procedure
+ ** writes a single byte into that file in order to work around a bug
+ ** in the OS-X msdos filesystem. In order to avoid problems with upper
+ ** layers, we need to report this file size as zero even though it is
+ ** really 1. Ticket #3260.
+ */
+ if( *pSize==1 ) *pSize = 0;
-static int nolockLock(sqlite3_file *NotUsed, int NotUsed2){
- UNUSED_PARAMETER2(NotUsed, NotUsed2);
- return SQLITE_OK;
-}
-static int nolockUnlock(sqlite3_file *NotUsed, int NotUsed2){
- UNUSED_PARAMETER2(NotUsed, NotUsed2);
return SQLITE_OK;
}
-/*
-** Close a file.
-*/
-static int nolockClose(sqlite3_file *id) {
- int rc;
- if( OS_VXWORKS ) unixEnterMutex();
- rc = closeUnixFile(id);
- if( OS_VXWORKS ) unixLeaveMutex();
- return rc;
-}
-
/*
** Information and control of an open file handle.
return 0;
}
-#define IOMETHODS(xClose, xLock, xUnlock, xCheckReservedLock) { \
-1, /* iVersion */ \
-xClose, /* xClose */ \
-unixRead, /* xRead */ \
-unixWrite, /* xWrite */ \
-unixTruncate, /* xTruncate */ \
-unixSync, /* xSync */ \
-unixFileSize, /* xFileSize */ \
-xLock, /* xLock */ \
-xUnlock, /* xUnlock */ \
-xCheckReservedLock, /* xCheckReservedLock */ \
-unixFileControl, /* xFileControl */ \
-unixSectorSize, /* xSectorSize */ \
-unixDeviceCharacteristics /* xDeviceCapabilities */ \
+/*
+** Here ends the implementation of all sqlite3_file methods.
+**
+********************** End sqlite3_file Methods *******************************
+******************************************************************************/
+
+/*
+** The following constant array describes all of the methods for the
+** sqlite3_file object for each of the various locking modes.
+**
+** The order in which the methods are defined is important and must
+** agree with the numeric values of the method identifier constants.
+** For example, LOCKING_STYLE_UNIX has a numeric value of zero, so
+** it must be the 0-th entry in the array.
+*/
+#define IOMETHODS(xClose, xLock, xUnlock, xCheckReservedLock) { \
+ 1, /* iVersion */ \
+ xClose, /* xClose */ \
+ unixRead, /* xRead */ \
+ unixWrite, /* xWrite */ \
+ unixTruncate, /* xTruncate */ \
+ unixSync, /* xSync */ \
+ unixFileSize, /* xFileSize */ \
+ xLock, /* xLock */ \
+ xUnlock, /* xUnlock */ \
+ xCheckReservedLock, /* xCheckReservedLock */ \
+ unixFileControl, /* xFileControl */ \
+ unixSectorSize, /* xSectorSize */ \
+ unixDeviceCharacteristics /* xDeviceCapabilities */ \
}
static sqlite3_io_methods aIoMethod[] = {
-IOMETHODS(unixClose, unixLock, unixUnlock, unixCheckReservedLock)
-,IOMETHODS(nolockClose, nolockLock, nolockUnlock, nolockCheckReservedLock)
-#if SQLITE_ENABLE_LOCKING_STYLE
-,IOMETHODS(dotlockClose, dotlockLock, dotlockUnlock,dotlockCheckReservedLock)
+ IOMETHODS(unixClose, unixLock, unixUnlock, unixCheckReservedLock),
+ IOMETHODS(nolockClose, nolockLock, nolockUnlock, nolockCheckReservedLock),
+ IOMETHODS(dotlockClose, dotlockLock, dotlockUnlock,dotlockCheckReservedLock),
#if OS_VXWORKS
- ,IOMETHODS(nolockClose, nolockLock, nolockUnlock, nolockCheckReservedLock)
- ,IOMETHODS(nolockClose, nolockLock, nolockUnlock, nolockCheckReservedLock)
- ,IOMETHODS(namedsemClose, namedsemLock, namedsemUnlock, namedsemCheckReservedLock)
- ,IOMETHODS(nolockClose, nolockLock, nolockUnlock, nolockCheckReservedLock)
-#else
- ,IOMETHODS(flockClose, flockLock, flockUnlock, flockCheckReservedLock)
- ,IOMETHODS(afpClose, afpLock, afpUnlock, afpCheckReservedLock)
- ,IOMETHODS(nolockClose, nolockLock, nolockUnlock, nolockCheckReservedLock)
- ,IOMETHODS(proxyClose, proxyLock, proxyUnlock, proxyCheckReservedLock)
+ IOMETHODS(semClose, semLock, semUnlock, semCheckReservedLock),
+#elif SQLITE_ENABLE_LOCKING_STYLE
+ IOMETHODS(flockClose, flockLock, flockUnlock, flockCheckReservedLock),
#endif
+#if defined(__DARWIN__) && SQLITE_ENABLE_LOCKING_STYLE
+ IOMETHODS(afpClose, afpLock, afpUnlock, afpCheckReservedLock),
+ IOMETHODS(proxyClose, proxyLock, proxyUnlock, proxyCheckReservedLock),
#endif
-/* The order of the IOMETHODS macros above is important. It must be the
- ** same order as the LOCKING_STYLE numbers
- */
+ /* The order of the IOMETHODS macros above is important. It must be the
+ ** same order as the LOCKING_STYLE numbers
+ */
};
+/****************************************************************************
+**************************** sqlite3_vfs methods ****************************
+**
+** This division contains the implementation of methods on the
+** sqlite3_vfs object.
+*/
+
+/*
+** If SQLITE_ENABLE_LOCKING_STYLE is defined, this function Examines the
+** f_fstypename entry in the statfs structure as returned by stat() for
+** the file system hosting the database file and selects the appropriate
+** locking style based on its value. These values and assignments are
+** based on Darwin/OSX behavior and have not been thoroughly tested on
+** other systems.
+**
+** If SQLITE_ENABLE_LOCKING_STYLE is not defined, this function always
+** returns LOCKING_STYLE_POSIX.
+*/
+#if SQLITE_ENABLE_LOCKING_STYLE
+static int detectLockingStyle(
+ sqlite3_vfs *pVfs,
+ const char *filePath,
+ int fd
+){
+#if OS_VXWORKS
+ if( !filePath ){
+ return LOCKING_STYLE_NONE;
+ }
+ if( pVfs->pAppData ){
+ return SQLITE_PTR_TO_INT(pVfs->pAppData);
+ }
+ if (access(filePath, 0) != -1){
+ return testLockingStyle(fd);
+ }
+#else
+ struct Mapping {
+ const char *zFilesystem;
+ int eLockingStyle;
+ } aMap[] = {
+ { "hfs", LOCKING_STYLE_POSIX },
+ { "ufs", LOCKING_STYLE_POSIX },
+ { "afpfs", LOCKING_STYLE_AFP },
+#ifdef SQLITE_ENABLE_AFP_LOCKING_SMB
+ { "smbfs", LOCKING_STYLE_AFP },
+#else
+ { "smbfs", LOCKING_STYLE_FLOCK },
+#endif
+ { "webdav", LOCKING_STYLE_NONE },
+ { 0, 0 }
+ };
+ int i;
+ struct statfs fsInfo;
+
+ if( !filePath ){
+ return LOCKING_STYLE_NONE;
+ }
+ if( pVfs && pVfs->pAppData ){
+ return SQLITE_PTR_TO_INT(pVfs->pAppData);
+ }
+
+ if( statfs(filePath, &fsInfo) != -1 ){
+ if( fsInfo.f_flags & MNT_RDONLY ){
+ return LOCKING_STYLE_NONE;
+ }
+ for(i=0; aMap[i].zFilesystem; i++){
+ if( strcmp(fsInfo.f_fstypename, aMap[i].zFilesystem)==0 ){
+ return aMap[i].eLockingStyle;
+ }
+ }
+ }
+
+ /* Default case. Handles, amongst others, "nfs". */
+ return testLockingStyle(fd);
+#endif /* if OS_VXWORKS */
+ return LOCKING_STYLE_POSIX;
+}
+#else
+ #define detectLockingStyle(x,y,z) LOCKING_STYLE_POSIX
+#endif /* if SQLITE_ENABLE_LOCKING_STYLE */
+
+
/*
** Initialize the contents of the unixFile structure pointed to by pId.
**
#endif
}
- /* Macro to define the static contents of an sqlite3_io_methods
- ** structure for a unix backend file. Different locking methods
- ** require different functions for the xClose, xLock, xUnlock and
- ** xCheckReservedLock methods.
- */
- assert(LOCKING_STYLE_POSIX==1);
- assert(LOCKING_STYLE_NONE==2);
- assert(LOCKING_STYLE_DOTFILE==3);
- assert(LOCKING_STYLE_FLOCK==4);
- assert(LOCKING_STYLE_AFP==5);
- assert(LOCKING_STYLE_NAMEDSEM==6);
- assert(LOCKING_STYLE_PROXY==7);
-
+
switch( eLockingStyle ){
case LOCKING_STYLE_POSIX: {
#if OS_VXWORKS
case LOCKING_STYLE_NAMEDSEM: {
/* Named semaphore locking uses the file path so it needs to be
- ** included in the namedsemLockingContext
- */
+ ** included in the semLockingContext
+ */
unixEnterMutex();
rc = findLockInfo(pNew, &pNew->pLock, &pNew->pOpen);
if( (rc==SQLITE_OK) && (pNew->pOpen->pSem==NULL) ){
#if SQLITE_ENABLE_LOCKING_STYLE
static sqlite3_io_methods *ioMethodForLockingStyle(int style){
- return &aIoMethod[style-1];
+ return &aIoMethod[style];
}
static int getDbPathForUnixFile(unixFile *pFile, char *dbPath){
if( pFile->pMethod==ioMethodForLockingStyle(LOCKING_STYLE_AFP) ){
- /* afp style keeps a reference to the db path in the filePath field of the struct */
- strlcpy(dbPath, ((afpLockingContext *)pFile->lockingContext)->dbPath, MAXPATHLEN);
+ /* afp style keeps a reference to the db path in the filePath field
+ ** of the struct */
+ strlcpy(dbPath, ((afpLockingContext *)pFile->lockingContext)->dbPath,
+ MAXPATHLEN);
return SQLITE_OK;
}
if( pFile->pMethod==ioMethodForLockingStyle(LOCKING_STYLE_DOTFILE) ){
- /* dot lock style uses the locking context to store the dot lock file path */
+ /* dot lock style uses the locking context to store the dot lock
+ ** file path */
int len = strlen((char *)pFile->lockingContext) - strlen(DOTLOCK_SUFFIX);
strlcpy(dbPath, (char *)pFile->lockingContext, len + 1);
return SQLITE_OK;
){
int fd = 0; /* File descriptor returned by open() */
int dirfd = -1; /* Directory file descriptor */
- int oflags = 0; /* Flags to pass to open() */
+ int openFlags = 0; /* Flags to pass to open() */
int eType = flags&0xFFFFFF00; /* Type of file to open */
int noLock; /* True to omit locking primitives */
int rc = SQLITE_OK;
zName = zTmpname;
}
- if( isReadonly ) oflags |= O_RDONLY;
- if( isReadWrite ) oflags |= O_RDWR;
- if( isCreate ) oflags |= O_CREAT;
- if( isExclusive ) oflags |= (O_EXCL|O_NOFOLLOW);
- oflags |= (O_LARGEFILE|O_BINARY);
+ if( isReadonly ) openFlags |= O_RDONLY;
+ if( isReadWrite ) openFlags |= O_RDWR;
+ if( isCreate ) openFlags |= O_CREAT;
+ if( isExclusive ) openFlags |= (O_EXCL|O_NOFOLLOW);
+ openFlags |= (O_LARGEFILE|O_BINARY);
- fd = open(zName, oflags, isDelete?0600:SQLITE_DEFAULT_FILE_PERMISSIONS);
- OSTRACE4("OPENX %-3d %s 0%o\n", fd, zName, oflags);
+ fd = open(zName, openFlags, isDelete?0600:SQLITE_DEFAULT_FILE_PERMISSIONS);
+ OSTRACE4("OPENX %-3d %s 0%o\n", fd, zName, openFlags);
if( fd<0 && errno!=EISDIR && isReadWrite && !isExclusive ){
/* Failed to open the file for read/write access. Try read-only. */
flags &= ~(SQLITE_OPEN_READWRITE|SQLITE_OPEN_CREATE);
}
#if SQLITE_ENABLE_LOCKING_STYLE
else{
- ((unixFile*)pFile)->oflags = oflags;
+ ((unixFile*)pFile)->openFlags = openFlags;
}
#endif
if( pOutFlags ){
return 0;
}
+/*
+************************ End of sqlite3_vfs methods ***************************
+******************************************************************************/
+
/*
** Initialize the operating system interface.
+**
+** This routine registers all VFS implementations for unix-like operating
+** systems. This routine, and the sqlite3_os_end() routine that follows,
+** should be the only routines in this file that are visible from other
+** files.
*/
int sqlite3_os_init(void){
/* Macro to define the static contents of an sqlite3_vfs structure for
unixGetLastError /* xGetLastError */ \
}
- static sqlite3_vfs unixVfs = UNIXVFS("unix", 0);
-#if SQLITE_ENABLE_LOCKING_STYLE
int i;
static sqlite3_vfs aVfs[] = {
+ UNIXVFS("unix", LOCKING_STYLE_AUTOMATIC),
UNIXVFS("unix-posix", LOCKING_STYLE_POSIX),
- UNIXVFS("unix-afp", LOCKING_STYLE_AFP),
- UNIXVFS("unix-flock", LOCKING_STYLE_FLOCK),
- UNIXVFS("unix-dotfile", LOCKING_STYLE_DOTFILE),
UNIXVFS("unix-none", LOCKING_STYLE_NONE),
+ UNIXVFS("unix-dotfile", LOCKING_STYLE_DOTFILE),
+#if OS_VXWORKS
UNIXVFS("unix-namedsem",LOCKING_STYLE_NAMEDSEM),
+#endif
+#if SQLITE_ENABLE_LOCKING_STYLE
+ UNIXVFS("unix-flock", LOCKING_STYLE_FLOCK),
+#endif
+#if SQLITE_ENABLE_LOCKING_STYLE && defined(__DARWIN__)
+ UNIXVFS("unix-afp", LOCKING_STYLE_AFP),
UNIXVFS("unix-proxy", LOCKING_STYLE_PROXY)
+#endif
};
for(i=0; i<(sizeof(aVfs)/sizeof(sqlite3_vfs)); i++){
- sqlite3_vfs_register(&aVfs[i], 0);
+ sqlite3_vfs_register(&aVfs[i], i==0);
}
-#endif
- sqlite3_vfs_register(&unixVfs, 1);
return SQLITE_OK;
}