From 734c9864cbba23d984b7c8c2e5ce4e6877de23a4 Mon Sep 17 00:00:00 2001
From: drh <drh@noemail.net>
Date: Fri, 28 Nov 2008 15:37:20 +0000
Subject: [PATCH] First step in refactoring os_unix.c.  This is work in
 progress.  The code compiles and runs on Linux and MacOSX (as long as
 SQLITE_ENABLE_LOCKING_STYLE is turned off), but there are a few test
 failures. (CVS 5965)

FossilOrigin-Name: 7825cd63e5cb390a9c2c05957ebc9b189612f04a
---
 manifest      |   14 +-
 manifest.uuid |    2 +-
 src/os_unix.c | 4940 ++++++++++++++++++++++++++-----------------------
 src/test1.c   |    6 +-
 4 files changed, 2634 insertions(+), 2328 deletions(-)

diff --git a/manifest b/manifest
index d1d00613a8..69345c57bc 100644
--- a/manifest
+++ b/manifest
@@ -1,5 +1,5 @@
-C Add\s19\snew\sassert()\sstatements\sin\sbtree.c\sthat\sattempt\sto\sdetect\swriting\sto\na\scache\spage\swhich\sis\snot\swriteable.\s(CVS\s5964)
-D 2008-11-27T02:22:11
+C First\sstep\sin\srefactoring\sos_unix.c.\s\sThis\sis\swork\sin\sprogress.\s\sThe\scode\ncompiles\sand\sruns\son\sLinux\sand\sMacOSX\s(as\slong\sas\sSQLITE_ENABLE_LOCKING_STYLE\nis\sturned\soff),\sbut\sthere\sare\sa\sfew\stest\sfailures.\s(CVS\s5965)
+D 2008-11-28T15:37:20
 F Makefile.arm-wince-mingw32ce-gcc fcd5e9cd67fe88836360bb4f9ef4cb7f8e2fb5a0
 F Makefile.in 0aa7bbe3be6acc4045706e3bb3fd0b8f38f4a3b5
 F Makefile.linux-gcc d53183f4aa6a9192d249731c90dbdffbd2c68654
@@ -137,7 +137,7 @@ F src/os.c 0b411644b87ad689d7250bbfd1834d99b81a3df4
 F src/os.h ef8abeb9afc694b82dbd169a91c9b7e26db3c892
 F src/os_common.h 24525d8b7bce66c374dfc1810a6c9043f3359b60
 F src/os_os2.c 36196e71292a44bf2d393413cd8c86199694b8b4
-F src/os_unix.c add9937ac646b0f5f8dd603053ceb9264d675a60
+F src/os_unix.c 26f4b3dde5bac6ba8c8dc86a547a4053d5f9cfc3
 F src/os_win.c 3dff41670fb9798a869c636626bb7d6d8b6a45bb
 F src/pager.c 2e9182e181bbd3d758436d9ccce2a3910400dd30
 F src/pager.h a02ef8e6cc7e78b54874166e5ce786c9d4c489bf
@@ -159,7 +159,7 @@ F src/sqliteLimit.h f435e728c6b620ef7312814d660a81f9356eb5c8
 F src/status.c 237b193efae0cf6ac3f0817a208de6c6c6ef6d76
 F src/table.c 22744786199c9195720c15a7a42cb97b2e2728d8
 F src/tclsqlite.c 96049bd454f1547abff0a57c45f0dfa57701e076
-F src/test1.c e604b8647aaf1b2cb16ff927e1f4d825f8589e8e
+F src/test1.c f407faafb17ad328057fe86800518a64046e2766
 F src/test2.c 897528183edf2839c2a3c991d415905db56f1240
 F src/test3.c 88a246b56b824275300e6c899634fbac1dc94b14
 F src/test4.c f79ab52d27ff49b784b631a42e2ccd52cfd5c84c
@@ -662,7 +662,7 @@ F tool/speedtest16.c c8a9c793df96db7e4933f0852abb7a03d48f2e81
 F tool/speedtest2.tcl ee2149167303ba8e95af97873c575c3e0fab58ff
 F tool/speedtest8.c 2902c46588c40b55661e471d7a86e4dd71a18224
 F tool/speedtest8inst1.c 293327bc76823f473684d589a8160bde1f52c14e
-P 165bc2d7658e14eda8b375329e4286dc4d048f3a
-R 18dffe90de3bb54b15f0c46cfd3b4237
+P f9c7359065829b016d8cd04304c02509c254fe05
+R f18bbe219fdf4c9d3849fc41eab92317
 U drh
-Z 80fd0c6dbff82ddebdc7b82929bca8e0
+Z 31ecaef0fd8fa5ecc70a0ec198476cc9
diff --git a/manifest.uuid b/manifest.uuid
index 2ea8b9c7cd..e17e65748d 100644
--- a/manifest.uuid
+++ b/manifest.uuid
@@ -1 +1 @@
-f9c7359065829b016d8cd04304c02509c254fe05
\ No newline at end of file
+7825cd63e5cb390a9c2c05957ebc9b189612f04a
\ No newline at end of file
diff --git a/src/os_unix.c b/src/os_unix.c
index a22402bad6..22cadf0bf5 100644
--- a/src/os_unix.c
+++ b/src/os_unix.c
@@ -10,24 +10,56 @@
 **
 ******************************************************************************
 **
-** This file contains code that is specific to Unix systems.
+** This file contains the VFS implementation for unix-like operating systems
+** include Linux, MacOSX, *BSD, QNX, VxWorks, AIX, HPUX, and others.
 **
-** $Id: os_unix.c,v 1.221 2008/11/25 12:07:41 danielk1977 Exp $
+** There are actually several different VFS implementations in this file.
+** The differences are in the way that file locking is done.  The default
+** implementation uses Posix Advisory Locks.  Alternative implementations
+** use flock(), dot-files, various proprietary locking schemas, or simply
+** skip locking all together.
+**
+** This source file is group into divisions where the logic for various
+** subfunctions is contained within the appropriate division.  PLEASE
+** KEEP THE STRUCTURE OF THIS FILE INTACT.  New code should be placed
+** in the correct division and should be clearly labeled.
+**
+** The current set of divisions is as follows:
+**
+**   *  General-purpose declarations and utility functions.
+**   *  Unique file ID logic used by VxWorks.
+**   *  Various locking primitive implementations:
+**      + for Posix Advisory Locks
+**      + for no-op locks
+**      + for dot-file locks
+**      + for flock() locking
+**      + for named semaphore locks (VxWorks only)
+**      + for AFP filesystem locks (MacOSX only)
+**      + for proxy locks (MacOSX only)
+**   *  The routine used to detect an appropriate locking style
+**   *  sqlite3_file methods not associated with locking
+**   *  Implementations of sqlite3_os_init() and sqlite3_os_end()
+**
+** $Id: os_unix.c,v 1.222 2008/11/28 15:37:20 drh Exp $
 */
 #include "sqliteInt.h"
 #if SQLITE_OS_UNIX              /* This file is used on unix only */
 
 /*
-** If SQLITE_ENABLE_LOCKING_STYLE is defined and is non-zero, then several
-** alternative locking implementations are provided:
+** This module implements the following locking styles:
 **
-**   * POSIX locking (the default),
-**   * No locking,
-**   * Dot-file locking,
-**   * flock() locking,
-**   * AFP locking (OSX only),
-**   * Named POSIX semaphores (VXWorks only),
-**   * proxy locking.
+**   1. POSIX locking (the default),
+**   2. No locking,
+**   3. Dot-file locking,
+**   4. flock() locking,
+**   5. AFP locking (OSX only),
+**   6. Named POSIX semaphores (VXWorks only),
+**   7. proxy locking. (OSX only)
+**
+** Styles 4, 5, and 7 are only available of SQLITE_ENABLE_LOCKING_STYLE
+** is defined to 1.  The SQLITE_ENABLE_LOCKING_STYLE also enables automatic
+** selection of the appropriate locking style based on the filesystem
+** where the database is located.  
 **
 ** SQLITE_ENABLE_LOCKING_STYLE only works on a Mac. It is turned on by
 ** default on a Mac and disabled on all other posix platforms.
@@ -124,6 +156,55 @@
 */
 #define MAX_PATHNAME 512
 
+/*
+** The locking styles are associated with the different file locking
+** capabilities supported by different file systems.  
+**
+** POSIX     support for shared and exclusive byte-range locks 
+**
+** NONE      no locking will be attempted, this is only used for
+**           read-only file systems currently
+**
+** DOTLOCK   isn't a true locking style, it refers to the use of a special
+**           file named the same as the database file with a '.lock'
+**           extension, this can be used on file systems that do not
+**           offer any reliable file locking
+**
+** FLOCK     only a single file-global exclusive lock  (Not on VxWorks)
+**
+** NAMEDSEM  similar to DOTLOCK but uses a named semaphore instead of an
+**           indicator file.  (VxWorks only)
+**
+** AFP       support exclusive byte-range locks  (MacOSX only)
+**
+** PROXY     uses a second file to represent the lock state of the database
+**           file which is never actually locked, a third file controls
+**           access to the proxy  (MacOSX only)
+**
+** Note that because FLOCK and NAMEDSEM are never used together, they
+** share the same code number (3).  The locking mode numbering is 
+** chosen so that a set of locking modes that are contiguous integers
+** from 1 to N.  On generic unix systems without flock() support,
+** the modes are 1..3.  On generic unix with flock() support, the modes
+** are 1..4.  On VxWorks, the modes are 1..4.  On MacOSX the modes
+** are 1..6.
+*/
+#define LOCKING_STYLE_POSIX        1
+#define LOCKING_STYLE_NONE         2
+#define LOCKING_STYLE_DOTFILE      3
+#define LOCKING_STYLE_FLOCK        4
+#define LOCKING_STYLE_NAMEDSEM     4
+#define LOCKING_STYLE_AFP          5
+#define LOCKING_STYLE_PROXY        6
+
+#define LOCKING_STYLE_AUTOMATIC    0  /* Choose lock style automatically */
+
+/*
+** Only set the lastErrno if the error code is a real error and not 
+** a normal expected return code of SQLITE_BUSY or SQLITE_OK
+*/
+#define IS_LOCK_ERROR(x)  ((x != SQLITE_OK) && (x != SQLITE_BUSY))
+
 
 /*
 ** The unixFile structure is subclass of sqlite3_file specific for the unix
@@ -138,11 +219,9 @@ struct unixFile {
   int dirfd;                       /* File descriptor for the directory */
   unsigned char locktype;          /* The type of lock held on this fd */
   int lastErrno;                   /* The unix errno from the last I/O error */
-#if SQLITE_ENABLE_LOCKING_STYLE
   void *lockingContext;            /* Locking style specific state */
-  int oflags;                      /* The flags specified at open */
-#endif
-#if SQLITE_THREADSAFE
+  int openFlags;                   /* The flags specified at open */
+#if SQLITE_THREADSAFE && defined(__linux__)
   pthread_t tid;                   /* The thread that "owns" this unixFile */
 #endif
 #if OS_VXWORKS
@@ -210,882 +289,584 @@ static void unixLeaveMutex(void){
   sqlite3_mutex_leave(sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MASTER));
 }
 
-/************************************************************************
-*********** Posix Advisory Locking And Thread Interaction ***************
-*************************************************************************
-**
-** Here is the dirt on POSIX advisory locks:  ANSI STD 1003.1 (1996)
-** section 6.5.2.2 lines 483 through 490 specify that when a process
-** sets or clears a lock, that operation overrides any prior locks set
-** by the same process.  It does not explicitly say so, but this implies
-** that it overrides locks set by the same process using a different
-** file descriptor.  Consider this test case:
-**
-**       int fd1 = open("./file1", O_RDWR|O_CREAT, 0644);
-**       int fd2 = open("./file2", O_RDWR|O_CREAT, 0644);
-**
-** Suppose ./file1 and ./file2 are really the same file (because
-** one is a hard or symbolic link to the other) then if you set
-** an exclusive lock on fd1, then try to get an exclusive lock
-** on fd2, it works.  I would have expected the second lock to
-** fail since there was already a lock on the file due to fd1.
-** But not so.  Since both locks came from the same process, the
-** second overrides the first, even though they were on different
-** file descriptors opened on different file names.
-**
-** Bummer.  If you ask me, this is broken.  Badly broken.  It means
-** that we cannot use POSIX locks to synchronize file access among
-** competing threads of the same process.  POSIX locks will work fine
-** to synchronize access for threads in separate processes, but not
-** threads within the same process.
-**
-** To work around the problem, SQLite has to manage file locks internally
-** on its own.  Whenever a new database is opened, we have to find the
-** specific inode of the database file (the inode is determined by the
-** st_dev and st_ino fields of the stat structure that fstat() fills in)
-** and check for locks already existing on that inode.  When locks are
-** created or removed, we have to look at our own internal record of the
-** locks to see if another thread has previously set a lock on that same
-** inode.
-**
-** The sqlite3_file structure for POSIX is no longer just an integer file
-** descriptor.  It is now a structure that holds the integer file
-** descriptor and a pointer to a structure that describes the internal
-** locks on the corresponding inode.  There is one locking structure
-** per inode, so if the same inode is opened twice, both unixFile structures
-** point to the same locking structure.  The locking structure keeps
-** a reference count (so we will know when to delete it) and a "cnt"
-** field that tells us its internal lock status.  cnt==0 means the
-** file is unlocked.  cnt==-1 means the file has an exclusive lock.
-** cnt>0 means there are cnt shared locks on the file.
-**
-** Any attempt to lock or unlock a file first checks the locking
-** structure.  The fcntl() system call is only invoked to set a 
-** POSIX lock if the internal lock structure transitions between
-** a locked and an unlocked state.
-**
-** 2004-Jan-11:
-** More recent discoveries about POSIX advisory locks.  (The more
-** I discover, the more I realize the a POSIX advisory locks are
-** an abomination.)
-**
-** If you close a file descriptor that points to a file that has locks,
-** all locks on that file that are owned by the current process are
-** released.  To work around this problem, each unixFile structure contains
-** a pointer to an unixOpenCnt structure.  There is one unixOpenCnt structure
-** per open inode, which means that multiple unixFile can point to a single
-** unixOpenCnt.  When an attempt is made to close an unixFile, if there are
-** other unixFile open on the same inode that are holding locks, the call
-** to close() the file descriptor is deferred until all of the locks clear.
-** The unixOpenCnt structure keeps a list of file descriptors that need to
-** be closed and that list is walked (and cleared) when the last lock
-** clears.
-**
-** First, under Linux threads, because each thread has a separate
-** process ID, lock operations in one thread do not override locks
-** to the same file in other threads.  Linux threads behave like
-** separate processes in this respect.  But, if you close a file
-** descriptor in linux threads, all locks are cleared, even locks
-** on other threads and even though the other threads have different
-** process IDs.  Linux threads is inconsistent in this respect.
-** (I'm beginning to think that linux threads is an abomination too.)
-** The consequence of this all is that the hash table for the unixLockInfo
-** structure has to include the process id as part of its key because
-** locks in different threads are treated as distinct.  But the 
-** unixOpenCnt structure should not include the process id in its
-** key because close() clears lock on all threads, not just the current
-** thread.  Were it not for this goofiness in linux threads, we could
-** combine the unixLockInfo and unixOpenCnt structures into a single structure.
-**
-** 2004-Jun-28:
-** On some versions of linux, threads can override each others locks.
-** On others not.  Sometimes you can change the behavior on the same
-** system by setting the LD_ASSUME_KERNEL environment variable.  The
-** POSIX standard is silent as to which behavior is correct, as far
-** as I can tell, so other versions of unix might show the same
-** inconsistency.  There is no little doubt in my mind that posix
-** advisory locks and linux threads are profoundly broken.
-**
-** To work around the inconsistencies, we have to test at runtime 
-** whether or not threads can override each others locks.  This test
-** is run once, the first time any lock is attempted.  A static 
-** variable is set to record the results of this test for future
-** use.
-*/
-
-/*
-** Set or check the unixFile.tid field.  This field is set when an unixFile
-** is first opened.  All subsequent uses of the unixFile verify that the
-** same thread is operating on the unixFile.  Some operating systems do
-** not allow locks to be overridden by other threads and that restriction
-** means that sqlite3* database handles cannot be moved from one thread
-** to another.  This logic makes sure a user does not try to do that
-** by mistake.
-**
-** Version 3.3.1 (2006-01-15):  unixFile can be moved from one thread to
-** another as long as we are running on a system that supports threads
-** overriding each others locks (which now the most common behavior)
-** or if no locks are held.  But the unixFile.pLock field needs to be
-** recomputed because its key includes the thread-id.  See the 
-** transferOwnership() function below for additional information
-*/
-#if SQLITE_THREADSAFE
-# define SET_THREADID(X)   (X)->tid = pthread_self()
-# define CHECK_THREADID(X) (threadsOverrideEachOthersLocks==0 && \
-                            !pthread_equal((X)->tid, pthread_self()))
-#else
-# define SET_THREADID(X)
-# define CHECK_THREADID(X) 0
-#endif
 
+#ifdef SQLITE_DEBUG
 /*
-** An instance of the following structure serves as the key used
-** to locate a particular unixOpenCnt structure given its inode.  This
-** is the same as the unixLockKey except that the thread ID is omitted.
+** Helper function for printing out trace information from debugging
+** binaries. This returns the string represetation of the supplied
+** integer lock-type.
 */
-struct unixFileId {
-  dev_t dev;                  /* Device number */
-#if OS_VXWORKS
-  struct vxworksFileId *pId;  /* Unique file ID for vxworks. */
-#else
-  ino_t ino;                  /* Inode number */
+static const char *locktypeName(int locktype){
+  switch( locktype ){
+  case NO_LOCK: return "NONE";
+  case SHARED_LOCK: return "SHARED";
+  case RESERVED_LOCK: return "RESERVED";
+  case PENDING_LOCK: return "PENDING";
+  case EXCLUSIVE_LOCK: return "EXCLUSIVE";
+  }
+  return "ERROR";
+}
 #endif
-};
 
+#ifdef SQLITE_LOCK_TRACE
 /*
-** An instance of the following structure serves as the key used
-** to locate a particular unixLockInfo structure given its inode.
+** Print out information about all locking operations.
 **
-** If threads cannot override each others locks, then we set the
-** unixLockKey.tid field to the thread ID.  If threads can override
-** each others locks then tid is always set to zero.  tid is omitted
-** if we compile without threading support.
+** This routine is used for troubleshooting locks on multithreaded
+** platforms.  Enable by compiling with the -DSQLITE_LOCK_TRACE
+** command-line option on the compiler.  This code is normally
+** turned off.
 */
-struct unixLockKey {
-  struct unixFileId fid;  /* Unique identifier for the file */
-#if SQLITE_THREADSAFE
-  pthread_t tid;      /* Thread ID or zero if threads can override each other */
-#endif
-};
+static int lockTrace(int fd, int op, struct flock *p){
+  char *zOpName, *zType;
+  int s;
+  int savedErrno;
+  if( op==F_GETLK ){
+    zOpName = "GETLK";
+  }else if( op==F_SETLK ){
+    zOpName = "SETLK";
+  }else{
+    s = fcntl(fd, op, p);
+    sqlite3DebugPrintf("fcntl unknown %d %d %d\n", fd, op, s);
+    return s;
+  }
+  if( p->l_type==F_RDLCK ){
+    zType = "RDLCK";
+  }else if( p->l_type==F_WRLCK ){
+    zType = "WRLCK";
+  }else if( p->l_type==F_UNLCK ){
+    zType = "UNLCK";
+  }else{
+    assert( 0 );
+  }
+  assert( p->l_whence==SEEK_SET );
+  s = fcntl(fd, op, p);
+  savedErrno = errno;
+  sqlite3DebugPrintf("fcntl %d %d %s %s %d %d %d %d\n",
+     threadid, fd, zOpName, zType, (int)p->l_start, (int)p->l_len,
+     (int)p->l_pid, s);
+  if( s==(-1) && op==F_SETLK && (p->l_type==F_RDLCK || p->l_type==F_WRLCK) ){
+    struct flock l2;
+    l2 = *p;
+    fcntl(fd, F_GETLK, &l2);
+    if( l2.l_type==F_RDLCK ){
+      zType = "RDLCK";
+    }else if( l2.l_type==F_WRLCK ){
+      zType = "WRLCK";
+    }else if( l2.l_type==F_UNLCK ){
+      zType = "UNLCK";
+    }else{
+      assert( 0 );
+    }
+    sqlite3DebugPrintf("fcntl-failure-reason: %s %d %d %d\n",
+       zType, (int)l2.l_start, (int)l2.l_len, (int)l2.l_pid);
+  }
+  errno = savedErrno;
+  return s;
+}
+#define fcntl lockTrace
+#endif /* SQLITE_LOCK_TRACE */
 
-/*
-** An instance of the following structure is allocated for each open
-** inode on each thread with a different process ID.  (Threads have
-** different process IDs on linux, but not on most other unixes.)
-**
-** A single inode can have multiple file descriptors, so each unixFile
-** structure contains a pointer to an instance of this object and this
-** object keeps a count of the number of unixFile pointing to it.
-*/
-struct unixLockInfo {
-  struct unixLockKey lockKey;       /* The lookup key */
-  int cnt;                          /* Number of SHARED locks held */
-  int locktype;                     /* One of SHARED_LOCK, RESERVED_LOCK etc. */
-  int nRef;                         /* Number of pointers to this structure */
-  struct unixLockInfo *pNext;       /* List of all unixLockInfo objects */
-  struct unixLockInfo *pPrev;       /*    .... doubly linked */
-};
 
-/*
-** An instance of the following structure is allocated for each open
-** inode.  This structure keeps track of the number of locks on that
-** inode.  If a close is attempted against an inode that is holding
-** locks, the close is deferred until all locks clear by adding the
-** file descriptor to be closed to the pending list.
-*/
-struct unixOpenCnt {
-  struct unixFileId fileId;   /* The lookup key */
-  int nRef;                   /* Number of pointers to this structure */
-  int nLock;                  /* Number of outstanding locks */
-  int nPending;               /* Number of pending close() operations */
-  int *aPending;            /* Malloced space holding fd's awaiting a close() */
-#if OS_VXWORKS
-  sem_t *pSem;                     /* Named POSIX semaphore */
-  char aSemName[MAX_PATHNAME+1];   /* Name of that semaphore */
-#endif
-  struct unixOpenCnt *pNext, *pPrev;   /* List of all unixOpenCnt objects */
-};
 
 /*
-** List of all unixLockInfo and unixOpenCnt objects.  This used to be a hash
-** table.  But the number of objects is rarely more than a dozen and
-** never exceeds a few thousand.  And lookup is not on a critical
-** path so a simple linked list will suffice.
+** This routine translates a standard POSIX errno code into something
+** useful to the clients of the sqlite3 functions.  Specifically, it is
+** intended to translate a variety of "try again" errors into SQLITE_BUSY
+** and a variety of "please close the file descriptor NOW" errors into 
+** SQLITE_IOERR
+** 
+** Errors during initialization of locks, or file system support for locks,
+** should handle ENOLCK, ENOTSUP, EOPNOTSUPP separately.
 */
-static struct unixLockInfo *lockList = 0;
-static struct unixOpenCnt *openList = 0;
+static int sqliteErrorFromPosixError(int posixError, int sqliteIOErr) {
+  switch (posixError) {
+  case 0: 
+    return SQLITE_OK;
+    
+  case EAGAIN:
+  case ETIMEDOUT:
+  case EBUSY:
+  case EINTR:
+  case ENOLCK:  
+    /* random NFS retry error, unless during file system support 
+     * introspection, in which it actually means what it says */
+    return SQLITE_BUSY;
+    
+  case EACCES: 
+    /* EACCES is like EAGAIN during locking operations, but not any other time*/
+    if( (sqliteIOErr == SQLITE_IOERR_LOCK) || 
+	(sqliteIOErr == SQLITE_IOERR_UNLOCK) || 
+	(sqliteIOErr == SQLITE_IOERR_RDLOCK) ||
+	(sqliteIOErr == SQLITE_IOERR_CHECKRESERVEDLOCK) ){
+      return SQLITE_BUSY;
+    }
+    /* else fall through */
+  case EPERM: 
+    return SQLITE_PERM;
+    
+  case EDEADLK:
+    return SQLITE_IOERR_BLOCKED;
+    
+#if EOPNOTSUPP!=ENOTSUP
+  case EOPNOTSUPP: 
+    /* something went terribly awry, unless during file system support 
+     * introspection, in which it actually means what it says */
+#endif
+#ifdef ENOTSUP
+  case ENOTSUP: 
+    /* invalid fd, unless during file system support introspection, in which 
+     * it actually means what it says */
+#endif
+  case EIO:
+  case EBADF:
+  case EINVAL:
+  case ENOTCONN:
+  case ENODEV:
+  case ENXIO:
+  case ENOENT:
+  case ESTALE:
+  case ENOSYS:
+    /* these should force the client to close the file and reconnect */
+    
+  default: 
+    return sqliteIOErr;
+  }
+}
 
-/*
-** This variable records whether or not threads can override each others
-** locks.
+
+
+/******************************************************************************
+****************** Begin Unique File ID Utility Used By VxWorks ***************
 **
-**    0:  No.  Threads cannot override each others locks.
-**    1:  Yes.  Threads can override each others locks.
-**   -1:  We don't know yet.
+** On most versions of unix, we can get a unique ID for a file by concatenating
+** the device number and the inode number.  But this does not work on VxWorks.
+** On VxWorks, a unique file id must be based on the canonical filename.
 **
-** On some systems, we know at compile-time if threads can override each
-** others locks.  On those systems, the SQLITE_THREAD_OVERRIDE_LOCK macro
-** will be set appropriately.  On other systems, we have to check at
-** runtime.  On these latter systems, SQLTIE_THREAD_OVERRIDE_LOCK is
-** undefined.
+** A pointer to an instance of the following structure can be used as a
+** unique file ID in VxWorks.  Each instance of this structure contains
+** a copy of the canonical filename.  There is also a reference count.  
+** The structure is reclaimed when the number of pointers to it drops to
+** zero.
 **
-** This variable normally has file scope only.  But during testing, we make
-** it a global so that the test code can change its value in order to verify
-** that the right stuff happens in either case.
+** There are never very many files open at one time and lookups are not
+** a performance-critical path, so it is sufficient to put these
+** structures on a linked list.
 */
-#ifndef SQLITE_THREAD_OVERRIDE_LOCK
-# define SQLITE_THREAD_OVERRIDE_LOCK -1
-#endif
-#ifdef SQLITE_TEST
-int threadsOverrideEachOthersLocks = SQLITE_THREAD_OVERRIDE_LOCK;
-#else
-static int threadsOverrideEachOthersLocks = SQLITE_THREAD_OVERRIDE_LOCK;
-#endif
+struct vxworksFileId {
+  struct vxworksFileId *pNext;  /* Next in a list of them all */
+  int nRef;                     /* Number of references to this one */
+  int nName;                    /* Length of the zCanonicalName[] string */
+  char *zCanonicalName;         /* Canonical filename */
+};
 
-/*
-** This structure holds information passed into individual test
-** threads by the testThreadLockingBehavior() routine.
+#if OS_VXWORKS
+/* 
+** All unique filesname are held on a linked list headed by this
+** variable:
 */
-struct threadTestData {
-  int fd;                /* File to be locked */
-  struct flock lock;     /* The locking operation */
-  int result;            /* Result of the locking operation */
-};
+static struct vxworksFileId *vxworksFileList = 0;
 
-#if SQLITE_THREADSAFE && defined(__linux__)
 /*
-** This function is used as the main routine for a thread launched by
-** testThreadLockingBehavior(). It tests whether the shared-lock obtained
-** by the main thread in testThreadLockingBehavior() conflicts with a
-** hypothetical write-lock obtained by this thread on the same file.
+** Simplify a filename into its canonical form
+** by making the following changes:
 **
-** The write-lock is not actually acquired, as this is not possible if 
-** the file is open in read-only mode (see ticket #3472).
-*/ 
-static void *threadLockingTest(void *pArg){
-  struct threadTestData *pData = (struct threadTestData*)pArg;
-  pData->result = fcntl(pData->fd, F_GETLK, &pData->lock);
-  return pArg;
+**  * removing any trailing and duplicate /
+**  * removing /./
+**  * removing /A/../
+**
+** Changes are made in-place.  Return the new name length.
+**
+** The original filename is in z[0..n-1].  Return the number of
+** characters in the simplified name.
+*/
+static int vxworksSimplifyName(char *z, int n){
+  int i, j;
+  while( n>1 && z[n-1]=='/' ){ n--; }
+  for(i=j=0; i<n; i++){
+    if( z[i]=='/' ){
+      if( z[i+1]=='/' ) continue;
+      if( z[i+1]=='.' && i+2<n && z[i+2]=='/' ){
+        i += 1;
+        continue;
+      }
+      if( z[i+1]=='.' && i+3<n && z[i+2]=='.' && z[i+3]=='/' ){
+        while( j>0 && z[j-1]!='/' ){ j--; }
+        if( j>0 ){ j--; }
+        i += 2;
+        continue;
+      }
+    }
+    z[j++] = z[i];
+  }
+  z[j] = 0;
+  return j;
 }
-#endif /* SQLITE_THREADSAFE && defined(__linux__) */
 
-
-#if SQLITE_THREADSAFE && defined(__linux__)
 /*
-** This procedure attempts to determine whether or not threads
-** can override each others locks then sets the 
-** threadsOverrideEachOthersLocks variable appropriately.
+** Find a unique file ID for the given absolute pathname.  Return
+** a pointer to the vxworksFileId object.  This pointer is the unique
+** file ID.
+**
+** The nRef field of the vxworksFileId object is incremented before
+** the object is returned.  A new vxworksFileId object is created
+** and added to the global list if necessary.
+**
+** If a memory allocation error occurs, return NULL.
 */
-static void testThreadLockingBehavior(int fd_orig){
-  int fd;
-  int rc;
-  struct threadTestData d;
-  struct flock l;
-  pthread_t t;
+static struct vxworksFileId *vxworksFindFileId(const char *zAbsoluteName){
+  struct vxworksFileId *pNew;         /* search key and new file ID */
+  struct vxworksFileId *pCandidate;   /* For looping over existing file IDs */
+  int n;                              /* Length of zAbsoluteName string */
 
-  fd = dup(fd_orig);
-  if( fd<0 ) return;
-  memset(&l, 0, sizeof(l));
-  l.l_type = F_RDLCK;
-  l.l_len = 1;
-  l.l_start = 0;
-  l.l_whence = SEEK_SET;
-  rc = fcntl(fd_orig, F_SETLK, &l);
-  if( rc!=0 ) return;
-  memset(&d, 0, sizeof(d));
-  d.fd = fd;
-  d.lock = l;
-  d.lock.l_type = F_WRLCK;
-  pthread_create(&t, 0, threadLockingTest, &d);
-  pthread_join(t, 0);
-  close(fd);
-  if( d.result!=0 ) return;
-  threadsOverrideEachOthersLocks = (d.lock.l_type==F_UNLCK);
-}
-#else  /* if !SQLITE_THREADSAFE || !defined(__linux__) */
-/*
-** On anything other than linux, assume threads override each others locks.
-*/
-static void testThreadLockingBehavior(int fd_orig){
-  UNUSED_PARAMETER(fd_orig);
-  threadsOverrideEachOthersLocks = 1;
-}
-#endif /* SQLITE_THERADSAFE && defined(__linux__) */
+  assert( zAbsoluteName[0]=='/' );
+  n = strlen(zAbsoluteName);
+  pNew = sqlite3_malloc( sizeof(*pNew) + (n+1) );
+  if( pNew==0 ) return 0;
+  pNew->zCanonicalName = (char*)&pNew[1];
+  memcpy(pNew->zCanonicalName, zAbsoluteName, n+1);
+  n = vxworksSimplifyName(pNew->zCanonicalName, n);
 
-/*
-** Release a unixLockInfo structure previously allocated by findLockInfo().
-*/
-static void releaseLockInfo(struct unixLockInfo *pLock){
-  if( pLock ){
-    pLock->nRef--;
-    if( pLock->nRef==0 ){
-      if( pLock->pPrev ){
-        assert( pLock->pPrev->pNext==pLock );
-        pLock->pPrev->pNext = pLock->pNext;
-      }else{
-        assert( lockList==pLock );
-        lockList = pLock->pNext;
-      }
-      if( pLock->pNext ){
-        assert( pLock->pNext->pPrev==pLock );
-        pLock->pNext->pPrev = pLock->pPrev;
-      }
-      sqlite3_free(pLock);
+  /* Search for an existing entry that matching the canonical name.
+  ** If found, increment the reference count and return a pointer to
+  ** the existing file ID.
+  */
+  unixEnterMutex();
+  for(pCandidate=vxworksFileList; pCandidate; pCandidate=pCandidate->pNext){
+    if( pCandidate->nName==n 
+     && memcmp(pCandidate->zCanonicalName, pNew->zCanonicalName, n)==0
+    ){
+       sqlite3_free(pNew);
+       pCandidate->nRef++;
+       unixLeaveMutex();
+       return pCandidate;
     }
   }
-}
 
+  /* No match was found.  We will make a new file ID */
+  pNew->nRef = 1;
+  pNew->nName = n;
+  pNew->pNext = vxworksFileList;
+  vxworksFileList = pNew;
+  unixLeaveMutex();
+  return pNew;
+}
 
 /*
-** Release a unixOpenCnt structure previously allocated by findLockInfo().
+** Decrement the reference count on a vxworksFileId object.  Free
+** the object when the reference count reaches zero.
 */
-static void releaseOpenCnt(struct unixOpenCnt *pOpen){
-  if( pOpen ){
-    pOpen->nRef--;
-    if( pOpen->nRef==0 ){
-      if( pOpen->pPrev ){
-        assert( pOpen->pPrev->pNext==pOpen );
-        pOpen->pPrev->pNext = pOpen->pNext;
-      }else{
-        assert( openList==pOpen );
-        openList = pOpen->pNext;
-      }
-      if( pOpen->pNext ){
-        assert( pOpen->pNext->pPrev==pOpen );
-        pOpen->pNext->pPrev = pOpen->pPrev;
-      }
-      sqlite3_free(pOpen->aPending);
-      sqlite3_free(pOpen);
-    }
+static void vxworksReleaseFileId(struct vxworksFileId *pId){
+  unixEnterMutex();
+  assert( pId->nRef>0 );
+  pId->nRef--;
+  if( pId->nRef==0 ){
+    struct vxworksFileId **pp;
+    for(pp=&vxworksFileList; *pp && *pp!=pId; pp = &((*pp)->pNext)){}
+    assert( *pp==pId );
+    *pp = pId->pNext;
+    sqlite3_free(pId);
   }
+  unixLeaveMutex();
 }
+#endif /* OS_VXWORKS */
+/*************** End of Unique File ID Utility Used By VxWorks ****************
+******************************************************************************/
 
 
-/*
-** Given a file descriptor, locate unixLockInfo and unixOpenCnt structures that
-** describes that file descriptor.  Create new ones if necessary.  The
-** return values might be uninitialized if an error occurs.
+/******************************************************************************
+*************************** Posix Advisory Locking ****************************
 **
-** Return an appropriate error code.
+** POSIX advisory locks broken by design.  ANSI STD 1003.1 (1996)
+** section 6.5.2.2 lines 483 through 490 specify that when a process
+** sets or clears a lock, that operation overrides any prior locks set
+** by the same process.  It does not explicitly say so, but this implies
+** that it overrides locks set by the same process using a different
+** file descriptor.  Consider this test case:
+**
+**       int fd1 = open("./file1", O_RDWR|O_CREAT, 0644);
+**       int fd2 = open("./file2", O_RDWR|O_CREAT, 0644);
+**
+** Suppose ./file1 and ./file2 are really the same file (because
+** one is a hard or symbolic link to the other) then if you set
+** an exclusive lock on fd1, then try to get an exclusive lock
+** on fd2, it works.  I would have expected the second lock to
+** fail since there was already a lock on the file due to fd1.
+** But not so.  Since both locks came from the same process, the
+** second overrides the first, even though they were on different
+** file descriptors opened on different file names.
+**
+** This means that we cannot use POSIX locks to synchronize file access
+** among competing threads of the same process.  POSIX locks will work fine
+** to synchronize access for threads in separate processes, but not
+** threads within the same process.
+**
+** To work around the problem, SQLite has to manage file locks internally
+** on its own.  Whenever a new database is opened, we have to find the
+** specific inode of the database file (the inode is determined by the
+** st_dev and st_ino fields of the stat structure that fstat() fills in)
+** and check for locks already existing on that inode.  When locks are
+** created or removed, we have to look at our own internal record of the
+** locks to see if another thread has previously set a lock on that same
+** inode.
+**
+** The sqlite3_file structure for POSIX is no longer just an integer file
+** descriptor.  It is now a structure that holds the integer file
+** descriptor and a pointer to a structure that describes the internal
+** locks on the corresponding inode.  There is one locking structure
+** per inode, so if the same inode is opened twice, both unixFile structures
+** point to the same locking structure.  The locking structure keeps
+** a reference count (so we will know when to delete it) and a "cnt"
+** field that tells us its internal lock status.  cnt==0 means the
+** file is unlocked.  cnt==-1 means the file has an exclusive lock.
+** cnt>0 means there are cnt shared locks on the file.
+**
+** Any attempt to lock or unlock a file first checks the locking
+** structure.  The fcntl() system call is only invoked to set a 
+** POSIX lock if the internal lock structure transitions between
+** a locked and an unlocked state.
+**
+** But wait:  there are yet more problems with POSIX advisory locks.
+**
+** If you close a file descriptor that points to a file that has locks,
+** all locks on that file that are owned by the current process are
+** released.  To work around this problem, each unixFile structure contains
+** a pointer to an unixOpenCnt structure.  There is one unixOpenCnt structure
+** per open inode, which means that multiple unixFile can point to a single
+** unixOpenCnt.  When an attempt is made to close an unixFile, if there are
+** other unixFile open on the same inode that are holding locks, the call
+** to close() the file descriptor is deferred until all of the locks clear.
+** The unixOpenCnt structure keeps a list of file descriptors that need to
+** be closed and that list is walked (and cleared) when the last lock
+** clears.
+**
+** Yet another problem with posix locks and threads:
+**
+** Many older versions of linux us the LinuxThreads library which is
+** not posix compliant.  Under LinuxThreads, a lock created thread
+** A cannot be modified or overridden by a different thread B.
+** Only thread A can modify the lock.  Locking behavior is correct
+** if the appliation uses the newer Native Posix Thread Library (NPTL)
+** on linux - with NPTL a lock created by thread A can override locks
+** in thread B.  But there is no way to know at compile-time which
+** threading library is being used.  So there is no way to know at
+** compile-time whether or not thread A can override locks on thread B.
+** We have to do a run-time check to discover the behavior of the
+** current process.
+**
+** On systems where thread A is unable to modify locks created by
+** thread B, we have to keep track of which thread created each
+** lock.  So there is an extra field in the key to the unixLockInfo
+** structure to record this information.  And on those systems it
+** is illegal to begin a transaction in one thread and finish it
+** in another.  For this latter restriction, there is no work-around.
+** It is a limitation of LinuxThreads.
 */
-static int findLockInfo(
-  unixFile *pFile,               /* Unix file with file desc used in the key */
-  struct unixLockInfo **ppLock,  /* Return the unixLockInfo structure here */
-  struct unixOpenCnt **ppOpen    /* Return the unixOpenCnt structure here */
-){
-  int rc;                        /* System call return code */
-  int fd;                        /* The file descriptor for pFile */
-  struct unixLockKey lockKey;    /* Lookup key for the unixLockInfo structure */
-  struct unixFileId fileId;      /* Lookup key for the unixOpenCnt struct */
-  struct stat statbuf;           /* Low-level file information */
-  struct unixLockInfo *pLock;    /* Candidate unixLockInfo object */
-  struct unixOpenCnt *pOpen;     /* Candidate unixOpenCnt object */
 
-  /* Get low-level information about the file that we can used to
-  ** create a unique name for the file.
-  */
-  fd = pFile->h;
-  rc = fstat(fd, &statbuf);
-  if( rc!=0 ){
-    pFile->lastErrno = errno;
-#ifdef EOVERFLOW
-    if( pFile->lastErrno==EOVERFLOW ) return SQLITE_NOLFS;
+/*
+** Set or check the unixFile.tid field.  This field is set when an unixFile
+** is first opened.  All subsequent uses of the unixFile verify that the
+** same thread is operating on the unixFile.  Some operating systems do
+** not allow locks to be overridden by other threads and that restriction
+** means that sqlite3* database handles cannot be moved from one thread
+** to another while locks are held.
+**
+** Version 3.3.1 (2006-01-15):  unixFile can be moved from one thread to
+** another as long as we are running on a system that supports threads
+** overriding each others locks (which is now the most common behavior)
+** or if no locks are held.  But the unixFile.pLock field needs to be
+** recomputed because its key includes the thread-id.  See the 
+** transferOwnership() function below for additional information
+*/
+#if SQLITE_THREADSAFE && defined(__linux__)
+# define SET_THREADID(X)   (X)->tid = pthread_self()
+# define CHECK_THREADID(X) (threadsOverrideEachOthersLocks==0 && \
+                            !pthread_equal((X)->tid, pthread_self()))
+#else
+# define SET_THREADID(X)
+# define CHECK_THREADID(X) 0
 #endif
-    return SQLITE_IOERR;
-  }
-
-  /* On OS X on an msdos filesystem, the inode number is reported
-  ** incorrectly for zero-size files.  See ticket #3260.  To work
-  ** around this problem (we consider it a bug in OS X, not SQLite)
-  ** we always increase the file size to 1 by writing a single byte
-  ** prior to accessing the inode number.  The one byte written is
-  ** an ASCII 'S' character which also happens to be the first byte
-  ** in the header of every SQLite database.  In this way, if there
-  ** is a race condition such that another thread has already populated
-  ** the first page of the database, no damage is done.
-  */
-  if( statbuf.st_size==0 ){
-    write(fd, "S", 1);
-    rc = fstat(fd, &statbuf);
-    if( rc!=0 ){
-      pFile->lastErrno = errno;
-      return SQLITE_IOERR;
-    }
-  }
 
-  memset(&lockKey, 0, sizeof(lockKey));
-  lockKey.fid.dev = statbuf.st_dev;
+/*
+** An instance of the following structure serves as the key used
+** to locate a particular unixOpenCnt structure given its inode.  This
+** is the same as the unixLockKey except that the thread ID is omitted.
+*/
+struct unixFileId {
+  dev_t dev;                  /* Device number */
 #if OS_VXWORKS
-  lockKey.fid.pId = pFile->pId;
+  struct vxworksFileId *pId;  /* Unique file ID for vxworks. */
 #else
-  lockKey.fid.ino = statbuf.st_ino;
-#endif
-#if SQLITE_THREADSAFE
-  if( threadsOverrideEachOthersLocks<0 ){
-    testThreadLockingBehavior(fd);
-  }
-  lockKey.tid = threadsOverrideEachOthersLocks ? 0 : pthread_self();
-#endif
-  fileId = lockKey.fid;
-  if( ppLock!=0 ){
-    pLock = lockList;
-    while( pLock && memcmp(&lockKey, &pLock->lockKey, sizeof(lockKey)) ){
-      pLock = pLock->pNext;
-    }
-    if( pLock==0 ){
-      pLock = sqlite3_malloc( sizeof(*pLock) );
-      if( pLock==0 ){
-        rc = SQLITE_NOMEM;
-        goto exit_findlockinfo;
-      }
-      pLock->lockKey = lockKey;
-      pLock->nRef = 1;
-      pLock->cnt = 0;
-      pLock->locktype = 0;
-      pLock->pNext = lockList;
-      pLock->pPrev = 0;
-      if( lockList ) lockList->pPrev = pLock;
-      lockList = pLock;
-    }else{
-      pLock->nRef++;
-    }
-    *ppLock = pLock;
-  }
-  if( ppOpen!=0 ){
-    pOpen = openList;
-    while( pOpen && memcmp(&fileId, &pOpen->fileId, sizeof(fileId)) ){
-      pOpen = pOpen->pNext;
-    }
-    if( pOpen==0 ){
-      pOpen = sqlite3_malloc( sizeof(*pOpen) );
-      if( pOpen==0 ){
-        releaseLockInfo(pLock);
-        rc = SQLITE_NOMEM;
-        goto exit_findlockinfo;
-      }
-      pOpen->fileId = fileId;
-      pOpen->nRef = 1;
-      pOpen->nLock = 0;
-      pOpen->nPending = 0;
-      pOpen->aPending = 0;
-      pOpen->pNext = openList;
-      pOpen->pPrev = 0;
-      if( openList ) openList->pPrev = pOpen;
-      openList = pOpen;
-#if OS_VXWORKS
-      pOpen->pSem = NULL;
-      pOpen->aSemName[0] = '\0';
+  ino_t ino;                  /* Inode number */
 #endif
-    }else{
-      pOpen->nRef++;
-    }
-    *ppOpen = pOpen;
-  }
-
-exit_findlockinfo:
-  return rc;
-}
-/**************************************************************************
-******************** End of the posix lock work-around ********************
-**************************************************************************/
+};
 
-/**************************************************************************
-**************** Begin Unique File ID Utility Used By VxWorks *************
-***************************************************************************
-**
-** The inode numbers of files are meaningless in VxWorks.  Inodes cannot
-** be used to find a unique identifier for a file.   A unique file id
-** must be based on the canonical filename.
-**
-** A pointer to an instance of the following structure can be used as a
-** unique file ID in VxWorks.  Each instance of this structure contains
-** a copy of the canonical filename.  There is also a reference count.  
-** The structure is reclaimed when the number of pointers to it drops to
-** zero.
+/*
+** An instance of the following structure serves as the key used
+** to locate a particular unixLockInfo structure given its inode.
 **
-** There are never very many files open at one time and lookups are not
-** a performance-critical path, so it is sufficient to put these
-** structures on a linked list.
+** If threads cannot override each others locks (LinuxThreads), then we
+** set the unixLockKey.tid field to the thread ID.  If threads can override
+** each others locks (Posix and NPTL) then tid is always set to zero.
+** tid is omitted if we compile without threading support or on an OS
+** other than linux.
 */
-struct vxworksFileId {
-  struct vxworksFileId *pNext;  /* Next in a list of them all */
-  int nRef;                     /* Number of references to this one */
-  int nName;                    /* Length of the zCanonicalName[] string */
-  char *zCanonicalName;         /* Canonical filename */
+struct unixLockKey {
+  struct unixFileId fid;  /* Unique identifier for the file */
+#if SQLITE_THREADSAFE && defined(__linux__)
+  pthread_t tid;  /* Thread ID of lock owner. Zero if not using LinuxThreads */
+#endif
 };
 
-#if OS_VXWORKS
-/* 
-** All unique filesname are held on a linked list headed by this
-** variable:
+/*
+** An instance of the following structure is allocated for each open
+** inode on each thread with a different process ID.  (Threads have
+** different process IDs on some versions of linux, but not on most
+** other unixes.)
+**
+** A single inode can have multiple file descriptors, so each unixFile
+** structure contains a pointer to an instance of this object and this
+** object keeps a count of the number of unixFile pointing to it.
 */
-static struct vxworksFileId *vxworksFileList = 0;
-#endif
+struct unixLockInfo {
+  struct unixLockKey lockKey;     /* The lookup key */
+  int cnt;                        /* Number of SHARED locks held */
+  int locktype;                   /* One of SHARED_LOCK, RESERVED_LOCK etc. */
+  int nRef;                       /* Number of pointers to this structure */
+  struct unixLockInfo *pNext;     /* List of all unixLockInfo objects */
+  struct unixLockInfo *pPrev;     /*    .... doubly linked */
+};
 
+/*
+** An instance of the following structure is allocated for each open
+** inode.  This structure keeps track of the number of locks on that
+** inode.  If a close is attempted against an inode that is holding
+** locks, the close is deferred until all locks clear by adding the
+** file descriptor to be closed to the pending list.
+*/
+struct unixOpenCnt {
+  struct unixFileId fileId;   /* The lookup key */
+  int nRef;                   /* Number of pointers to this structure */
+  int nLock;                  /* Number of outstanding locks */
+  int nPending;               /* Number of pending close() operations */
+  int *aPending;            /* Malloced space holding fd's awaiting a close() */
 #if OS_VXWORKS
+  sem_t *pSem;                     /* Named POSIX semaphore */
+  char aSemName[MAX_PATHNAME+1];   /* Name of that semaphore */
+#endif
+  struct unixOpenCnt *pNext, *pPrev;   /* List of all unixOpenCnt objects */
+};
+
 /*
-** Simplify a filename into its canonical form
-** by making the following changes:
-**
-**  * removing any trailing and duplicate /
-**  * removing /./
-**  * removing /A/../
-**
-** Changes are made in-place.  Return the new name length.
-**
-** The original filename is in z[0..n-1].  Return the number of
-** characters in the simplified name.
+** List of all unixLockInfo and unixOpenCnt objects.  This used to be a hash
+** table.  But the number of objects is rarely more than a dozen and
+** never exceeds a few thousand.  And lookup is not on a critical
+** path so a simple linked list will suffice.
 */
-static int vxworksSimplifyName(char *z, int n){
-  int i, j;
-  while( n>1 && z[n-1]=='/' ){ n--; }
-  for(i=j=0; i<n; i++){
-    if( z[i]=='/' ){
-      if( z[i+1]=='/' ) continue;
-      if( z[i+1]=='.' && i+2<n && z[i+2]=='/' ){
-        i += 1;
-        continue;
-      }
-      if( z[i+1]=='.' && i+3<n && z[i+2]=='.' && z[i+3]=='/' ){
-        while( j>0 && z[j-1]!='/' ){ j--; }
-        if( j>0 ){ j--; }
-        i += 2;
-        continue;
-      }
-    }
-    z[j++] = z[i];
-  }
-  z[j] = 0;
-  return j;
-}
-#endif /* OS_VXWORKS */
+static struct unixLockInfo *lockList = 0;
+static struct unixOpenCnt *openList = 0;
 
-#if OS_VXWORKS
 /*
-** Find a unique file ID for the given absolute pathname.  Return
-** a pointer to the vxworksFileId object.  This pointer is the unique
-** file ID.
+** This variable records whether or not threads can override each others
+** locks.
 **
-** The nRef field of the vxworksFileId object is incremented before
-** the object is returned.  A new vxworksFileId object is created
-** and added to the global list if necessary.
+**    0:  No.  Threads cannot override each others locks.
+**    1:  Yes.  Threads can override each others locks.
+**   -1:  We don't know yet.
 **
-** If a memory allocation error occurs, return NULL.
+** On some systems, we know at compile-time if threads can override each
+** others locks.  On those systems, the SQLITE_THREAD_OVERRIDE_LOCK macro
+** will be set appropriately.  On other systems, we have to check at
+** runtime.  On these latter systems, SQLTIE_THREAD_OVERRIDE_LOCK is
+** undefined.
+**
+** This variable normally has file scope only.  But during testing, we make
+** it a global so that the test code can change its value in order to verify
+** that the right stuff happens in either case.
 */
-static struct vxworksFileId *vxworksFindFileId(const char *zAbsoluteName){
-  struct vxworksFileId *pNew;         /* search key and new file ID */
-  struct vxworksFileId *pCandidate;   /* For looping over existing file IDs */
-  int n;                              /* Length of zAbsoluteName string */
-
-  assert( zAbsoluteName[0]=='/' );
-  n = strlen(zAbsoluteName);
-  pNew = sqlite3_malloc( sizeof(*pNew) + (n+1) );
-  if( pNew==0 ) return 0;
-  pNew->zCanonicalName = (char*)&pNew[1];
-  memcpy(pNew->zCanonicalName, zAbsoluteName, n+1);
-  n = vxworksSimplifyName(pNew->zCanonicalName, n);
-
-  /* Search for an existing entry that matching the canonical name.
-  ** If found, increment the reference count and return a pointer to
-  ** the existing file ID.
-  */
-  unixEnterMutex();
-  for(pCandidate=vxworksFileList; pCandidate; pCandidate=pCandidate->pNext){
-    if( pCandidate->nName==n 
-     && memcmp(pCandidate->zCanonicalName, pNew->zCanonicalName, n)==0
-    ){
-       sqlite3_free(pNew);
-       pCandidate->nRef++;
-       unixLeaveMutex();
-       return pCandidate;
-    }
-  }
-
-  /* No match was found.  We will make a new file ID */
-  pNew->nRef = 1;
-  pNew->nName = n;
-  pNew->pNext = vxworksFileList;
-  vxworksFileList = pNew;
-  unixLeaveMutex();
-  return pNew;
-}
-#endif /* OS_VXWORKS */
+#ifndef SQLITE_THREAD_OVERRIDE_LOCK
+# define SQLITE_THREAD_OVERRIDE_LOCK -1
+#endif
+#ifdef SQLITE_TEST
+int threadsOverrideEachOthersLocks = SQLITE_THREAD_OVERRIDE_LOCK;
+#else
+static int threadsOverrideEachOthersLocks = SQLITE_THREAD_OVERRIDE_LOCK;
+#endif
 
-#if OS_VXWORKS
 /*
-** Decrement the reference count on a vxworksFileId object.  Free
-** the object when the reference count reaches zero.
+** This structure holds information passed into individual test
+** threads by the testThreadLockingBehavior() routine.
 */
-static void vxworksReleaseFileId(struct vxworksFileId *pId){
-  unixEnterMutex();
-  assert( pId->nRef>0 );
-  pId->nRef--;
-  if( pId->nRef==0 ){
-    struct vxworksFileId **pp;
-    for(pp=&vxworksFileList; *pp && *pp!=pId; pp = &((*pp)->pNext)){}
-    assert( *pp==pId );
-    *pp = pId->pNext;
-    sqlite3_free(pId);
-  }
-  unixLeaveMutex();
+struct threadTestData {
+  int fd;                /* File to be locked */
+  struct flock lock;     /* The locking operation */
+  int result;            /* Result of the locking operation */
+};
+
+#if SQLITE_THREADSAFE && defined(__linux__)
+/*
+** This function is used as the main routine for a thread launched by
+** testThreadLockingBehavior(). It tests whether the shared-lock obtained
+** by the main thread in testThreadLockingBehavior() conflicts with a
+** hypothetical write-lock obtained by this thread on the same file.
+**
+** The write-lock is not actually acquired, as this is not possible if 
+** the file is open in read-only mode (see ticket #3472).
+*/ 
+static void *threadLockingTest(void *pArg){
+  struct threadTestData *pData = (struct threadTestData*)pArg;
+  pData->result = fcntl(pData->fd, F_GETLK, &pData->lock);
+  return pArg;
 }
-#endif /* OS_VXWORKS */
-/**************************************************************************
-************** End of Unique File ID Utility Used By VxWorks **************
-**************************************************************************/
+#endif /* SQLITE_THREADSAFE && defined(__linux__) */
 
-#ifdef SQLITE_TEST
-/* simulate multiple hosts by creating unique hostid file paths */
-int sqlite3_hostid_num = 0;
-#endif
 
+#if SQLITE_THREADSAFE && defined(__linux__)
 /*
-** The locking styles are associated with the different file locking
-** capabilities supported by different file systems.  
-**
-** POSIX     support for shared and exclusive byte-range locks 
-**
-** AFP       support exclusive byte-range locks
-**
-** FLOCK     only a single file-global exclusive lock
-**
-** DOTLOCK   isn't a true locking style, it refers to the use of a special
-**           file named the same as the database file with a '.lock'
-**           extension, this can be used on file systems that do not
-**           offer any reliable file locking
-**
-** NONE      no locking will be attempted, this is only used for
-**           read-only file systems currently
-**
-** NAMEDSEM  similar to DOTLOCK but uses a named semaphore instead of an
-**           indicator file.
-**
-** PROXY     uses a second file to represent the lock state of the database
-**           file which is never actually locked, a third file controls
-**           access to the proxy
+** This procedure attempts to determine whether or not threads
+** can override each others locks then sets the 
+** threadsOverrideEachOthersLocks variable appropriately.
 */
-#define LOCKING_STYLE_POSIX        1
-#define LOCKING_STYLE_NONE         2
-#define LOCKING_STYLE_DOTFILE      3
-#define LOCKING_STYLE_FLOCK        4
-#define LOCKING_STYLE_AFP          5
-#define LOCKING_STYLE_NAMEDSEM     6
-#define LOCKING_STYLE_PROXY        7
+static void testThreadLockingBehavior(int fd_orig){
+  int fd;
+  int rc;
+  struct threadTestData d;
+  struct flock l;
+  pthread_t t;
 
+  fd = dup(fd_orig);
+  if( fd<0 ) return;
+  memset(&l, 0, sizeof(l));
+  l.l_type = F_RDLCK;
+  l.l_len = 1;
+  l.l_start = 0;
+  l.l_whence = SEEK_SET;
+  rc = fcntl(fd_orig, F_SETLK, &l);
+  if( rc!=0 ) return;
+  memset(&d, 0, sizeof(d));
+  d.fd = fd;
+  d.lock = l;
+  d.lock.l_type = F_WRLCK;
+  pthread_create(&t, 0, threadLockingTest, &d);
+  pthread_join(t, 0);
+  close(fd);
+  if( d.result!=0 ) return;
+  threadsOverrideEachOthersLocks = (d.lock.l_type==F_UNLCK);
+}
+#elif SQLITE_THREADSAFE
 /*
-** Only set the lastErrno if the error code is a real error and not 
-** a normal expected return code of SQLITE_BUSY or SQLITE_OK
+** On anything other than linux, assume threads override each others locks.
 */
-#define IS_LOCK_ERROR(x)  ((x != SQLITE_OK) && (x != SQLITE_BUSY))
+static void testThreadLockingBehavior(int fd_orig){
+  UNUSED_PARAMETER(fd_orig);
+  threadsOverrideEachOthersLocks = 1;
+}
+#endif /* SQLITE_THERADSAFE && defined(__linux__) */
 
-#ifdef SQLITE_LOCK_TRACE
 /*
-** Print out information about all locking operations.
-**
-** This routine is used for troubleshooting locks on multithreaded
-** platforms.  Enable by compiling with the -DSQLITE_LOCK_TRACE
-** command-line option on the compiler.  This code is normally
-** turned off.
-*/
-static int lockTrace(int fd, int op, struct flock *p){
-  char *zOpName, *zType;
-  int s;
-  int savedErrno;
-  if( op==F_GETLK ){
-    zOpName = "GETLK";
-  }else if( op==F_SETLK ){
-    zOpName = "SETLK";
-  }else{
-    s = fcntl(fd, op, p);
-    sqlite3DebugPrintf("fcntl unknown %d %d %d\n", fd, op, s);
-    return s;
-  }
-  if( p->l_type==F_RDLCK ){
-    zType = "RDLCK";
-  }else if( p->l_type==F_WRLCK ){
-    zType = "WRLCK";
-  }else if( p->l_type==F_UNLCK ){
-    zType = "UNLCK";
-  }else{
-    assert( 0 );
-  }
-  assert( p->l_whence==SEEK_SET );
-  s = fcntl(fd, op, p);
-  savedErrno = errno;
-  sqlite3DebugPrintf("fcntl %d %d %s %s %d %d %d %d\n",
-     threadid, fd, zOpName, zType, (int)p->l_start, (int)p->l_len,
-     (int)p->l_pid, s);
-  if( s==(-1) && op==F_SETLK && (p->l_type==F_RDLCK || p->l_type==F_WRLCK) ){
-    struct flock l2;
-    l2 = *p;
-    fcntl(fd, F_GETLK, &l2);
-    if( l2.l_type==F_RDLCK ){
-      zType = "RDLCK";
-    }else if( l2.l_type==F_WRLCK ){
-      zType = "WRLCK";
-    }else if( l2.l_type==F_UNLCK ){
-      zType = "UNLCK";
-    }else{
-      assert( 0 );
-    }
-    sqlite3DebugPrintf("fcntl-failure-reason: %s %d %d %d\n",
-       zType, (int)l2.l_start, (int)l2.l_len, (int)l2.l_pid);
-  }
-  errno = savedErrno;
-  return s;
-}
-#define fcntl lockTrace
-#endif /* SQLITE_LOCK_TRACE */
-
-
-#if SQLITE_ENABLE_LOCKING_STYLE
-/*
-** The proxyLockingContext has the path and file structures for the remote 
-** and local proxy files in it
-*/
-typedef struct proxyLockingContext proxyLockingContext;
-struct proxyLockingContext {
-  unixFile *conchFile;
-  char *conchFilePath;
-  unixFile *lockProxy;
-  char *lockProxyPath;
-  char *dbPath;
-  int conchHeld;
-  void *oldLockingContext; /* preserve the original locking context for close */
-  sqlite3_io_methods const *pOldMethod; /* ditto pMethod */
-};
-
-static int getDbPathForUnixFile(unixFile *pFile, char *dbPath);
-static int getLockPath(const char *dbPath, char *lPath, size_t maxLen);
-static sqlite3_io_methods *ioMethodForLockingStyle(int style);
-static int createProxyUnixFile(const char *path, unixFile **ppFile);
-static int fillInUnixFile(sqlite3_vfs *pVfs, int h, int dirfd, sqlite3_file *pId, const char *zFilename, int noLock, int isDelete);
-static int takeConch(unixFile *pFile);
-static int releaseConch(unixFile *pFile);
-static int unixRandomness(sqlite3_vfs *pVfs, int nBuf, char *zBuf);
-
-/*
-** Tests a byte-range locking query to see if byte range locks are 
-** supported, if not we fall back to dotlockLockingStyle.
-** On vxWorks we fall back to namedsemLockingStyle.
-*/
-static int testLockingStyle(int fd){
-  struct flock lockInfo;
-
-  /* Test byte-range lock using fcntl(). If the call succeeds, 
-  ** assume that the file-system supports POSIX style locks. 
-  */
-  lockInfo.l_len = 1;
-  lockInfo.l_start = 0;
-  lockInfo.l_whence = SEEK_SET;
-  lockInfo.l_type = F_RDLCK;
-  if( fcntl(fd, F_GETLK, &lockInfo)!=-1 ) {
-    return LOCKING_STYLE_POSIX;
-  }
-  
-  /* Testing for flock() can give false positives.  So if if the above 
-  ** test fails, then we fall back to using dot-file style locking (or
-  ** named-semaphore locking on vxworks).
-  */
-  return (OS_VXWORKS ? LOCKING_STYLE_NAMEDSEM : LOCKING_STYLE_DOTFILE);
-}
-#endif
-
-/* 
-** If SQLITE_ENABLE_LOCKING_STYLE is defined, this function Examines the 
-** f_fstypename entry in the statfs structure as returned by stat() for 
-** the file system hosting the database file and selects  the appropriate
-** locking style based on its value.  These values and assignments are 
-** based on Darwin/OSX behavior and have not been thoroughly tested on 
-** other systems.
-**
-** If SQLITE_ENABLE_LOCKING_STYLE is not defined, this function always
-** returns LOCKING_STYLE_POSIX.
-*/
-#if SQLITE_ENABLE_LOCKING_STYLE
-static int detectLockingStyle(
-  sqlite3_vfs *pVfs,
-  const char *filePath, 
-  int fd
-){
-#if OS_VXWORKS
-  if( !filePath ){
-    return LOCKING_STYLE_NONE;
-  }
-  if( pVfs->pAppData ){
-    return SQLITE_PTR_TO_INT(pVfs->pAppData);
-  }
-  if (access(filePath, 0) != -1){
-    return testLockingStyle(fd);
-  }
-#else
-  struct Mapping {
-    const char *zFilesystem;
-    int eLockingStyle;
-  } aMap[] = {
-    { "hfs",    LOCKING_STYLE_POSIX },
-    { "ufs",    LOCKING_STYLE_POSIX },
-    { "afpfs",  LOCKING_STYLE_AFP },
-#ifdef SQLITE_ENABLE_AFP_LOCKING_SMB
-    { "smbfs",  LOCKING_STYLE_AFP },
-#else
-    { "smbfs",  LOCKING_STYLE_FLOCK },
-#endif
-    { "webdav", LOCKING_STYLE_NONE },
-    { 0, 0 }
-  };
-  int i;
-  struct statfs fsInfo;
-
-  if( !filePath ){
-    return LOCKING_STYLE_NONE;
-  }
-  if( pVfs && pVfs->pAppData ){
-    return SQLITE_PTR_TO_INT(pVfs->pAppData);
-  }
-
-  if( statfs(filePath, &fsInfo) != -1 ){
-    if( fsInfo.f_flags & MNT_RDONLY ){
-      return LOCKING_STYLE_NONE;
-    }
-    for(i=0; aMap[i].zFilesystem; i++){
-      if( strcmp(fsInfo.f_fstypename, aMap[i].zFilesystem)==0 ){
-        return aMap[i].eLockingStyle;
-      }
-    }
-  }
-
-  /* Default case. Handles, amongst others, "nfs". */
-  return testLockingStyle(fd);  
-#endif /* if OS_VXWORKS */
-  return LOCKING_STYLE_POSIX;
-}
-#else
-  #define detectLockingStyle(x,y,z) LOCKING_STYLE_POSIX
-#endif /* if SQLITE_ENABLE_LOCKING_STYLE */
-
-#ifdef SQLITE_DEBUG
-/*
-** Helper function for printing out trace information from debugging
-** binaries. This returns the string represetation of the supplied
-** integer lock-type.
-*/
-static const char *locktypeName(int locktype){
-  switch( locktype ){
-  case NO_LOCK: return "NONE";
-  case SHARED_LOCK: return "SHARED";
-  case RESERVED_LOCK: return "RESERVED";
-  case PENDING_LOCK: return "PENDING";
-  case EXCLUSIVE_LOCK: return "EXCLUSIVE";
-  }
-  return "ERROR";
-}
-#endif
-
-/*
-** If we are currently in a different thread than the thread that the
-** unixFile argument belongs to, then transfer ownership of the unixFile
-** over to the current thread.
+** If we are currently in a different thread than the thread that the
+** unixFile argument belongs to, then transfer ownership of the unixFile
+** over to the current thread.
 **
 ** A unixFile is only owned by a thread on systems where one thread is
 ** unable to override locks created by a different thread.  RedHat9 is
@@ -1095,7 +876,7 @@ static const char *locktypeName(int locktype){
 ** If the unixFile is locked and an ownership is wrong, then return
 ** SQLITE_MISUSE.  SQLITE_OK is returned if everything works.
 */
-#if SQLITE_THREADSAFE
+#if SQLITE_THREADSAFE && defined(__linux__)
 static int transferOwnership(unixFile *pFile){
   int rc;
   pthread_t hSelf;
@@ -1134,736 +915,487 @@ static int transferOwnership(unixFile *pFile){
 
 
 /*
-** Seek to the offset passed as the second argument, then read cnt 
-** bytes into pBuf. Return the number of bytes actually read.
-**
-** NB:  If you define USE_PREAD or USE_PREAD64, then it might also
-** be necessary to define _XOPEN_SOURCE to be 500.  This varies from
-** one system to another.  Since SQLite does not define USE_PREAD
-** any any form by default, we will not attempt to define _XOPEN_SOURCE.
-** See tickets #2741 and #2681.
-**
-** To avoid stomping the errno value on a failed read the lastErrno value
-** is set before returning.
+** Release a unixLockInfo structure previously allocated by findLockInfo().
 */
-static int seekAndRead(unixFile *id, sqlite3_int64 offset, void *pBuf, int cnt){
-  int got;
-  i64 newOffset;
-  TIMER_START;
-#if defined(USE_PREAD)
-  got = pread(id->h, pBuf, cnt, offset);
-  SimulateIOError( got = -1 );
-#elif defined(USE_PREAD64)
-  got = pread64(id->h, pBuf, cnt, offset);
-  SimulateIOError( got = -1 );
-#else
-  newOffset = lseek(id->h, offset, SEEK_SET);
-  SimulateIOError( newOffset-- );
-  if( newOffset!=offset ){
-    if( newOffset == -1 ){
-      ((unixFile*)id)->lastErrno = errno;
-    }else{
-      ((unixFile*)id)->lastErrno = 0;			
+static void releaseLockInfo(struct unixLockInfo *pLock){
+  if( pLock ){
+    pLock->nRef--;
+    if( pLock->nRef==0 ){
+      if( pLock->pPrev ){
+        assert( pLock->pPrev->pNext==pLock );
+        pLock->pPrev->pNext = pLock->pNext;
+      }else{
+        assert( lockList==pLock );
+        lockList = pLock->pNext;
+      }
+      if( pLock->pNext ){
+        assert( pLock->pNext->pPrev==pLock );
+        pLock->pNext->pPrev = pLock->pPrev;
+      }
+      sqlite3_free(pLock);
     }
-    return -1;
   }
-  got = read(id->h, pBuf, cnt);
-#endif
-  TIMER_END;
-  if( got<0 ){
-    ((unixFile*)id)->lastErrno = errno;
-  }
-  OSTRACE5("READ    %-3d %5d %7lld %llu\n", id->h, got, offset, TIMER_ELAPSED);
-  return got;
 }
 
 /*
-** Read data from a file into a buffer.  Return SQLITE_OK if all
-** bytes were read successfully and SQLITE_IOERR if anything goes
-** wrong.
+** Release a unixOpenCnt structure previously allocated by findLockInfo().
 */
-static int unixRead(
-  sqlite3_file *id, 
-  void *pBuf, 
-  int amt,
-  sqlite3_int64 offset
-){
-  int got;
-  assert( id );
-  got = seekAndRead((unixFile*)id, offset, pBuf, amt);
-  if( got==amt ){
-    return SQLITE_OK;
-  }else if( got<0 ){
-    /* lastErrno set by seekAndRead */
-    return SQLITE_IOERR_READ;
-  }else{
-    ((unixFile*)id)->lastErrno = 0; /* not a system error */
-    /* Unread parts of the buffer must be zero-filled */
-    memset(&((char*)pBuf)[got], 0, amt-got);
-    return SQLITE_IOERR_SHORT_READ;
+static void releaseOpenCnt(struct unixOpenCnt *pOpen){
+  if( pOpen ){
+    pOpen->nRef--;
+    if( pOpen->nRef==0 ){
+      if( pOpen->pPrev ){
+        assert( pOpen->pPrev->pNext==pOpen );
+        pOpen->pPrev->pNext = pOpen->pNext;
+      }else{
+        assert( openList==pOpen );
+        openList = pOpen->pNext;
+      }
+      if( pOpen->pNext ){
+        assert( pOpen->pNext->pPrev==pOpen );
+        pOpen->pNext->pPrev = pOpen->pPrev;
+      }
+      sqlite3_free(pOpen->aPending);
+      sqlite3_free(pOpen);
+    }
   }
 }
 
+
 /*
-** Seek to the offset in id->offset then read cnt bytes into pBuf.
-** Return the number of bytes actually read.  Update the offset.
+** Given a file descriptor, locate unixLockInfo and unixOpenCnt structures that
+** describes that file descriptor.  Create new ones if necessary.  The
+** return values might be uninitialized if an error occurs.
 **
-** To avoid stomping the errno value on a failed write the lastErrno value
-** is set before returning.
+** Return an appropriate error code.
 */
-static int seekAndWrite(unixFile *id, i64 offset, const void *pBuf, int cnt){
-  int got;
-  i64 newOffset;
-  TIMER_START;
-#if defined(USE_PREAD)
-  got = pwrite(id->h, pBuf, cnt, offset);
-#elif defined(USE_PREAD64)
-  got = pwrite64(id->h, pBuf, cnt, offset);
+static int findLockInfo(
+  unixFile *pFile,               /* Unix file with file desc used in the key */
+  struct unixLockInfo **ppLock,  /* Return the unixLockInfo structure here */
+  struct unixOpenCnt **ppOpen    /* Return the unixOpenCnt structure here */
+){
+  int rc;                        /* System call return code */
+  int fd;                        /* The file descriptor for pFile */
+  struct unixLockKey lockKey;    /* Lookup key for the unixLockInfo structure */
+  struct unixFileId fileId;      /* Lookup key for the unixOpenCnt struct */
+  struct stat statbuf;           /* Low-level file information */
+  struct unixLockInfo *pLock;    /* Candidate unixLockInfo object */
+  struct unixOpenCnt *pOpen;     /* Candidate unixOpenCnt object */
+
+  /* Get low-level information about the file that we can used to
+  ** create a unique name for the file.
+  */
+  fd = pFile->h;
+  rc = fstat(fd, &statbuf);
+  if( rc!=0 ){
+    pFile->lastErrno = errno;
+#ifdef EOVERFLOW
+    if( pFile->lastErrno==EOVERFLOW ) return SQLITE_NOLFS;
+#endif
+    return SQLITE_IOERR;
+  }
+
+  /* On OS X on an msdos filesystem, the inode number is reported
+  ** incorrectly for zero-size files.  See ticket #3260.  To work
+  ** around this problem (we consider it a bug in OS X, not SQLite)
+  ** we always increase the file size to 1 by writing a single byte
+  ** prior to accessing the inode number.  The one byte written is
+  ** an ASCII 'S' character which also happens to be the first byte
+  ** in the header of every SQLite database.  In this way, if there
+  ** is a race condition such that another thread has already populated
+  ** the first page of the database, no damage is done.
+  */
+  if( statbuf.st_size==0 ){
+    write(fd, "S", 1);
+    rc = fstat(fd, &statbuf);
+    if( rc!=0 ){
+      pFile->lastErrno = errno;
+      return SQLITE_IOERR;
+    }
+  }
+
+  memset(&lockKey, 0, sizeof(lockKey));
+  lockKey.fid.dev = statbuf.st_dev;
+#if OS_VXWORKS
+  lockKey.fid.pId = pFile->pId;
 #else
-  newOffset = lseek(id->h, offset, SEEK_SET);
-  if( newOffset!=offset ){
-    if( newOffset == -1 ){
-      ((unixFile*)id)->lastErrno = errno;
+  lockKey.fid.ino = statbuf.st_ino;
+#endif
+#if SQLITE_THREADSAFE && defined(__linux__)
+  if( threadsOverrideEachOthersLocks<0 ){
+    testThreadLockingBehavior(fd);
+  }
+  lockKey.tid = threadsOverrideEachOthersLocks ? 0 : pthread_self();
+#endif
+  fileId = lockKey.fid;
+  if( ppLock!=0 ){
+    pLock = lockList;
+    while( pLock && memcmp(&lockKey, &pLock->lockKey, sizeof(lockKey)) ){
+      pLock = pLock->pNext;
+    }
+    if( pLock==0 ){
+      pLock = sqlite3_malloc( sizeof(*pLock) );
+      if( pLock==0 ){
+        rc = SQLITE_NOMEM;
+        goto exit_findlockinfo;
+      }
+      pLock->lockKey = lockKey;
+      pLock->nRef = 1;
+      pLock->cnt = 0;
+      pLock->locktype = 0;
+      pLock->pNext = lockList;
+      pLock->pPrev = 0;
+      if( lockList ) lockList->pPrev = pLock;
+      lockList = pLock;
     }else{
-      ((unixFile*)id)->lastErrno = 0;			
+      pLock->nRef++;
     }
-    return -1;
+    *ppLock = pLock;
   }
-  got = write(id->h, pBuf, cnt);
+  if( ppOpen!=0 ){
+    pOpen = openList;
+    while( pOpen && memcmp(&fileId, &pOpen->fileId, sizeof(fileId)) ){
+      pOpen = pOpen->pNext;
+    }
+    if( pOpen==0 ){
+      pOpen = sqlite3_malloc( sizeof(*pOpen) );
+      if( pOpen==0 ){
+        releaseLockInfo(pLock);
+        rc = SQLITE_NOMEM;
+        goto exit_findlockinfo;
+      }
+      pOpen->fileId = fileId;
+      pOpen->nRef = 1;
+      pOpen->nLock = 0;
+      pOpen->nPending = 0;
+      pOpen->aPending = 0;
+      pOpen->pNext = openList;
+      pOpen->pPrev = 0;
+      if( openList ) openList->pPrev = pOpen;
+      openList = pOpen;
+#if OS_VXWORKS
+      pOpen->pSem = NULL;
+      pOpen->aSemName[0] = '\0';
 #endif
-  TIMER_END;
-  if( got<0 ){
-    ((unixFile*)id)->lastErrno = errno;
+    }else{
+      pOpen->nRef++;
+    }
+    *ppOpen = pOpen;
   }
 
-  OSTRACE5("WRITE   %-3d %5d %7lld %llu\n", id->h, got, offset, TIMER_ELAPSED);
-  return got;
+exit_findlockinfo:
+  return rc;
 }
 
 
 /*
-** Write data from a buffer into a file.  Return SQLITE_OK on success
-** or some other error code on failure.
+** This routine checks if there is a RESERVED lock held on the specified
+** file by this or any other process. If such a lock is held, set *pResOut
+** to a non-zero value otherwise *pResOut is set to zero.  The return value
+** is set to SQLITE_OK unless an I/O error occurs during lock checking.
 */
-static int unixWrite(
-  sqlite3_file *id, 
-  const void *pBuf, 
-  int amt,
-  sqlite3_int64 offset 
-){
-  int wrote = 0;
-  assert( id );
-  assert( amt>0 );
-  while( amt>0 && (wrote = seekAndWrite((unixFile*)id, offset, pBuf, amt))>0 ){
-    amt -= wrote;
-    offset += wrote;
-    pBuf = &((char*)pBuf)[wrote];
+static int unixCheckReservedLock(sqlite3_file *id, int *pResOut){
+  int rc = SQLITE_OK;
+  int reserved = 0;
+  unixFile *pFile = (unixFile*)id;
+
+  SimulateIOError( return SQLITE_IOERR_CHECKRESERVEDLOCK; );
+
+  assert( pFile );
+  unixEnterMutex(); /* Because pFile->pLock is shared across threads */
+
+  /* Check if a thread in this process holds such a lock */
+  if( pFile->pLock->locktype>SHARED_LOCK ){
+    reserved = 1;
   }
-  SimulateIOError(( wrote=(-1), amt=1 ));
-  SimulateDiskfullError(( wrote=0, amt=1 ));
-  if( amt>0 ){
-    if( wrote<0 ){
-      /* lastErrno set by seekAndWrite */
-      return SQLITE_IOERR_WRITE;
-    }else{
-      ((unixFile*)id)->lastErrno = 0; /* not a system error */
-      return SQLITE_FULL;
+
+  /* Otherwise see if some other process holds it.
+  */
+  if( !reserved ){
+    struct flock lock;
+    lock.l_whence = SEEK_SET;
+    lock.l_start = RESERVED_BYTE;
+    lock.l_len = 1;
+    lock.l_type = F_WRLCK;
+    if (-1 == fcntl(pFile->h, F_GETLK, &lock)) {
+      int tErrno = errno;
+      rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_CHECKRESERVEDLOCK);
+      pFile->lastErrno = tErrno;
+    } else if( lock.l_type!=F_UNLCK ){
+      reserved = 1;
     }
   }
-  return SQLITE_OK;
-}
+  
+  unixLeaveMutex();
+  OSTRACE4("TEST WR-LOCK %d %d %d\n", pFile->h, rc, reserved);
 
-#ifdef SQLITE_TEST
-/*
-** Count the number of fullsyncs and normal syncs.  This is used to test
-** that syncs and fullsyncs are occuring at the right times.
-*/
-int sqlite3_sync_count = 0;
-int sqlite3_fullsync_count = 0;
-#endif
+  *pResOut = reserved;
+  return rc;
+}
 
 /*
-** Use the fdatasync() API only if the HAVE_FDATASYNC macro is defined.
-** Otherwise use fsync() in its place.
-*/
-#ifndef HAVE_FDATASYNC
-# define fdatasync fsync
-#endif
-
-/*
-** Define HAVE_FULLFSYNC to 0 or 1 depending on whether or not
-** the F_FULLFSYNC macro is defined.  F_FULLFSYNC is currently
-** only available on Mac OS X.  But that could change.
+** Lock the file with the lock specified by parameter locktype - one
+** of the following:
+**
+**     (1) SHARED_LOCK
+**     (2) RESERVED_LOCK
+**     (3) PENDING_LOCK
+**     (4) EXCLUSIVE_LOCK
+**
+** Sometimes when requesting one lock state, additional lock states
+** are inserted in between.  The locking might fail on one of the later
+** transitions leaving the lock state different from what it started but
+** still short of its goal.  The following chart shows the allowed
+** transitions and the inserted intermediate states:
+**
+**    UNLOCKED -> SHARED
+**    SHARED -> RESERVED
+**    SHARED -> (PENDING) -> EXCLUSIVE
+**    RESERVED -> (PENDING) -> EXCLUSIVE
+**    PENDING -> EXCLUSIVE
+**
+** This routine will only increase a lock.  Use the sqlite3OsUnlock()
+** routine to lower a locking level.
 */
-#ifdef F_FULLFSYNC
-# define HAVE_FULLFSYNC 1
-#else
-# define HAVE_FULLFSYNC 0
-#endif
+static int unixLock(sqlite3_file *id, int locktype){
+  /* The following describes the implementation of the various locks and
+  ** lock transitions in terms of the POSIX advisory shared and exclusive
+  ** lock primitives (called read-locks and write-locks below, to avoid
+  ** confusion with SQLite lock names). The algorithms are complicated
+  ** slightly in order to be compatible with windows systems simultaneously
+  ** accessing the same database file, in case that is ever required.
+  **
+  ** Symbols defined in os.h indentify the 'pending byte' and the 'reserved
+  ** byte', each single bytes at well known offsets, and the 'shared byte
+  ** range', a range of 510 bytes at a well known offset.
+  **
+  ** To obtain a SHARED lock, a read-lock is obtained on the 'pending
+  ** byte'.  If this is successful, a random byte from the 'shared byte
+  ** range' is read-locked and the lock on the 'pending byte' released.
+  **
+  ** A process may only obtain a RESERVED lock after it has a SHARED lock.
+  ** A RESERVED lock is implemented by grabbing a write-lock on the
+  ** 'reserved byte'. 
+  **
+  ** A process may only obtain a PENDING lock after it has obtained a
+  ** SHARED lock. A PENDING lock is implemented by obtaining a write-lock
+  ** on the 'pending byte'. This ensures that no new SHARED locks can be
+  ** obtained, but existing SHARED locks are allowed to persist. A process
+  ** does not have to obtain a RESERVED lock on the way to a PENDING lock.
+  ** This property is used by the algorithm for rolling back a journal file
+  ** after a crash.
+  **
+  ** An EXCLUSIVE lock, obtained after a PENDING lock is held, is
+  ** implemented by obtaining a write-lock on the entire 'shared byte
+  ** range'. Since all other locks require a read-lock on one of the bytes
+  ** within this range, this ensures that no other locks are held on the
+  ** database. 
+  **
+  ** The reason a single byte cannot be used instead of the 'shared byte
+  ** range' is that some versions of windows do not support read-locks. By
+  ** locking a random byte from a range, concurrent SHARED locks may exist
+  ** even if the locking primitive used is always a write-lock.
+  */
+  int rc = SQLITE_OK;
+  unixFile *pFile = (unixFile*)id;
+  struct unixLockInfo *pLock = pFile->pLock;
+  struct flock lock;
+  int s;
 
+  assert( pFile );
+  OSTRACE7("LOCK    %d %s was %s(%s,%d) pid=%d\n", pFile->h,
+      locktypeName(locktype), locktypeName(pFile->locktype),
+      locktypeName(pLock->locktype), pLock->cnt , getpid());
 
-/*
-** The fsync() system call does not work as advertised on many
-** unix systems.  The following procedure is an attempt to make
-** it work better.
-**
-** The SQLITE_NO_SYNC macro disables all fsync()s.  This is useful
-** for testing when we want to run through the test suite quickly.
-** You are strongly advised *not* to deploy with SQLITE_NO_SYNC
-** enabled, however, since with SQLITE_NO_SYNC enabled, an OS crash
-** or power failure will likely corrupt the database file.
-*/
-static int full_fsync(int fd, int fullSync, int dataOnly){
-  int rc;
+  /* If there is already a lock of this type or more restrictive on the
+  ** unixFile, do nothing. Don't use the end_lock: exit path, as
+  ** unixEnterMutex() hasn't been called yet.
+  */
+  if( pFile->locktype>=locktype ){
+    OSTRACE3("LOCK    %d %s ok (already held)\n", pFile->h,
+            locktypeName(locktype));
+    return SQLITE_OK;
+  }
 
-  /* The following "ifdef/elif/else/" block has the same structure as
-  ** the one below. It is replicated here solely to avoid cluttering 
-  ** up the real code with the UNUSED_PARAMETER() macros.
+  /* Make sure the locking sequence is correct
   */
-#ifdef SQLITE_NO_SYNC
-  UNUSED_PARAMETER(fd);
-  UNUSED_PARAMETER(fullSync);
-  UNUSED_PARAMETER(dataOnly);
-#elif HAVE_FULLFSYNC
-  UNUSED_PARAMETER(dataOnly);
-#else
-  UNUSED_PARAMETER(fullSync);
-#endif
+  assert( pFile->locktype!=NO_LOCK || locktype==SHARED_LOCK );
+  assert( locktype!=PENDING_LOCK );
+  assert( locktype!=RESERVED_LOCK || pFile->locktype==SHARED_LOCK );
 
-  /* Record the number of times that we do a normal fsync() and 
-  ** FULLSYNC.  This is used during testing to verify that this procedure
-  ** gets called with the correct arguments.
+  /* This mutex is needed because pFile->pLock is shared across threads
   */
-#ifdef SQLITE_TEST
-  if( fullSync ) sqlite3_fullsync_count++;
-  sqlite3_sync_count++;
-#endif
+  unixEnterMutex();
 
-  /* If we compiled with the SQLITE_NO_SYNC flag, then syncing is a
-  ** no-op
+  /* Make sure the current thread owns the pFile.
   */
-#ifdef SQLITE_NO_SYNC
-  rc = SQLITE_OK;
-#elif HAVE_FULLFSYNC
-  if( fullSync ){
-    rc = fcntl(fd, F_FULLFSYNC, 0);
-  }else{
-    rc = 1;
+  rc = transferOwnership(pFile);
+  if( rc!=SQLITE_OK ){
+    unixLeaveMutex();
+    return rc;
   }
-  /* If the FULLFSYNC failed, fall back to attempting an fsync().
-   * It shouldn't be possible for fullfsync to fail on the local 
-   * file system (on OSX), so failure indicates that FULLFSYNC
-   * isn't supported for this file system. So, attempt an fsync 
-   * and (for now) ignore the overhead of a superfluous fcntl call.  
-   * It'd be better to detect fullfsync support once and avoid 
-   * the fcntl call every time sync is called.
-   */
-  if( rc ) rc = fsync(fd);
+  pLock = pFile->pLock;
 
-#else 
-  if( dataOnly ){
-    rc = fdatasync(fd);
-    if( OS_VXWORKS && rc==-1 && errno==ENOTSUP ){
-      rc = fsync(fd);
-    }
-  }else{
-    rc = fsync(fd);
+  /* If some thread using this PID has a lock via a different unixFile*
+  ** handle that precludes the requested lock, return BUSY.
+  */
+  if( (pFile->locktype!=pLock->locktype && 
+          (pLock->locktype>=PENDING_LOCK || locktype>SHARED_LOCK))
+  ){
+    rc = SQLITE_BUSY;
+    goto end_lock;
   }
-#endif /* ifdef SQLITE_NO_SYNC elif HAVE_FULLFSYNC */
 
-  if( OS_VXWORKS && rc!= -1 ){
-    rc = 0;
+  /* If a SHARED lock is requested, and some thread using this PID already
+  ** has a SHARED or RESERVED lock, then increment reference counts and
+  ** return SQLITE_OK.
+  */
+  if( locktype==SHARED_LOCK && 
+      (pLock->locktype==SHARED_LOCK || pLock->locktype==RESERVED_LOCK) ){
+    assert( locktype==SHARED_LOCK );
+    assert( pFile->locktype==0 );
+    assert( pLock->cnt>0 );
+    pFile->locktype = SHARED_LOCK;
+    pLock->cnt++;
+    pFile->pOpen->nLock++;
+    goto end_lock;
   }
-  return rc;
-}
 
-/*
-** Make sure all writes to a particular file are committed to disk.
-**
-** If dataOnly==0 then both the file itself and its metadata (file
-** size, access time, etc) are synced.  If dataOnly!=0 then only the
-** file data is synced.
-**
-** Under Unix, also make sure that the directory entry for the file
-** has been created by fsync-ing the directory that contains the file.
-** If we do not do this and we encounter a power failure, the directory
-** entry for the journal might not exist after we reboot.  The next
-** SQLite to access the file will not know that the journal exists (because
-** the directory entry for the journal was never created) and the transaction
-** will not roll back - possibly leading to database corruption.
-*/
-static int unixSync(sqlite3_file *id, int flags){
-  int rc;
-  unixFile *pFile = (unixFile*)id;
+  lock.l_len = 1L;
 
-  int isDataOnly = (flags&SQLITE_SYNC_DATAONLY);
-  int isFullsync = (flags&0x0F)==SQLITE_SYNC_FULL;
+  lock.l_whence = SEEK_SET;
 
-  /* Check that one of SQLITE_SYNC_NORMAL or FULL was passed */
-  assert((flags&0x0F)==SQLITE_SYNC_NORMAL
-      || (flags&0x0F)==SQLITE_SYNC_FULL
-  );
+  /* A PENDING lock is needed before acquiring a SHARED lock and before
+  ** acquiring an EXCLUSIVE lock.  For the SHARED lock, the PENDING will
+  ** be released.
+  */
+  if( locktype==SHARED_LOCK 
+      || (locktype==EXCLUSIVE_LOCK && pFile->locktype<PENDING_LOCK)
+  ){
+    lock.l_type = (locktype==SHARED_LOCK?F_RDLCK:F_WRLCK);
+    lock.l_start = PENDING_BYTE;
+    s = fcntl(pFile->h, F_SETLK, &lock);
+    if( s==(-1) ){
+      int tErrno = errno;
+      rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK);
+      if( IS_LOCK_ERROR(rc) ){
+        pFile->lastErrno = tErrno;
+      }
+      goto end_lock;
+    }
+  }
 
-  /* Unix cannot, but some systems may return SQLITE_FULL from here. This
-  ** line is to test that doing so does not cause any problems.
+
+  /* If control gets to this point, then actually go ahead and make
+  ** operating system calls for the specified lock.
   */
-  SimulateDiskfullError( return SQLITE_FULL );
+  if( locktype==SHARED_LOCK ){
+    int tErrno = 0;
+    assert( pLock->cnt==0 );
+    assert( pLock->locktype==0 );
 
-  assert( pFile );
-  OSTRACE2("SYNC    %-3d\n", pFile->h);
-  rc = full_fsync(pFile->h, isFullsync, isDataOnly);
-  SimulateIOError( rc=1 );
-  if( rc ){
-    pFile->lastErrno = errno;
-    return SQLITE_IOERR_FSYNC;
-  }
-  if( pFile->dirfd>=0 ){
-    int err;
-    OSTRACE4("DIRSYNC %-3d (have_fullfsync=%d fullsync=%d)\n", pFile->dirfd,
-            HAVE_FULLFSYNC, isFullsync);
-#ifndef SQLITE_DISABLE_DIRSYNC
-    /* The directory sync is only attempted if full_fsync is
-    ** turned off or unavailable.  If a full_fsync occurred above,
-    ** then the directory sync is superfluous.
-    */
-    if( (!HAVE_FULLFSYNC || !isFullsync) && full_fsync(pFile->dirfd,0,0) ){
-       /*
-       ** We have received multiple reports of fsync() returning
-       ** errors when applied to directories on certain file systems.
-       ** A failed directory sync is not a big deal.  So it seems
-       ** better to ignore the error.  Ticket #1657
-       */
-       /* pFile->lastErrno = errno; */
-       /* return SQLITE_IOERR; */
+    /* Now get the read-lock */
+    lock.l_start = SHARED_FIRST;
+    lock.l_len = SHARED_SIZE;
+    if( (s = fcntl(pFile->h, F_SETLK, &lock))==(-1) ){
+      tErrno = errno;
     }
-#endif
-    err = close(pFile->dirfd); /* Only need to sync once, so close the */
-    if( err==0 ){              /* directory when we are done */
-      pFile->dirfd = -1;
+    /* Drop the temporary PENDING lock */
+    lock.l_start = PENDING_BYTE;
+    lock.l_len = 1L;
+    lock.l_type = F_UNLCK;
+    if( fcntl(pFile->h, F_SETLK, &lock)!=0 ){
+      if( s != -1 ){
+        /* This could happen with a network mount */
+        tErrno = errno; 
+        rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_UNLOCK); 
+        if( IS_LOCK_ERROR(rc) ){
+          pFile->lastErrno = tErrno;
+        }
+        goto end_lock;
+      }
+    }
+    if( s==(-1) ){
+      rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK);
+      if( IS_LOCK_ERROR(rc) ){
+        pFile->lastErrno = tErrno;
+      }
     }else{
-      pFile->lastErrno = errno;
-      rc = SQLITE_IOERR_DIR_CLOSE;
+      pFile->locktype = SHARED_LOCK;
+      pFile->pOpen->nLock++;
+      pLock->cnt = 1;
     }
-  }
-  return rc;
-}
-
-/*
-** Truncate an open file to a specified size
-*/
-static int unixTruncate(sqlite3_file *id, i64 nByte){
-  int rc;
-  assert( id );
-  SimulateIOError( return SQLITE_IOERR_TRUNCATE );
-  rc = ftruncate(((unixFile*)id)->h, (off_t)nByte);
-  if( rc ){
-    ((unixFile*)id)->lastErrno = errno;
-    return SQLITE_IOERR_TRUNCATE;
+  }else if( locktype==EXCLUSIVE_LOCK && pLock->cnt>1 ){
+    /* We are trying for an exclusive lock but another thread in this
+    ** same process is still holding a shared lock. */
+    rc = SQLITE_BUSY;
   }else{
-    return SQLITE_OK;
+    /* The request was for a RESERVED or EXCLUSIVE lock.  It is
+    ** assumed that there is a SHARED or greater lock on the file
+    ** already.
+    */
+    assert( 0!=pFile->locktype );
+    lock.l_type = F_WRLCK;
+    switch( locktype ){
+      case RESERVED_LOCK:
+        lock.l_start = RESERVED_BYTE;
+        break;
+      case EXCLUSIVE_LOCK:
+        lock.l_start = SHARED_FIRST;
+        lock.l_len = SHARED_SIZE;
+        break;
+      default:
+        assert(0);
+    }
+    s = fcntl(pFile->h, F_SETLK, &lock);
+    if( s==(-1) ){
+      int tErrno = errno;
+      rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK);
+      if( IS_LOCK_ERROR(rc) ){
+        pFile->lastErrno = tErrno;
+      }
+    }
   }
-}
-
-/*
-** Determine the current size of a file in bytes
-*/
-static int unixFileSize(sqlite3_file *id, i64 *pSize){
-  int rc;
-  struct stat buf;
-  assert( id );
-  rc = fstat(((unixFile*)id)->h, &buf);
-  SimulateIOError( rc=1 );
-  if( rc!=0 ){
-    ((unixFile*)id)->lastErrno = errno;
-    return SQLITE_IOERR_FSTAT;
+  
+  if( rc==SQLITE_OK ){
+    pFile->locktype = locktype;
+    pLock->locktype = locktype;
+  }else if( locktype==EXCLUSIVE_LOCK ){
+    pFile->locktype = PENDING_LOCK;
+    pLock->locktype = PENDING_LOCK;
   }
-  *pSize = buf.st_size;
 
-  /* When opening a zero-size database, the findLockInfo() procedure
-  ** writes a single byte into that file in order to work around a bug
-  ** in the OS-X msdos filesystem.  In order to avoid problems with upper
-  ** layers, we need to report this file size as zero even though it is
-  ** really 1.   Ticket #3260.
-  */
-  if( *pSize==1 ) *pSize = 0;
-
-
-  return SQLITE_OK;
-}
-
-/*
-** This routine translates a standard POSIX errno code into something
-** useful to the clients of the sqlite3 functions.  Specifically, it is
-** intended to translate a variety of "try again" errors into SQLITE_BUSY
-** and a variety of "please close the file descriptor NOW" errors into 
-** SQLITE_IOERR
-** 
-** Errors during initialization of locks, or file system support for locks,
-** should handle ENOLCK, ENOTSUP, EOPNOTSUPP separately.
-*/
-static int sqliteErrorFromPosixError(int posixError, int sqliteIOErr) {
-  switch (posixError) {
-  case 0: 
-    return SQLITE_OK;
-    
-  case EAGAIN:
-  case ETIMEDOUT:
-  case EBUSY:
-  case EINTR:
-  case ENOLCK:  
-    /* random NFS retry error, unless during file system support 
-     * introspection, in which it actually means what it says */
-    return SQLITE_BUSY;
-    
-  case EACCES: 
-    /* EACCES is like EAGAIN during locking operations, but not any other time*/
-    if( (sqliteIOErr == SQLITE_IOERR_LOCK) || 
-	(sqliteIOErr == SQLITE_IOERR_UNLOCK) || 
-	(sqliteIOErr == SQLITE_IOERR_RDLOCK) ||
-	(sqliteIOErr == SQLITE_IOERR_CHECKRESERVEDLOCK) ){
-      return SQLITE_BUSY;
-    }
-    /* else fall through */
-  case EPERM: 
-    return SQLITE_PERM;
-    
-  case EDEADLK:
-    return SQLITE_IOERR_BLOCKED;
-    
-#if EOPNOTSUPP!=ENOTSUP
-  case EOPNOTSUPP: 
-    /* something went terribly awry, unless during file system support 
-     * introspection, in which it actually means what it says */
-#endif
-#ifdef ENOTSUP
-  case ENOTSUP: 
-    /* invalid fd, unless during file system support introspection, in which 
-     * it actually means what it says */
-#endif
-  case EIO:
-  case EBADF:
-  case EINVAL:
-  case ENOTCONN:
-  case ENODEV:
-  case ENXIO:
-  case ENOENT:
-  case ESTALE:
-  case ENOSYS:
-    /* these should force the client to close the file and reconnect */
-    
-  default: 
-    return sqliteIOErr;
-  }
+end_lock:
+  unixLeaveMutex();
+  OSTRACE4("LOCK    %d %s %s\n", pFile->h, locktypeName(locktype), 
+      rc==SQLITE_OK ? "ok" : "failed");
+  return rc;
 }
 
 /*
-** This routine checks if there is a RESERVED lock held on the specified
-** file by this or any other process. If such a lock is held, set *pResOut
-** to a non-zero value otherwise *pResOut is set to zero.  The return value
-** is set to SQLITE_OK unless an I/O error occurs during lock checking.
+** Lower the locking level on file descriptor pFile to locktype.  locktype
+** must be either NO_LOCK or SHARED_LOCK.
+**
+** If the locking level of the file descriptor is already at or below
+** the requested locking level, this routine is a no-op.
 */
-static int unixCheckReservedLock(sqlite3_file *id, int *pResOut){
+static int unixUnlock(sqlite3_file *id, int locktype){
+  struct unixLockInfo *pLock;
+  struct flock lock;
   int rc = SQLITE_OK;
-  int reserved = 0;
   unixFile *pFile = (unixFile*)id;
-
-  SimulateIOError( return SQLITE_IOERR_CHECKRESERVEDLOCK; );
+  int h;
 
   assert( pFile );
-  unixEnterMutex(); /* Because pFile->pLock is shared across threads */
+  OSTRACE7("UNLOCK  %d %d was %d(%d,%d) pid=%d\n", pFile->h, locktype,
+      pFile->locktype, pFile->pLock->locktype, pFile->pLock->cnt, getpid());
 
-  /* Check if a thread in this process holds such a lock */
-  if( pFile->pLock->locktype>SHARED_LOCK ){
-    reserved = 1;
+  assert( locktype<=SHARED_LOCK );
+  if( pFile->locktype<=locktype ){
+    return SQLITE_OK;
   }
-
-  /* Otherwise see if some other process holds it.
-  */
-  if( !reserved ){
-    struct flock lock;
-    lock.l_whence = SEEK_SET;
-    lock.l_start = RESERVED_BYTE;
-    lock.l_len = 1;
-    lock.l_type = F_WRLCK;
-    if (-1 == fcntl(pFile->h, F_GETLK, &lock)) {
-      int tErrno = errno;
-      rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_CHECKRESERVEDLOCK);
-      pFile->lastErrno = tErrno;
-    } else if( lock.l_type!=F_UNLCK ){
-      reserved = 1;
-    }
-  }
-  
-  unixLeaveMutex();
-  OSTRACE4("TEST WR-LOCK %d %d %d\n", pFile->h, rc, reserved);
-
-  *pResOut = reserved;
-  return rc;
-}
-
-/*
-** Lock the file with the lock specified by parameter locktype - one
-** of the following:
-**
-**     (1) SHARED_LOCK
-**     (2) RESERVED_LOCK
-**     (3) PENDING_LOCK
-**     (4) EXCLUSIVE_LOCK
-**
-** Sometimes when requesting one lock state, additional lock states
-** are inserted in between.  The locking might fail on one of the later
-** transitions leaving the lock state different from what it started but
-** still short of its goal.  The following chart shows the allowed
-** transitions and the inserted intermediate states:
-**
-**    UNLOCKED -> SHARED
-**    SHARED -> RESERVED
-**    SHARED -> (PENDING) -> EXCLUSIVE
-**    RESERVED -> (PENDING) -> EXCLUSIVE
-**    PENDING -> EXCLUSIVE
-**
-** This routine will only increase a lock.  Use the sqlite3OsUnlock()
-** routine to lower a locking level.
-*/
-static int unixLock(sqlite3_file *id, int locktype){
-  /* The following describes the implementation of the various locks and
-  ** lock transitions in terms of the POSIX advisory shared and exclusive
-  ** lock primitives (called read-locks and write-locks below, to avoid
-  ** confusion with SQLite lock names). The algorithms are complicated
-  ** slightly in order to be compatible with windows systems simultaneously
-  ** accessing the same database file, in case that is ever required.
-  **
-  ** Symbols defined in os.h indentify the 'pending byte' and the 'reserved
-  ** byte', each single bytes at well known offsets, and the 'shared byte
-  ** range', a range of 510 bytes at a well known offset.
-  **
-  ** To obtain a SHARED lock, a read-lock is obtained on the 'pending
-  ** byte'.  If this is successful, a random byte from the 'shared byte
-  ** range' is read-locked and the lock on the 'pending byte' released.
-  **
-  ** A process may only obtain a RESERVED lock after it has a SHARED lock.
-  ** A RESERVED lock is implemented by grabbing a write-lock on the
-  ** 'reserved byte'. 
-  **
-  ** A process may only obtain a PENDING lock after it has obtained a
-  ** SHARED lock. A PENDING lock is implemented by obtaining a write-lock
-  ** on the 'pending byte'. This ensures that no new SHARED locks can be
-  ** obtained, but existing SHARED locks are allowed to persist. A process
-  ** does not have to obtain a RESERVED lock on the way to a PENDING lock.
-  ** This property is used by the algorithm for rolling back a journal file
-  ** after a crash.
-  **
-  ** An EXCLUSIVE lock, obtained after a PENDING lock is held, is
-  ** implemented by obtaining a write-lock on the entire 'shared byte
-  ** range'. Since all other locks require a read-lock on one of the bytes
-  ** within this range, this ensures that no other locks are held on the
-  ** database. 
-  **
-  ** The reason a single byte cannot be used instead of the 'shared byte
-  ** range' is that some versions of windows do not support read-locks. By
-  ** locking a random byte from a range, concurrent SHARED locks may exist
-  ** even if the locking primitive used is always a write-lock.
-  */
-  int rc = SQLITE_OK;
-  unixFile *pFile = (unixFile*)id;
-  struct unixLockInfo *pLock = pFile->pLock;
-  struct flock lock;
-  int s;
-
-  assert( pFile );
-  OSTRACE7("LOCK    %d %s was %s(%s,%d) pid=%d\n", pFile->h,
-      locktypeName(locktype), locktypeName(pFile->locktype),
-      locktypeName(pLock->locktype), pLock->cnt , getpid());
-
-  /* If there is already a lock of this type or more restrictive on the
-  ** unixFile, do nothing. Don't use the end_lock: exit path, as
-  ** unixEnterMutex() hasn't been called yet.
-  */
-  if( pFile->locktype>=locktype ){
-    OSTRACE3("LOCK    %d %s ok (already held)\n", pFile->h,
-            locktypeName(locktype));
-    return SQLITE_OK;
-  }
-
-  /* Make sure the locking sequence is correct
-  */
-  assert( pFile->locktype!=NO_LOCK || locktype==SHARED_LOCK );
-  assert( locktype!=PENDING_LOCK );
-  assert( locktype!=RESERVED_LOCK || pFile->locktype==SHARED_LOCK );
-
-  /* This mutex is needed because pFile->pLock is shared across threads
-  */
-  unixEnterMutex();
-
-  /* Make sure the current thread owns the pFile.
-  */
-  rc = transferOwnership(pFile);
-  if( rc!=SQLITE_OK ){
-    unixLeaveMutex();
-    return rc;
-  }
-  pLock = pFile->pLock;
-
-  /* If some thread using this PID has a lock via a different unixFile*
-  ** handle that precludes the requested lock, return BUSY.
-  */
-  if( (pFile->locktype!=pLock->locktype && 
-          (pLock->locktype>=PENDING_LOCK || locktype>SHARED_LOCK))
-  ){
-    rc = SQLITE_BUSY;
-    goto end_lock;
-  }
-
-  /* If a SHARED lock is requested, and some thread using this PID already
-  ** has a SHARED or RESERVED lock, then increment reference counts and
-  ** return SQLITE_OK.
-  */
-  if( locktype==SHARED_LOCK && 
-      (pLock->locktype==SHARED_LOCK || pLock->locktype==RESERVED_LOCK) ){
-    assert( locktype==SHARED_LOCK );
-    assert( pFile->locktype==0 );
-    assert( pLock->cnt>0 );
-    pFile->locktype = SHARED_LOCK;
-    pLock->cnt++;
-    pFile->pOpen->nLock++;
-    goto end_lock;
-  }
-
-  lock.l_len = 1L;
-
-  lock.l_whence = SEEK_SET;
-
-  /* A PENDING lock is needed before acquiring a SHARED lock and before
-  ** acquiring an EXCLUSIVE lock.  For the SHARED lock, the PENDING will
-  ** be released.
-  */
-  if( locktype==SHARED_LOCK 
-      || (locktype==EXCLUSIVE_LOCK && pFile->locktype<PENDING_LOCK)
-  ){
-    lock.l_type = (locktype==SHARED_LOCK?F_RDLCK:F_WRLCK);
-    lock.l_start = PENDING_BYTE;
-    s = fcntl(pFile->h, F_SETLK, &lock);
-    if( s==(-1) ){
-      int tErrno = errno;
-      rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK);
-      if( IS_LOCK_ERROR(rc) ){
-        pFile->lastErrno = tErrno;
-      }
-      goto end_lock;
-    }
-  }
-
-
-  /* If control gets to this point, then actually go ahead and make
-  ** operating system calls for the specified lock.
-  */
-  if( locktype==SHARED_LOCK ){
-    int tErrno = 0;
-    assert( pLock->cnt==0 );
-    assert( pLock->locktype==0 );
-
-    /* Now get the read-lock */
-    lock.l_start = SHARED_FIRST;
-    lock.l_len = SHARED_SIZE;
-    if( (s = fcntl(pFile->h, F_SETLK, &lock))==(-1) ){
-      tErrno = errno;
-    }
-    /* Drop the temporary PENDING lock */
-    lock.l_start = PENDING_BYTE;
-    lock.l_len = 1L;
-    lock.l_type = F_UNLCK;
-    if( fcntl(pFile->h, F_SETLK, &lock)!=0 ){
-      if( s != -1 ){
-        /* This could happen with a network mount */
-        tErrno = errno; 
-        rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_UNLOCK); 
-        if( IS_LOCK_ERROR(rc) ){
-          pFile->lastErrno = tErrno;
-        }
-        goto end_lock;
-      }
-    }
-    if( s==(-1) ){
-      rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK);
-      if( IS_LOCK_ERROR(rc) ){
-        pFile->lastErrno = tErrno;
-      }
-    }else{
-      pFile->locktype = SHARED_LOCK;
-      pFile->pOpen->nLock++;
-      pLock->cnt = 1;
-    }
-  }else if( locktype==EXCLUSIVE_LOCK && pLock->cnt>1 ){
-    /* We are trying for an exclusive lock but another thread in this
-    ** same process is still holding a shared lock. */
-    rc = SQLITE_BUSY;
-  }else{
-    /* The request was for a RESERVED or EXCLUSIVE lock.  It is
-    ** assumed that there is a SHARED or greater lock on the file
-    ** already.
-    */
-    assert( 0!=pFile->locktype );
-    lock.l_type = F_WRLCK;
-    switch( locktype ){
-      case RESERVED_LOCK:
-        lock.l_start = RESERVED_BYTE;
-        break;
-      case EXCLUSIVE_LOCK:
-        lock.l_start = SHARED_FIRST;
-        lock.l_len = SHARED_SIZE;
-        break;
-      default:
-        assert(0);
-    }
-    s = fcntl(pFile->h, F_SETLK, &lock);
-    if( s==(-1) ){
-      int tErrno = errno;
-      rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK);
-      if( IS_LOCK_ERROR(rc) ){
-        pFile->lastErrno = tErrno;
-      }
-    }
-  }
-  
-  if( rc==SQLITE_OK ){
-    pFile->locktype = locktype;
-    pLock->locktype = locktype;
-  }else if( locktype==EXCLUSIVE_LOCK ){
-    pFile->locktype = PENDING_LOCK;
-    pLock->locktype = PENDING_LOCK;
-  }
-
-end_lock:
-  unixLeaveMutex();
-  OSTRACE4("LOCK    %d %s %s\n", pFile->h, locktypeName(locktype), 
-      rc==SQLITE_OK ? "ok" : "failed");
-  return rc;
-}
-
-/*
-** Lower the locking level on file descriptor pFile to locktype.  locktype
-** must be either NO_LOCK or SHARED_LOCK.
-**
-** If the locking level of the file descriptor is already at or below
-** the requested locking level, this routine is a no-op.
-*/
-static int unixUnlock(sqlite3_file *id, int locktype){
-  struct unixLockInfo *pLock;
-  struct flock lock;
-  int rc = SQLITE_OK;
-  unixFile *pFile = (unixFile*)id;
-  int h;
-
-  assert( pFile );
-  OSTRACE7("UNLOCK  %d %d was %d(%d,%d) pid=%d\n", pFile->h, locktype,
-      pFile->locktype, pFile->pLock->locktype, pFile->pLock->cnt, getpid());
-
-  assert( locktype<=SHARED_LOCK );
-  if( pFile->locktype<=locktype ){
-    return SQLITE_OK;
-  }
-  if( CHECK_THREADID(pFile) ){
-    return SQLITE_MISUSE;
+  if( CHECK_THREADID(pFile) ){
+    return SQLITE_MISUSE;
   }
   unixEnterMutex();
   h = pFile->h;
@@ -2044,367 +1576,237 @@ static int unixClose(sqlite3_file *id){
   return rc;
 }
 
+/************** End of the posix advisory lock implementation *****************
+******************************************************************************/
 
-#if SQLITE_ENABLE_LOCKING_STYLE
-#if !OS_VXWORKS
-#pragma mark AFP support
-
-/*
- ** The afpLockingContext structure contains all afp lock specific state
- */
-typedef struct afpLockingContext afpLockingContext;
-struct afpLockingContext {
-  unsigned long long sharedByte;
-  const char *dbPath;
-};
+/******************************************************************************
+****************************** No-op Locking **********************************
+**
+** Of the various locking implementations available, this is by far the
+** simplest:  locking is ignored.  No attempt is made to lock the database
+** file for reading or writing.
+**
+** This locking mode is appropriate for use on read-only databases
+** (ex: databases that are burned into CD-ROM, for example.)  It can
+** also be used if the application employs some external mechanism to
+** prevent simultaneous access of the same database by two or more
+** database connections.  But there is a serious risk of database
+** corruption if this locking mode is used in situations where multiple
+** database connections are accessing the same database file at the same
+** time and one or more of those connections are writing.
+*/
 
-struct ByteRangeLockPB2
-{
-  unsigned long long offset;        /* offset to first byte to lock */
-  unsigned long long length;        /* nbr of bytes to lock */
-  unsigned long long retRangeStart; /* nbr of 1st byte locked if successful */
-  unsigned char unLockFlag;         /* 1 = unlock, 0 = lock */
-  unsigned char startEndFlag;       /* 1=rel to end of fork, 0=rel to start */
-  int fd;                           /* file desc to assoc this lock with */
-};
+/*
+** The nolockLockingContext is void
+*/
+typedef void nolockLockingContext;
 
-#define afpfsByteRangeLock2FSCTL        _IOWR('z', 23, struct ByteRangeLockPB2)
+static int nolockCheckReservedLock(sqlite3_file *NotUsed, int *pResOut){
+  UNUSED_PARAMETER(NotUsed);
+  *pResOut = 0;
+  return SQLITE_OK;
+}
 
-/* 
- ** Return SQLITE_OK on success, SQLITE_BUSY on failure.
- */
-static int _AFPFSSetLock(
-  const char *path, 
-  unixFile *pFile, 
-  unsigned long long offset, 
-  unsigned long long length, 
-  int setLockFlag
-){
-  struct ByteRangeLockPB2       pb;
-  int                     err;
-  
-  pb.unLockFlag = setLockFlag ? 0 : 1;
-  pb.startEndFlag = 0;
-  pb.offset = offset;
-  pb.length = length; 
-  pb.fd = pFile->h;
-  //SimulateIOErrorBenign(1);
-  //SimulateIOError( pb.fd=(-1) )
-  //SimulateIOErrorBenign(0);
-  
-  OSTRACE6("AFPSETLOCK [%s] for %d%s in range %llx:%llx\n", 
-    (setLockFlag?"ON":"OFF"), pFile->h, (pb.fd==-1?"[testval-1]":""), offset, length);
-  err = fsctl(path, afpfsByteRangeLock2FSCTL, &pb, 0);
-  if ( err==-1 ) {
-    int rc;
-    int tErrno = errno;
-    OSTRACE4("AFPSETLOCK failed to fsctl() '%s' %d %s\n", path, tErrno, strerror(tErrno));
-#ifdef SQLITE_IGNORE_AFP_LOCK_ERRORS
-    rc = SQLITE_BUSY;
-#else
-    rc = sqliteErrorFromPosixError(tErrno, setLockFlag ? SQLITE_IOERR_LOCK : SQLITE_IOERR_UNLOCK);
-#endif /* SQLITE_IGNORE_AFP_LOCK_ERRORS */
-    if( IS_LOCK_ERROR(rc) ){
-      pFile->lastErrno = tErrno;
-    }
-    return rc;
-  } else {
-    return SQLITE_OK;
-  }
+static int nolockLock(sqlite3_file *NotUsed, int NotUsed2){
+  UNUSED_PARAMETER2(NotUsed, NotUsed2);
+  return SQLITE_OK;
 }
 
-/* AFP-style reserved lock checking following the behavior of 
+static int nolockUnlock(sqlite3_file *NotUsed, int NotUsed2){
+  UNUSED_PARAMETER2(NotUsed, NotUsed2);
+  return SQLITE_OK;
+}
+
+/*
+** Close a file.
+*/
+static int nolockClose(sqlite3_file *id) {
+  int rc;
+  if( OS_VXWORKS ) unixEnterMutex();
+  rc = closeUnixFile(id);
+  if( OS_VXWORKS ) unixLeaveMutex();
+  return rc;
+}
+
+/******************* End of the no-op lock implementation *********************
+******************************************************************************/
+
+/******************************************************************************
+************************* Begin dot-file Locking ******************************
+**
+** The dotfile locking implementation uses the existing of separate lock
+** files in order to control access to the database.  This works on just
+** about every filesystem imaginable.  But there are serious downsides:
+**
+**    (1)  There is zero concurrency.  A single reader blocks all other
+**         connections from reading or writing the database.
+**
+**    (2)  An application crash or power loss can leave stale lock files
+**         sitting around that need to be cleared manually.
+**
+** Nevertheless, a dotlock is an appropriate locking mode for use if no
+** other locking strategy is available.
+*/
+
+/*
+** The file suffix added to the data base filename in order to create the
+** lock file.
+*/
+#define DOTLOCK_SUFFIX ".lock"
+
+/* Dotlock-style reserved lock checking following the behavior of 
 ** unixCheckReservedLock, see the unixCheckReservedLock function comments */
-static int afpCheckReservedLock(sqlite3_file *id, int *pResOut){
+static int dotlockCheckReservedLock(sqlite3_file *id, int *pResOut) {
   int rc = SQLITE_OK;
   int reserved = 0;
   unixFile *pFile = (unixFile*)id;
-  
+
   SimulateIOError( return SQLITE_IOERR_CHECKRESERVEDLOCK; );
   
   assert( pFile );
-  afpLockingContext *context = (afpLockingContext *) pFile->lockingContext;
-  
+
   /* Check if a thread in this process holds such a lock */
   if( pFile->locktype>SHARED_LOCK ){
     reserved = 1;
   }
   
-  /* Otherwise see if some other process holds it.
-   */
+  /* Otherwise see if some other process holds it. */
   if( !reserved ){
-    /* lock the RESERVED byte */
-    int lrc = _AFPFSSetLock(context->dbPath, pFile, RESERVED_BYTE, 1,1);  
-    if( SQLITE_OK==lrc ){
-      /* if we succeeded in taking the reserved lock, unlock it to restore
-      ** the original state */
-      lrc = _AFPFSSetLock(context->dbPath, pFile, RESERVED_BYTE, 1, 0);
-    } else {
-      /* if we failed to get the lock then someone else must have it */
+    char *zLockFile = (char *)pFile->lockingContext;
+    struct stat statBuf;
+    
+    if( lstat(zLockFile, &statBuf)==0 ){
+      /* file exists, someone else has the lock */
       reserved = 1;
-    }
-    if( IS_LOCK_ERROR(lrc) ){
-      rc=lrc;
+    }else{
+      /* file does not exist, we could have it if we want it */
+      int tErrno = errno;
+      if( ENOENT != tErrno ){
+        rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_CHECKRESERVEDLOCK);
+        pFile->lastErrno = tErrno;
+      }
     }
   }
-  
   OSTRACE4("TEST WR-LOCK %d %d %d\n", pFile->h, rc, reserved);
-  
+
   *pResOut = reserved;
   return rc;
 }
 
-/* AFP-style locking following the behavior of unixLock, see the unixLock 
-** function comments for details of lock management. */
-static int afpLock(sqlite3_file *id, int locktype){
-  int rc = SQLITE_OK;
+static int dotlockLock(sqlite3_file *id, int locktype) {
   unixFile *pFile = (unixFile*)id;
-  afpLockingContext *context = (afpLockingContext *) pFile->lockingContext;
+  int fd;
+  char *zLockFile = (char *)pFile->lockingContext;
+  int rc=SQLITE_OK;
+
+  /* if we already have a lock, it is exclusive.  
+  ** Just adjust level and punt on outta here. */
+  if (pFile->locktype > NO_LOCK) {
+    pFile->locktype = locktype;
+#if !OS_VXWORKS
+    /* Always update the timestamp on the old file */
+    utimes(zLockFile, NULL);
+#endif
+    rc = SQLITE_OK;
+    goto dotlock_end_lock;
+  }
   
-  assert( pFile );
-  OSTRACE5("LOCK    %d %s was %s pid=%d\n", pFile->h,
-         locktypeName(locktype), locktypeName(pFile->locktype), getpid());
+  /* check to see if lock file already exists */
+  struct stat statBuf;
+  if (lstat(zLockFile,&statBuf) == 0){
+    rc = SQLITE_BUSY; /* it does, busy */
+    goto dotlock_end_lock;
+  }
+  
+  /* grab an exclusive lock */
+  fd = open(zLockFile,O_RDONLY|O_CREAT|O_EXCL,0600);
+  if( fd<0 ){
+    /* failed to open/create the file, someone else may have stolen the lock */
+    int tErrno = errno;
+    if( EEXIST == tErrno ){
+      rc = SQLITE_BUSY;
+    } else {
+      rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK);
+      if( IS_LOCK_ERROR(rc) ){
+        pFile->lastErrno = tErrno;
+      }
+    }
+    goto dotlock_end_lock;
+  } 
+  if( close(fd) ){
+    pFile->lastErrno = errno;
+    rc = SQLITE_IOERR_CLOSE;
+  }
+  
+  /* got it, set the type and return ok */
+  pFile->locktype = locktype;
 
-  /* If there is already a lock of this type or more restrictive on the
-  ** unixFile, do nothing. Don't use the afp_end_lock: exit path, as
-  ** unixEnterMutex() hasn't been called yet.
-  */
-  if( pFile->locktype>=locktype ){
-    OSTRACE3("LOCK    %d %s ok (already held)\n", pFile->h,
-           locktypeName(locktype));
+ dotlock_end_lock:
+  return rc;
+}
+
+static int dotlockUnlock(sqlite3_file *id, int locktype) {
+  unixFile *pFile = (unixFile*)id;
+  char *zLockFile = (char *)pFile->lockingContext;
+
+  assert( pFile );
+  OSTRACE5("UNLOCK  %d %d was %d pid=%d\n", pFile->h, locktype,
+	   pFile->locktype, getpid());
+  assert( locktype<=SHARED_LOCK );
+  
+  /* no-op if possible */
+  if( pFile->locktype==locktype ){
     return SQLITE_OK;
   }
-
-  /* Make sure the locking sequence is correct
-  */
-  assert( pFile->locktype!=NO_LOCK || locktype==SHARED_LOCK );
-  assert( locktype!=PENDING_LOCK );
-  assert( locktype!=RESERVED_LOCK || pFile->locktype==SHARED_LOCK );
   
-  /* This mutex is needed because pFile->pLock is shared across threads
-  */
-  unixEnterMutex();
-
-  /* Make sure the current thread owns the pFile.
-  */
-  rc = transferOwnership(pFile);
-  if( rc!=SQLITE_OK ){
-    unixLeaveMutex();
-    return rc;
+  /* shared can just be set because we always have an exclusive */
+  if (locktype==SHARED_LOCK) {
+    pFile->locktype = locktype;
+    return SQLITE_OK;
   }
-    
-  /* A PENDING lock is needed before acquiring a SHARED lock and before
-  ** acquiring an EXCLUSIVE lock.  For the SHARED lock, the PENDING will
-  ** be released.
-  */
-  if( locktype==SHARED_LOCK 
-      || (locktype==EXCLUSIVE_LOCK && pFile->locktype<PENDING_LOCK)
-  ){
-    int failed;
-    failed = _AFPFSSetLock(context->dbPath, pFile, PENDING_BYTE, 1, 1);
-    if (failed) {
-      rc = failed;
-      goto afp_end_lock;
+  
+  /* no, really, unlock. */
+  if (unlink(zLockFile) ) {
+    int rc, tErrno = errno;
+    if( ENOENT != tErrno ){
+      rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_UNLOCK);
     }
+    if( IS_LOCK_ERROR(rc) ){
+      pFile->lastErrno = tErrno;
+    }
+    return rc; 
   }
-  
-  /* If control gets to this point, then actually go ahead and make
-  ** operating system calls for the specified lock.
-  */
-  if( locktype==SHARED_LOCK ){
-    int lk, lrc1, lrc2, lrc1Errno;
-    
-    /* Now get the read-lock SHARED_LOCK */
-    /* note that the quality of the randomness doesn't matter that much */
-    lk = random(); 
-    context->sharedByte = (lk & 0x7fffffff)%(SHARED_SIZE - 1);
-    lrc1 = _AFPFSSetLock(context->dbPath, pFile, 
-          SHARED_FIRST+context->sharedByte, 1, 1);
-    if( IS_LOCK_ERROR(lrc1) ){
-      lrc1Errno = pFile->lastErrno;
-    }
-    /* Drop the temporary PENDING lock */
-    lrc2 = _AFPFSSetLock(context->dbPath, pFile, PENDING_BYTE, 1, 0);
-    
-    if( IS_LOCK_ERROR(lrc1) ) {
-      pFile->lastErrno = lrc1Errno;
-      rc = lrc1;
-      goto afp_end_lock;
-    } else if( IS_LOCK_ERROR(lrc2) ){
-      rc = lrc2;
-      goto afp_end_lock;
-    } else if( lrc1 != SQLITE_OK ) {
-      rc = lrc1;
-    } else {
-      pFile->locktype = SHARED_LOCK;
-      pFile->pOpen->nLock++;
-    }
-  }else{
-    /* The request was for a RESERVED or EXCLUSIVE lock.  It is
-    ** assumed that there is a SHARED or greater lock on the file
-    ** already.
-    */
-    int failed = 0;
-    assert( 0!=pFile->locktype );
-    if (locktype >= RESERVED_LOCK && pFile->locktype < RESERVED_LOCK) {
-        /* Acquire a RESERVED lock */
-        failed = _AFPFSSetLock(context->dbPath, pFile, RESERVED_BYTE, 1,1);
-    }
-    if (!failed && locktype == EXCLUSIVE_LOCK) {
-      /* Acquire an EXCLUSIVE lock */
-        
-      /* Remove the shared lock before trying the range.  we'll need to 
-      ** reestablish the shared lock if we can't get the  afpUnlock
-      */
-      if( !(failed = _AFPFSSetLock(context->dbPath, pFile, SHARED_FIRST +
-                         context->sharedByte, 1, 0)) ){
-        int failed2 = SQLITE_OK;
-        /* now attemmpt to get the exclusive lock range */
-        failed = _AFPFSSetLock(context->dbPath, pFile, SHARED_FIRST, 
-                               SHARED_SIZE, 1);
-        if( failed && (failed2 = _AFPFSSetLock(context->dbPath, pFile, 
-                       SHARED_FIRST + context->sharedByte, 1, 1)) ){
-          /* Can't reestablish the shared lock.  Sqlite can't deal, this is
-          ** a critical I/O error
-          */
-          rc = ((failed & SQLITE_IOERR) == SQLITE_IOERR) ? failed2 : 
-               SQLITE_IOERR_LOCK;
-          goto afp_end_lock;
-        } 
-      }else{
-        rc = failed; 
-      }
-    }
-    if( failed ){
-      rc = failed;
-    }
-  }
-  
-  if( rc==SQLITE_OK ){
-    pFile->locktype = locktype;
-  }else if( locktype==EXCLUSIVE_LOCK ){
-    pFile->locktype = PENDING_LOCK;
-  }
-  
-afp_end_lock:
-  unixLeaveMutex();
-  OSTRACE4("LOCK    %d %s %s\n", pFile->h, locktypeName(locktype), 
-         rc==SQLITE_OK ? "ok" : "failed");
-  return rc;
-}
-
-/*
-** Lower the locking level on file descriptor pFile to locktype.  locktype
-** must be either NO_LOCK or SHARED_LOCK.
-**
-** If the locking level of the file descriptor is already at or below
-** the requested locking level, this routine is a no-op.
-*/
-static int afpUnlock(sqlite3_file *id, int locktype) {
-  int rc = SQLITE_OK;
-  unixFile *pFile = (unixFile*)id;
-  afpLockingContext *pCtx = (afpLockingContext *) pFile->lockingContext;
-
-  assert( pFile );
-  OSTRACE5("UNLOCK  %d %d was %d pid=%d\n", pFile->h, locktype,
-         pFile->locktype, getpid());
-
-  assert( locktype<=SHARED_LOCK );
-  if( pFile->locktype<=locktype ){
-    return SQLITE_OK;
-  }
-  if( CHECK_THREADID(pFile) ){
-    return SQLITE_MISUSE;
-  }
-  unixEnterMutex();
-  if( pFile->locktype>SHARED_LOCK ){
-    
-    if( pFile->locktype==EXCLUSIVE_LOCK ){
-      rc = _AFPFSSetLock(pCtx->dbPath, pFile, SHARED_FIRST, SHARED_SIZE, 0);
-      if( rc==SQLITE_OK && locktype==SHARED_LOCK ){
-        /* only re-establish the shared lock if necessary */
-        int sharedLockByte = SHARED_FIRST+pCtx->sharedByte;
-        rc = _AFPFSSetLock(pCtx->dbPath, pFile, sharedLockByte, 1, 1);
-      }
-    }
-    if( rc==SQLITE_OK && pFile->locktype>=PENDING_LOCK ){
-      rc = _AFPFSSetLock(pCtx->dbPath, pFile, PENDING_BYTE, 1, 0);
-    } 
-    if( rc==SQLITE_OK && pFile->locktype>=RESERVED_LOCK ){
-      rc = _AFPFSSetLock(pCtx->dbPath, pFile, RESERVED_BYTE, 1, 0);
-    }
-  }else if( locktype==NO_LOCK ){
-    /* clear the shared lock */
-    int sharedLockByte = SHARED_FIRST+pCtx->sharedByte;
-    rc = _AFPFSSetLock(pCtx->dbPath, pFile, sharedLockByte, 1, 0);
-  }
-
-  if( rc==SQLITE_OK ){
-    if( locktype==NO_LOCK ){
-      struct unixOpenCnt *pOpen = pFile->pOpen;
-      pOpen->nLock--;
-      assert( pOpen->nLock>=0 );
-      if( pOpen->nLock==0 && pOpen->nPending>0 ){
-        int i;
-        for(i=0; i<pOpen->nPending; i++){
-          if( pOpen->aPending[i] < 0 ) continue;
-          if( close(pOpen->aPending[i]) ){
-            pFile->lastErrno = errno;
-            rc = SQLITE_IOERR_CLOSE;
-          }else{
-            pOpen->aPending[i] = -1;
-          }
-        }
-        if( rc==SQLITE_OK ){
-          sqlite3_free(pOpen->aPending);
-          pOpen->nPending = 0;
-          pOpen->aPending = 0;
-        }
-      }
-    }
-  }
-end_afpunlock:
-  unixLeaveMutex();
-  if( rc==SQLITE_OK ) pFile->locktype = locktype;
-  return rc;
+  pFile->locktype = NO_LOCK;
+  return SQLITE_OK;
 }
 
 /*
-** Close a file & cleanup AFP specific locking context 
+** Close a file.
 */
-static int afpClose(sqlite3_file *id) {
+static int dotlockClose(sqlite3_file *id) {
+  int rc;
   if( id ){
     unixFile *pFile = (unixFile*)id;
-    afpUnlock(id, NO_LOCK);
-    unixEnterMutex();
-    if( pFile->pOpen && pFile->pOpen->nLock ){
-      /* If there are outstanding locks, do not actually close the file just
-       ** yet because that would clear those locks.  Instead, add the file
-       ** descriptor to pOpen->aPending.  It will be automatically closed when
-       ** the last lock is cleared.
-       */
-      int *aNew;
-      struct unixOpenCnt *pOpen = pFile->pOpen;
-      aNew = sqlite3_realloc(pOpen->aPending, (pOpen->nPending+1)*sizeof(int) );
-      if( aNew==0 ){
-        /* If a malloc fails, just leak the file descriptor */
-      }else{
-        pOpen->aPending = aNew;
-        pOpen->aPending[pOpen->nPending] = pFile->h;
-        pOpen->nPending++;
-        pFile->h = -1;
-      }
-    }
-    releaseOpenCnt(pFile->pOpen);
+    dotlockUnlock(id, NO_LOCK);
     sqlite3_free(pFile->lockingContext);
-    closeUnixFile(id);
-    unixLeaveMutex();
   }
-  return SQLITE_OK;
+  if( OS_VXWORKS ) unixEnterMutex();
+  rc = closeUnixFile(id);
+  if( OS_VXWORKS ) unixLeaveMutex();
+  return rc;
 }
+/****************** End of the dot-file lock implementation *******************
+******************************************************************************/
 
-
-#pragma mark flock() style locking
+/******************************************************************************
+************************** Begin flock Locking ********************************
+**
+** Use the flock() system call to do file locking.
+**
+** Omit this section if SQLITE_ENABLE_LOCKING_STYLE is turned off or if
+** compiling for VXWORKS.
+*/
+#if SQLITE_ENABLE_LOCKING_STYLE && !OS_VXWORKS
 
 /*
 ** The flockLockingContext is not used
@@ -2553,14 +1955,21 @@ static int flockClose(sqlite3_file *id) {
   return closeUnixFile(id);
 }
 
-#endif /* !OS_VXWORKS */
+#endif /* SQLITE_ENABLE_LOCKING_STYLE && !OS_VXWORK */
 
-#pragma mark Old-School .lock file based locking
-#define DOTLOCK_SUFFIX ".lock"
+/******************* End of the flock lock implementation *********************
+******************************************************************************/
 
-/* Dotlock-style reserved lock checking following the behavior of 
+/******************************************************************************
+************************ Begin Named Semaphore Locking ************************
+**
+** Named semaphore locking is only supported on VxWorks.
+*/
+#if OS_VXWORKS
+
+/* Namedsem-style reserved lock checking following the behavior of 
 ** unixCheckReservedLock, see the unixCheckReservedLock function comments */
-static int dotlockCheckReservedLock(sqlite3_file *id, int *pResOut) {
+static int semCheckReservedLock(sqlite3_file *id, int *pResOut) {
   int rc = SQLITE_OK;
   int reserved = 0;
   unixFile *pFile = (unixFile*)id;
@@ -2576,19 +1985,21 @@ static int dotlockCheckReservedLock(sqlite3_file *id, int *pResOut) {
   
   /* Otherwise see if some other process holds it. */
   if( !reserved ){
-    char *zLockFile = (char *)pFile->lockingContext;
+    sem_t *pSem = pFile->pOpen->pSem;
     struct stat statBuf;
-    
-    if( lstat(zLockFile, &statBuf)==0 ){
-      /* file exists, someone else has the lock */
-      reserved = 1;
-    }else{
-      /* file does not exist, we could have it if we want it */
+
+    if( sem_trywait(pSem)==-1 ){
       int tErrno = errno;
-      if( ENOENT != tErrno ){
+      if( EAGAIN != tErrno ){
         rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_CHECKRESERVEDLOCK);
         pFile->lastErrno = tErrno;
+      } else {
+        /* someone else has the lock when we are in NO_LOCK */
+        reserved = (pFile->locktype < SHARED_LOCK);
       }
+    }else{
+      /* we could have it if we want it */
+      sem_post(pSem);
     }
   }
   OSTRACE4("TEST WR-LOCK %d %d %d\n", pFile->h, rc, reserved);
@@ -2597,66 +2008,42 @@ static int dotlockCheckReservedLock(sqlite3_file *id, int *pResOut) {
   return rc;
 }
 
-static int dotlockLock(sqlite3_file *id, int locktype) {
+static int semLock(sqlite3_file *id, int locktype) {
   unixFile *pFile = (unixFile*)id;
   int fd;
-  char *zLockFile = (char *)pFile->lockingContext;
-  int rc=SQLITE_OK;
+  sem_t *pSem = pFile->pOpen->pSem;
+  int rc = SQLITE_OK;
 
   /* if we already have a lock, it is exclusive.  
   ** Just adjust level and punt on outta here. */
   if (pFile->locktype > NO_LOCK) {
     pFile->locktype = locktype;
-#if !OS_VXWORKS
-    /* Always update the timestamp on the old file */
-    utimes(zLockFile, NULL);
-#endif
     rc = SQLITE_OK;
-    goto dotlock_end_lock;
+    goto sem_end_lock;
   }
   
-  /* check to see if lock file already exists */
-  struct stat statBuf;
-  if (lstat(zLockFile,&statBuf) == 0){
-    rc = SQLITE_BUSY; /* it does, busy */
-    goto dotlock_end_lock;
+  /* lock semaphore now but bail out when already locked. */
+  if( sem_trywait(pSem)==-1 ){
+    rc = SQLITE_BUSY;
+    goto sem_end_lock;
   }
-  
-  /* grab an exclusive lock */
-  fd = open(zLockFile,O_RDONLY|O_CREAT|O_EXCL,0600);
-  if( fd<0 ){
-    /* failed to open/create the file, someone else may have stolen the lock */
-    int tErrno = errno;
-    if( EEXIST == tErrno ){
-      rc = SQLITE_BUSY;
-    } else {
-      rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK);
-      if( IS_LOCK_ERROR(rc) ){
-        pFile->lastErrno = tErrno;
-      }
-    }
-    goto dotlock_end_lock;
-  } 
-  if( close(fd) ){
-    pFile->lastErrno = errno;
-    rc = SQLITE_IOERR_CLOSE;
-  }
-  
-  /* got it, set the type and return ok */
-  pFile->locktype = locktype;
-
- dotlock_end_lock:
-  return rc;
-}
-
-static int dotlockUnlock(sqlite3_file *id, int locktype) {
-  unixFile *pFile = (unixFile*)id;
-  char *zLockFile = (char *)pFile->lockingContext;
-
-  assert( pFile );
-  OSTRACE5("UNLOCK  %d %d was %d pid=%d\n", pFile->h, locktype,
-	   pFile->locktype, getpid());
-  assert( locktype<=SHARED_LOCK );
+
+  /* got it, set the type and return ok */
+  pFile->locktype = locktype;
+
+ sem_end_lock:
+  return rc;
+}
+
+static int semUnlock(sqlite3_file *id, int locktype) {
+  unixFile *pFile = (unixFile*)id;
+  sem_t *pSem = pFile->pOpen->pSem;
+
+  assert( pFile );
+  assert( pSem );
+  OSTRACE5("UNLOCK  %d %d was %d pid=%d\n", pFile->h, locktype,
+	   pFile->locktype, getpid());
+  assert( locktype<=SHARED_LOCK );
   
   /* no-op if possible */
   if( pFile->locktype==locktype ){
@@ -2669,12 +2056,10 @@ static int dotlockUnlock(sqlite3_file *id, int locktype) {
     return SQLITE_OK;
   }
   
-  /* no, really, unlock. */
-  if (unlink(zLockFile) ) {
+  /* no, really unlock. */
+  if ( sem_post(pSem)==-1 ) {
     int rc, tErrno = errno;
-    if( ENOENT != tErrno ){
-      rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_UNLOCK);
-    }
+    rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_UNLOCK);
     if( IS_LOCK_ERROR(rc) ){
       pFile->lastErrno = tErrno;
     }
@@ -2687,232 +2072,705 @@ static int dotlockUnlock(sqlite3_file *id, int locktype) {
 /*
  ** Close a file.
  */
-static int dotlockClose(sqlite3_file *id) {
-  int rc;
+static int semClose(sqlite3_file *id) {
   if( id ){
     unixFile *pFile = (unixFile*)id;
-    dotlockUnlock(id, NO_LOCK);
-    sqlite3_free(pFile->lockingContext);
+    semUnlock(id, NO_LOCK);
+    assert( pFile );
+    unixEnterMutex();
+    releaseLockInfo(pFile->pLock);
+    releaseOpenCnt(pFile->pOpen);
+    closeUnixFile(id);
+    unixLeaveMutex();
   }
-  if( OS_VXWORKS ) unixEnterMutex();
-  rc = closeUnixFile(id);
-  if( OS_VXWORKS ) unixLeaveMutex();
-  return rc;
+  return SQLITE_OK;
 }
 
-#if OS_VXWORKS
+#endif /* OS_VXWORKS */
+/*
+** Named semaphore locking is only available on VxWorks.
+**
+*************** End of the named semaphore lock implementation ****************
+******************************************************************************/
 
-#pragma mark POSIX/vxWorks named semaphore based locking
 
-/* Namedsem-style reserved lock checking following the behavior of 
+/******************************************************************************
+*************************** Begin AFP Locking *********************************
+**
+** AFP is the Apple Filing Protocol.  AFP is a network filesystem found
+** on Apple Macintosh computers - both OS9 and OSX.
+**
+** Third-party implementations of AFP are available.  But this code here
+** only works on OSX.
+*/
+
+#if defined(__DARWIN__) && SQLITE_ENABLE_LOCKING_STYLE
+/*
+** The afpLockingContext structure contains all afp lock specific state
+*/
+typedef struct afpLockingContext afpLockingContext;
+struct afpLockingContext {
+  unsigned long long sharedByte;
+  const char *dbPath;
+};
+
+struct ByteRangeLockPB2
+{
+  unsigned long long offset;        /* offset to first byte to lock */
+  unsigned long long length;        /* nbr of bytes to lock */
+  unsigned long long retRangeStart; /* nbr of 1st byte locked if successful */
+  unsigned char unLockFlag;         /* 1 = unlock, 0 = lock */
+  unsigned char startEndFlag;       /* 1=rel to end of fork, 0=rel to start */
+  int fd;                           /* file desc to assoc this lock with */
+};
+
+#define afpfsByteRangeLock2FSCTL        _IOWR('z', 23, struct ByteRangeLockPB2)
+
+/* 
+ ** Return SQLITE_OK on success, SQLITE_BUSY on failure.
+ */
+static int _AFPFSSetLock(
+  const char *path, 
+  unixFile *pFile, 
+  unsigned long long offset, 
+  unsigned long long length, 
+  int setLockFlag
+){
+  struct ByteRangeLockPB2       pb;
+  int                     err;
+  
+  pb.unLockFlag = setLockFlag ? 0 : 1;
+  pb.startEndFlag = 0;
+  pb.offset = offset;
+  pb.length = length; 
+  pb.fd = pFile->h;
+  //SimulateIOErrorBenign(1);
+  //SimulateIOError( pb.fd=(-1) )
+  //SimulateIOErrorBenign(0);
+  
+  OSTRACE6("AFPSETLOCK [%s] for %d%s in range %llx:%llx\n", 
+    (setLockFlag?"ON":"OFF"), pFile->h, (pb.fd==-1?"[testval-1]":""),
+    offset, length);
+  err = fsctl(path, afpfsByteRangeLock2FSCTL, &pb, 0);
+  if ( err==-1 ) {
+    int rc;
+    int tErrno = errno;
+    OSTRACE4("AFPSETLOCK failed to fsctl() '%s' %d %s\n",
+             path, tErrno, strerror(tErrno));
+#ifdef SQLITE_IGNORE_AFP_LOCK_ERRORS
+    rc = SQLITE_BUSY;
+#else
+    rc = sqliteErrorFromPosixError(tErrno,
+                    setLockFlag ? SQLITE_IOERR_LOCK : SQLITE_IOERR_UNLOCK);
+#endif /* SQLITE_IGNORE_AFP_LOCK_ERRORS */
+    if( IS_LOCK_ERROR(rc) ){
+      pFile->lastErrno = tErrno;
+    }
+    return rc;
+  } else {
+    return SQLITE_OK;
+  }
+}
+
+/* AFP-style reserved lock checking following the behavior of 
 ** unixCheckReservedLock, see the unixCheckReservedLock function comments */
-static int namedsemCheckReservedLock(sqlite3_file *id, int *pResOut) {
+static int afpCheckReservedLock(sqlite3_file *id, int *pResOut){
   int rc = SQLITE_OK;
   int reserved = 0;
   unixFile *pFile = (unixFile*)id;
-
+  
   SimulateIOError( return SQLITE_IOERR_CHECKRESERVEDLOCK; );
   
   assert( pFile );
-
+  afpLockingContext *context = (afpLockingContext *) pFile->lockingContext;
+  
   /* Check if a thread in this process holds such a lock */
   if( pFile->locktype>SHARED_LOCK ){
     reserved = 1;
   }
   
-  /* Otherwise see if some other process holds it. */
+  /* Otherwise see if some other process holds it.
+   */
   if( !reserved ){
-    sem_t *pSem = pFile->pOpen->pSem;
-    struct stat statBuf;
-
-    if( sem_trywait(pSem)==-1 ){
-      int tErrno = errno;
-      if( EAGAIN != tErrno ){
-        rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_CHECKRESERVEDLOCK);
-        pFile->lastErrno = tErrno;
-      } else {
-        /* someone else has the lock when we are in NO_LOCK */
-        reserved = (pFile->locktype < SHARED_LOCK);
-      }
-    }else{
-      /* we could have it if we want it */
-      sem_post(pSem);
+    /* lock the RESERVED byte */
+    int lrc = _AFPFSSetLock(context->dbPath, pFile, RESERVED_BYTE, 1,1);  
+    if( SQLITE_OK==lrc ){
+      /* if we succeeded in taking the reserved lock, unlock it to restore
+      ** the original state */
+      lrc = _AFPFSSetLock(context->dbPath, pFile, RESERVED_BYTE, 1, 0);
+    } else {
+      /* if we failed to get the lock then someone else must have it */
+      reserved = 1;
+    }
+    if( IS_LOCK_ERROR(lrc) ){
+      rc=lrc;
     }
   }
+  
   OSTRACE4("TEST WR-LOCK %d %d %d\n", pFile->h, rc, reserved);
-
+  
   *pResOut = reserved;
   return rc;
 }
 
-static int namedsemLock(sqlite3_file *id, int locktype) {
-  unixFile *pFile = (unixFile*)id;
-  int fd;
-  sem_t *pSem = pFile->pOpen->pSem;
+/* AFP-style locking following the behavior of unixLock, see the unixLock 
+** function comments for details of lock management. */
+static int afpLock(sqlite3_file *id, int locktype){
   int rc = SQLITE_OK;
-
-  /* if we already have a lock, it is exclusive.  
-  ** Just adjust level and punt on outta here. */
-  if (pFile->locktype > NO_LOCK) {
-    pFile->locktype = locktype;
-    rc = SQLITE_OK;
-    goto namedsem_end_lock;
-  }
-  
-  /* lock semaphore now but bail out when already locked. */
-  if( sem_trywait(pSem)==-1 ){
-    rc = SQLITE_BUSY;
-    goto namedsem_end_lock;
-  }
-
-  /* got it, set the type and return ok */
-  pFile->locktype = locktype;
-
- namedsem_end_lock:
-  return rc;
-}
-
-static int namedsemUnlock(sqlite3_file *id, int locktype) {
   unixFile *pFile = (unixFile*)id;
-  sem_t *pSem = pFile->pOpen->pSem;
-
-  assert( pFile );
-  assert( pSem );
-  OSTRACE5("UNLOCK  %d %d was %d pid=%d\n", pFile->h, locktype,
-	   pFile->locktype, getpid());
-  assert( locktype<=SHARED_LOCK );
-  
-  /* no-op if possible */
-  if( pFile->locktype==locktype ){
-    return SQLITE_OK;
-  }
+  afpLockingContext *context = (afpLockingContext *) pFile->lockingContext;
   
-  /* shared can just be set because we always have an exclusive */
-  if (locktype==SHARED_LOCK) {
-    pFile->locktype = locktype;
+  assert( pFile );
+  OSTRACE5("LOCK    %d %s was %s pid=%d\n", pFile->h,
+         locktypeName(locktype), locktypeName(pFile->locktype), getpid());
+
+  /* If there is already a lock of this type or more restrictive on the
+  ** unixFile, do nothing. Don't use the afp_end_lock: exit path, as
+  ** unixEnterMutex() hasn't been called yet.
+  */
+  if( pFile->locktype>=locktype ){
+    OSTRACE3("LOCK    %d %s ok (already held)\n", pFile->h,
+           locktypeName(locktype));
     return SQLITE_OK;
   }
+
+  /* Make sure the locking sequence is correct
+  */
+  assert( pFile->locktype!=NO_LOCK || locktype==SHARED_LOCK );
+  assert( locktype!=PENDING_LOCK );
+  assert( locktype!=RESERVED_LOCK || pFile->locktype==SHARED_LOCK );
   
-  /* no, really unlock. */
-  if ( sem_post(pSem)==-1 ) {
-    int rc, tErrno = errno;
-    rc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_UNLOCK);
-    if( IS_LOCK_ERROR(rc) ){
-      pFile->lastErrno = tErrno;
-    }
-    return rc; 
-  }
-  pFile->locktype = NO_LOCK;
-  return SQLITE_OK;
-}
+  /* This mutex is needed because pFile->pLock is shared across threads
+  */
+  unixEnterMutex();
 
-/*
- ** Close a file.
- */
-static int namedsemClose(sqlite3_file *id) {
-  if( id ){
-    unixFile *pFile = (unixFile*)id;
-    namedsemUnlock(id, NO_LOCK);
-    assert( pFile );
-    unixEnterMutex();
-    releaseLockInfo(pFile->pLock);
-    releaseOpenCnt(pFile->pOpen);
-    closeUnixFile(id);
+  /* Make sure the current thread owns the pFile.
+  */
+  rc = transferOwnership(pFile);
+  if( rc!=SQLITE_OK ){
     unixLeaveMutex();
+    return rc;
   }
-  return SQLITE_OK;
-}
-
-#endif /* OS_VXWORKS */
-
-#pragma mark Proxy locking support
-
-static int proxyCheckReservedLock(sqlite3_file *id, int *pResOut) {
-  unixFile *pFile = (unixFile*)id;
-  int rc = takeConch(pFile);
+    
+  /* A PENDING lock is needed before acquiring a SHARED lock and before
+  ** acquiring an EXCLUSIVE lock.  For the SHARED lock, the PENDING will
+  ** be released.
+  */
+  if( locktype==SHARED_LOCK 
+      || (locktype==EXCLUSIVE_LOCK && pFile->locktype<PENDING_LOCK)
+  ){
+    int failed;
+    failed = _AFPFSSetLock(context->dbPath, pFile, PENDING_BYTE, 1, 1);
+    if (failed) {
+      rc = failed;
+      goto afp_end_lock;
+    }
+  }
+  
+  /* If control gets to this point, then actually go ahead and make
+  ** operating system calls for the specified lock.
+  */
+  if( locktype==SHARED_LOCK ){
+    int lk, lrc1, lrc2, lrc1Errno;
+    
+    /* Now get the read-lock SHARED_LOCK */
+    /* note that the quality of the randomness doesn't matter that much */
+    lk = random(); 
+    context->sharedByte = (lk & 0x7fffffff)%(SHARED_SIZE - 1);
+    lrc1 = _AFPFSSetLock(context->dbPath, pFile, 
+          SHARED_FIRST+context->sharedByte, 1, 1);
+    if( IS_LOCK_ERROR(lrc1) ){
+      lrc1Errno = pFile->lastErrno;
+    }
+    /* Drop the temporary PENDING lock */
+    lrc2 = _AFPFSSetLock(context->dbPath, pFile, PENDING_BYTE, 1, 0);
+    
+    if( IS_LOCK_ERROR(lrc1) ) {
+      pFile->lastErrno = lrc1Errno;
+      rc = lrc1;
+      goto afp_end_lock;
+    } else if( IS_LOCK_ERROR(lrc2) ){
+      rc = lrc2;
+      goto afp_end_lock;
+    } else if( lrc1 != SQLITE_OK ) {
+      rc = lrc1;
+    } else {
+      pFile->locktype = SHARED_LOCK;
+      pFile->pOpen->nLock++;
+    }
+  }else{
+    /* The request was for a RESERVED or EXCLUSIVE lock.  It is
+    ** assumed that there is a SHARED or greater lock on the file
+    ** already.
+    */
+    int failed = 0;
+    assert( 0!=pFile->locktype );
+    if (locktype >= RESERVED_LOCK && pFile->locktype < RESERVED_LOCK) {
+        /* Acquire a RESERVED lock */
+        failed = _AFPFSSetLock(context->dbPath, pFile, RESERVED_BYTE, 1,1);
+    }
+    if (!failed && locktype == EXCLUSIVE_LOCK) {
+      /* Acquire an EXCLUSIVE lock */
+        
+      /* Remove the shared lock before trying the range.  we'll need to 
+      ** reestablish the shared lock if we can't get the  afpUnlock
+      */
+      if( !(failed = _AFPFSSetLock(context->dbPath, pFile, SHARED_FIRST +
+                         context->sharedByte, 1, 0)) ){
+        int failed2 = SQLITE_OK;
+        /* now attemmpt to get the exclusive lock range */
+        failed = _AFPFSSetLock(context->dbPath, pFile, SHARED_FIRST, 
+                               SHARED_SIZE, 1);
+        if( failed && (failed2 = _AFPFSSetLock(context->dbPath, pFile, 
+                       SHARED_FIRST + context->sharedByte, 1, 1)) ){
+          /* Can't reestablish the shared lock.  Sqlite can't deal, this is
+          ** a critical I/O error
+          */
+          rc = ((failed & SQLITE_IOERR) == SQLITE_IOERR) ? failed2 : 
+               SQLITE_IOERR_LOCK;
+          goto afp_end_lock;
+        } 
+      }else{
+        rc = failed; 
+      }
+    }
+    if( failed ){
+      rc = failed;
+    }
+  }
+  
   if( rc==SQLITE_OK ){
-    proxyLockingContext *pCtx = (proxyLockingContext *)pFile->lockingContext;
-    unixFile *proxy = pCtx->lockProxy;
-    return proxy->pMethod->xCheckReservedLock((sqlite3_file*)proxy, pResOut);
+    pFile->locktype = locktype;
+  }else if( locktype==EXCLUSIVE_LOCK ){
+    pFile->locktype = PENDING_LOCK;
   }
+  
+afp_end_lock:
+  unixLeaveMutex();
+  OSTRACE4("LOCK    %d %s %s\n", pFile->h, locktypeName(locktype), 
+         rc==SQLITE_OK ? "ok" : "failed");
   return rc;
 }
 
-static int proxyLock(sqlite3_file *id, int locktype) {
+/*
+** Lower the locking level on file descriptor pFile to locktype.  locktype
+** must be either NO_LOCK or SHARED_LOCK.
+**
+** If the locking level of the file descriptor is already at or below
+** the requested locking level, this routine is a no-op.
+*/
+static int afpUnlock(sqlite3_file *id, int locktype) {
+  int rc = SQLITE_OK;
   unixFile *pFile = (unixFile*)id;
-  int rc = takeConch(pFile);
-  if( rc==SQLITE_OK ){
-    proxyLockingContext *pCtx = (proxyLockingContext *)pFile->lockingContext;
-    unixFile *proxy = pCtx->lockProxy;
-    rc = proxy->pMethod->xLock((sqlite3_file*)proxy, locktype);
-    pFile->locktype = proxy->locktype;
+  afpLockingContext *pCtx = (afpLockingContext *) pFile->lockingContext;
+
+  assert( pFile );
+  OSTRACE5("UNLOCK  %d %d was %d pid=%d\n", pFile->h, locktype,
+         pFile->locktype, getpid());
+
+  assert( locktype<=SHARED_LOCK );
+  if( pFile->locktype<=locktype ){
+    return SQLITE_OK;
+  }
+  if( CHECK_THREADID(pFile) ){
+    return SQLITE_MISUSE;
+  }
+  unixEnterMutex();
+  if( pFile->locktype>SHARED_LOCK ){
+    
+    if( pFile->locktype==EXCLUSIVE_LOCK ){
+      rc = _AFPFSSetLock(pCtx->dbPath, pFile, SHARED_FIRST, SHARED_SIZE, 0);
+      if( rc==SQLITE_OK && locktype==SHARED_LOCK ){
+        /* only re-establish the shared lock if necessary */
+        int sharedLockByte = SHARED_FIRST+pCtx->sharedByte;
+        rc = _AFPFSSetLock(pCtx->dbPath, pFile, sharedLockByte, 1, 1);
+      }
+    }
+    if( rc==SQLITE_OK && pFile->locktype>=PENDING_LOCK ){
+      rc = _AFPFSSetLock(pCtx->dbPath, pFile, PENDING_BYTE, 1, 0);
+    } 
+    if( rc==SQLITE_OK && pFile->locktype>=RESERVED_LOCK ){
+      rc = _AFPFSSetLock(pCtx->dbPath, pFile, RESERVED_BYTE, 1, 0);
+    }
+  }else if( locktype==NO_LOCK ){
+    /* clear the shared lock */
+    int sharedLockByte = SHARED_FIRST+pCtx->sharedByte;
+    rc = _AFPFSSetLock(pCtx->dbPath, pFile, sharedLockByte, 1, 0);
   }
-  return rc;
-}
 
-static int proxyUnlock(sqlite3_file *id, int locktype) {
-  unixFile *pFile = (unixFile*)id;
-  int rc = takeConch(pFile);
   if( rc==SQLITE_OK ){
-    proxyLockingContext *pCtx = (proxyLockingContext *)pFile->lockingContext;
-    unixFile *proxy = pCtx->lockProxy;
-    rc = proxy->pMethod->xUnlock((sqlite3_file*)proxy, locktype);
-    pFile->locktype = proxy->locktype;
+    if( locktype==NO_LOCK ){
+      struct unixOpenCnt *pOpen = pFile->pOpen;
+      pOpen->nLock--;
+      assert( pOpen->nLock>=0 );
+      if( pOpen->nLock==0 && pOpen->nPending>0 ){
+        int i;
+        for(i=0; i<pOpen->nPending; i++){
+          if( pOpen->aPending[i] < 0 ) continue;
+          if( close(pOpen->aPending[i]) ){
+            pFile->lastErrno = errno;
+            rc = SQLITE_IOERR_CLOSE;
+          }else{
+            pOpen->aPending[i] = -1;
+          }
+        }
+        if( rc==SQLITE_OK ){
+          sqlite3_free(pOpen->aPending);
+          pOpen->nPending = 0;
+          pOpen->aPending = 0;
+        }
+      }
+    }
   }
+end_afpunlock:
+  unixLeaveMutex();
+  if( rc==SQLITE_OK ) pFile->locktype = locktype;
   return rc;
 }
 
 /*
- ** Close a file.
- */
-static int proxyClose(sqlite3_file *id) {
+** Close a file & cleanup AFP specific locking context 
+*/
+static int afpClose(sqlite3_file *id) {
   if( id ){
     unixFile *pFile = (unixFile*)id;
-    proxyLockingContext *pCtx = (proxyLockingContext *)pFile->lockingContext;
-    unixFile *lockProxy = pCtx->lockProxy;
-    unixFile *conchFile = pCtx->conchFile;
-    int rc = SQLITE_OK;
-    
-    if( lockProxy ){
-      rc = lockProxy->pMethod->xUnlock((sqlite3_file*)lockProxy, NO_LOCK);
-      if( rc ) return rc;
-      rc = lockProxy->pMethod->xClose((sqlite3_file*)lockProxy);
-      if( rc ) return rc;
-      sqlite3_free(lockProxy);
-    }
-    if( conchFile ){
-      if( pCtx->conchHeld ){
-        rc = releaseConch(pFile);
-        if( rc ) return rc;
+    afpUnlock(id, NO_LOCK);
+    unixEnterMutex();
+    if( pFile->pOpen && pFile->pOpen->nLock ){
+      /* If there are outstanding locks, do not actually close the file just
+      ** yet because that would clear those locks.  Instead, add the file
+      ** descriptor to pOpen->aPending.  It will be automatically closed when
+      ** the last lock is cleared.
+      */
+      int *aNew;
+      struct unixOpenCnt *pOpen = pFile->pOpen;
+      aNew = sqlite3_realloc(pOpen->aPending, (pOpen->nPending+1)*sizeof(int) );
+      if( aNew==0 ){
+        /* If a malloc fails, just leak the file descriptor */
+      }else{
+        pOpen->aPending = aNew;
+        pOpen->aPending[pOpen->nPending] = pFile->h;
+        pOpen->nPending++;
+        pFile->h = -1;
       }
-      rc = conchFile->pMethod->xClose((sqlite3_file*)conchFile);
-      if( rc ) return rc;
-      sqlite3_free(conchFile);
     }
-    sqlite3_free(pCtx->lockProxyPath);
-    sqlite3_free(pCtx->conchFilePath);
-    sqlite3_free(pCtx->dbPath);
-    /* restore the original locking context and pMethod then close it */
-    pFile->lockingContext = pCtx->oldLockingContext;
-    pFile->pMethod = pCtx->pOldMethod;
-    sqlite3_free(pCtx);
-    return pFile->pMethod->xClose(id);
+    releaseOpenCnt(pFile->pOpen);
+    sqlite3_free(pFile->lockingContext);
+    closeUnixFile(id);
+    unixLeaveMutex();
   }
   return SQLITE_OK;
 }
 
-/* HOSTIDLEN and CONCHLEN both include space for the string 
-** terminating nul 
-*/
-#define HOSTIDLEN         128
-#define CONCHLEN          (MAXPATHLEN+HOSTIDLEN+1)
-#ifndef HOSTIDPATH
-# define HOSTIDPATH       "/Library/Caches/.com.apple.sqliteConchHostId"
-#endif
+#endif /* defined(__DARWIN__) && SQLITE_ENABLE_LOCKING_STYLE */
+/*
+** The code above is the AFP lock implementation.  The code is specific
+** to MacOSX and does not work on other unix platforms.  No alternative
+** is available.  If you don't compile for a mac, then the "unix-afp"
+** VFS is not available.
+**
+********************* End of the AFP lock implementation **********************
+******************************************************************************/
 
-/* basically a copy of unixRandomness with different
-** test behavior built in */
-static int genHostID(char *pHostID){
+/******************************************************************************
+************************** Begin Proxy Locking ********************************
+**
+**
+** The default locking schemes in SQLite use byte-range locks on the
+** database file to coordinate safe, concurrent access by multiple readers
+** and writers [http://sqlite.org/lockingv3.html].  The five file locking
+** states (UNLOCKED, PENDING, SHARED, RESERVED, EXCLUSIVE) are implemented
+** as POSIX read & write locks over fixed set of locations (via fsctl),
+** on AFP and SMB only exclusive byte-range locks are available via fsctl
+** with _IOWR('z', 23, struct ByteRangeLockPB2) to track the same 5 states.
+** To simulate a F_RDLCK on the shared range, on AFP a randomly selected
+** address in the shared range is taken for a SHARED lock, the entire
+** shared range is taken for an EXCLUSIVE lock):
+**
+**      PENDING_BYTE        0x40000000		   	
+**      RESERVED_BYTE       0x40000001
+**      SHARED_RANGE        0x40000002 -> 0x40000200
+**
+** This works well on the local file system, but shows a nearly 100x
+** slowdown in read performance on AFP because the AFP client disables
+** the read cache when byte-range locks are present.  Enabling the read
+** cache exposes a cache coherency problem that is present on all OS X
+** supported network file systems.  NFS and AFP both observe the
+** close-to-open semantics for ensuring cache coherency
+** [http://nfs.sourceforge.net/#faq_a8], which does not effectively
+** address the requirements for concurrent database access by multiple
+** readers and writers
+** [http://www.nabble.com/SQLite-on-NFS-cache-coherency-td15655701.html].
+**
+** To address the performance and cache coherency issues, proxy file locking
+** changes the way database access is controlled by limiting access to a
+** single host at a time and moving file locks off of the database file
+** and onto a proxy file on the local file system.  
+**
+**
+** Using proxy locks
+** -----------------
+**
+** C APIs
+**
+**  sqlite3_file_control(db, dbname, SQLITE_SET_LOCKPROXYFILE,
+**                       <proxy_path> | ":auto:");
+**  sqlite3_file_control(db, dbname, SQLITE_GET_LOCKPROXYFILE, &<proxy_path>);
+**
+**
+** SQL pragmas
+**
+**  PRAGMA [database.]lock_proxy_file=<proxy_path> | :auto:
+**  PRAGMA [database.]lock_proxy_file
+**
+** Specifying ":auto:" means that if there is a conch file with a matching
+** host ID in it, the proxy path in the conch file will be used, otherwise
+** a proxy path based on the user's temp dir
+** (via confstr(_CS_DARWIN_USER_TEMP_DIR,...)) will be used and the
+** actual proxy file name is generated from the name and path of the
+** database file.  For example:
+**
+**       For database path "/Users/me/foo.db" 
+**       The lock path will be "<tmpdir>/sqliteplocks/_Users_me_foo.db:auto:")
+**
+** Once a lock proxy is configured for a database connection, it can not
+** be removed, however it may be switched to a different proxy path via
+** the above APIs (assuming the conch file is not being held by another
+** connection or process). 
+**
+**
+** How proxy locking works
+** -----------------------
+**
+** Proxy file locking relies primarily on two new supporting files: 
+**
+**   *  conch file to limit access to the database file to a single host
+**      at a time
+**
+**   *  proxy file to act as a proxy for the advisory locks normally
+**      taken on the database
+**
+** The conch file - to use a proxy file, sqlite must first "hold the conch"
+** by taking an sqlite-style shared lock on the conch file, reading the
+** contents and comparing the host's unique host ID (see below) and lock
+** proxy path against the values stored in the conch.  The conch file is
+** stored in the same directory as the database file and the file name
+** is patterned after the database file name as ".<databasename>-conch".
+** If the conch file does not exist, or it's contents do not match the
+** host ID and/or proxy path, then the lock is escalated to an exclusive
+** lock and the conch file contents is updated with the host ID and proxy
+** path and the lock is downgraded to a shared lock again.  If the conch
+** is held by another process (with a shared lock), the exclusive lock
+** will fail and SQLITE_BUSY is returned.
+**
+** The proxy file - a single-byte file used for all advisory file locks
+** normally taken on the database file.   This allows for safe sharing
+** of the database file for multiple readers and writers on the same
+** host (the conch ensures that they all use the same local lock file).
+**
+** There is a third file - the host ID file - used as a persistent record
+** of a unique identifier for the host, a 128-byte unique host id file
+** in the path defined by the HOSTIDPATH macro (default value is
+** /Library/Caches/.com.apple.sqliteConchHostId).
+**
+** Requesting the lock proxy does not immediately take the conch, it is
+** only taken when the first request to lock database file is made.  
+** This matches the semantics of the traditional locking behavior, where
+** opening a connection to a database file does not take a lock on it.
+** The shared lock and an open file descriptor are maintained until 
+** the connection to the database is closed. 
+**
+** The proxy file and the lock file are never deleted so they only need
+** to be created the first time they are used.
+**
+** Configuration options
+** ---------------------
+**
+**  SQLITE_PREFER_PROXY_LOCKING
+**
+**       Database files accessed on non-local file systems are
+**       automatically configured for proxy locking, lock files are
+**       named automatically using the same logic as
+**       PRAGMA lock_proxy_file=":auto:"
+**    
+**  SQLITE_PROXY_DEBUG
+**
+**       Enables the logging of error messages during host id file
+**       retrieval and creation
+**
+**  HOSTIDPATH
+**
+**       Overrides the default host ID file path location
+**
+**  LOCKPROXYDIR
+**
+**       Overrides the default directory used for lock proxy files that
+**       are named automatically via the ":auto:" setting
+**
+**  SQLITE_DEFAULT_PROXYDIR_PERMISSIONS
+**
+**       Permissions to use when creating a directory for storing the
+**       lock proxy files, only used when LOCKPROXYDIR is not set.
+**    
+**    
+** As mentioned above, when compiled with SQLITE_PREFER_PROXY_LOCKING,
+** setting the environment variable SQLITE_FORCE_PROXY_LOCKING to 1 will
+** force proxy locking to be used for every database file opened, and 0
+** will force automatic proxy locking to be disabled for all database
+** files (explicity calling the SQLITE_SET_LOCKPROXYFILE pragma or
+** sqlite_file_control API is not affected by SQLITE_FORCE_PROXY_LOCKING).
+*/
+
+/*
+** Proxy locking is only available on MacOSX 
+*/
+#if defined(__DARWIN__) && SQLITE_ENABLE_LOCKING_STYLE
+
+
+static int getDbPathForUnixFile(unixFile *pFile, char *dbPath);
+static int getLockPath(const char *dbPath, char *lPath, size_t maxLen);
+static sqlite3_io_methods *ioMethodForLockingStyle(int style);
+static int createProxyUnixFile(const char *path, unixFile **ppFile);
+static int fillInUnixFile(sqlite3_vfs *pVfs, int h, int dirfd, sqlite3_file *pId, const char *zFilename, int noLock, int isDelete);
+static int takeConch(unixFile *pFile);
+static int releaseConch(unixFile *pFile);
+static int unixRandomness(sqlite3_vfs *pVfs, int nBuf, char *zBuf);
+
+/*
+** Tests a byte-range locking query to see if byte range locks are 
+** supported, if not we fall back to dotlockLockingStyle.
+** On vxWorks we fall back to semLockingStyle.
+*/
+static int testLockingStyle(int fd){
+  struct flock lockInfo;
+
+  /* Test byte-range lock using fcntl(). If the call succeeds, 
+  ** assume that the file-system supports POSIX style locks. 
+  */
+  lockInfo.l_len = 1;
+  lockInfo.l_start = 0;
+  lockInfo.l_whence = SEEK_SET;
+  lockInfo.l_type = F_RDLCK;
+  if( fcntl(fd, F_GETLK, &lockInfo)!=-1 ) {
+    return LOCKING_STYLE_POSIX;
+  }
+  
+  /* Testing for flock() can give false positives.  So if if the above 
+  ** test fails, then we fall back to using dot-file style locking (or
+  ** named-semaphore locking on vxworks).
+  */
+  return (OS_VXWORKS ? LOCKING_STYLE_NAMEDSEM : LOCKING_STYLE_DOTFILE);
+}
+
+
+#ifdef SQLITE_TEST
+/* simulate multiple hosts by creating unique hostid file paths */
+int sqlite3_hostid_num = 0;
+#endif
+
+/*
+** The proxyLockingContext has the path and file structures for the remote 
+** and local proxy files in it
+*/
+typedef struct proxyLockingContext proxyLockingContext;
+struct proxyLockingContext {
+  unixFile *conchFile;
+  char *conchFilePath;
+  unixFile *lockProxy;
+  char *lockProxyPath;
+  char *dbPath;
+  int conchHeld;
+  void *oldLockingContext; /* preserve the original locking context for close */
+  sqlite3_io_methods const *pOldMethod; /* ditto pMethod */
+};
+
+
+static int proxyCheckReservedLock(sqlite3_file *id, int *pResOut) {
+  unixFile *pFile = (unixFile*)id;
+  int rc = takeConch(pFile);
+  if( rc==SQLITE_OK ){
+    proxyLockingContext *pCtx = (proxyLockingContext *)pFile->lockingContext;
+    unixFile *proxy = pCtx->lockProxy;
+    return proxy->pMethod->xCheckReservedLock((sqlite3_file*)proxy, pResOut);
+  }
+  return rc;
+}
+
+static int proxyLock(sqlite3_file *id, int locktype) {
+  unixFile *pFile = (unixFile*)id;
+  int rc = takeConch(pFile);
+  if( rc==SQLITE_OK ){
+    proxyLockingContext *pCtx = (proxyLockingContext *)pFile->lockingContext;
+    unixFile *proxy = pCtx->lockProxy;
+    rc = proxy->pMethod->xLock((sqlite3_file*)proxy, locktype);
+    pFile->locktype = proxy->locktype;
+  }
+  return rc;
+}
+
+static int proxyUnlock(sqlite3_file *id, int locktype) {
+  unixFile *pFile = (unixFile*)id;
+  int rc = takeConch(pFile);
+  if( rc==SQLITE_OK ){
+    proxyLockingContext *pCtx = (proxyLockingContext *)pFile->lockingContext;
+    unixFile *proxy = pCtx->lockProxy;
+    rc = proxy->pMethod->xUnlock((sqlite3_file*)proxy, locktype);
+    pFile->locktype = proxy->locktype;
+  }
+  return rc;
+}
+
+/*
+ ** Close a file.
+ */
+static int proxyClose(sqlite3_file *id) {
+  if( id ){
+    unixFile *pFile = (unixFile*)id;
+    proxyLockingContext *pCtx = (proxyLockingContext *)pFile->lockingContext;
+    unixFile *lockProxy = pCtx->lockProxy;
+    unixFile *conchFile = pCtx->conchFile;
+    int rc = SQLITE_OK;
+    
+    if( lockProxy ){
+      rc = lockProxy->pMethod->xUnlock((sqlite3_file*)lockProxy, NO_LOCK);
+      if( rc ) return rc;
+      rc = lockProxy->pMethod->xClose((sqlite3_file*)lockProxy);
+      if( rc ) return rc;
+      sqlite3_free(lockProxy);
+    }
+    if( conchFile ){
+      if( pCtx->conchHeld ){
+        rc = releaseConch(pFile);
+        if( rc ) return rc;
+      }
+      rc = conchFile->pMethod->xClose((sqlite3_file*)conchFile);
+      if( rc ) return rc;
+      sqlite3_free(conchFile);
+    }
+    sqlite3_free(pCtx->lockProxyPath);
+    sqlite3_free(pCtx->conchFilePath);
+    sqlite3_free(pCtx->dbPath);
+    /* restore the original locking context and pMethod then close it */
+    pFile->lockingContext = pCtx->oldLockingContext;
+    pFile->pMethod = pCtx->pOldMethod;
+    sqlite3_free(pCtx);
+    return pFile->pMethod->xClose(id);
+  }
+  return SQLITE_OK;
+}
+
+/* HOSTIDLEN and CONCHLEN both include space for the string 
+** terminating nul 
+*/
+#define HOSTIDLEN         128
+#define CONCHLEN          (MAXPATHLEN+HOSTIDLEN+1)
+#ifndef HOSTIDPATH
+# define HOSTIDPATH       "/Library/Caches/.com.apple.sqliteConchHostId"
+#endif
+
+/* basically a copy of unixRandomness with different
+** test behavior built in */
+static int genHostID(char *pHostID){
   int pid, fd, i, len;
   unsigned char *key = (unsigned char *)pHostID;
   
@@ -2977,7 +2835,8 @@ static int getHostID(char *pHostID, int *pError){
 		
     if( err!=EEXIST ){
 #ifdef SQLITE_PROXY_DEBUG /* set the sqlite error message instead */
-      fprintf(stderr, "sqlite error creating host ID file %s: %s\n", path, strerror(err));
+      fprintf(stderr, "sqlite error creating host ID file %s: %s\n",
+              path, strerror(err));
 #endif
       return SQLITE_PERM;
     }
@@ -2986,18 +2845,19 @@ static int getHostID(char *pHostID, int *pError){
     if( fd<0 ){
       int err = errno;
 #ifdef SQLITE_PROXY_DEBUG /* set the sqlite error message instead */
-      fprintf(stderr, "sqlite error opening host ID file %s: %s\n", path, strerror(err));
+      fprintf(stderr, "sqlite error opening host ID file %s: %s\n",
+              path, strerror(err));
 #endif
       return SQLITE_PERM;
     }
     len = pread(fd, pHostID, HOSTIDLEN, 0);
-		if( len<0 ){
-			*pError = errno;
-			rc = SQLITE_IOERR_READ;
-		}else if( len<HOSTIDLEN ){
-			*pError = 0;
-			rc = SQLITE_IOERR_SHORT_READ;
-		}
+    if( len<0 ){
+      *pError = errno;
+      rc = SQLITE_IOERR_READ;
+    }else if( len<HOSTIDLEN ){
+      *pError = 0;
+      rc = SQLITE_IOERR_SHORT_READ;
+    }
     close(fd); /* silently leak the fd if it fails */
     OSTRACE3("GETHOSTID  read %s pid=%d\n", pHostID, getpid());
     return rc;
@@ -3006,13 +2866,13 @@ static int getHostID(char *pHostID, int *pError){
     /* we're creating the host ID file (use a random string of bytes) */
     genHostID(pHostID);
     len = pwrite(fd, pHostID, HOSTIDLEN, 0);
-		if( len<0 ){
-			*pError = errno;
-			rc = SQLITE_IOERR_WRITE;
-		}else if( len<HOSTIDLEN ){
-			*pError = 0;
-			rc = SQLITE_IOERR_WRITE;
-		}
+    if( len<0 ){
+      *pError = errno;
+      rc = SQLITE_IOERR_WRITE;
+    }else if( len<HOSTIDLEN ){
+      *pError = 0;
+      rc = SQLITE_IOERR_WRITE;
+    }
     close(fd); /* silently leak the fd if it fails */
     OSTRACE3("GETHOSTID  wrote %s pid=%d\n", pHostID, getpid());
     return rc;
@@ -3063,9 +2923,9 @@ static int takeConch(unixFile *pFile){
     if( readRc!=SQLITE_IOERR_SHORT_READ ){
       int match = 0;
       if( readRc!=SQLITE_OK ){
-				if( rc&SQLITE_IOERR==SQLITE_IOERR ){
-					pFile->lastErrno = conchFile->lastErrno;
-				}
+        if( rc&SQLITE_IOERR==SQLITE_IOERR ){
+          pFile->lastErrno = conchFile->lastErrno;
+        }
         rc = readRc;
         goto end_takeconch;
       }
@@ -3114,13 +2974,15 @@ static int takeConch(unixFile *pFile){
 #else
           if( fchmod(conchFile->h, buf.st_mode)!=0 ){
             int code = errno;
-            fprintf(stderr, "fchmod %o FAILED with %d %s\n",buf.st_mode, code, strerror(code));
+            fprintf(stderr, "fchmod %o FAILED with %d %s\n",
+                             buf.st_mode, code, strerror(code));
           } else {
             fprintf(stderr, "fchmod %o SUCCEDED\n",buf.st_mode);
           }
         }else{
           int code = errno;
-          fprintf(stderr, "STAT FAILED[%d] with %d %s\n", err, code, strerror(code));
+          fprintf(stderr, "STAT FAILED[%d] with %d %s\n", 
+                          err, code, strerror(code));
 #endif
         }
       }
@@ -3129,7 +2991,7 @@ static int takeConch(unixFile *pFile){
   
 end_takeconch:
     OSTRACE2("TRANSPROXY: CLOSE  %d\n", pFile->h);
-    if( rc==SQLITE_OK && pFile->oflags ){
+    if( rc==SQLITE_OK && pFile->openFlags ){
       if( pFile->h>=0 ){
 #ifdef STRICT_CLOSE_ERROR
         if( close(pFile->h) ){
@@ -3141,7 +3003,8 @@ end_takeconch:
 #endif
       }
       pFile->h = -1;
-      int fd = open(pCtx->dbPath, pFile->oflags, SQLITE_DEFAULT_FILE_PERMISSIONS);
+      int fd = open(pCtx->dbPath, pFile->openFlags,
+                    SQLITE_DEFAULT_FILE_PERMISSIONS);
       OSTRACE2("TRANSPROXY: OPEN  %d\n", fd);
       if( fd>=0 ){
         pFile->h = fd;
@@ -3159,14 +3022,16 @@ end_takeconch:
 
       if( tLockPath ){
         pCtx->lockProxyPath = sqlite3DbStrDup(0, tLockPath);
-        if( pCtx->lockProxy->pMethod == ioMethodForLockingStyle(LOCKING_STYLE_AFP) ){
-          ((afpLockingContext *)pCtx->lockProxy->lockingContext)->dbPath = pCtx->lockProxyPath;
+        if( pCtx->lockProxy->pMethod ==
+                    ioMethodForLockingStyle(LOCKING_STYLE_AFP) ){
+          ((afpLockingContext *)pCtx->lockProxy->lockingContext)->dbPath =
+                     pCtx->lockProxyPath;
         }
       }
     } else {
       conchFile->pMethod->xUnlock((sqlite3_file*)conchFile, NO_LOCK);
     }
-    OSTRACE3("TAKECONCH  %d %s\n", conchFile->h, rc==SQLITE_OK ? "ok" : "failed");
+    OSTRACE3("TAKECONCH  %d %s\n", conchFile->h, rc==SQLITE_OK?"ok":"failed");
     return rc;
   }
 }
@@ -3186,245 +3051,591 @@ static int releaseConch(unixFile *pFile){
   return rc;
 }
 
-static int getConchPathFromDBPath(char *dbPath, char **pConchPath){
-  int i;
-  int len = strlen(dbPath);
-  char *conchPath;
-  
-  conchPath = (char *)sqlite3_malloc(len + 8);
-  if( conchPath==0 ){
-    return SQLITE_NOMEM;
+static int getConchPathFromDBPath(char *dbPath, char **pConchPath){
+  int i;
+  int len = strlen(dbPath);
+  char *conchPath;
+  
+  conchPath = (char *)sqlite3_malloc(len + 8);
+  if( conchPath==0 ){
+    return SQLITE_NOMEM;
+  }
+  strlcpy(conchPath, dbPath, len+1);
+  
+  /* now insert a "." before the last / character */
+  for( i=(len-1); i>=0; i-- ){
+    if( conchPath[i]=='/' ){
+      i++;
+      break;
+    }
+  }
+  conchPath[i]='.';
+  while ( i<len ){
+    conchPath[i+1]=dbPath[i];
+    i++;
+  }
+  conchPath[i+1]='\0';
+  strlcat(conchPath, "-conch", len + 8);
+  *pConchPath = conchPath;
+  return SQLITE_OK;
+}
+
+
+static int getLockPath(const char *dbPath, char *lPath, size_t maxLen){
+  int len;
+  int dbLen;
+  int i;
+
+#ifdef LOCKPROXYDIR
+  len = strlcpy(lPath, LOCKPROXYDIR, maxLen);
+#else
+# ifdef _CS_DARWIN_USER_TEMP_DIR
+  {
+    char utdir[MAXPATHLEN];
+    
+    confstr(_CS_DARWIN_USER_TEMP_DIR, lPath, maxLen);
+    len = strlcat(lPath, "sqliteplocks", maxLen);
+    if( mkdir(lPath, SQLITE_DEFAULT_PROXYDIR_PERMISSIONS) ){
+      /* if mkdir fails, handle as lock file creation failure */
+      int err = errno;
+#  ifdef SQLITE_DEBUG
+      if( err!=EEXIST ){
+        fprintf(stderr, "getLockPath: mkdir(%s,0%o) error %d %s\n", lPath,
+                SQLITE_DEFAULT_PROXYDIR_PERMISSIONS, err, strerror(err));
+      }
+#  endif
+    }else{
+      OSTRACE3("GETLOCKPATH  mkdir %s pid=%d\n", lPath, getpid());
+    }
+    
+  }
+# else
+  len = strlcpy(lPath, "/tmp/", maxLen);
+# endif
+#endif
+
+  if( lPath[len-1]!='/' ){
+    len = strlcat(lPath, "/", maxLen);
+  }
+  
+  /* transform the db path to a unique cache name */
+  dbLen = strlen(dbPath);
+  for( i=0; i<dbLen && (i+len+7)<maxLen; i++){
+    char c = dbPath[i];
+    lPath[i+len] = (c=='/')?'_':c;
+  }
+  lPath[i+len]='\0';
+  strlcat(lPath, ":auto:", maxLen);
+  return SQLITE_OK;
+}
+
+/* Takes a fully configured proxy locking-style unix file and switches
+** the local lock file path 
+*/
+static int switchLockProxyPath(unixFile *pFile, const char *path) {
+  proxyLockingContext *pCtx = (proxyLockingContext*)pFile->lockingContext;
+  char *oldPath = pCtx->lockProxyPath;
+  int taken = 0;
+  int rc = SQLITE_OK;
+
+  if( pFile->locktype!=NO_LOCK ){
+    return SQLITE_BUSY;
+  }  
+
+  /* nothing to do if the path is NULL, :auto: or matches the existing path */
+  if( !path || path[0]=='\0' || !strcmp(path, ":auto:") ||
+    (oldPath && !strncmp(oldPath, path, MAXPATHLEN)) ){
+    return SQLITE_OK;
+  }else{
+    unixFile *lockProxy = pCtx->lockProxy;
+    pCtx->lockProxy=NULL;
+    pCtx->conchHeld = 0;
+    if( lockProxy!=NULL ){
+      rc=lockProxy->pMethod->xClose((sqlite3_file *)lockProxy);
+      if( rc ) return rc;
+      sqlite3_free(lockProxy);
+    }
+    sqlite3_free(oldPath);
+    pCtx->lockProxyPath = sqlite3DbStrDup(0, path);
+  }
+  
+  return rc;
+}
+
+/*
+** Takes an already filled in unix file and alters it so all file locking 
+** will be performed on the local proxy lock file.  The following fields
+** are preserved in the locking context so that they can be restored and 
+** the unix structure properly cleaned up at close time:
+**  ->lockingContext
+**  ->pMethod
+*/
+static int transformUnixFileForLockProxy(unixFile *pFile, const char *path) {
+  proxyLockingContext *pCtx;
+  char dbPath[MAXPATHLEN];
+  char *lockPath=NULL;
+  int rc = SQLITE_OK;
+  
+  if( pFile->locktype!=NO_LOCK ){
+    return SQLITE_BUSY;
+  }
+  getDbPathForUnixFile(pFile, dbPath);
+  if( !path || path[0]=='\0' || !strcmp(path, ":auto:") ){
+    lockPath=NULL;
+  }else{
+    lockPath=(char *)path;
+  }
+  
+  OSTRACE4("TRANSPROXY  %d for %s pid=%d\n", pFile->h,
+           (lockPath ? lockPath : ":auto:"), getpid());
+
+  pCtx = sqlite3_malloc( sizeof(*pCtx) );
+  if( pCtx==0 ){
+    return SQLITE_NOMEM;
+  }
+  memset(pCtx, 0, sizeof(*pCtx));
+
+  rc = getConchPathFromDBPath(dbPath, &pCtx->conchFilePath);
+  if( rc==SQLITE_OK ){
+    rc = createProxyUnixFile(pCtx->conchFilePath, &pCtx->conchFile);
+  }  
+  if( rc==SQLITE_OK && lockPath ){
+    pCtx->lockProxyPath = sqlite3DbStrDup(0, lockPath);
+  }
+
+end_transform_file:
+  if( rc==SQLITE_OK ){
+    /* all memory is allocated, proxys are created and assigned, 
+    ** switch the locking context and pMethod then return.
+    */
+    pCtx->dbPath = sqlite3DbStrDup(0, dbPath);
+    pCtx->oldLockingContext = pFile->lockingContext;
+    pFile->lockingContext = pCtx;
+    pCtx->pOldMethod = pFile->pMethod;
+    pFile->pMethod = ioMethodForLockingStyle(LOCKING_STYLE_PROXY);
+  }else{
+    if( pCtx->conchFile ){ 
+      rc = pCtx->conchFile->pMethod->xClose((sqlite3_file *)pCtx->conchFile);
+      if( rc ) return rc;
+      sqlite3_free(pCtx->conchFile);
+    }
+    sqlite3_free(pCtx->conchFilePath); 
+    sqlite3_free(pCtx);
+  }
+  OSTRACE3("TRANSPROXY  %d %s\n", pFile->h,
+           (rc==SQLITE_OK ? "ok" : "failed"));
+  return rc;
+} 
+
+static int createProxyUnixFile(const char *path, unixFile **ppFile) {
+  int fd;
+  int dirfd = -1;
+  unixFile *pNew;
+  int rc = SQLITE_OK;
+
+  fd = open(path, O_RDWR | O_CREAT, SQLITE_DEFAULT_FILE_PERMISSIONS);
+  if( fd<0 ){
+    return SQLITE_CANTOPEN;
+  }
+  
+  pNew = (unixFile *)sqlite3_malloc(sizeof(unixFile));
+  if( pNew==NULL ){
+    rc = SQLITE_NOMEM;
+    goto end_create_proxy;
+  }
+  memset(pNew, 0, sizeof(unixFile));
+  
+  rc = fillInUnixFile(NULL, fd, dirfd, (sqlite3_file*)pNew, path, 0, 0);
+  if( rc==SQLITE_OK ){
+    *ppFile = pNew;
+    return SQLITE_OK;
+  }
+end_create_proxy:    
+  close(fd); /* silently leak fd if error, we're already in error */
+  sqlite3_free(pNew);
+  return rc;
+}
+
+
+#endif /* defined(__DARWIN__) && SQLITE_ENABLE_LOCKING_STYLE */
+/*
+** The proxy locking style is intended for use with AFP filesystems.
+** And since AFP is only supported on MacOSX, the proxy locking is also
+** restricted to MacOSX.
+** 
+**
+******************* End of the proxy lock implementation **********************
+******************************************************************************/
+
+
+/******************************************************************************
+**************** Non-locking sqlite3_file methods *****************************
+**
+** The next division contains implementations for all methods of the 
+** sqlite3_file object other than the locking methods.  The locking
+** methods were defined in divisions above (one locking method per
+** division).  Those methods that are common to all locking modes
+** are gather together into this division.
+*/
+
+/*
+** Seek to the offset passed as the second argument, then read cnt 
+** bytes into pBuf. Return the number of bytes actually read.
+**
+** NB:  If you define USE_PREAD or USE_PREAD64, then it might also
+** be necessary to define _XOPEN_SOURCE to be 500.  This varies from
+** one system to another.  Since SQLite does not define USE_PREAD
+** any any form by default, we will not attempt to define _XOPEN_SOURCE.
+** See tickets #2741 and #2681.
+**
+** To avoid stomping the errno value on a failed read the lastErrno value
+** is set before returning.
+*/
+static int seekAndRead(unixFile *id, sqlite3_int64 offset, void *pBuf, int cnt){
+  int got;
+  i64 newOffset;
+  TIMER_START;
+#if defined(USE_PREAD)
+  got = pread(id->h, pBuf, cnt, offset);
+  SimulateIOError( got = -1 );
+#elif defined(USE_PREAD64)
+  got = pread64(id->h, pBuf, cnt, offset);
+  SimulateIOError( got = -1 );
+#else
+  newOffset = lseek(id->h, offset, SEEK_SET);
+  SimulateIOError( newOffset-- );
+  if( newOffset!=offset ){
+    if( newOffset == -1 ){
+      ((unixFile*)id)->lastErrno = errno;
+    }else{
+      ((unixFile*)id)->lastErrno = 0;			
+    }
+    return -1;
+  }
+  got = read(id->h, pBuf, cnt);
+#endif
+  TIMER_END;
+  if( got<0 ){
+    ((unixFile*)id)->lastErrno = errno;
+  }
+  OSTRACE5("READ    %-3d %5d %7lld %llu\n", id->h, got, offset, TIMER_ELAPSED);
+  return got;
+}
+
+/*
+** Read data from a file into a buffer.  Return SQLITE_OK if all
+** bytes were read successfully and SQLITE_IOERR if anything goes
+** wrong.
+*/
+static int unixRead(
+  sqlite3_file *id, 
+  void *pBuf, 
+  int amt,
+  sqlite3_int64 offset
+){
+  int got;
+  assert( id );
+  got = seekAndRead((unixFile*)id, offset, pBuf, amt);
+  if( got==amt ){
+    return SQLITE_OK;
+  }else if( got<0 ){
+    /* lastErrno set by seekAndRead */
+    return SQLITE_IOERR_READ;
+  }else{
+    ((unixFile*)id)->lastErrno = 0; /* not a system error */
+    /* Unread parts of the buffer must be zero-filled */
+    memset(&((char*)pBuf)[got], 0, amt-got);
+    return SQLITE_IOERR_SHORT_READ;
+  }
+}
+
+/*
+** Seek to the offset in id->offset then read cnt bytes into pBuf.
+** Return the number of bytes actually read.  Update the offset.
+**
+** To avoid stomping the errno value on a failed write the lastErrno value
+** is set before returning.
+*/
+static int seekAndWrite(unixFile *id, i64 offset, const void *pBuf, int cnt){
+  int got;
+  i64 newOffset;
+  TIMER_START;
+#if defined(USE_PREAD)
+  got = pwrite(id->h, pBuf, cnt, offset);
+#elif defined(USE_PREAD64)
+  got = pwrite64(id->h, pBuf, cnt, offset);
+#else
+  newOffset = lseek(id->h, offset, SEEK_SET);
+  if( newOffset!=offset ){
+    if( newOffset == -1 ){
+      ((unixFile*)id)->lastErrno = errno;
+    }else{
+      ((unixFile*)id)->lastErrno = 0;			
+    }
+    return -1;
+  }
+  got = write(id->h, pBuf, cnt);
+#endif
+  TIMER_END;
+  if( got<0 ){
+    ((unixFile*)id)->lastErrno = errno;
+  }
+
+  OSTRACE5("WRITE   %-3d %5d %7lld %llu\n", id->h, got, offset, TIMER_ELAPSED);
+  return got;
+}
+
+
+/*
+** Write data from a buffer into a file.  Return SQLITE_OK on success
+** or some other error code on failure.
+*/
+static int unixWrite(
+  sqlite3_file *id, 
+  const void *pBuf, 
+  int amt,
+  sqlite3_int64 offset 
+){
+  int wrote = 0;
+  assert( id );
+  assert( amt>0 );
+  while( amt>0 && (wrote = seekAndWrite((unixFile*)id, offset, pBuf, amt))>0 ){
+    amt -= wrote;
+    offset += wrote;
+    pBuf = &((char*)pBuf)[wrote];
   }
-  strlcpy(conchPath, dbPath, len+1);
-  
-  /* now insert a "." before the last / character */
-  for( i=(len-1); i>=0; i-- ){
-    if( conchPath[i]=='/' ){
-      i++;
-      break;
+  SimulateIOError(( wrote=(-1), amt=1 ));
+  SimulateDiskfullError(( wrote=0, amt=1 ));
+  if( amt>0 ){
+    if( wrote<0 ){
+      /* lastErrno set by seekAndWrite */
+      return SQLITE_IOERR_WRITE;
+    }else{
+      ((unixFile*)id)->lastErrno = 0; /* not a system error */
+      return SQLITE_FULL;
     }
   }
-  conchPath[i]='.';
-  while ( i<len ){
-    conchPath[i+1]=dbPath[i];
-    i++;
-  }
-  conchPath[i+1]='\0';
-  strlcat(conchPath, "-conch", len + 8);
-  *pConchPath = conchPath;
   return SQLITE_OK;
 }
 
-static int getLockPath(const char *dbPath, char *lPath, size_t maxLen){
-  int len;
-  int dbLen;
-  int i;
+#ifdef SQLITE_TEST
+/*
+** Count the number of fullsyncs and normal syncs.  This is used to test
+** that syncs and fullsyncs are occuring at the right times.
+*/
+int sqlite3_sync_count = 0;
+int sqlite3_fullsync_count = 0;
+#endif
 
-#ifdef LOCKPROXYDIR
-  len = strlcpy(lPath, LOCKPROXYDIR, maxLen);
+/*
+** Use the fdatasync() API only if the HAVE_FDATASYNC macro is defined.
+** Otherwise use fsync() in its place.
+*/
+#ifndef HAVE_FDATASYNC
+# define fdatasync fsync
+#endif
+
+/*
+** Define HAVE_FULLFSYNC to 0 or 1 depending on whether or not
+** the F_FULLFSYNC macro is defined.  F_FULLFSYNC is currently
+** only available on Mac OS X.  But that could change.
+*/
+#ifdef F_FULLFSYNC
+# define HAVE_FULLFSYNC 1
 #else
-# ifdef _CS_DARWIN_USER_TEMP_DIR
-  {
-    char utdir[MAXPATHLEN];
-    
-    confstr(_CS_DARWIN_USER_TEMP_DIR, lPath, maxLen);
-    len = strlcat(lPath, "sqliteplocks", maxLen);
-    if( mkdir(lPath, SQLITE_DEFAULT_PROXYDIR_PERMISSIONS) ){
-      /* if mkdir fails, handle as lock file creation failure */
-      int err = errno;
-#  ifdef SQLITE_DEBUG
-      if( err!=EEXIST ){
-        fprintf(stderr, "getLockPath: mkdir(%s,0%o) error %d %s\n", lPath,
-                SQLITE_DEFAULT_PROXYDIR_PERMISSIONS, err, strerror(err));
-      }
-#  endif
-    }else{
-      OSTRACE3("GETLOCKPATH  mkdir %s pid=%d\n", lPath, getpid());
-    }
-    
-  }
-# else
-  len = strlcpy(lPath, "/tmp/", maxLen);
-# endif
+# define HAVE_FULLFSYNC 0
 #endif
 
-  if( lPath[len-1]!='/' ){
-    len = strlcat(lPath, "/", maxLen);
-  }
-  
-  /* transform the db path to a unique cache name */
-  dbLen = strlen(dbPath);
-  for( i=0; i<dbLen && (i+len+7)<maxLen; i++){
-    char c = dbPath[i];
-    lPath[i+len] = (c=='/')?'_':c;
-  }
-  lPath[i+len]='\0';
-  strlcat(lPath, ":auto:", maxLen);
-  return SQLITE_OK;
-}
 
-/* Takes a fully configured proxy locking-style unix file and switches
-** the local lock file path 
+/*
+** The fsync() system call does not work as advertised on many
+** unix systems.  The following procedure is an attempt to make
+** it work better.
+**
+** The SQLITE_NO_SYNC macro disables all fsync()s.  This is useful
+** for testing when we want to run through the test suite quickly.
+** You are strongly advised *not* to deploy with SQLITE_NO_SYNC
+** enabled, however, since with SQLITE_NO_SYNC enabled, an OS crash
+** or power failure will likely corrupt the database file.
 */
-static int switchLockProxyPath(unixFile *pFile, const char *path) {
-  proxyLockingContext *pCtx = (proxyLockingContext*)pFile->lockingContext;
-  char *oldPath = pCtx->lockProxyPath;
-  int taken = 0;
-  int rc = SQLITE_OK;
+static int full_fsync(int fd, int fullSync, int dataOnly){
+  int rc;
 
-  if( pFile->locktype!=NO_LOCK ){
-    return SQLITE_BUSY;
-  }  
+  /* The following "ifdef/elif/else/" block has the same structure as
+  ** the one below. It is replicated here solely to avoid cluttering 
+  ** up the real code with the UNUSED_PARAMETER() macros.
+  */
+#ifdef SQLITE_NO_SYNC
+  UNUSED_PARAMETER(fd);
+  UNUSED_PARAMETER(fullSync);
+  UNUSED_PARAMETER(dataOnly);
+#elif HAVE_FULLFSYNC
+  UNUSED_PARAMETER(dataOnly);
+#else
+  UNUSED_PARAMETER(fullSync);
+#endif
 
-  /* nothing to do if the path is NULL, :auto: or matches the existing path */
-  if( !path || path[0]=='\0' || !strcmp(path, ":auto:") ||
-    (oldPath && !strncmp(oldPath, path, MAXPATHLEN)) ){
-    return SQLITE_OK;
+  /* Record the number of times that we do a normal fsync() and 
+  ** FULLSYNC.  This is used during testing to verify that this procedure
+  ** gets called with the correct arguments.
+  */
+#ifdef SQLITE_TEST
+  if( fullSync ) sqlite3_fullsync_count++;
+  sqlite3_sync_count++;
+#endif
+
+  /* If we compiled with the SQLITE_NO_SYNC flag, then syncing is a
+  ** no-op
+  */
+#ifdef SQLITE_NO_SYNC
+  rc = SQLITE_OK;
+#elif HAVE_FULLFSYNC
+  if( fullSync ){
+    rc = fcntl(fd, F_FULLFSYNC, 0);
   }else{
-    unixFile *lockProxy = pCtx->lockProxy;
-    pCtx->lockProxy=NULL;
-    pCtx->conchHeld = 0;
-    if( lockProxy!=NULL ){
-      rc=lockProxy->pMethod->xClose((sqlite3_file *)lockProxy);
-      if( rc ) return rc;
-      sqlite3_free(lockProxy);
+    rc = 1;
+  }
+  /* If the FULLFSYNC failed, fall back to attempting an fsync().
+   * It shouldn't be possible for fullfsync to fail on the local 
+   * file system (on OSX), so failure indicates that FULLFSYNC
+   * isn't supported for this file system. So, attempt an fsync 
+   * and (for now) ignore the overhead of a superfluous fcntl call.  
+   * It'd be better to detect fullfsync support once and avoid 
+   * the fcntl call every time sync is called.
+   */
+  if( rc ) rc = fsync(fd);
+
+#else 
+  if( dataOnly ){
+    rc = fdatasync(fd);
+    if( OS_VXWORKS && rc==-1 && errno==ENOTSUP ){
+      rc = fsync(fd);
     }
-    sqlite3_free(oldPath);
-    pCtx->lockProxyPath = sqlite3DbStrDup(0, path);
+  }else{
+    rc = fsync(fd);
+  }
+#endif /* ifdef SQLITE_NO_SYNC elif HAVE_FULLFSYNC */
+
+  if( OS_VXWORKS && rc!= -1 ){
+    rc = 0;
   }
-  
   return rc;
 }
 
 /*
-** Takes an already filled in unix file and alters it so all file locking 
-** will be performed on the local proxy lock file.  The following fields
-** are preserved in the locking context so that they can be restored and 
-** the unix structure properly cleaned up at close time:
-**  ->lockingContext
-**  ->pMethod
+** Make sure all writes to a particular file are committed to disk.
+**
+** If dataOnly==0 then both the file itself and its metadata (file
+** size, access time, etc) are synced.  If dataOnly!=0 then only the
+** file data is synced.
+**
+** Under Unix, also make sure that the directory entry for the file
+** has been created by fsync-ing the directory that contains the file.
+** If we do not do this and we encounter a power failure, the directory
+** entry for the journal might not exist after we reboot.  The next
+** SQLite to access the file will not know that the journal exists (because
+** the directory entry for the journal was never created) and the transaction
+** will not roll back - possibly leading to database corruption.
 */
-static int transformUnixFileForLockProxy(unixFile *pFile, const char *path) {
-  proxyLockingContext *pCtx;
-  char dbPath[MAXPATHLEN];
-  char *lockPath=NULL;
-  int rc = SQLITE_OK;
-  
-  if( pFile->locktype!=NO_LOCK ){
-    return SQLITE_BUSY;
-  }
-  getDbPathForUnixFile(pFile, dbPath);
-  if( !path || path[0]=='\0' || !strcmp(path, ":auto:") ){
-    lockPath=NULL;
-  }else{
-    lockPath=(char *)path;
-  }
-  
-  OSTRACE4("TRANSPROXY  %d for %s pid=%d\n", pFile->h,
-           (lockPath ? lockPath : ":auto:"), getpid());
+static int unixSync(sqlite3_file *id, int flags){
+  int rc;
+  unixFile *pFile = (unixFile*)id;
+
+  int isDataOnly = (flags&SQLITE_SYNC_DATAONLY);
+  int isFullsync = (flags&0x0F)==SQLITE_SYNC_FULL;
+
+  /* Check that one of SQLITE_SYNC_NORMAL or FULL was passed */
+  assert((flags&0x0F)==SQLITE_SYNC_NORMAL
+      || (flags&0x0F)==SQLITE_SYNC_FULL
+  );
 
-  pCtx = sqlite3_malloc( sizeof(*pCtx) );
-  if( pCtx==0 ){
-    return SQLITE_NOMEM;
-  }
-  memset(pCtx, 0, sizeof(*pCtx));
+  /* Unix cannot, but some systems may return SQLITE_FULL from here. This
+  ** line is to test that doing so does not cause any problems.
+  */
+  SimulateDiskfullError( return SQLITE_FULL );
 
-  rc = getConchPathFromDBPath(dbPath, &pCtx->conchFilePath);
-  if( rc==SQLITE_OK ){
-    rc = createProxyUnixFile(pCtx->conchFilePath, &pCtx->conchFile);
-  }  
-  if( rc==SQLITE_OK && lockPath ){
-    pCtx->lockProxyPath = sqlite3DbStrDup(0, lockPath);
+  assert( pFile );
+  OSTRACE2("SYNC    %-3d\n", pFile->h);
+  rc = full_fsync(pFile->h, isFullsync, isDataOnly);
+  SimulateIOError( rc=1 );
+  if( rc ){
+    pFile->lastErrno = errno;
+    return SQLITE_IOERR_FSYNC;
   }
-
-end_transform_file:
-  if( rc==SQLITE_OK ){
-    /* all memory is allocated, proxys are created and assigned, 
-    ** switch the locking context and pMethod then return.
+  if( pFile->dirfd>=0 ){
+    int err;
+    OSTRACE4("DIRSYNC %-3d (have_fullfsync=%d fullsync=%d)\n", pFile->dirfd,
+            HAVE_FULLFSYNC, isFullsync);
+#ifndef SQLITE_DISABLE_DIRSYNC
+    /* The directory sync is only attempted if full_fsync is
+    ** turned off or unavailable.  If a full_fsync occurred above,
+    ** then the directory sync is superfluous.
     */
-    pCtx->dbPath = sqlite3DbStrDup(0, dbPath);
-    pCtx->oldLockingContext = pFile->lockingContext;
-    pFile->lockingContext = pCtx;
-    pCtx->pOldMethod = pFile->pMethod;
-    pFile->pMethod = ioMethodForLockingStyle(LOCKING_STYLE_PROXY);
-  }else{
-    if( pCtx->conchFile ){ 
-      rc = pCtx->conchFile->pMethod->xClose((sqlite3_file *)pCtx->conchFile);
-      if( rc ) return rc;
-      sqlite3_free(pCtx->conchFile);
+    if( (!HAVE_FULLFSYNC || !isFullsync) && full_fsync(pFile->dirfd,0,0) ){
+       /*
+       ** We have received multiple reports of fsync() returning
+       ** errors when applied to directories on certain file systems.
+       ** A failed directory sync is not a big deal.  So it seems
+       ** better to ignore the error.  Ticket #1657
+       */
+       /* pFile->lastErrno = errno; */
+       /* return SQLITE_IOERR; */
+    }
+#endif
+    err = close(pFile->dirfd); /* Only need to sync once, so close the */
+    if( err==0 ){              /* directory when we are done */
+      pFile->dirfd = -1;
+    }else{
+      pFile->lastErrno = errno;
+      rc = SQLITE_IOERR_DIR_CLOSE;
     }
-    sqlite3_free(pCtx->conchFilePath); 
-    sqlite3_free(pCtx);
   }
-  OSTRACE3("TRANSPROXY  %d %s\n", pFile->h,
-           (rc==SQLITE_OK ? "ok" : "failed"));
   return rc;
-} 
-
-static int createProxyUnixFile(const char *path, unixFile **ppFile) {
-  int fd;
-  int dirfd = -1;
-  unixFile *pNew;
-  int rc = SQLITE_OK;
+}
 
-  fd = open(path, O_RDWR | O_CREAT, SQLITE_DEFAULT_FILE_PERMISSIONS);
-  if( fd<0 ){
-    return SQLITE_CANTOPEN;
-  }
-  
-  pNew = (unixFile *)sqlite3_malloc(sizeof(unixFile));
-  if( pNew==NULL ){
-    rc = SQLITE_NOMEM;
-    goto end_create_proxy;
-  }
-  memset(pNew, 0, sizeof(unixFile));
-  
-  rc = fillInUnixFile(NULL, fd, dirfd, (sqlite3_file*)pNew, path, 0, 0);
-  if( rc==SQLITE_OK ){
-    *ppFile = pNew;
+/*
+** Truncate an open file to a specified size
+*/
+static int unixTruncate(sqlite3_file *id, i64 nByte){
+  int rc;
+  assert( id );
+  SimulateIOError( return SQLITE_IOERR_TRUNCATE );
+  rc = ftruncate(((unixFile*)id)->h, (off_t)nByte);
+  if( rc ){
+    ((unixFile*)id)->lastErrno = errno;
+    return SQLITE_IOERR_TRUNCATE;
+  }else{
     return SQLITE_OK;
   }
-end_create_proxy:    
-  close(fd); /* silently leak fd if error, we're already in error */
-  sqlite3_free(pNew);
-  return rc;
 }
 
-
-#endif /* SQLITE_ENABLE_LOCKING_STYLE */
-
 /*
-** The nolockLockingContext is void
+** Determine the current size of a file in bytes
 */
-typedef void nolockLockingContext;
+static int unixFileSize(sqlite3_file *id, i64 *pSize){
+  int rc;
+  struct stat buf;
+  assert( id );
+  rc = fstat(((unixFile*)id)->h, &buf);
+  SimulateIOError( rc=1 );
+  if( rc!=0 ){
+    ((unixFile*)id)->lastErrno = errno;
+    return SQLITE_IOERR_FSTAT;
+  }
+  *pSize = buf.st_size;
 
-static int nolockCheckReservedLock(sqlite3_file *NotUsed, int *pResOut){
-  UNUSED_PARAMETER(NotUsed);
-  *pResOut = 0;
-  return SQLITE_OK;
-}
+  /* When opening a zero-size database, the findLockInfo() procedure
+  ** writes a single byte into that file in order to work around a bug
+  ** in the OS-X msdos filesystem.  In order to avoid problems with upper
+  ** layers, we need to report this file size as zero even though it is
+  ** really 1.   Ticket #3260.
+  */
+  if( *pSize==1 ) *pSize = 0;
 
-static int nolockLock(sqlite3_file *NotUsed, int NotUsed2){
-  UNUSED_PARAMETER2(NotUsed, NotUsed2);
-  return SQLITE_OK;
-}
 
-static int nolockUnlock(sqlite3_file *NotUsed, int NotUsed2){
-  UNUSED_PARAMETER2(NotUsed, NotUsed2);
   return SQLITE_OK;
 }
 
-/*
-** Close a file.
-*/
-static int nolockClose(sqlite3_file *id) {
-  int rc;
-  if( OS_VXWORKS ) unixEnterMutex();
-  rc = closeUnixFile(id);
-  if( OS_VXWORKS ) unixLeaveMutex();
-  return rc;
-}
-
 
 /*
 ** Information and control of an open file handle.
@@ -3519,43 +3730,135 @@ static int unixDeviceCharacteristics(sqlite3_file *NotUsed){
   return 0;
 }
 
-#define IOMETHODS(xClose, xLock, xUnlock, xCheckReservedLock) {    \
-1,                          /* iVersion */                           \
-xClose,                     /* xClose */                             \
-unixRead,                   /* xRead */                              \
-unixWrite,                  /* xWrite */                             \
-unixTruncate,               /* xTruncate */                          \
-unixSync,                   /* xSync */                              \
-unixFileSize,               /* xFileSize */                          \
-xLock,                      /* xLock */                              \
-xUnlock,                    /* xUnlock */                            \
-xCheckReservedLock,         /* xCheckReservedLock */                 \
-unixFileControl,            /* xFileControl */                       \
-unixSectorSize,             /* xSectorSize */                        \
-unixDeviceCharacteristics   /* xDeviceCapabilities */                \
+/*
+** Here ends the implementation of all sqlite3_file methods.
+**
+********************** End sqlite3_file Methods *******************************
+******************************************************************************/
+
+/*
+** The following constant array describes all of the methods for the
+** sqlite3_file object for each of the various locking modes.
+**
+** The order in which the methods are defined is important and must
+** agree with the numeric values of the method identifier constants.
+** For example, LOCKING_STYLE_UNIX has a numeric value of zero, so
+** it must be the 0-th entry in the array.
+*/
+#define IOMETHODS(xClose, xLock, xUnlock, xCheckReservedLock) {  \
+   1,                          /* iVersion */                    \
+   xClose,                     /* xClose */                      \
+   unixRead,                   /* xRead */                       \
+   unixWrite,                  /* xWrite */                      \
+   unixTruncate,               /* xTruncate */                   \
+   unixSync,                   /* xSync */                       \
+   unixFileSize,               /* xFileSize */                   \
+   xLock,                      /* xLock */                       \
+   xUnlock,                    /* xUnlock */                     \
+   xCheckReservedLock,         /* xCheckReservedLock */          \
+   unixFileControl,            /* xFileControl */                \
+   unixSectorSize,             /* xSectorSize */                 \
+   unixDeviceCharacteristics   /* xDeviceCapabilities */         \
 }
 static sqlite3_io_methods aIoMethod[] = {
-IOMETHODS(unixClose, unixLock, unixUnlock, unixCheckReservedLock) 
-,IOMETHODS(nolockClose, nolockLock, nolockUnlock, nolockCheckReservedLock)
-#if SQLITE_ENABLE_LOCKING_STYLE
-,IOMETHODS(dotlockClose, dotlockLock, dotlockUnlock,dotlockCheckReservedLock)
+  IOMETHODS(unixClose, unixLock, unixUnlock, unixCheckReservedLock),
+  IOMETHODS(nolockClose, nolockLock, nolockUnlock, nolockCheckReservedLock),
+  IOMETHODS(dotlockClose, dotlockLock, dotlockUnlock,dotlockCheckReservedLock),
 #if OS_VXWORKS
-  ,IOMETHODS(nolockClose, nolockLock, nolockUnlock, nolockCheckReservedLock)
-  ,IOMETHODS(nolockClose, nolockLock, nolockUnlock, nolockCheckReservedLock)
-  ,IOMETHODS(namedsemClose, namedsemLock, namedsemUnlock, namedsemCheckReservedLock)
-  ,IOMETHODS(nolockClose, nolockLock, nolockUnlock, nolockCheckReservedLock)
-#else
-  ,IOMETHODS(flockClose, flockLock, flockUnlock, flockCheckReservedLock)
-  ,IOMETHODS(afpClose, afpLock, afpUnlock, afpCheckReservedLock)
-  ,IOMETHODS(nolockClose, nolockLock, nolockUnlock, nolockCheckReservedLock)
-  ,IOMETHODS(proxyClose, proxyLock, proxyUnlock, proxyCheckReservedLock)
+  IOMETHODS(semClose, semLock, semUnlock, semCheckReservedLock),
+#elif SQLITE_ENABLE_LOCKING_STYLE
+  IOMETHODS(flockClose, flockLock, flockUnlock, flockCheckReservedLock),
 #endif
+#if defined(__DARWIN__) && SQLITE_ENABLE_LOCKING_STYLE
+  IOMETHODS(afpClose, afpLock, afpUnlock, afpCheckReservedLock),
+  IOMETHODS(proxyClose, proxyLock, proxyUnlock, proxyCheckReservedLock),
 #endif
-/* The order of the IOMETHODS macros above is important.  It must be the
- ** same order as the LOCKING_STYLE numbers
- */
+  /* The order of the IOMETHODS macros above is important.  It must be the
+  ** same order as the LOCKING_STYLE numbers
+  */
 };
 
+/****************************************************************************
+**************************** sqlite3_vfs methods ****************************
+**
+** This division contains the implementation of methods on the
+** sqlite3_vfs object.
+*/
+
+/* 
+** If SQLITE_ENABLE_LOCKING_STYLE is defined, this function Examines the 
+** f_fstypename entry in the statfs structure as returned by stat() for 
+** the file system hosting the database file and selects  the appropriate
+** locking style based on its value.  These values and assignments are 
+** based on Darwin/OSX behavior and have not been thoroughly tested on 
+** other systems.
+**
+** If SQLITE_ENABLE_LOCKING_STYLE is not defined, this function always
+** returns LOCKING_STYLE_POSIX.
+*/
+#if SQLITE_ENABLE_LOCKING_STYLE
+static int detectLockingStyle(
+  sqlite3_vfs *pVfs,
+  const char *filePath, 
+  int fd
+){
+#if OS_VXWORKS
+  if( !filePath ){
+    return LOCKING_STYLE_NONE;
+  }
+  if( pVfs->pAppData ){
+    return SQLITE_PTR_TO_INT(pVfs->pAppData);
+  }
+  if (access(filePath, 0) != -1){
+    return testLockingStyle(fd);
+  }
+#else
+  struct Mapping {
+    const char *zFilesystem;
+    int eLockingStyle;
+  } aMap[] = {
+    { "hfs",    LOCKING_STYLE_POSIX },
+    { "ufs",    LOCKING_STYLE_POSIX },
+    { "afpfs",  LOCKING_STYLE_AFP },
+#ifdef SQLITE_ENABLE_AFP_LOCKING_SMB
+    { "smbfs",  LOCKING_STYLE_AFP },
+#else
+    { "smbfs",  LOCKING_STYLE_FLOCK },
+#endif
+    { "webdav", LOCKING_STYLE_NONE },
+    { 0, 0 }
+  };
+  int i;
+  struct statfs fsInfo;
+
+  if( !filePath ){
+    return LOCKING_STYLE_NONE;
+  }
+  if( pVfs && pVfs->pAppData ){
+    return SQLITE_PTR_TO_INT(pVfs->pAppData);
+  }
+
+  if( statfs(filePath, &fsInfo) != -1 ){
+    if( fsInfo.f_flags & MNT_RDONLY ){
+      return LOCKING_STYLE_NONE;
+    }
+    for(i=0; aMap[i].zFilesystem; i++){
+      if( strcmp(fsInfo.f_fstypename, aMap[i].zFilesystem)==0 ){
+        return aMap[i].eLockingStyle;
+      }
+    }
+  }
+
+  /* Default case. Handles, amongst others, "nfs". */
+  return testLockingStyle(fd);  
+#endif /* if OS_VXWORKS */
+  return LOCKING_STYLE_POSIX;
+}
+#else
+  #define detectLockingStyle(x,y,z) LOCKING_STYLE_POSIX
+#endif /* if SQLITE_ENABLE_LOCKING_STYLE */
+
+
 /*
 ** Initialize the contents of the unixFile structure pointed to by pId.
 **
@@ -3613,19 +3916,7 @@ static int fillInUnixFile(
 #endif
   }
 
-  /* Macro to define the static contents of an sqlite3_io_methods 
-  ** structure for a unix backend file. Different locking methods
-  ** require different functions for the xClose, xLock, xUnlock and
-  ** xCheckReservedLock methods.
-  */
-  assert(LOCKING_STYLE_POSIX==1);
-  assert(LOCKING_STYLE_NONE==2);
-  assert(LOCKING_STYLE_DOTFILE==3);
-  assert(LOCKING_STYLE_FLOCK==4);
-  assert(LOCKING_STYLE_AFP==5);
-  assert(LOCKING_STYLE_NAMEDSEM==6);
-  assert(LOCKING_STYLE_PROXY==7);
-    
+   
   switch( eLockingStyle ){
 
     case LOCKING_STYLE_POSIX: {
@@ -3680,8 +3971,8 @@ static int fillInUnixFile(
 #if OS_VXWORKS
     case LOCKING_STYLE_NAMEDSEM: {
       /* Named semaphore locking uses the file path so it needs to be
-       ** included in the namedsemLockingContext
-       */
+      ** included in the semLockingContext
+      */
       unixEnterMutex();
       rc = findLockInfo(pNew, &pNew->pLock, &pNew->pOpen);
       if( (rc==SQLITE_OK) && (pNew->pOpen->pSem==NULL) ){
@@ -3728,17 +4019,20 @@ static int fillInUnixFile(
 
 #if SQLITE_ENABLE_LOCKING_STYLE
 static sqlite3_io_methods *ioMethodForLockingStyle(int style){
-  return &aIoMethod[style-1];
+  return &aIoMethod[style];
 }
 
 static int getDbPathForUnixFile(unixFile *pFile, char *dbPath){
   if( pFile->pMethod==ioMethodForLockingStyle(LOCKING_STYLE_AFP) ){
-    /* afp style keeps a reference to the db path in the filePath field of the struct */
-    strlcpy(dbPath, ((afpLockingContext *)pFile->lockingContext)->dbPath, MAXPATHLEN);
+    /* afp style keeps a reference to the db path in the filePath field 
+    ** of the struct */
+    strlcpy(dbPath, ((afpLockingContext *)pFile->lockingContext)->dbPath, 
+            MAXPATHLEN);
     return SQLITE_OK;
   }
   if( pFile->pMethod==ioMethodForLockingStyle(LOCKING_STYLE_DOTFILE) ){
-    /* dot lock style uses the locking context to store the dot lock file path */
+    /* dot lock style uses the locking context to store the dot lock
+    ** file path */
     int len = strlen((char *)pFile->lockingContext) - strlen(DOTLOCK_SUFFIX);
     strlcpy(dbPath, (char *)pFile->lockingContext, len + 1);
     return SQLITE_OK;
@@ -3873,7 +4167,7 @@ static int unixOpen(
 ){
   int fd = 0;                    /* File descriptor returned by open() */
   int dirfd = -1;                /* Directory file descriptor */
-  int oflags = 0;                /* Flags to pass to open() */
+  int openFlags = 0;                /* Flags to pass to open() */
   int eType = flags&0xFFFFFF00;  /* Type of file to open */
   int noLock;                    /* True to omit locking primitives */
   int rc = SQLITE_OK;
@@ -3935,14 +4229,14 @@ static int unixOpen(
     zName = zTmpname;
   }
 
-  if( isReadonly )  oflags |= O_RDONLY;
-  if( isReadWrite ) oflags |= O_RDWR;
-  if( isCreate )    oflags |= O_CREAT;
-  if( isExclusive ) oflags |= (O_EXCL|O_NOFOLLOW);
-  oflags |= (O_LARGEFILE|O_BINARY);
+  if( isReadonly )  openFlags |= O_RDONLY;
+  if( isReadWrite ) openFlags |= O_RDWR;
+  if( isCreate )    openFlags |= O_CREAT;
+  if( isExclusive ) openFlags |= (O_EXCL|O_NOFOLLOW);
+  openFlags |= (O_LARGEFILE|O_BINARY);
 
-  fd = open(zName, oflags, isDelete?0600:SQLITE_DEFAULT_FILE_PERMISSIONS);
-  OSTRACE4("OPENX   %-3d %s 0%o\n", fd, zName, oflags);
+  fd = open(zName, openFlags, isDelete?0600:SQLITE_DEFAULT_FILE_PERMISSIONS);
+  OSTRACE4("OPENX   %-3d %s 0%o\n", fd, zName, openFlags);
   if( fd<0 && errno!=EISDIR && isReadWrite && !isExclusive ){
     /* Failed to open the file for read/write access. Try read-only. */
     flags &= ~(SQLITE_OPEN_READWRITE|SQLITE_OPEN_CREATE);
@@ -3961,7 +4255,7 @@ static int unixOpen(
   }
 #if SQLITE_ENABLE_LOCKING_STYLE
   else{
-    ((unixFile*)pFile)->oflags = oflags;
+    ((unixFile*)pFile)->openFlags = openFlags;
   }
 #endif
   if( pOutFlags ){
@@ -4283,8 +4577,17 @@ static int unixGetLastError(sqlite3_vfs *NotUsed, int NotUsed2, char *NotUsed3){
   return 0;
 }
 
+/*
+************************ End of sqlite3_vfs methods ***************************
+******************************************************************************/
+
 /*
 ** Initialize the operating system interface.
+**
+** This routine registers all VFS implementations for unix-like operating
+** systems.  This routine, and the sqlite3_os_end() routine that follows,
+** should be the only routines in this file that are visible from other
+** files.
 */
 int sqlite3_os_init(void){ 
   /* Macro to define the static contents of an sqlite3_vfs structure for
@@ -4313,23 +4616,26 @@ int sqlite3_os_init(void){
     unixGetLastError      /* xGetLastError */               \
   }
 
-  static sqlite3_vfs unixVfs = UNIXVFS("unix", 0);
-#if SQLITE_ENABLE_LOCKING_STYLE
   int i;
   static sqlite3_vfs aVfs[] = {
+    UNIXVFS("unix",         LOCKING_STYLE_AUTOMATIC), 
     UNIXVFS("unix-posix",   LOCKING_STYLE_POSIX), 
-    UNIXVFS("unix-afp",     LOCKING_STYLE_AFP), 
-    UNIXVFS("unix-flock",   LOCKING_STYLE_FLOCK), 
-    UNIXVFS("unix-dotfile", LOCKING_STYLE_DOTFILE), 
     UNIXVFS("unix-none",    LOCKING_STYLE_NONE),
+    UNIXVFS("unix-dotfile", LOCKING_STYLE_DOTFILE),
+#if OS_VXWORKS
     UNIXVFS("unix-namedsem",LOCKING_STYLE_NAMEDSEM),
+#endif
+#if SQLITE_ENABLE_LOCKING_STYLE
+    UNIXVFS("unix-flock",   LOCKING_STYLE_FLOCK), 
+#endif
+#if SQLITE_ENABLE_LOCKING_STYLE && defined(__DARWIN__)
+    UNIXVFS("unix-afp",     LOCKING_STYLE_AFP), 
     UNIXVFS("unix-proxy",   LOCKING_STYLE_PROXY)
+#endif
   };
   for(i=0; i<(sizeof(aVfs)/sizeof(sqlite3_vfs)); i++){
-    sqlite3_vfs_register(&aVfs[i], 0);
+    sqlite3_vfs_register(&aVfs[i], i==0);
   }
-#endif
-  sqlite3_vfs_register(&unixVfs, 1);
   return SQLITE_OK; 
 }
 
diff --git a/src/test1.c b/src/test1.c
index e555a3392c..4ebc280bf9 100644
--- a/src/test1.c
+++ b/src/test1.c
@@ -13,7 +13,7 @@
 ** is not included in the SQLite library.  It is used for automated
 ** testing of the SQLite library.
 **
-** $Id: test1.c,v 1.331 2008/11/21 23:35:03 pweilbacher Exp $
+** $Id: test1.c,v 1.332 2008/11/28 15:37:20 drh Exp $
 */
 #include "sqliteInt.h"
 #include "tcl.h"
@@ -4718,7 +4718,7 @@ int Sqlitetest1_Init(Tcl_Interp *interp){
   extern int sqlite3_open_file_count;
   extern int sqlite3_sort_count;
   extern int sqlite3_current_time;
-#if SQLITE_OS_UNIX
+#if SQLITE_OS_UNIX && defined(__DARWIN__)
   extern int sqlite3_hostid_num;
 #endif
   extern int sqlite3_max_blobsize;
@@ -4938,7 +4938,7 @@ int Sqlitetest1_Init(Tcl_Interp *interp){
       (char*)&sqlite3_open_file_count, TCL_LINK_INT);
   Tcl_LinkVar(interp, "sqlite_current_time", 
       (char*)&sqlite3_current_time, TCL_LINK_INT);
-#if SQLITE_OS_UNIX
+#if SQLITE_OS_UNIX && defined(__DARWIN__)
   Tcl_LinkVar(interp, "sqlite_hostid_num", 
       (char*)&sqlite3_hostid_num, TCL_LINK_INT);
 #endif
-- 
2.47.3