]> git.ipfire.org Git - thirdparty/sqlite.git/commitdiff
Add logical (range-based) conflict detection to BEGIN CONCURRENT transactions execute... bedrock-lcd
authordan <Dan Kennedy>
Mon, 16 Feb 2026 20:20:02 +0000 (20:20 +0000)
committerdan <Dan Kennedy>
Mon, 16 Feb 2026 20:20:02 +0000 (20:20 +0000)
FossilOrigin-Name: 3e1cbc04c5d28183a39d82ef281274e2fc4ebacf935639add88267a539d852ac

16 files changed:
main.mk
manifest
manifest.tags
manifest.uuid
src/bcrecord.c [new file with mode: 0644]
src/btree.c
src/btreeInt.h
src/global.c
src/main.c
src/pager.c
src/pager.h
src/sqliteInt.h
src/wal.c
src/wal.h
test/concurrentA.test [new file with mode: 0644]
tool/mksqlite3c.tcl

diff --git a/main.mk b/main.mk
index e7091e0b0de90e399f3645fb8e59fe039ecd2c67..143cd769828fc60c549f544cb7bdbfc0f3508ec5 100644 (file)
--- a/main.mk
+++ b/main.mk
@@ -534,7 +534,7 @@ clean: clean-sanity-check
 # Object files for the SQLite library (non-amalgamation).
 #
 LIBOBJS0 = alter.o analyze.o attach.o auth.o \
-         backup.o bitvec.o btmutex.o btree.o build.o \
+         backup.o bcrecord.o bitvec.o btmutex.o btree.o build.o \
          callback.o carray.o complete.o ctime.o \
          date.o dbpage.o dbstat.o delete.o \
          expr.o fault.o fkey.o \
@@ -581,6 +581,7 @@ SRC = \
   $(TOP)/src/attach.c \
   $(TOP)/src/auth.c \
   $(TOP)/src/backup.c \
+  $(TOP)/src/bcrecord.c \
   $(TOP)/src/bitvec.c \
   $(TOP)/src/btmutex.c \
   $(TOP)/src/btree.c \
@@ -1177,6 +1178,9 @@ auth.o:   $(TOP)/src/auth.c $(DEPS_OBJ_COMMON)
 backup.o:      $(TOP)/src/backup.c $(DEPS_OBJ_COMMON)
        $(T.cc.sqlite) -c $(TOP)/src/backup.c
 
+bcrecord.o:    $(TOP)/src/bcrecord.c $(DEPS_OBJ_COMMON)
+       $(T.cc.sqlite) -c $(TOP)/src/bcrecord.c
+
 bitvec.o:      $(TOP)/src/bitvec.c $(DEPS_OBJ_COMMON)
        $(T.cc.sqlite) -c $(TOP)/src/bitvec.c
 
index 0708ded1e450772f457439b655ce66f974cba623..bd33050ab050a394e2b2e15f832b0c1754d47ca4 100644 (file)
--- a/manifest
+++ b/manifest
@@ -1,5 +1,5 @@
-C Merge\sall\sthe\slatest\strunk\senhancements\sinto\sthe\sbedrock\sbranch.
-D 2026-01-12T17:18:58.486
+C Add\slogical\s(range-based)\sconflict\sdetection\sto\sBEGIN\sCONCURRENT\stransactions\sexecuted\swithin\sa\ssingle\sprocess.
+D 2026-02-16T20:20:02.122
 F .fossil-settings/binary-glob 61195414528fb3ea9693577e1980230d78a1f8b0a54c78cf1b9b24d0a409ed6a x
 F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1
 F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea
@@ -667,7 +667,7 @@ F ext/wasm/tests/opfs/sahpool/index.html be736567fd92d3ecb9754c145755037cbbd2bca
 F ext/wasm/tests/opfs/sahpool/sahpool-pausing.js f264925cfc82155de38cecb3d204c36e0f6991460fff0cb7c15079454679a4e2
 F ext/wasm/tests/opfs/sahpool/sahpool-worker.js bd25a43fc2ab2d1bafd8f2854ad3943ef673f7c3be03e95ecf1612ff6e8e2a61
 F magic.txt 5ade0bc977aa135e79e3faaea894d5671b26107cc91e70783aa7dc83f22f3ba0
-F main.mk 3e58a98a5ac521d949d076be7b009464609e79e5de898dc95f893388b1ae2ffa
+F main.mk 4434b06210de4c79b9875f16c21934480c61193ed405317f6948ad99184dc653
 F make.bat a136fd0b1c93e89854a86d5f4edcf0386d211e5d5ec2434480f6eea436c7420c
 F mptest/config01.test 3c6adcbc50b991866855f1977ff172eb6d901271
 F mptest/config02.test 4415dfe36c48785f751e16e32c20b077c28ae504
@@ -683,11 +683,12 @@ F src/analyze.c ebfe8c9c55e44c5ca2bb885f84b133cde7a37e99ea5e3bf1c2782fb7e34b0882
 F src/attach.c 7cf07d4fa42b9fc8662237c60c40b730326c30aa90ae5fffc0b18b2d726ebf61
 F src/auth.c 54ab9c6c5803b47c0d45b76ce27eff22a03b4b1f767c5945a3a4eb13aa4c78dc
 F src/backup.c 5c97e8023aab1ce14a42387eb3ae00ba5a0644569e3476f38661fa6f824c3523
+F src/bcrecord.c da6b9952749e0ec8a2e5a5caaeb172250e0ec889e60fc4e7ee3e204ef0bc4d9d
 F src/bitvec.c 1a048c91b2fbccbbc32dcefa757f93216c0f518ed2bc0b4db39947264479ad06
 F src/btmutex.c 30dada73a819a1ef5b7583786370dce1842e12e1ad941e4d05ac29695528daea
-F src/btree.c 0c0710584f65ffbc73e478dfb84babec1ffe7cf17682489c26c026c0f7404b6f
+F src/btree.c a6a625f11206ade0682f071a21699ebef18c2dbd313d22b3d1bdb673bb1a4ab7
 F src/btree.h dcda10d5a5d29690dc2739b8d4d416d136760ab1dbe88a2dd417855e1775e37a
-F src/btreeInt.h c3b8750a8dd8bc9a38840be463e7d23899b9da08276fa442e2bb8efd1052bc48
+F src/btreeInt.h bebab6bb22dc3026488772a2b2584d872ac5c705cf34e09a91ece6490dba6e02
 F src/build.c 323ce9b3de09f4578f7f4f725239ac15b3182ccb7ce3dcb79293730148365569
 F src/callback.c 6987b99f738d9f68fe95ff482cdc10a9bf3df2051319d3d418b7cd22e725825e
 F src/carray.c ff6081a31878fc34df8fa1052a9cbf17ddc22652544dcb3e2326886ed1053b55
@@ -700,7 +701,7 @@ F src/expr.c 252e62742f5bb01517377c93057b6040ab954034ec3dde4d6fc583565d859a9c
 F src/fault.c 460f3e55994363812d9d60844b2a6de88826e007
 F src/fkey.c c065da737307a29e4d240ac727758dbf4102cb3218a1f651eb689b6a6fa12531
 F src/func.c 13ab81ae9e23657de777837b1a909a6913d28283615132be88144fc8c1830bbb
-F src/global.c a19e4b1ca1335f560e9560e590fc13081e21f670643367f99cb9e8f9dc7d615b
+F src/global.c dfd0d69cd7170865c1718a7cc7b88cb78732b958cb0ba0bba07d4765cc62037c
 F src/hash.c 03c8c0f4be9e8bcb6de65aa26d34a61d48a9430747084a69f9469fbb00ea52ca
 F src/hash.h 46b92795a95bfefb210f52f0c316e9d7cdbcdd7e7fcfb0d8be796d3a5767cddf
 F src/hwtime.h 21c2cf1f736e7b97502c3674d0c386db3f06870d6f10d0cf8174e2a4b8cb726e
@@ -709,7 +710,7 @@ F src/insert.c dfd311b0ac2d4f6359e62013db67799757f4d2cc56cca5c10f4888acfbbfa3fd
 F src/json.c fb031340edee159c07ad37dbe668ffe945ed86f525b0eb3822e4a67cbc498a72
 F src/legacy.c d7874bc885906868cd51e6c2156698f2754f02d9eee1bae2d687323c3ca8e5aa
 F src/loadext.c d6559d2b39c9bde6b104b83adeafbe5db3a514aae4d3d40afc58de522a03043b
-F src/main.c ce34cbd2bd5beccdf98aa127179bc6ecea3716d86f9f6b00724aa2034fdae5c5
+F src/main.c 0f27559d78061d241a930c0a52b158b7e65e4729a29f99393fbbbbf9dbffa0b9
 F src/malloc.c 410e570b30c26cc36e3372577df50f7a96ee3eed5b2b161c6b6b48773c650c5e
 F src/mem0.c 6a55ebe57c46ca1a7d98da93aaa07f99f1059645
 F src/mem1.c 3bb59158c38e05f6270e761a9f435bf19827a264c13d1631c58b84bdc96d73b2
@@ -733,8 +734,8 @@ F src/os_setup.h 8efc64eda6a6c2f221387eefc2e7e45fd5a3d5c8337a7a83519ba4fbd2957ae
 F src/os_unix.c 54ab9a0a780e73c740fd01cdd13b2cba192d7b2b50d5c6820ee661e02e6f488e
 F src/os_win.c 0d553b6e8b92c8eb85e7f1b4a8036fe8638c8b32c9ad8d9d72a861c10f81b4c5
 F src/os_win.h 5e168adf482484327195d10f9c3bce3520f598e04e07ffe62c9c5a8067c1037b
-F src/pager.c 93e9cf4fe32606a29ed446ef5577eaed7e7e7802b01773d96312742bbfce45ed
-F src/pager.h dd6ade22dd303a8ca6c34f1ff0f299add7191c1bff65f0289b7fd7c3460f9551
+F src/pager.c 7d10d8fcfa75baa22ca026e8da287dcc7075e7e8db3c40e977aeefe2d7211e9c
+F src/pager.h aec16889c22f11ea29b8fd44edad67ca03fbf14f60884ff0e3a3ad9b4ca5b8fa
 F src/parse.y aa52bf080906d3214e01343a67feabbc330f9c7e7f1db6126a3b722cbf58c01d
 F src/pcache.c 588cc3c5ccaaadde689ed35ce5c5c891a1f7b1f4d1f56f6cf0143b74d8ee6484
 F src/pcache.h 1497ce1b823cf00094bb0cf3bac37b345937e6f910890c626b16512316d3abf5
@@ -750,7 +751,7 @@ F src/shell.c.in 3bc4c6aa962bdf950dc144c2138eb4bc734bf7e305f2ac42dbda5a83c4a4c66
 F src/sqlite.h.in 91b7a209629dd57f534585fa08752bf154b53b63ee596c8e41275b8c636bb300
 F src/sqlite3.rc 015537e6ac1eec6c7050e17b616c2ffe6f70fca241835a84a4f0d5937383c479
 F src/sqlite3ext.h f590cd8cb4c36fc727632c9b5fbbafc85f7efe2c6890f9958d7e711dc26ec01e
-F src/sqliteInt.h fc560d28bf9e287cece657914685b8d82679268a6a92b20b6a013300b7526b81
+F src/sqliteInt.h 7916d6284f6bb28f60c3e0b444f81e8c701fcaabd7cfc0865b19d90a5b7e6199
 F src/sqliteLimit.h 7e705474d59912388832cc5465edbc0dbb552872e23452812846e90d280987f3
 F src/status.c 7565d63a79aa2f326339a24a0461a60096d0bd2bce711fefb50b5c89335f3592
 F src/table.c 0f141b58a16de7e2fbe81c308379e7279f4c6b50eb08efeec5892794a0ba30d1
@@ -824,8 +825,8 @@ F src/vdbetrace.c 49e689f751505839742f4a243a1a566e57d5c9eaf0d33bbaa26e2de3febf7b
 F src/vdbevtab.c fc46b9cbd759dc013f0b3724549cc0d71379183c667df3a5988f7e2f1bd485f3
 F src/vtab.c 5437ce986db2f70e639ce8a3fe68dcdfe64b0f1abb14eaebecdabd5e0766cc68
 F src/vxworks.h 9d18819c5235b49c2340a8a4d48195ec5d5afb637b152406de95a9436beeaeab
-F src/wal.c 8f0f8aed7bafc4f0fa9b0e1e5d70d4f12022b46509936b3f671f41246e6ad728
-F src/wal.h 8d02ab8c2a93a941f5898eb3345bf711c1d3f8f86f4be8d5428fb6c074962d8a
+F src/wal.c 2bcd6d2578856c2c8a66781304ad1403c0599f1d918323a8af855aaa49b46c1b
+F src/wal.h 44c780f3a22fc4c6c1438dd07a60e3e04be30f50b54470e4beaeb5e01c48c1b4
 F src/walker.c d5006d6b005e4ea7302ad390957a8d41ed83faa177e412f89bc5600a7462a014
 F src/where.c 0079b6ba463ae806b99b20cb335729dcce5f3e496b81cccf6441dc11f8c5bf92
 F src/whereInt.h 8d94cb116c9e06205c3d5ac87af065fc044f8cf08bfdccd94b6ea1c1308e65da
@@ -1003,6 +1004,7 @@ F test/concurrent6.test a7860e9ca13bb5fb76bcf41c5524fbfa9c37e6e258ecf84ffb5748a2
 F test/concurrent7.test b96fa5c4cfdf8d5c0bc66b6934214500bad0260884a736f054ccc76e81aae85d
 F test/concurrent8.test b93937e74a8efb8b84f2fea7595b53418c5f29777bbe9cbdb5dc219b3dd72a7d
 F test/concurrent9.test 4b59e327c524d09c992f94b5cc7202cfed124ecbb85823c30308b5e1c7e16dca
+F test/concurrentA.test 1c03f23c30f3400e57bb7a5c02d270b7ceac336820e03abdd621b12b3549b0c8
 F test/conflict.test 3307ffdf988e04b01c4e942d8aa369a977f085bf629f43a627c9a77f39d65926
 F test/conflict2.test 5557909ce683b1073982f5d1b61dfb1d41e369533bfdaf003180c5bc87282dd1
 F test/conflict3.test 81865d9599609aca394fb3b9cd5f561d4729ea5b176bece3644f6ecb540f88ac
@@ -1466,7 +1468,7 @@ F test/mmap4.test 2e2b4e32555b58da15176e6fe750f17c9dcf7f93
 F test/mmapcorrupt.test 470fb44fe92e99c1d23701d156f8c17865f5b027063c9119dcfdb842791f4465
 F test/mmapfault.test d4c9eff9cd8c2dc14bc43e71e042f175b0a26fe3
 F test/mmapwarm.test 2272005969cd17a910077bd5082f70bc1fefad9a875afec7fc9af483898ecaf3
-F test/modeA.sql 3f2b5a7ce7074a52b2b7ec07b07dc1a08edba19e40bce9b4d65d3965413bbea3 w test/modeA.clitest
+F test/modeA.sql 3f2b5a7ce7074a52b2b7ec07b07dc1a08edba19e40bce9b4d65d3965413bbea3
 F test/multiplex.test d74c034e52805f6de8cc5432cef8c9eb774bb64ec29b83a22effc8ca4dac1f08
 F test/multiplex2.test 580ca5817c7edbe4cc68fa150609c9473393003a
 F test/multiplex3.test fac575e0b1b852025575a6a8357701d80933e98b5d2fe6d35ddaa68f92f6a1f7
@@ -2179,7 +2181,7 @@ F tool/mkpragmatab.tcl 10694206dfe9d1f6e24d5876d52bb56e67706ef20bc479002bb88b67b
 F tool/mkshellc.tcl da6918b128e928a8f0d663519e14829153e59465bd5eb596442e99fa10a411b7
 F tool/mksourceid.c 36aa8020014aed0836fd13c51d6dc9219b0df1761d6b5f58ff5b616211b079b9
 F tool/mksqlite3c-noext.tcl 351c55256213154cabb051a3c870ef9f4487de905015141ae50dc7578a901b84
-F tool/mksqlite3c.tcl 7a268139158e5deef27a370bc2f8db6ccf100c1ad7ac5e5b23743c0fd354f609
+F tool/mksqlite3c.tcl 11d4d76e80f2fc0a6a2fa8c2a5824dc3d654a7f0317f17386f927cd7cb73638c
 F tool/mksqlite3h.tcl ef6831c97e6e638d2324863e8125306baea239b23defd75da77edffa3b620e81
 F tool/mksqlite3internalh.tcl 46ef6ed6ccd3c36e23051109dd25085d8edef3887635cea25afa81c4adf4d4db
 F tool/mksrczip.tcl 81efd9974dbb36005383f2cd655520057a2ae5aa85ac2441a80c7c28f803ac52
@@ -2228,8 +2230,8 @@ F tool/warnings-clang.sh bbf6a1e685e534c92ec2bfba5b1745f34fb6f0bc2a362850723a9ee
 F tool/warnings.sh d924598cf2f55a4ecbc2aeb055c10bd5f48114793e7ba25f9585435da29e7e98
 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f
 F tool/winmain.c 00c8fb88e365c9017db14c73d3c78af62194d9644feaf60e220ab0f411f3604c
-P f273f6b8245c5dcaf5642a83afaa015efd5b6115d099ddd1325db53c51338f63 b57a8215f4259a0aae188b7ee5060f8ff48919303179aae80b58b43ed3b991f5
-R b94b8b995e69f821ef662acbd14c9693
-U drh
-Z 4983067014ca9c3566faa2245d09fda6
+P d577b2a2b2b04ff7b7b07c0fce6c9e3a910ae0a2a46d3524e688c1530155e3c2
+R 396f22c620cb2dfb0ceaaeebf35e550a
+U dan
+Z 169a5a4214c793756ba9b66babdcc793
 # Remove this line to create a well-formed Fossil manifest.
index d2f128659288415b5efec6b14f86b2d6c0e0d87d..2a8fc1c89d0da15dd061b60631423db563cb851f 100644 (file)
@@ -1,2 +1,2 @@
-branch bedrock
-tag bedrock
+branch bedrock-lcd
+tag bedrock-lcd
index d205fc03548d4f847b88b4fb5e130d96048bfc27..3c3d53c12e63c1e494e1112adb54123f1992a47a 100644 (file)
@@ -1 +1 @@
-d577b2a2b2b04ff7b7b07c0fce6c9e3a910ae0a2a46d3524e688c1530155e3c2
+3e1cbc04c5d28183a39d82ef281274e2fc4ebacf935639add88267a539d852ac
diff --git a/src/bcrecord.c b/src/bcrecord.c
new file mode 100644 (file)
index 0000000..a36311d
--- /dev/null
@@ -0,0 +1,597 @@
+/*
+** 2026 February 13
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+*************************************************************************
+*/
+
+#include "sqliteInt.h"
+#include "vdbeInt.h"
+#include "btreeInt.h"
+
+#include <string.h>
+#include <assert.h>
+
+/*
+** Write the serialized data blob for the value stored in pMem into 
+** buf. It is assumed that the caller has allocated sufficient space.
+** Return the number of bytes written.
+**
+** nBuf is the amount of space left in buf[].  The caller is responsible
+** for allocating enough space to buf[] to hold the entire field, exclusive
+** of the pMem->u.nZero bytes for a MEM_Zero value.
+**
+** Return the number of bytes actually written into buf[].  The number
+** of bytes in the zero-filled tail is included in the return value only
+** if those bytes were zeroed in buf[].
+*/ 
+static u32 bcRecordSerialPut(u8 *buf, Mem *pMem, u32 serial_type){
+  u32 len;
+
+  /* Integer and Real */
+  if( serial_type<=7 && serial_type>0 ){
+    u64 v;
+    u32 i;
+    if( serial_type==7 ){
+      assert( sizeof(v)==sizeof(pMem->u.r) );
+      memcpy(&v, &pMem->u.r, sizeof(v));
+      swapMixedEndianFloat(v);
+    }else{
+      v = pMem->u.i;
+    }
+    len = i = sqlite3SmallTypeSizes[serial_type];
+    assert( i>0 );
+    do{
+      buf[--i] = (u8)(v&0xFF);
+      v >>= 8;
+    }while( i );
+    return len;
+  }
+
+  /* String or blob */
+  if( serial_type>=12 ){
+    assert( pMem->n + ((pMem->flags & MEM_Zero)?pMem->u.nZero:0)
+             == (int)sqlite3VdbeSerialTypeLen(serial_type) );
+    len = pMem->n;
+    if( len>0 ) memcpy(buf, pMem->z, len);
+    return len;
+  }
+
+  /* NULL or constants 0 or 1 */
+  return 0;
+}
+
+/*
+** Return the serial-type for the value stored in pMem.
+**
+** This routine might convert a large MEM_IntReal value into MEM_Real.
+*/
+static u32 bcRecordSerialType(Mem *pMem, u32 *pLen){
+  int flags = pMem->flags;
+  u32 n;
+
+  assert( pLen!=0 );
+  if( flags&MEM_Null ){
+    *pLen = 0;
+    return 0;
+  }
+  if( flags&(MEM_Int|MEM_IntReal) ){
+    /* Figure out whether to use 1, 2, 4, 6 or 8 bytes. */
+#   define MAX_6BYTE ((((i64)0x00008000)<<32)-1)
+    i64 i = pMem->u.i;
+    u64 u;
+    testcase( flags & MEM_Int );
+    testcase( flags & MEM_IntReal );
+    if( i<0 ){
+      u = ~i;
+    }else{
+      u = i;
+    }
+    if( u<=127 ){
+      if( (i&1)==i ){
+        *pLen = 0;
+        return 8+(u32)u;
+      }else{
+        *pLen = 1;
+        return 1;
+      }
+    }
+    if( u<=32767 ){ *pLen = 2; return 2; }
+    if( u<=8388607 ){ *pLen = 3; return 3; }
+    if( u<=2147483647 ){ *pLen = 4; return 4; }
+    if( u<=MAX_6BYTE ){ *pLen = 6; return 5; }
+    *pLen = 8;
+    if( flags&MEM_IntReal ){
+      /* If the value is IntReal and is going to take up 8 bytes to store
+      ** as an integer, then we might as well make it an 8-byte floating
+      ** point value */
+      pMem->u.r = (double)pMem->u.i;
+      pMem->flags &= ~MEM_IntReal;
+      pMem->flags |= MEM_Real;
+      return 7;
+    }
+    return 6;
+  }
+  if( flags&MEM_Real ){
+    *pLen = 8;
+    return 7;
+  }
+  assert( pMem->db->mallocFailed || flags&(MEM_Str|MEM_Blob) );
+  assert( pMem->n>=0 );
+  n = (u32)pMem->n;
+  if( flags & MEM_Zero ){
+    n += pMem->u.nZero;
+  }
+  *pLen = n;
+  return ((n*2) + 12 + ((flags&MEM_Str)!=0));
+}
+
+
+/*
+**
+*/
+int sqlite3BcSerializeRecord(
+  UnpackedRecord *pRec,           /* Record to serialize */
+  u8 **ppRec,                     /* OUT: buffer containing serialization */
+  int *pnRec                      /* OUT: size of (*ppRec) in bytes */
+){
+  int ii;
+  int nData = 0;
+  int nHdr = 0;
+  u8 *pOut = 0;
+  int iOffHdr = 0;
+  int iOffData = 0;
+
+  for(ii=0; ii<pRec->nField; ii++){
+    u32 n;
+    u32 stype = bcRecordSerialType(&pRec->aMem[ii], &n);
+    nData += n;
+    nHdr += sqlite3VarintLen(stype);
+    pRec->aMem[ii].uTemp = stype;
+  }
+
+  if( nHdr<=126 ){
+    /* The common case */
+    nHdr += 1;
+  }else{
+    /* Rare case of a really large header */
+    int nVarint = sqlite3VarintLen(nHdr);
+    nHdr += nVarint;
+    if( nVarint<sqlite3VarintLen(nHdr) ) nHdr++;
+  }
+
+  pOut = (u8*)sqlite3_malloc(nData+nHdr);
+  if( pOut==0 ){
+    return SQLITE_NOMEM_BKPT;
+  }
+
+  iOffData = nHdr;
+  iOffHdr = putVarint32(pOut, nHdr);
+  for(ii=0; ii<pRec->nField; ii++){
+    u32 stype = pRec->aMem[ii].uTemp;
+    iOffHdr += putVarint32(&pOut[iOffHdr], stype);
+    iOffData += bcRecordSerialPut(&pOut[iOffData], &pRec->aMem[ii], stype);
+  }
+  assert( iOffData==(nHdr+nData) );
+
+  *ppRec = pOut;
+  *pnRec = iOffData;
+
+  return SQLITE_OK;
+}
+
+static char *hex_encode(const u8 *aIn, int nIn){
+  char *zRet = sqlite3_malloc(nIn*2+1);
+  static const char aDigit[] = "0123456789ABCDEF";
+  int i;
+  for(i=0; i<nIn; i++){
+    zRet[i*2] = aDigit[ (aIn[i] >> 4) ];
+    zRet[i*2+1] = aDigit[ (aIn[i] & 0xF) ];
+  }
+  return zRet;
+}
+
+
+static char *bcRecordToText(const u8 *aRec, int nRec, int delta){
+  char *zRet = 0;
+  const char *zSep = "";
+  const u8 *pEndHdr;              /* Points to one byte past record header */
+  const u8 *pHdr;                 /* Current point in record header */
+  const u8 *pBody;                /* Current point in record data */
+  u64 nHdr;                       /* Bytes in record header */
+  const char *zDelta = 0;
+
+  if( nRec==0 ){
+    return sqlite3_mprintf("()");
+  }
+
+  pHdr = aRec + sqlite3GetVarint(aRec, &nHdr);
+  pBody = pEndHdr = &aRec[nHdr];
+  while( pHdr<pEndHdr ){
+    u64 iSerialType = 0;
+    int nByte = 0;
+
+    pHdr += sqlite3GetVarint(pHdr, &iSerialType);
+    nByte = sqlite3VdbeSerialTypeLen((u32)iSerialType);
+
+    switch( iSerialType ){
+      case 0: {  /* Null */
+        zRet = sqlite3_mprintf("%z%sNULL", zRet, zSep);
+        break;
+      }
+      case 1: case 2: case 3: case 4: case 5: case 6: {
+        i64 iVal = 0;
+
+        switch( iSerialType ){
+          case 1: 
+            iVal = (i64)pBody[0];
+            break;
+          case 2: 
+            iVal = ((i64)pBody[0] << 8) + (i64)pBody[1];
+            break;
+          case 3: 
+            iVal = ((i64)pBody[0] << 16) + ((i64)pBody[1] << 8) + (i64)pBody[2];
+            break;
+          case 4: 
+            iVal = ((i64)pBody[0] << 24) + ((i64)pBody[1] << 16) 
+                 + ((i64)pBody[2] << 8) + (i64)pBody[3];
+            break;
+          case 5: 
+            iVal = ((i64)pBody[0] << 40) + ((i64)pBody[1] << 32) 
+                 + ((i64)pBody[2] << 24) + ((i64)pBody[3] << 16) 
+                 + ((i64)pBody[4] << 8) + (i64)pBody[5];
+            break;
+          case 6: 
+            iVal = ((i64)pBody[0] << 56) + ((i64)pBody[1] << 48) 
+                 + ((i64)pBody[2] << 40) + ((i64)pBody[3] << 32) 
+                 + ((i64)pBody[4] << 24) + ((i64)pBody[5] << 16) 
+                 + ((i64)pBody[6] << 8) + (i64)pBody[7];
+            break;
+        }
+
+        zRet = sqlite3_mprintf("%z%s%lld", zRet, zSep, iVal);
+        break;
+      }
+      case 7: {
+        double d;
+        u64 i = ((u64)pBody[0] << 56) + ((u64)pBody[1] << 48) 
+            + ((u64)pBody[2] << 40) + ((u64)pBody[3] << 32) 
+            + ((u64)pBody[4] << 24) + ((u64)pBody[5] << 16) 
+            + ((u64)pBody[6] << 8) + (u64)pBody[7];
+        memcpy(&d, &i, 8);
+        zRet = sqlite3_mprintf("%z%s%f", zRet, zSep, d);
+        break;
+      }
+
+      case 8: {  /* 0 */
+        zRet = sqlite3_mprintf("%z%s0", zRet, zSep);
+        break;
+      }
+      case 9: {  /* 1 */
+        zRet = sqlite3_mprintf("%z%s1", zRet, zSep);
+        break;
+      }
+
+      default: {
+        if( (iSerialType % 2) ){
+          /* A text value */
+          zRet = sqlite3_mprintf("%z%s%.*Q", zRet, zSep, nByte, pBody);
+        }else{  
+          /* A blob value */
+          char *zHex = hex_encode(pBody, nByte);
+          zRet = sqlite3_mprintf("%z%sX'%z'", zRet, zSep, zHex);
+        }
+        break;
+      }
+    }
+    pBody += nByte;
+    zSep = ",";
+  }
+
+  zDelta = "";
+  if( delta<0 ) zDelta = "+";
+  if( delta>0 ) zDelta = "-";
+  return sqlite3_mprintf("(%z)%s", zRet, zDelta);
+}
+
+typedef struct ConcTable ConcTable;
+typedef struct ConcCursor ConcCursor;
+typedef struct ConcRow ConcRow;
+
+struct ConcRow {
+  Pgno root;
+  const char *zOp;
+  char *zK1;
+  char *zK2;
+  ConcRow *pRowNext;
+};
+
+
+struct ConcCursor {
+  sqlite3_vtab_cursor base;       /* Base class.  Must be first */
+  ConcRow *pRow;
+};
+
+struct ConcTable {
+  sqlite3_vtab base;              /* Base class.  Must be first */
+  sqlite3 *db;                    /* The database */
+};
+
+#define CONC_SCHEMA "CREATE TABLE x(root, op, k1, k2)"
+
+/* Columns */
+#define DBPAGE_COLUMN_PGNO    0
+#define DBPAGE_COLUMN_DATA    1
+#define DBPAGE_COLUMN_SCHEMA  2
+
+/*
+** Connect to or create a concvfs virtual table.
+*/
+static int concConnect(
+  sqlite3 *db,
+  void *pAux,
+  int argc, const char *const*argv,
+  sqlite3_vtab **ppVtab,
+  char **pzErr
+){
+  ConcTable *pTab = 0;
+  int rc = SQLITE_OK;
+  (void)pAux;
+  (void)argc;
+  (void)argv;
+  (void)pzErr;
+
+  sqlite3_vtab_config(db, SQLITE_VTAB_DIRECTONLY);
+  sqlite3_vtab_config(db, SQLITE_VTAB_USES_ALL_SCHEMAS);
+  rc = sqlite3_declare_vtab(db, CONC_SCHEMA);
+  if( rc==SQLITE_OK ){
+    pTab = (ConcTable *)sqlite3_malloc64(sizeof(ConcTable));
+    if( pTab==0 ) rc = SQLITE_NOMEM_BKPT;
+  }
+
+  assert( rc==SQLITE_OK || pTab==0 );
+  if( rc==SQLITE_OK ){
+    memset(pTab, 0, sizeof(ConcTable));
+    pTab->db = db;
+  }
+
+  *ppVtab = (sqlite3_vtab*)pTab;
+  return rc;
+}
+
+/*
+** Disconnect from or destroy a concvfs virtual table.
+*/
+static int concDisconnect(sqlite3_vtab *pVtab){
+  sqlite3_free(pVtab);
+  return SQLITE_OK;
+}
+
+/*
+** idxNum:
+**
+**     0     schema=main, full table scan
+**     1     schema=main, pgno=?1
+**     2     schema=?1, full table scan
+**     3     schema=?1, pgno=?2
+*/
+static int concBestIndex(sqlite3_vtab *tab, sqlite3_index_info *pIdxInfo){
+  return SQLITE_OK;
+}
+
+
+/*
+** Open a new concvfs cursor.
+*/
+static int concOpen(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCursor){
+  ConcCursor *pCsr;
+
+  pCsr = (ConcCursor *)sqlite3_malloc64(sizeof(ConcCursor));
+  if( pCsr==0 ){
+    return SQLITE_NOMEM_BKPT;
+  }else{
+    memset(pCsr, 0, sizeof(ConcCursor));
+  }
+
+  *ppCursor = (sqlite3_vtab_cursor *)pCsr;
+  return SQLITE_OK;
+}
+
+/*
+** Close a concvfs cursor.
+*/
+static int concClose(sqlite3_vtab_cursor *pCursor){
+  ConcCursor *pCsr = (ConcCursor *)pCursor;
+  ConcRow *pRow = 0;
+  ConcRow *pNext = 0;
+  for(pRow=pCsr->pRow; pRow; pRow=pNext){
+    pNext = pRow->pRowNext;
+    sqlite3_free(pRow->zK1);
+    sqlite3_free(pRow->zK2);
+    sqlite3_free(pRow);
+  }
+  sqlite3_free(pCsr);
+  return SQLITE_OK;
+}
+
+/*
+** Move a concvfs cursor to the next entry in the file.
+*/
+static int concNext(sqlite3_vtab_cursor *pCursor){
+  ConcCursor *pCsr = (ConcCursor *)pCursor;
+  ConcRow *pFree = pCsr->pRow;
+  if( pFree ){
+    pCsr->pRow = pFree->pRowNext;
+    sqlite3_free(pFree->zK1);
+    sqlite3_free(pFree->zK2);
+    sqlite3_free(pFree);
+  }
+  return SQLITE_OK;
+}
+
+static int concEof(sqlite3_vtab_cursor *pCursor){
+  ConcCursor *pCsr = (ConcCursor *)pCursor;
+  return pCsr->pRow==0;
+}
+
+/*
+** idxNum:
+**
+**     0     schema=main, full table scan
+**     1     schema=main, pgno=?1
+**     2     schema=?1, full table scan
+**     3     schema=?1, pgno=?2
+**
+** idxStr is not used
+*/
+static int concFilter(
+  sqlite3_vtab_cursor *pCursor,
+  int idxNum, const char *idxStr,
+  int argc, sqlite3_value **argv
+){
+  ConcCursor *pCsr = (ConcCursor *)pCursor;
+  ConcTable *pTab = (ConcTable *)pCursor->pVtab;
+  sqlite3 *db = pTab->db;
+  BtConcurrent *pConc = &db->aDb[0].pBt->pBt->conc;
+  int rc = SQLITE_OK;
+
+  if( 1 ){
+    int ii;
+
+    for(ii=0; rc==SQLITE_OK && ii<pConc->nWrite; ii++){
+      BtWrite *pWrite = &pConc->aWrite[ii];
+      ConcRow *pRow = (ConcRow*)sqlite3MallocZero(sizeof(ConcRow));
+      if( pRow==0 ){
+        rc = SQLITE_NOMEM_BKPT;
+      }else{
+        pRow->root = pWrite->iRoot;
+        pRow->zOp = pWrite->bDel ? "delete" : "insert";
+        if( pWrite->pKeyInfo ){
+          pRow->zK1 = bcRecordToText(pWrite->aRec, pWrite->nRec, 0);
+          if( pRow->zK1==0 ) rc = SQLITE_NOMEM_BKPT;
+        }else{
+          pRow->zK1 = sqlite3_mprintf("%lld", pWrite->iKey);
+          if( pRow->zK1==0 ){
+            rc = SQLITE_NOMEM_BKPT;
+          }else if( pWrite->bDel==0 ){
+            pRow->zK2 = bcRecordToText(pWrite->aRec, pWrite->nRec, 0);
+            if( pRow->zK2==0 ) rc = SQLITE_NOMEM_BKPT;
+          }
+        }
+        pRow->pRowNext = pCsr->pRow;
+        pCsr->pRow = pRow;
+      }
+    }
+
+    for(ii=0; rc==SQLITE_OK && ii<pConc->nReadIndex; ii++){
+      BtReadIndex *p = &pConc->aReadIndex[ii];
+      ConcRow *pRow = (ConcRow*)sqlite3MallocZero(sizeof(ConcRow));
+      if( pRow==0 ){
+        rc = SQLITE_NOMEM_BKPT;
+      }else{
+        pRow->root = p->iRoot;
+        pRow->zOp = "read";
+        pRow->zK1 = bcRecordToText(p->aRecMin, p->nRecMin, p->drc_min);
+        pRow->zK2 = bcRecordToText(p->aRecMax, p->nRecMax, p->drc_max);
+        pRow->pRowNext = pCsr->pRow;
+        pCsr->pRow = pRow;
+        if( pRow->zK1==0 || pRow->zK2==0 ){
+          rc = SQLITE_NOMEM_BKPT;
+        }
+      }
+    }
+
+    for(ii=0; rc==SQLITE_OK && ii<pConc->nReadIntkey; ii++){
+      BtReadIntkey *p = &pConc->aReadIntkey[ii];
+      ConcRow *pRow = (ConcRow*)sqlite3MallocZero(sizeof(ConcRow));
+      if( pRow==0 ){
+        rc = SQLITE_NOMEM_BKPT;
+      }else{
+        pRow->root = p->iRoot;
+        pRow->zOp = "read";
+        pRow->zK1 = sqlite3_mprintf("%lld", p->iMin);
+        pRow->zK2 = sqlite3_mprintf("%lld", p->iMax);
+        pRow->pRowNext = pCsr->pRow;
+        pCsr->pRow = pRow;
+        if( pRow->zK1==0 || pRow->zK2==0 ){
+          rc = SQLITE_NOMEM_BKPT;
+        }
+      }
+    }
+  }
+
+  return rc;
+}
+
+static int concColumn(
+  sqlite3_vtab_cursor *pCursor,
+  sqlite3_context *ctx,
+  int i
+){
+  ConcCursor *pCsr = (ConcCursor *)pCursor;
+  int rc = SQLITE_OK;
+  ConcRow *pRow = pCsr->pRow;
+  assert( pRow );
+  switch( i ){
+    case 0: {           /* root */
+      sqlite3_result_int64(ctx, (sqlite3_int64)pRow->root);
+      break;
+    }
+    case 1: {           /* op */
+      sqlite3_result_text(ctx, pRow->zOp, -1, SQLITE_TRANSIENT);
+      break;
+    }
+    case 2: {           /* k1 */
+      sqlite3_result_text(ctx, pRow->zK1, -1, SQLITE_TRANSIENT);
+      break;
+    }
+    case 3: {           /* k2 */
+      sqlite3_result_text(ctx, pRow->zK2, -1, SQLITE_TRANSIENT);
+      break;
+    }
+  }
+  return rc;
+}
+
+static int concRowid(sqlite3_vtab_cursor *pCursor, sqlite_int64 *pRowid){
+  *pRowid = 0;
+  return SQLITE_OK;
+}
+
+int sqlite3ConcurrentRegister(sqlite3 *db){
+  static sqlite3_module conc_module = {
+    2,                            /* iVersion */
+    concConnect,                  /* xCreate */
+    concConnect,                  /* xConnect */
+    concBestIndex,                /* xBestIndex */
+    concDisconnect,               /* xDisconnect */
+    concDisconnect,               /* xDestroy */
+    concOpen,                     /* xOpen - open a cursor */
+    concClose,                    /* xClose - close a cursor */
+    concFilter,                   /* xFilter - configure scan constraints */
+    concNext,                     /* xNext - advance a cursor */
+    concEof,                      /* xEof - check for end of scan */
+    concColumn,                   /* xColumn - read data */
+    concRowid,                    /* xRowid - read data */
+    0,                            /* xUpdate */
+    0,                            /* xBegin */
+    0,                            /* xSync */
+    0,                            /* xCommit */
+    0,                            /* xRollback */
+    0,                            /* xFindMethod */
+    0,                            /* xRename */
+    0,                            /* xSavepoint */
+    0,                            /* xRelease */
+    0,                            /* xRollbackTo */
+    0,                            /* xShadowName */
+    0                             /* xIntegrity */
+  };
+  return sqlite3_create_module(db, "sqlite_conc", &conc_module, 0);
+}
+
index c507457037672053fd974c9aa8faf0a29714bcb6..9d03c6d782b8fed51a93a0317caa5d20ab5c51d9 100644 (file)
@@ -623,10 +623,66 @@ static int btreePtrmapStore(
 
 /* !defined(SQLITE_OMIT_CONCURRENT)
 **
-** Open savepoint iSavepoint, if it is not already open.
+** Free the contents of the array of BtWrite objects (but not the
+** array itself).
+*/
+static void btreeBcFreeWriteArray(BtWrite *aWrite, int nWrite){
+  int ii;
+  for(ii=0; ii<nWrite; ii++){
+    sqlite3KeyInfoUnref(aWrite[ii].pKeyInfo);
+    sqlite3_free(aWrite[ii].aRec);
+  }
+}
+
+/* !defined(SQLITE_OMIT_CONCURRENT)
+**
+** If this is a BEGIN CONCURRENT transactin, update the BtShared.conc 
+** object to reflect the fact that nSvpt savepoints are now open.
+*/
+static void btreeBcSavepointBegin(BtShared *pBt, int nSvpt){
+  if( nSvpt>=BTCONC_MAX_SAVEPOINT ){
+    /* More than 8 nested savepoints. No logical OCC this transaction. */
+    pBt->conc.eState = BTCONC_STATE_RETIRED;
+  }else if( nSvpt>pBt->conc.nSvpt ){
+    int ii;
+    for(ii=pBt->conc.nSvpt; ii<nSvpt; ii++){
+      pBt->conc.aSvpt[ii] = pBt->conc.nWrite;
+    }
+    pBt->conc.nSvpt = nSvpt;
+  }
+}
+
+/* !defined(SQLITE_OMIT_CONCURRENT)
+**
+** If this is a BEGIN CONCURRENT transaction, update the BtShared.conc 
+** object to reflect the fact that savepoint iSvpt has just been 
+** released (if op==SAVEPOINT_RELEASE) or rolled back (if
+** op==SAVEPOINT_ROLLBACK).
+*/
+static void btreeBcSavepointEnd(BtShared *pBt, int op, int iSvpt){
+  assert( op==SAVEPOINT_ROLLBACK || op==SAVEPOINT_RELEASE );
+  assert( iSvpt>=0 || (iSvpt==-1 && op==SAVEPOINT_ROLLBACK) );
+  assert( pBt->conc.nSvpt>=0 );
+
+  if( iSvpt<pBt->conc.nSvpt ){
+    if( op==SAVEPOINT_RELEASE ){
+      pBt->conc.nSvpt = iSvpt;
+    }else{
+      int nNew = (iSvpt<0) ? 0 : pBt->conc.aSvpt[iSvpt];
+      btreeBcFreeWriteArray(&pBt->conc.aWrite[nNew], pBt->conc.nWrite - nNew);
+      pBt->conc.nWrite = nNew;
+      pBt->conc.nSvpt = iSvpt+1;
+    }
+  }
+}
+
+/* !defined(SQLITE_OMIT_CONCURRENT)
+**
+** Open savepoint iSvpt, if it is not already open.
 */
 static int btreePtrmapBegin(BtShared *pBt, int nSvpt){
   BtreePtrmap *pMap = pBt->pMap;
+  btreeBcSavepointBegin(pBt, nSvpt);
   if( pMap && nSvpt>pMap->nSvpt ){
     int i;
     if( nSvpt>=pMap->nSvptAlloc ){
@@ -638,127 +694,1392 @@ static int btreePtrmapBegin(BtShared *pBt, int nSvpt){
         pMap->aSvpt = aNew;
         pMap->nSvptAlloc = nNew;
       }
-    }
+    }
+
+    for(i=pMap->nSvpt; i<nSvpt; i++){
+      pMap->aSvpt[i] = pMap->nRollback;
+    }
+    pMap->nSvpt = nSvpt;
+  }
+
+  return SQLITE_OK;
+}
+
+/* !defined(SQLITE_OMIT_CONCURRENT)
+**
+** Rollback (if op==SAVEPOINT_ROLLBACK) or release (if op==SAVEPOINT_RELEASE)
+** savepoint iSvpt.
+*/
+static void btreePtrmapEnd(BtShared *pBt, int op, int iSvpt){
+  BtreePtrmap *pMap = pBt->pMap;
+  btreeBcSavepointEnd(pBt, op, iSvpt);
+  if( pMap ){
+    assert( op==SAVEPOINT_ROLLBACK || op==SAVEPOINT_RELEASE );
+    assert( iSvpt>=0 || (iSvpt==-1 && op==SAVEPOINT_ROLLBACK) );
+    if( iSvpt<0 ){
+      pMap->nSvpt = 0;
+      pMap->nRollback = 0;
+      memset(pMap->aPtr, 0, sizeof(Pgno) * pMap->nPtrAlloc);
+    }else if( iSvpt<pMap->nSvpt ){
+      if( op==SAVEPOINT_ROLLBACK ){
+        int ii;
+        for(ii=pMap->nRollback-1; ii>=pMap->aSvpt[iSvpt]; ii--){
+          RollbackEntry *p = &pMap->aRollback[ii];
+          PtrmapEntry *pEntry = &pMap->aPtr[p->pgno - pMap->iFirst];
+          pEntry->parent = p->parent;
+          pEntry->eType = p->eType;
+        }
+      }
+      pMap->nSvpt = iSvpt + (op==SAVEPOINT_ROLLBACK);
+      pMap->nRollback = pMap->aSvpt[iSvpt];
+    }
+  }
+}
+
+/* !defined(SQLITE_OMIT_CONCURRENT)
+**
+** This function is called after an CONCURRENT transaction is opened on the
+** database. It allocates the BtreePtrmap structure used to track pointers
+** to allocated pages and zeroes the nFree/iTrunk fields in the database 
+** header on page 1.
+*/
+static int btreePtrmapAllocate(BtShared *pBt){
+  int rc = SQLITE_OK;
+  if( pBt->pMap==0 ){
+    BtreePtrmap *pMap = sqlite3_malloc(sizeof(BtreePtrmap));
+    if( pMap==0 ){
+      rc = SQLITE_NOMEM;
+    }else{
+      memset(&pBt->pPage1->aData[32], 0, sizeof(u32)*2);
+      memset(pMap, 0, sizeof(BtreePtrmap));
+      pMap->iFirst = pBt->nPage + 1;
+      pBt->pMap = pMap;
+    }
+  }
+  return rc;
+}
+
+/* !defined(SQLITE_OMIT_CONCURRENT)
+**
+** Free any BtreePtrmap structure allocated by an earlier call to
+** btreePtrmapAllocate().
+*/
+static void btreePtrmapDelete(BtShared *pBt){
+  BtreePtrmap *pMap = pBt->pMap;
+  if( pMap ){
+    sqlite3_free(pMap->aRollback);
+    sqlite3_free(pMap->aPtr);
+    sqlite3_free(pMap->aSvpt);
+    sqlite3_free(pMap);
+    pBt->pMap = 0;
+  }
+}
+
+/*
+** Check that the pointer-map does not contain any entries with a parent
+** page of 0. Call sqlite3_log() multiple times to output the entire
+** data structure if it does.
+*/
+static void btreePtrmapCheck(BtShared *pBt, Pgno nPage){
+  Pgno i;
+  int bProblem = 0;
+  BtreePtrmap *p = pBt->pMap;
+
+  for(i=p->iFirst; i<=nPage; i++){
+    PtrmapEntry *pEntry = &p->aPtr[i-p->iFirst];
+    if( pEntry->eType==PTRMAP_OVERFLOW1
+     || pEntry->eType==PTRMAP_OVERFLOW2
+     || pEntry->eType==PTRMAP_BTREE
+    ){
+      if( pEntry->parent==0 ){
+        bProblem = 1;
+        break;
+      }
+    }
+  }
+
+  if( bProblem ){
+    for(i=p->iFirst; i<=nPage; i++){
+      PtrmapEntry *pEntry = &p->aPtr[i-p->iFirst];
+      sqlite3_log(SQLITE_CORRUPT, 
+          "btreePtrmapCheck: pgno=%d eType=%d parent=%d", 
+          (int)i, (int)pEntry->eType, (int)pEntry->parent
+      );
+    }
+    abort();
+  }
+}
+
+
+/* !defined(SQLITE_OMIT_CONCURRENT)
+**
+** (*ppArray) points to an array of elements size szElem. (*pnArrayAlloc)
+** is the current allocated size of the array, (*pnArray) is the currently
+** used size. Ensure there is enough space to append an element. Return
+** SQLITE_OK if successful, or SQLITE_NOMEM if an OOM occurs.
+*/
+static int btreeBcGrowArray(
+  void **ppArray,
+  int *pnArray,
+  int *pnArrayAlloc,
+  int szElem
+){
+  if( (*pnArray)>=(*pnArrayAlloc) ){
+    i64 nNew = (*pnArray)==0 ? 100 : ((*pnArray) * 2);
+    void *pNew = sqlite3_realloc64(*ppArray, nNew * szElem);
+
+    if( pNew ){
+      *ppArray = pNew;
+      *pnArrayAlloc = (int)nNew;
+    }else{
+      return SQLITE_NOMEM_BKPT;
+    }
+  }
+  return SQLITE_OK;
+}
+
+/* !defined(SQLITE_OMIT_CONCURRENT)
+**
+** A new entry is to be appended to the write-array of the BtConcurrent
+** object passed as the only argument. Ensure sufficient space is available.
+** Return SQLITE_NOMEM if an OOM occurs, or SQLITE_OK otherwise.
+*/
+static int btreeBcGrowWriteArray(BtConcurrent *pBtConc){
+  return btreeBcGrowArray(
+      (void**)&pBtConc->aWrite,
+      &pBtConc->nWrite,
+      &pBtConc->nWriteAlloc,
+      sizeof(BtWrite)
+  );
+}
+
+
+/* !defined(SQLITE_OMIT_CONCURRENT)
+**
+** Global linked list of all shared-logs in the process. Protected by
+** mutex SQLITE_MUTEX_STATIC_MAIN.
+*/
+static BtSharedLog *pGlobalBtSharedLog = 0;
+
+/* !defined(SQLITE_OMIT_CONCURRENT)
+**
+** Start using BtShared.conc, if it is not already in use.
+*/
+static int btreeBcBeginConcurrent(BtShared *pBt){
+  int rc = SQLITE_OK;
+  assert( pBt->conc.eState==BTCONC_STATE_NONE
+       || pBt->conc.eState==BTCONC_STATE_INUSE
+       || pBt->conc.eState==BTCONC_STATE_RETIRED
+  );
+  assert( pBt->conc.eState==BTCONC_STATE_NONE || pBt->conc.pBtLog!=0 );
+  if( pBt->conc.eState==BTCONC_STATE_NONE ){
+
+    /* If this BtShared does not yet have a connection to the global
+    ** BtSharedLog object for this database, establish one now. Creating
+    ** the BtSharedLog if it does not already exist.  */
+    if( pBt->conc.pBtLog==0 ){
+      const char *zFull = sqlite3PagerFilename(pBt->pPager, 0);
+      BtSharedLog *p = 0;
+      sqlite3_mutex_enter( sqlite3_mutex_alloc(SQLITE_MUTEX_STATIC_MAIN) );
+      for(p=pGlobalBtSharedLog; p; p=p->pSharedNext){
+        if( 0==strcmp(zFull, p->zFullname) ) break;
+      }
+      if( p ){
+        p->nRef++;
+      }else{
+        int nFull = sqlite3Strlen30(zFull) + 1;
+        p = (BtSharedLog*)sqlite3MallocZero(sizeof(BtSharedLog) + nFull);
+        if( p==0 ){
+          rc = SQLITE_NOMEM_BKPT;
+        }else{
+          p->zFullname = (char*)&p[1];
+          memcpy(p->zFullname, zFull, nFull);
+          p->mutex = sqlite3_mutex_alloc(SQLITE_MUTEX_FAST);
+          if( p->mutex==0 ){
+            sqlite3_free(p);
+            p = 0;
+            rc = SQLITE_NOMEM_BKPT;
+          }else{
+            p->nRef = 1;
+            p->pSharedNext = pGlobalBtSharedLog;
+            pGlobalBtSharedLog = p;
+          }
+        }
+      }
+      sqlite3_mutex_leave( sqlite3_mutex_alloc(SQLITE_MUTEX_STATIC_MAIN) );
+
+      pBt->conc.pBtLog = p;
+    }
+
+    pBt->conc.eState = BTCONC_STATE_INUSE;
+    pBt->conc.iBase = sqlite3PagerWalCommitId(pBt->pPager);
+    pBt->conc.nSvpt = 0;
+  }
+
+  return rc;
+}
+
+
+/* !defined(SQLITE_OMIT_CONCURRENT)
+**
+** Remove and free the oldest entry in the BtSharedLog indicated by the
+** only argument.
+*/
+static void btreeBcRemoveOldest(BtSharedLog *pBtLog){
+  BtSharedLogEntry *pFree = pBtLog->pFirst;
+  int ii;
+
+  pBtLog->pFirst = pFree->pLogNext;
+  if( pBtLog->pFirst==0 ){
+    assert( pBtLog->pLast==pFree );
+    pBtLog->pLast = 0;
+  }
+
+  for(ii=0; ii<pFree->nIndex; ii++){
+    BtWriteIndex *p = &pFree->aIndex[ii];
+    sqlite3_free(p->aRec);
+  }
+  sqlite3_free(pFree);
+
+  pBtLog->nEntry--;
+}
+
+/* !defined(SQLITE_OMIT_CONCURRENT)
+**
+** Macro to implement an in-place sort of an array of objects. Arguments:
+**
+**    Type:          The type of the array. e.g. BtReadIntkey
+**    aObj:          The array.
+**    nObj:          The size of the array in objects.
+**    compare_pA_pB: An expression that evaluates to true if (*pA)<=(*pB)
+**                   when evaluated, where pA and pB are pointers to
+**                   members of the aObj array, type (Type*).
+*/
+#define BT_MERGESORT_BODY(Type, aObj, nObj, compare_pA_pB) {    \
+  if( nObj>1 ){                                                 \
+    Type *aTemp;                                                \
+    Type *pSrc;                                                 \
+    Type *pDst;                                                 \
+    int width;                                                  \
+    int i;                                                      \
+    aTemp = sqlite3Malloc(sizeof(Type) * nObj);                 \
+    if( aTemp==0 ){                                             \
+      return SQLITE_NOMEM;                                      \
+    }                                                           \
+    pSrc = aObj;                                                \
+    pDst = aTemp;                                               \
+    for(width=1; width<nObj; width*=2){                         \
+      for(i=0; i<nObj; i += 2*width){                           \
+        int left = i;                                           \
+        int mid = i + width;                                    \
+        int right = i + 2*width;                                \
+        int p, q, k;                                            \
+        if( mid>nObj ) mid = nObj;                              \
+        if( right>nObj ) right = nObj;                          \
+        p = left;                                               \
+        q = mid;                                                \
+        k = left;                                               \
+        while( p<mid && q<right ){                              \
+          Type *pA = &pSrc[p];                                  \
+          Type *pB = &pSrc[q];                                  \
+          if( (compare_pA_pB) ){                                \
+            pDst[k++] = pSrc[p++];                              \
+          }else{                                                \
+            pDst[k++] = pSrc[q++];                              \
+          }                                                     \
+        }                                                       \
+        while( p<mid ){                                         \
+          pDst[k++] = pSrc[p++];                                \
+        }                                                       \
+        while( q<right ){                                       \
+          pDst[k++] = pSrc[q++];                                \
+        }                                                       \
+      }                                                         \
+      SWAP(Type*, pSrc, pDst);                                  \
+    }                                                           \
+    if( pSrc!=aObj ){                                           \
+      memcpy(aObj, pSrc, sizeof(Type)*nObj);                    \
+    }                                                           \
+    sqlite3_free(aTemp);                                        \
+  }                                                             \
+}
+
+/* !defined(SQLITE_OMIT_CONCURRENT)
+**
+** Sort array aRead[], first by root page and then by minimum key value.
+** Then merge any overlapping ranges. Update (*pnRead) with the new
+** size of the array before returning.
+**
+** Return SQLITE_NOMEM if an OOM error occurs, or SQLITE_OK otherwise.
+*/
+static int btreeBcReadIntkeySort(BtReadIntkey *aRead, int *pnRead){
+  int nRead = *pnRead;
+
+  BT_MERGESORT_BODY(BtReadIntkey, aRead, nRead, (
+        pA->iRoot<pB->iRoot 
+    || (pA->iRoot==pB->iRoot && pA->iMin<=pB->iMin)
+  ));
+
+  /* Merge any overlapping ranges. */
+  {
+    int iIn;
+    int iOut = 0;
+
+    for(iIn=0; iIn<nRead; iIn++){
+      /* assert() that the merge-sort worked */
+      assert( iIn==nRead-1 || aRead[iIn].iRoot<aRead[iIn+1].iRoot || (
+              aRead[iIn].iRoot==aRead[iIn+1].iRoot 
+           && aRead[iIn].iMin<=aRead[iIn+1].iMin
+      ));
+      if( iOut>0
+       && aRead[iIn].iRoot==aRead[iOut-1].iRoot
+       && aRead[iIn].iMin <= aRead[iOut-1].iMax + 1
+      ){
+        /* Merge into existing range */
+        if( aRead[iIn].iMax > aRead[iOut-1].iMax ){
+          aRead[iOut-1].iMax = aRead[iIn].iMax;
+        }
+      }else{
+        /* Start new range */
+        aRead[iOut++] = aRead[iIn];
+      }
+    }
+
+    *pnRead = iOut;
+  }
+
+  return SQLITE_OK;
+}
+
+/* !defined(SQLITE_OMIT_CONCURRENT)
+**
+** Return the result of comparing the two records:
+**
+**    (aLeft, nLeft, drc_left) - (aRight, nRight, drc_right)
+**
+** Argument pUnpacked is guaranteed to point to an UnpackedRecord structure
+** large enough to decode one of the records. However, the values of 
+** UnpackedRecord.pKeyInfo and UnpackedRecord.nField must be set manually
+** before it is used.
+*/
+static int btreeBcRecordCompare(
+  UnpackedRecord *pUnpacked,
+  KeyInfo *pKeyInfo,
+  const u8 *aLeft, int nLeft, int drc_left,
+  const u8 *aRight, int nRight, int drc_right
+){
+  int res = 0;
+  pUnpacked->pKeyInfo = pKeyInfo;
+  pUnpacked->nField = pKeyInfo->nKeyField + 1;
+  sqlite3VdbeRecordUnpack(nRight, aRight, pUnpacked);
+  res = sqlite3VdbeRecordCompare(nLeft, aLeft, pUnpacked);
+  if( res==0 ){
+    res = (drc_right - drc_left);
+  }
+  if( res<0 ){
+    res = -1;
+  }else if( res>0 ){
+    res = +1;
+  }
+  assert( res>=-1 && res<=+1 );
+  return res;
+}
+
+
+/* !defined(SQLITE_OMIT_CONCURRENT)
+**
+** Return the result of comparing the two BtReadIndex arguments:
+**
+**    (*pA) - (*pB)
+**
+** Argument pUnpacked is guaranteed to point to an UnpackedRecord structure
+** large enough to use with btreeBcRecordCompare().
+*/
+static int btreeBcReadIndexCmp(
+  UnpackedRecord *pUnpacked,
+  BtReadIndex *pA,
+  BtReadIndex *pB
+){
+  if( pA->iRoot<pB->iRoot ) return -1;
+  if( pA->iRoot>pB->iRoot ) return +1;
+
+  return btreeBcRecordCompare(pUnpacked, pA->pKeyInfo, 
+      pA->aRecMin, pA->nRecMin, pA->drc_min,
+      pB->aRecMin, pB->nRecMin, pB->drc_min
+  );
+}
+
+/* !defined(SQLITE_OMIT_CONCURRENT)
+**
+** Sort the BtConcurrent.aReadIndex[] array by root page number, then
+** by minimum record value. Then merge overlapping ranges.
+** Updates *pnRead.
+**
+** Return SQLITE_NOMEM if an OOM is encountered, or SQLITE_OK otherwise.
+*/
+int btreeBcReadIndexSort(BtConcurrent *pBtConc){
+  int rc = SQLITE_OK;
+  int nRead = pBtConc->nReadIndex;
+  BtReadIndex *aRead = pBtConc->aReadIndex;
+
+  BT_MERGESORT_BODY(BtReadIndex, aRead, nRead, (
+    btreeBcReadIndexCmp(pBtConc->pUnpacked, pA, pB)<=0
+  ));
+
+  /* Merge overlapping ranges */
+  if( rc==SQLITE_OK && nRead>1 ){
+    int iIn;
+    int iOut = 1;
+
+    for(iIn=1; iIn<nRead; iIn++){
+      if( aRead[iIn].iRoot==aRead[iOut-1].iRoot ){
+
+        /* Compare aRecMin of this range to the aRecMax of the previous. */
+        int res = btreeBcRecordCompare(
+          pBtConc->pUnpacked,
+          aRead[iIn].pKeyInfo,
+          aRead[iIn].aRecMin, aRead[iIn].nRecMin, aRead[iIn].drc_min,
+          aRead[iOut-1].aRecMax, aRead[iOut-1].nRecMax, aRead[iOut-1].drc_max
+        );
+
+        if( res<=0 ){
+          /* This range overlaps with the previous one */
+          res = btreeBcRecordCompare(
+            pBtConc->pUnpacked,
+            aRead[iIn].pKeyInfo,
+            aRead[iIn].aRecMax, aRead[iIn].nRecMax, aRead[iIn].drc_max,
+            aRead[iOut-1].aRecMax, aRead[iOut-1].nRecMax, aRead[iOut-1].drc_max
+          );
+
+          if( res>0 ){
+            aRead[iOut-1].aRecMax  = aRead[iIn].aRecMax;
+            aRead[iOut-1].nRecMax  = aRead[iIn].nRecMax;
+            aRead[iOut-1].drc_max  = aRead[iIn].drc_max;
+          }
+
+          continue;
+        }
+      }
+
+      /* Start new range */
+      aRead[iOut++] = aRead[iIn];
+    }
+
+    pBtConc->nReadIndex = iOut;
+  }
+
+  return SQLITE_OK;
+}
+
+/* !defined(SQLITE_OMIT_CONCURRENT)
+**
+** Sort the aWrite[] array by root page and key value.
+**
+** Return SQLITE_NOMEM if an OOM is encountered, or SQLITE_OK otherwise.
+*/
+static int btreeBcWriteIntkeySort(
+  BtWriteIntkey *aWrite,
+  int nWrite
+){
+  BT_MERGESORT_BODY(BtWriteIntkey, aWrite, nWrite, (
+        pA->iRoot<pB->iRoot
+    || (pA->iRoot==pB->iRoot && pA->iKey <= pB->iKey)
+  ));
+  return SQLITE_OK;
+}
+
+/* !defined(SQLITE_OMIT_CONCURRENT)
+**
+** Helper function for btreeBcWriteIndexSort().
+*/
+static int btreeBcWriteIndexCmp(
+  UnpackedRecord *pUnpacked,
+  BtWrite *aWrite,
+  BtWriteIndex *pA,
+  BtWriteIndex *pB
+){
+  BtWrite *pWA = &aWrite[pA->iRoot];
+  BtWrite *pWB = &aWrite[pB->iRoot];
+  int res;
+
+  if( pWA->iRoot<pWB->iRoot ) return -1;
+  if( pWA->iRoot>pWB->iRoot ) return +1;
+
+  return btreeBcRecordCompare(
+      pUnpacked, pWA->pKeyInfo, pA->aRec, pA->nRec, 0, pB->aRec, pB->nRec, 0
+  );
+}
+
+/* !defined(SQLITE_OMIT_CONCURRENT)
+**
+** Sort the aWriteIndex[] array, first in order of root page, then record.
+** At this point the BtWriteIndex.iRoot values are actually indexes
+** into the BtConcurrent.aWrite[] array. When sorting, this index is 
+** used to find the actual root page number and the required KeyInfo
+** struct.
+**
+** After sorting, replace the BtWriteIndex.iRoot values with the actual
+** value from the aWrite[] array.
+**
+** Return SQLITE_NOMEM if an OOM is encountered, or SQLITE_OK otherwise.
+*/
+int btreeBcWriteIndexSort(
+  BtWriteIndex *aWriteIndex,
+  int nWriteIndex,
+  BtConcurrent *pBtConc
+){
+  int ii;
+
+  BT_MERGESORT_BODY(BtWriteIndex, aWriteIndex, nWriteIndex, (
+    btreeBcWriteIndexCmp(pBtConc->pUnpacked, pBtConc->aWrite, pA, pB)<=0
+  ));
+
+  /* Replace the aWriteIndex[ii].iRoot values with actual root page numbers */
+  for(ii=0; ii<nWriteIndex; ii++){
+    aWriteIndex[ii].iRoot = pBtConc->aWrite[ aWriteIndex[ii].iRoot ].iRoot;
+  }
+
+  return SQLITE_OK;
+}
+
+
+/* !defined(SQLITE_OMIT_CONCURRENT)
+**
+** Check if any of the reads accumulated in pBtConc conflict with the
+** writes in the aWrite[] array. If so, return SQLITE_BUSY_SNAPSHOT.
+** Otherwise, if no conflicts are found, return SQLITE_OK.
+*/
+int btreeBcDetectIntkeyConflict(
+  BtConcurrent *pBtConc,
+  BtWriteIntkey *aWrite,
+  int nWrite
+){
+  BtReadIntkey *aRead = pBtConc->aReadIntkey;
+  int nRead = pBtConc->nReadIntkey;
+  int iRead = 0;
+  int iWrite = 0;
+
+  while( iRead<nRead && iWrite<nWrite ){
+    Pgno iRootRead = aRead[iRead].iRoot;
+    Pgno iRootWrite = aWrite[iWrite].iRoot;
+
+    if( iRootWrite < iRootRead ){
+      iWrite++;
+    }
+    else if( iRootWrite > iRootRead ){
+      iRead++;
+    }
+    else{
+      /* Same root page */
+      i64 iKey = aWrite[iWrite].iKey;
+
+      if( iKey < aRead[iRead].iMin ){
+        iWrite++;
+      }
+      else if( iKey > aRead[iRead].iMax ){
+        iRead++;
+      }
+      else{
+        /* iMin <= iKey <= iMax */
+        return SQLITE_BUSY_SNAPSHOT;  /* Conflict */
+      }
+    }
+  }
+
+  return SQLITE_OK;  /* No conflict */
+}
+
+/* !defined(SQLITE_OMIT_CONCURRENT)
+**
+** Test for conflicts between the index ranges accumulated in pBtConc
+** and the write-keys in the aWriteIndex[] array. Return 
+** SQLITE_BUSY_SNAPSHOT if there is a conflict, or SQLITE_OK otherwise.
+*/
+int btreeBcDetectIndexConflict(
+  BtConcurrent *pBtConc,
+  BtWriteIndex *aWriteIndex,
+  int nWriteIndex
+){
+  BtReadIndex *aReadIndex = pBtConc->aReadIndex;
+  int nReadIndex = pBtConc->nReadIndex;
+  int iRead = 0;
+  int iWrite = 0;
+
+  while( iRead<nReadIndex && iWrite<nWriteIndex ){
+    BtReadIndex *pRead = &aReadIndex[iRead];
+    BtWriteIndex *pWrite = &aWriteIndex[iWrite];
+
+    if( pWrite->iRoot < pRead->iRoot ){
+      iWrite++;
+    }else if( pWrite->iRoot > pRead->iRoot ){
+      iRead++;
+    }else{
+      int cmp = btreeBcRecordCompare(
+          pBtConc->pUnpacked,
+          pRead->pKeyInfo,
+          pWrite->aRec, pWrite->nRec, 0,
+          pRead->aRecMin, pRead->nRecMin, pRead->drc_min
+      );
+      if( cmp < 0 ){
+        /* Write key is less than aRecMin */
+        iWrite++;
+      }else{
+        cmp = btreeBcRecordCompare(
+            pBtConc->pUnpacked,
+            pRead->pKeyInfo,
+            pWrite->aRec, pWrite->nRec, 0,
+            pRead->aRecMax, pRead->nRecMax, pRead->drc_max
+        );
+        if( cmp<=0 ){
+          /* Write key is between aRecMin and aRecMax - conflict! */
+          return SQLITE_BUSY_SNAPSHOT;
+        }
+        /* Write key is greater than aRecMax */
+        iRead++;
+      }
+    }
+  }
+
+  return SQLITE_OK;
+}
+
+/* !defined(SQLITE_OMIT_CONCURRENT)
+*/
+static int btreeBcSharedLogEntry(BtShared *pBt, BtSharedLogEntry **ppOut){
+  BtSharedLogEntry *pRet = 0;
+  int ii;
+  int nIntkey = 0;
+  int nIndex = 0;
+  int nByte = 0;
+  BtWrite *aWrite = pBt->conc.aWrite;
+  int rc = SQLITE_OK;
+
+  /* Count the two different types of writes. */
+  for(ii=0; ii<pBt->conc.nWrite; ii++){
+    if( aWrite[ii].pKeyInfo ) nIndex++;
+  }
+  nIntkey = pBt->conc.nWrite - nIndex;
+
+  /* Allocate for the BtSharedLogEntry, and the two arrays. */
+  nByte = sizeof(BtSharedLogEntry) 
+        + nIntkey * sizeof(BtWriteIntkey)
+        + nIndex * sizeof(BtWriteIndex);
+
+  pRet = (BtSharedLogEntry*)sqlite3MallocZero(nByte);
+  if( pRet==0 ){
+    rc = SQLITE_NOMEM_BKPT;
+  }else{
+
+    /* Populate the aIntkey[] and aIndex[] arrays */
+    pRet->aIntkey = (BtWriteIntkey*)&pRet[1];
+    pRet->aIndex = (BtWriteIndex*)&pRet->aIntkey[nIntkey];
+    for(ii=0; ii<pBt->conc.nWrite; ii++){
+      if( aWrite[ii].pKeyInfo ){
+        BtWriteIndex *p = &pRet->aIndex[pRet->nIndex++];
+
+        p->iRoot = ii;
+        p->nRec = aWrite[ii].nRec;
+        p->aRec = aWrite[ii].aRec;
+
+        aWrite[ii].aRec = 0;
+
+      }else{
+        BtWriteIntkey *p = &pRet->aIntkey[pRet->nIntkey++];
+        p->iRoot = aWrite[ii].iRoot;
+        p->iKey = aWrite[ii].iKey;
+      }
+    }
+
+    /* Sort the aIntkey[] array */
+    rc = btreeBcWriteIntkeySort(pRet->aIntkey, pRet->nIntkey);
+
+    /* Sort the aIndex[] array */
+    if( rc==SQLITE_OK ){
+      rc = btreeBcWriteIndexSort(pRet->aIndex, pRet->nIndex, &pBt->conc);
+    }
+  }
+
+  *ppOut = pRet;
+  return rc;
+}
+
+
+/* !defined(SQLITE_OMIT_CONCURRENT)
+**
+** A transaction has just been committed. If BtConcurrent info was collected
+** for the transaction, add it to the BtSharedLog object.
+*/
+static int btreeBcUpdateSharedLog(
+  BtShared *pBt, 
+  u64 iBaseId, 
+  u32 iFirstFrame,
+  u32 nFrame
+){
+  int rc = SQLITE_OK;
+  if( pBt->conc.eState==BTCONC_STATE_INUSE ){
+    BtSharedLogEntry *pNew = 0;
+    BtSharedLog *pBtLog = pBt->conc.pBtLog;
+
+    rc = btreeBcSharedLogEntry(pBt, &pNew);
+    if( rc==SQLITE_OK ){
+      BtSharedLogEntry *pLast = 0;
+
+      /* Set the two versions in the new log-entry object. */
+      pNew->iBaseId = iBaseId;
+      pNew->iThisId = sqlite3PagerWalCommitId(pBt->pPager);
+      pNew->iFirstFrame = iFirstFrame;
+      pNew->nFrame = nFrame;
+
+      /* Take the BtSharedLog mutex */
+      sqlite3_mutex_enter(pBtLog->mutex);
+
+      pLast = pBtLog->pLast;
+      if( pLast && pLast->iThisId!=iBaseId ){
+        /* This new log entry does not immediately follow the previous
+        ** entry in the BtSharedLog object. This means that an external
+        ** process wrote to the db, or some connection in this process
+        ** wrote to the db but did not update the BtSharedLog for some
+        ** reason. Either way, discard the entire contents of the 
+        ** BtSharedLog before adding pNew as the first and only entry. */
+        while( pBtLog->pFirst ){
+          btreeBcRemoveOldest(pBtLog);
+        }
+      }
+
+      if( (iFirstFrame & 0x7FFFFFFF)==1 ){
+        /* This was the first entry written into a wal file. *-wal if the
+        ** 0x80000000 bit of iFirst is clear, or *-wal2 if it is set.
+        ** Either way, remove all entries corresponding to that wal file
+        ** from the BtSharedLog. The initial snapshot belonging to each of
+        ** these entries is no longer available, so there can be no chance
+        ** of it being required.  */
+        const u32 m = 0x80000000;
+        u32 v = (iFirstFrame & m);
+        while( pBtLog->pFirst && (pBtLog->pFirst->iFirstFrame & m)==v ){
+          btreeBcRemoveOldest(pBtLog);
+        }
+      }
+
+      /* Link the new entry into the BtSharedLog object. */
+      if( pBtLog->pLast ){
+        pBtLog->pLast->pLogNext = pNew;
+      }else{
+        pBtLog->pFirst = pNew;
+      }
+      pBtLog->pLast = pNew;
+      pBtLog->nEntry++;
+
+      /* Free any old log-entries no longer required. TODO: Could do
+      ** this by querying wal.c to see what snapshots are still available
+      ** or in use. Maybe we can even lock the required BtSharedLogEntry
+      ** objects in memory when each concurrent transaction is opened. */
+      while( pBtLog->nEntry>sqlite3GlobalConfig.nMaxSharedLogEntry ){
+        btreeBcRemoveOldest(pBtLog);
+      }
+
+      /* Release BtSharedLog mutex */
+      sqlite3_mutex_leave(pBtLog->mutex);
+    }
+  }
+
+  return rc;
+}
+
+/* !defined(SQLITE_OMIT_CONCURRENT)
+**
+** Clear the state of pBt->conc.
+*/
+static void btreeBcEndTransaction(sqlite3 *db, BtShared *pBt){
+  if( pBt->conc.eState!=BTCONC_STATE_NONE ){
+
+    {
+      BtReadIndex *aRead = pBt->conc.aReadIndex;
+      int ii;
+      for(ii=0; ii<pBt->conc.nReadIndex; ii++){
+        sqlite3_free(aRead[ii].aRecMin);
+        sqlite3_free(aRead[ii].aRecMax);
+        sqlite3KeyInfoUnref(aRead[ii].pKeyInfo);
+      }
+      sqlite3_free(aRead);
+
+      sqlite3_free(pBt->conc.aReadIntkey);
+
+      pBt->conc.aReadIndex = 0;
+      pBt->conc.nReadIndex = 0;
+      pBt->conc.nReadIndexAlloc = 0;
+      pBt->conc.aReadIntkey = 0;
+      pBt->conc.nReadIntkey = 0;
+      pBt->conc.nReadIntkeyAlloc = 0;
+    }
+
+    {
+      btreeBcFreeWriteArray(pBt->conc.aWrite, pBt->conc.nWrite);
+      sqlite3_free(pBt->conc.aWrite);
+      pBt->conc.aWrite = 0;
+      pBt->conc.nWrite = 0;
+      pBt->conc.nWriteAlloc = 0;
+    }
+
+    sqlite3DbFree(db, pBt->conc.pUnpacked);
+    pBt->conc.pUnpacked = 0;
+    pBt->conc.nUnpackedField = 0;
+    pBt->conc.eState = BTCONC_STATE_NONE;
+  }
+}
+
+/* !defined(SQLITE_OMIT_CONCURRENT)
+**
+*/
+static void btreeBcDisconnect(BtShared *pBt){
+  if( pBt->conc.pBtLog ){
+    BtSharedLog *pFree = pBt->conc.pBtLog;
+    pBt->conc.pBtLog = 0;
+
+    sqlite3_mutex_enter( sqlite3_mutex_alloc(SQLITE_MUTEX_STATIC_MAIN) );
+    pFree->nRef--;
+    if( pFree->nRef==0 ){
+      BtSharedLog **pp = &pGlobalBtSharedLog;
+      while( *pp!=pFree ){ pp = &(*pp)->pSharedNext; }
+      *pp = (*pp)->pSharedNext;
+    }else{
+      pFree = 0;
+    }
+    sqlite3_mutex_leave( sqlite3_mutex_alloc(SQLITE_MUTEX_STATIC_MAIN) );
+
+    if( pFree ){
+      /* TODO: Free existing log entries */
+      while( pFree->pFirst ){
+        btreeBcRemoveOldest(pFree);
+      }
+
+      sqlite3_mutex_free(pFree->mutex);
+      sqlite3_free(pFree);
+    }
+  }
+}
+
+/* !defined(SQLITE_OMIT_CONCURRENT)
+**
+** Finish a scan.
+*/
+static int btreeBcScanFinish(BtCursor *pCsr){
+  int rc = SQLITE_OK;
+  BtConcurrent *pBtConc = &pCsr->pBt->conc;
+  if( pBtConc->eState==BTCONC_STATE_INUSE && pCsr->iScanIndex>0 ){
+    int bIsValid = sqlite3BtreeCursorIsValidNN(pCsr);
+
+    if( pCsr->pKeyInfo ){
+      BtReadIndex *p = &pBtConc->aReadIndex[pCsr->iScanIndex-1];
+      u8 *aRec = 0;
+      int nRec = 0;
+
+      if( sqlite3BtreeCursorIsValidNN(pCsr) ){
+        nRec = sqlite3BtreePayloadSize(pCsr);
+        aRec = sqlite3_malloc(nRec + 8+9);
+        if( !aRec ){
+          rc = SQLITE_NOMEM_BKPT;
+        }else{
+          rc = sqlite3BtreePayload(pCsr, 0, nRec, aRec);
+        }
+        if( rc!=SQLITE_OK ){
+          sqlite3_free(aRec);
+          return rc;
+        }
+      }
+
+      assert( pCsr->eScanType==BTCONC_READ_MOVETO||(!p->aRecMin&&!p->nRecMin));
+      assert( p->aRecMax==0 && p->nRecMax==0 );
+
+      switch( pCsr->eScanType ){
+        case BTCONC_READ_FIRST: {
+          p->aRecMax = aRec;
+          p->nRecMax = nRec;
+          break;
+        }
+
+        case BTCONC_READ_LAST: {
+          p->aRecMin = aRec;
+          p->nRecMin = nRec;
+          break;
+        }
+
+        case BTCONC_READ_MOVETO: {
+          /* The value used in the seek op is currently stored in p->aRecMin */
+
+          if( pCsr->iScanDir==0 ){
+            if( aRec==0 ){
+              sqlite3_free(p->aRecMin);
+              p->aRecMin = 0;
+              p->nRecMin = 0;
+              p->drc_min = 0;
+            }else{
+              pCsr->iScanDir = btreeBcRecordCompare(
+                  pBtConc->pUnpacked, p->pKeyInfo, 
+                  aRec, nRec, 0,
+                  p->aRecMin, p->nRecMin, p->drc_min
+              );
+            }
+          }
+
+          if( pCsr->iScanDir<0 ){
+            p->aRecMax = p->aRecMin;
+            p->nRecMax = p->nRecMin;
+            p->drc_max = p->drc_min;
+            p->aRecMin = aRec;
+            p->nRecMin = nRec;
+            p->drc_min = 0;
+          }else{
+            p->aRecMax = aRec;
+            p->nRecMax = nRec;
+          }
+
+          break;
+        }
+      }
 
-    for(i=pMap->nSvpt; i<nSvpt; i++){
-      pMap->aSvpt[i] = pMap->nRollback;
+    }else{
+      i64 iKey = bIsValid ? sqlite3BtreeIntegerKey(pCsr) : 0;
+      BtReadIntkey *p = &pBtConc->aReadIntkey[pCsr->iScanIndex-1];
+      switch( pCsr->eScanType ){
+        case BTCONC_READ_FIRST: {
+          p->iMin = SMALLEST_INT64;
+          p->iMax = bIsValid ? iKey : LARGEST_INT64;
+          break;
+        }
+
+        case BTCONC_READ_LAST: {
+          p->iMax = LARGEST_INT64;
+          if( bIsValid ){
+            p->iMin = iKey;
+          }else{
+            p->iMin = SMALLEST_INT64;
+          }
+          break;
+        }
+
+        case BTCONC_READ_MOVETO: {
+          /* The value used in the seek op is currently stored in p->iMin */
+          if( bIsValid ){
+            p->iMax = iKey;
+          }else{
+            if( pCsr->iScanDir==0 ){
+              p->iMin = SMALLEST_INT64;
+              p->iMax = LARGEST_INT64;
+            }else if( pCsr->iScanDir>0 ){
+              p->iMax = LARGEST_INT64;
+            }else{
+              p->iMax = p->iMin;
+              p->iMin = SMALLEST_INT64;
+            }
+          }
+          if( p->iMin>p->iMax ){
+            SWAP(i64, p->iMin, p->iMax);
+          }
+          break;
+        }
+      }
     }
-    pMap->nSvpt = nSvpt;
+    pCsr->iScanIndex = 0;
+    pCsr->iScanDir = 0;
+  }
+  return rc;
+}
+
+/* !defined(SQLITE_OMIT_CONCURRENT)
+**
+** Allocate and return a BtReadIntkey object.
+*/
+static BtReadIntkey *btreeBcIntkeyRead(
+  BtConcurrent *pBtConc,
+  int *piScanIndex,
+  int *pRc
+){
+  int rc = btreeBcGrowArray(
+      (void**)&pBtConc->aReadIntkey, 
+      &pBtConc->nReadIntkey, 
+      &pBtConc->nReadIntkeyAlloc, 
+      sizeof(BtReadIntkey)
+  );
+  if( rc==SQLITE_OK ){
+    pBtConc->nReadIntkey++;
+    *piScanIndex = pBtConc->nReadIntkey;
+    return &pBtConc->aReadIntkey[pBtConc->nReadIntkey-1];
+  }
+  return 0;
+}
+
+/* !defined(SQLITE_OMIT_CONCURRENT)
+**
+** Allocate and return a BtReadIndex object.
+*/
+static BtReadIndex *btreeBcIndexRead(
+  BtConcurrent *pBtConc,
+  int *piScanIndex,
+  int *pRc
+){
+  int rc = btreeBcGrowArray(
+      (void**)&pBtConc->aReadIndex, 
+      &pBtConc->nReadIndex, 
+      &pBtConc->nReadIndexAlloc, 
+      sizeof(BtReadIndex)
+  );
+  if( rc==SQLITE_OK ){
+    pBtConc->nReadIndex++;
+    *piScanIndex = pBtConc->nReadIndex;
+    return &pBtConc->aReadIndex[pBtConc->nReadIndex-1];
   }
+  return 0;
+}
 
+/* !defined(SQLITE_OMIT_CONCURRENT)
+**
+** Ensure the UnpackedRecord structure at BtConcurrent.pUnpacked is
+** large enough to unpack keys that are compared using pKeyInfo.
+** Return SQLITE_NOMEM if an OOM error is encountered, or SQLITE_OK
+** otherwise.
+*/
+static int btreeBcUpdateUnpacked(BtConcurrent *pBtConc, KeyInfo *pKeyInfo){
+  if( pKeyInfo->nKeyField>pBtConc->nUnpackedField ){
+    sqlite3 *db = pKeyInfo->db;
+    sqlite3DbFree(db, pBtConc->pUnpacked );
+    pBtConc->pUnpacked = sqlite3VdbeAllocUnpackedRecord(pKeyInfo);
+    pBtConc->nUnpackedField = pKeyInfo->nKeyField;
+    return db->mallocFailed ? SQLITE_NOMEM : SQLITE_OK;
+  }
   return SQLITE_OK;
 }
 
 /* !defined(SQLITE_OMIT_CONCURRENT)
 **
-** Rollback (if op==SAVEPOINT_ROLLBACK) or release (if op==SAVEPOINT_RELEASE)
-** savepoint iSvpt.
+** Start a scan.
 */
-static void btreePtrmapEnd(BtShared *pBt, int op, int iSvpt){
-  BtreePtrmap *pMap = pBt->pMap;
-  if( pMap ){
-    assert( op==SAVEPOINT_ROLLBACK || op==SAVEPOINT_RELEASE );
-    assert( iSvpt>=0 || (iSvpt==-1 && op==SAVEPOINT_ROLLBACK) );
-    if( iSvpt<0 ){
-      pMap->nSvpt = 0;
-      pMap->nRollback = 0;
-      memset(pMap->aPtr, 0, sizeof(Pgno) * pMap->nPtrAlloc);
-    }else if( iSvpt<pMap->nSvpt ){
-      if( op==SAVEPOINT_ROLLBACK ){
-        int ii;
-        for(ii=pMap->nRollback-1; ii>=pMap->aSvpt[iSvpt]; ii--){
-          RollbackEntry *p = &pMap->aRollback[ii];
-          PtrmapEntry *pEntry = &pMap->aPtr[p->pgno - pMap->iFirst];
-          pEntry->parent = p->parent;
-          pEntry->eType = p->eType;
-        }
+static int btreeBcScanStart(
+  BtCursor *pCsr,                 /* Cursor to start scan on */
+  int eRead,                      /* BTCONC_READ_XXX value */
+  i64 iKey, UnpackedRecord *pKey  /* Key value for BTCONC_READ_MOVETO */
+){
+  int rc = SQLITE_OK;
+  BtConcurrent *pBtConc = &pCsr->pBt->conc;
+
+  rc = btreeBcScanFinish(pCsr);
+  if( rc==SQLITE_OK && pBtConc->eState==BTCONC_STATE_INUSE ){
+    pCsr->iScanDir = 0;
+    pCsr->eScanType = eRead;
+    if( pCsr->pKeyInfo ){
+      BtReadIndex *pRead = btreeBcIndexRead(pBtConc, &pCsr->iScanIndex, &rc);
+      if( pRead==0 ) return rc;
+      memset(pRead, 0, sizeof(*pRead));
+      pRead->iRoot = pCsr->pgnoRoot;
+      pRead->pKeyInfo = sqlite3KeyInfoRef(pCsr->pKeyInfo);
+      rc = btreeBcUpdateUnpacked(pBtConc, pRead->pKeyInfo);
+      if( rc==SQLITE_OK && pKey ){
+        int drc = 0;
+        assert( eRead==BTCONC_READ_MOVETO );
+        rc = sqlite3BcSerializeRecord(pKey, &pRead->aRecMin, &pRead->nRecMin);
+        assert( pKey->default_rc>=-1 && pKey->default_rc<=+1 );
+        assert( pCsr->pKeyInfo->nAllField>=pKey->nField );
+        drc = pKey->default_rc * (pCsr->pKeyInfo->nAllField+1 - pKey->nField);
+        pRead->drc_min = (i16)drc;
+      }
+    }else{
+      BtReadIntkey *pRead = btreeBcIntkeyRead(pBtConc, &pCsr->iScanIndex, &rc);
+      if( pRead==0 ) return rc;
+      pRead->iRoot = pCsr->pgnoRoot;
+
+      pRead->iMin = iKey;
+      pRead->iMax = 0;
+      if( eRead==BTCONC_READ_COUNT ){
+        pRead->iMax = LARGEST_INT64;
+        pRead->iMin = SMALLEST_INT64;
       }
-      pMap->nSvpt = iSvpt + (op==SAVEPOINT_ROLLBACK);
-      pMap->nRollback = pMap->aSvpt[iSvpt];
+    }
+
+    if( eRead==BTCONC_READ_COUNT ){
+      pCsr->iScanIndex = 0;
+      pCsr->iScanDir = 0;
     }
   }
+
+  return rc;
 }
 
 /* !defined(SQLITE_OMIT_CONCURRENT)
 **
-** This function is called after an CONCURRENT transaction is opened on the
-** database. It allocates the BtreePtrmap structure used to track pointers
-** to allocated pages and zeroes the nFree/iTrunk fields in the database 
-** header on page 1.
+** Append an insert to the BtConcurrent object, if it is in use. Return
+** SQLITE_OK if successful, or SQLITE_NOMEM if an OOM occurs.
 */
-static int btreePtrmapAllocate(BtShared *pBt){
+static int btreeBcInsert(
+  BtCursor *pCsr,                 /* Cursor to write to */
+  const BtreePayload *pPay        /* Payload to write */
+){
+  BtConcurrent *pBtConc = &pCsr->pBt->conc;
   int rc = SQLITE_OK;
-  if( pBt->pMap==0 ){
-    BtreePtrmap *pMap = sqlite3_malloc(sizeof(BtreePtrmap));
-    if( pMap==0 ){
-      rc = SQLITE_NOMEM;
+
+  if( SQLITE_OK==(rc = btreeBcScanFinish(pCsr))
+   && pBtConc->eState==BTCONC_STATE_INUSE 
+   && SQLITE_OK==(rc = btreeBcGrowWriteArray(pBtConc))
+  ){
+    int nByte = (pPay->pKey? pPay->nKey : pPay->nData);
+    BtWrite *p = &pBtConc->aWrite[pBtConc->nWrite];
+    memset(p, 0, sizeof(BtWrite));
+    pBtConc->nWrite++;
+    p->iRoot = pCsr->pgnoRoot;
+    p->aRec = sqlite3_malloc(nByte);
+    if( p->aRec==0 ){
+      rc = SQLITE_NOMEM_BKPT;
+    }else if( pCsr->pKeyInfo ){
+      p->pKeyInfo = sqlite3KeyInfoRef(pCsr->pKeyInfo);
+      p->nRec = pPay->nKey;
+      memcpy(p->aRec, pPay->pKey, p->nRec);
+      rc = btreeBcUpdateUnpacked(pBtConc, p->pKeyInfo);
     }else{
-      memset(&pBt->pPage1->aData[32], 0, sizeof(u32)*2);
-      memset(pMap, 0, sizeof(BtreePtrmap));
-      pMap->iFirst = pBt->nPage + 1;
-      pBt->pMap = pMap;
+      p->iKey = pPay->nKey;
+      p->nRec = pPay->nData;
+      memcpy(p->aRec, pPay->pData, p->nRec);
     }
   }
+
   return rc;
 }
 
 /* !defined(SQLITE_OMIT_CONCURRENT)
 **
-** Free any BtreePtrmap structure allocated by an earlier call to
-** btreePtrmapAllocate().
+** The entry under pCur is to be deleted. If this is a BEGIN CONCURRENT
+** transaction, add an entry for the delete to the BtConcurrent object.
+** Return SQLITE_OK if successful, or an SQLite error code (SQLITE_NOMEM)
+** if something goes wrong.
 */
-static void btreePtrmapDelete(BtShared *pBt){
-  BtreePtrmap *pMap = pBt->pMap;
-  if( pMap ){
-    sqlite3_free(pMap->aRollback);
-    sqlite3_free(pMap->aPtr);
-    sqlite3_free(pMap->aSvpt);
-    sqlite3_free(pMap);
-    pBt->pMap = 0;
+static int btreeBcDelete(BtCursor *pCsr){
+  BtConcurrent *pBtConc = &pCsr->pBt->conc;
+  int rc;
+
+  if( SQLITE_OK==(rc = btreeBcScanFinish(pCsr))
+   && pBtConc->eState==BTCONC_STATE_INUSE 
+   && SQLITE_OK==(rc = btreeBcGrowWriteArray(pBtConc))
+  ){
+    BtWrite *p = &pBtConc->aWrite[pBtConc->nWrite];
+    int nRec = 0;
+
+    pBtConc->nWrite++;
+    memset(p, 0, sizeof(BtWrite));
+    p->bDel = 1;
+    p->iRoot = pCsr->pgnoRoot;
+    if( pCsr->pKeyInfo ){
+      p->pKeyInfo = sqlite3KeyInfoRef(pCsr->pKeyInfo);
+      p->nRec = sqlite3BtreePayloadSize(pCsr);
+      p->aRec = sqlite3_malloc(p->nRec + 8+9);
+      if( p->aRec==0 ){
+        rc = SQLITE_NOMEM_BKPT;
+      }else{
+        rc = sqlite3BtreePayload(pCsr, 0, p->nRec, p->aRec);
+      }
+      if( rc==SQLITE_OK ){
+        rc = btreeBcUpdateUnpacked(pBtConc, p->pKeyInfo);
+      }
+    }else{
+      p->iKey = sqlite3BtreeIntegerKey(pCsr);
+    }
   }
+
+  return rc;
 }
 
-/*
-** Check that the pointer-map does not contain any entries with a parent
-** page of 0. Call sqlite3_log() multiple times to output the entire
-** data structure if it does.
+/* !defined(SQLITE_OMIT_CONCURRENT)
+**
+** Close all cursors open on the b-tree object. It is the responsibility
+** of the caller to ensure none of the cursors will be used after
+** this call.
 */
-static void btreePtrmapCheck(BtShared *pBt, Pgno nPage){
-  Pgno i;
-  int bProblem = 0;
-  BtreePtrmap *p = pBt->pMap;
+static void btreeCloseAllCursors(BtShared *pBt){
+  while( pBt->pCursor ){
+    BtCursor *pCsr = pBt->pCursor;
+    sqlite3BtreeCloseCursor(pCsr);
+    sqlite3_free(pCsr);
+  }
+}
 
-  for(i=p->iFirst; i<=nPage; i++){
-    PtrmapEntry *pEntry = &p->aPtr[i-p->iFirst];
-    if( pEntry->eType==PTRMAP_OVERFLOW1
-     || pEntry->eType==PTRMAP_OVERFLOW2
-     || pEntry->eType==PTRMAP_BTREE
-    ){
-      if( pEntry->parent==0 ){
-        bProblem = 1;
-        break;
-      }
+/* !defined(SQLITE_OMIT_CONCURRENT)
+**
+** Allocate memory for and open a cursor.
+*/
+static int btreeCursorOpen(
+  Btree *p, 
+  Pgno iRoot, 
+  int flags, 
+  KeyInfo *pKeyInfo,
+  BtCursor **ppCsr
+){
+  BtCursor *pCsr = 0;
+  int rc = SQLITE_OK;
+
+  pCsr = (BtCursor*)sqlite3MallocZero(sqlite3BtreeCursorSize());
+  if( pCsr==0 ){
+    rc = SQLITE_NOMEM_BKPT;
+  }else{
+    rc = sqlite3BtreeCursor(p, iRoot, flags, pKeyInfo, pCsr);
+    if( rc!=SQLITE_OK ){
+      sqlite3_free(pCsr);
+      pCsr = 0;
     }
   }
+  *ppCsr = pCsr;
+  return rc;
+}
 
-  if( bProblem ){
-    for(i=p->iFirst; i<=nPage; i++){
-      PtrmapEntry *pEntry = &p->aPtr[i-p->iFirst];
-      sqlite3_log(SQLITE_CORRUPT, 
-          "btreePtrmapCheck: pgno=%d eType=%d parent=%d", 
-          (int)i, (int)pEntry->eType, (int)pEntry->parent
+/* Used by btreeBcTryLogicalCommit() */
+static int btreeMoveto(BtCursor*, const void*, i64, int, int*);
+
+/* !defined(SQLITE_OMIT_CONCURRENT)
+**
+** Attempt to validate and write the transaction in BtShared.conc to the
+** page cache. Return SQLITE_OK if successful. SQLITE_BUSY_SNAPSHOT if 
+** the transaction cannot be written because validation failed, or an
+** SQLite error code if some other error occurred.
+*/
+static int btreeBcTryLogicalCommit(Btree *p){
+  int rc = SQLITE_OK;
+  BtShared *pBt = p->pBt;
+
+  u64 cksum1 = 0;
+  u64 cksum2 = 0;
+
+  BtConcurrent *pBtConc = &pBt->conc;
+  assert( pBtConc->eState==BTCONC_STATE_INUSE );
+  pBtConc->eState = BTCONC_STATE_RETIRED;
+
+  /* If the read arrays have not been pre-sorted, sort them now. */
+  if( pBtConc->bReadSorted==0 ){
+    rc = btreeBcReadIntkeySort(pBtConc->aReadIntkey, &pBtConc->nReadIntkey);
+    if( rc==SQLITE_OK ){
+      rc = btreeBcReadIndexSort(pBtConc);
+    }
+  }
+
+  if( rc==SQLITE_OK ){
+    BtSharedLog *pBtLog = pBt->conc.pBtLog;
+    BtSharedLogEntry *pEntry;
+    u64 iLiveId = sqlite3PagerWalLiveId(pBt->pPager);
+
+    sqlite3_mutex_enter(pBtLog->mutex);
+
+    /* Skip past log entries corresponding to transactions that were 
+    ** committed before the snapshot on which this transaction is based
+    ** was created. */
+    for(pEntry=pBtLog->pFirst; pEntry; pEntry=pEntry->pLogNext){
+      if( pEntry->iBaseId==pBt->conc.iBase ) break;
+    }
+
+    if( pEntry==0 || pBtLog->pLast->iThisId!=iLiveId ){
+      /* The shared-log does not contain all required entries. Either it
+      ** does not have entries back as far as the snapshot that this
+      ** snapshot was prepared against (pEntry==0), or the last entry
+      ** is older than the current live head of the wal file. Either
+      ** way, logical validation cannot run.  */
+      rc = SQLITE_BUSY_SNAPSHOT;
+    }else{
+      /* We know the frame that page-level validation failed at. Since
+      ** page-level validation scans transactions in the order committed,
+      ** skip over BtSharedLogEntry structures until we come to the
+      ** one that wrote the conflicting frame. Everything before this
+      ** in the list is guaranteed not to conflict.  */
+      u32 iConf = p->db->aCommit[SQLITE_COMMIT_CONFLICT_FRAME];
+      while( iConf<pEntry->iFirstFrame 
+          || iConf>=(pEntry->iFirstFrame+pEntry->nFrame) 
+      ){
+        pEntry = pEntry->pLogNext;
+        assert( pEntry );
+      }
+    }
+
+    /* Loop through all remaining shared-log entries checking for
+    ** conflicts. If none are found, then the transaction may be
+    ** committed. This block sets rc to SQLITE_BUSY_SNAPSHOT if conflicts
+    ** are found.  */
+    for(; pEntry && rc==SQLITE_OK; pEntry=pEntry->pLogNext){
+      rc = btreeBcDetectIntkeyConflict(
+          pBtConc, pEntry->aIntkey, pEntry->nIntkey
       );
+      if( rc==SQLITE_OK ){
+        rc = btreeBcDetectIndexConflict(pBtConc,pEntry->aIndex,pEntry->nIndex);
+      }
     }
-    abort();
+    sqlite3_mutex_leave(pBtLog->mutex);
+  }
+
+  if( rc==SQLITE_OK ){
+    /* Update the snapshot to the head of the wal file. Drop the contents
+    ** of the page-cache at the same time. Then ensure that the database
+    ** size is set correctly at both the btree and pager level.  */
+    btreePtrmapDelete(pBt);
+    rc = sqlite3PagerUpgradeSnapshot(pBt->pPager, pBt->pPage1->pDbPage, 1);
+    if( rc==SQLITE_OK ){
+      const u8 *aPg1 = (const u8*)pBt->pPage1->pDbPage->pData;
+      u32 dbSize = get4byte(&aPg1[28]);
+      sqlite3PagerSetDbsize(pBt->pPager, dbSize);
+      pBt->nPage = dbSize;
+    }
+  }
+
+  /* If everything still looks ok, proceed with the commit. */
+  if( rc==SQLITE_OK ){
+    int ii;
+
+    for(ii=0; ii<pBtConc->nWrite; ii++){
+      BtWrite *pWrite = &pBtConc->aWrite[ii];
+      BtCursor *pCsr = 0;
+      for(pCsr=pBt->pCursor; pCsr; pCsr=pCsr->pNext){
+        if( pCsr->pgnoRoot==pWrite->iRoot ) break;
+      }
+
+      if( pCsr==0 ){
+        rc = btreeCursorOpen(p, pWrite->iRoot, 
+            BTREE_WRCSR, pWrite->pKeyInfo, &pCsr
+        );
+      }
+
+      if( pCsr ){
+        if( pWrite->bDel ){
+          int res = 0;
+          if( pWrite->pKeyInfo ){
+            rc = btreeMoveto(pCsr, pWrite->aRec, pWrite->nRec, 0, &res);
+          }else{
+            rc = btreeMoveto(pCsr, 0, pWrite->iKey, 0, &res);
+          }
+          if( rc==SQLITE_OK && res==0 ){
+            sqlite3BtreeDelete(pCsr, 0);
+          }
+        }else{
+          BtreePayload pay;
+          memset(&pay, 0, sizeof(pay));
+          if( pWrite->pKeyInfo ){
+            pay.pKey = (const void*)pWrite->aRec;
+            pay.nKey = pWrite->nRec;
+          }else{
+            pay.nKey = pWrite->iKey;
+            pay.pData = (const void*)pWrite->aRec;
+            pay.nData = pWrite->nRec;
+          }
+          rc = sqlite3BtreeInsert(pCsr, &pay, 0, 0);
+        }
+      }
+    }
+
+    btreeCloseAllCursors(pBt);
   }
+
+  pBtConc->eState = BTCONC_STATE_INUSE;
+  return rc;
 }
 
+
 #else  /* SQLITE_OMIT_CONCURRENT */
 # define btreePtrmapAllocate(x) SQLITE_OK
 # define btreePtrmapDelete(x) 
 # define btreePtrmapBegin(x,y)  SQLITE_OK
 # define btreePtrmapEnd(x,y,z) 
 # define btreePtrmapCheck(y,z) 
+
+# define btreeBcEndTransaction(db, pBt)
 #endif /* SQLITE_OMIT_CONCURRENT */
 
 static void releasePage(MemPage *pPage);  /* Forward reference */
@@ -3204,6 +4525,7 @@ int sqlite3BtreeClose(Btree *p){
     ** Clean out and delete the BtShared object.
     */
     assert( !pBt->pCursor );
+    btreeBcDisconnect(pBt);
     sqlite3PagerClose(pBt->pPager, p->db);
     if( pBt->xFreeSchema && pBt->pSchema ){
       pBt->xFreeSchema(pBt->pSchema);
@@ -4036,7 +5358,10 @@ static SQLITE_NOINLINE int btreeBeginTrans(
 trans_begun:
 #ifndef SQLITE_OMIT_CONCURRENT
   if( bConcurrent && rc==SQLITE_OK && sqlite3PagerIsWal(pBt->pPager) ){
-    rc = sqlite3PagerBeginConcurrent(pBt->pPager);
+    rc = btreeBcBeginConcurrent(pBt);
+    if( rc==SQLITE_OK ){
+      rc = sqlite3PagerBeginConcurrent(pBt->pPager);
+    }
     if( rc==SQLITE_OK && wrflag ){
       rc = btreePtrmapAllocate(pBt);
     }
@@ -4662,7 +5987,7 @@ static int btreeFixUnlocked(Btree *p){
   u32 nFree = get4byte(&p1[36]);
 
   assert( pBt->pMap );
-  rc = sqlite3PagerUpgradeSnapshot(pPager, pPage1->pDbPage);
+  rc = sqlite3PagerUpgradeSnapshot(pPager, pPage1->pDbPage, 0);
   assert( p1==pPage1->aData );
 
   if( rc==SQLITE_OK ){
@@ -4767,10 +6092,12 @@ static int btreeFixUnlocked(Btree *p){
 int sqlite3BtreeCommitPhaseOne(Btree *p, const char *zSuperJrnl){
   int rc = SQLITE_OK;
   if( p->inTrans==TRANS_WRITE ){
+    u64 iBaseId = 0;
     BtShared *pBt = p->pBt;
     sqlite3BtreeEnter(p);
 
 #ifndef SQLITE_OMIT_CONCURRENT
+    iBaseId = sqlite3PagerWalLiveId(pBt->pPager);
     memset(p->aCommit, 0, sizeof(p->aCommit));
 #endif
 #ifndef SQLITE_OMIT_AUTOVACUUM
@@ -4803,6 +6130,11 @@ int sqlite3BtreeCommitPhaseOne(Btree *p, const char *zSuperJrnl){
 
       p->aCommit[SQLITE_COMMIT_FIRSTFRAME] = iPrev+1;
       p->aCommit[SQLITE_COMMIT_NFRAME] = iCurrent-iPrev;
+
+      rc = btreeBcUpdateSharedLog(pBt, iBaseId, 
+          p->aCommit[SQLITE_COMMIT_FIRSTFRAME],
+          p->aCommit[SQLITE_COMMIT_NFRAME]
+      );
     }
 #endif
     sqlite3BtreeLeave(p);
@@ -4851,6 +6183,8 @@ static void btreeEndTransaction(Btree *p){
   ** Also call PagerEndConcurrent() to ensure that the pager has discarded
   ** the record of all pages read within the transaction.  */
   btreePtrmapDelete(pBt);
+  btreeBcEndTransaction(db, pBt);
+  pBt->conc.eState = BTCONC_STATE_NONE;
   sqlite3PagerEndConcurrent(pBt->pPager);
   btreeIntegrity(p);
 }
@@ -5319,6 +6653,15 @@ int sqlite3BtreeCloseCursor(BtCursor *pCur){
     BtShared *pBt = pCur->pBt;
     sqlite3BtreeEnter(pBtree);
     assert( pBt->pCursor!=0 );
+
+    if( SQLITE_OK!=btreeBcScanFinish(pCur) ){
+      /* Allocation failed in btreeBcScanFinish(), but we have no way
+      ** to return the error to the user. So just disable the BtConcurrent
+      ** object.  */
+      assert( pBt->conc.eState==BTCONC_STATE_INUSE );
+      pBt->conc.eState = BTCONC_STATE_RETIRED;
+    }
+
     if( pBt->pCursor==pCur ){
       pBt->pCursor = pCur->pNext;
     }else{
@@ -6176,6 +7519,10 @@ int sqlite3BtreeFirst(BtCursor *pCur, int *pRes){
 
   assert( cursorOwnsBtShared(pCur) );
   assert( sqlite3_mutex_held(pCur->pBtree->db->mutex) );
+
+  rc = btreeBcScanStart(pCur, BTCONC_READ_FIRST, 0, 0);
+  if( rc!=SQLITE_OK ) return rc;
+
   rc = moveToRoot(pCur);
   if( rc==SQLITE_OK ){
     assert( pCur->pPage->nCell>0 );
@@ -6256,6 +7603,13 @@ int sqlite3BtreeLast(BtCursor *pCur, int *pRes){
   assert( cursorOwnsBtShared(pCur) );
   assert( sqlite3_mutex_held(pCur->pBtree->db->mutex) );
 
+#ifndef SQLITE_OMIT_CONCURRENT
+  {
+    int rc = btreeBcScanStart(pCur, BTCONC_READ_LAST, 0, 0);
+    if( rc!=SQLITE_OK ) return rc;
+  }
+#endif
+
   /* If the cursor already points to the last entry, this is a no-op. */
   if( CURSOR_VALID==pCur->eState && (pCur->curFlags & BTCF_AtLast)!=0 ){
     assert( cursorIsAtLastEntry(pCur) || CORRUPT_DB );
@@ -6302,6 +7656,9 @@ int sqlite3BtreeTableMoveto(
   assert( pCur->pKeyInfo==0 );
   assert( pCur->eState!=CURSOR_VALID || pCur->curIntKey!=0 );
 
+  rc = btreeBcScanStart(pCur, BTCONC_READ_MOVETO, intKey, 0);
+  if( rc!=SQLITE_OK ) return rc;
+
   /* If the cursor is already positioned at the point we are trying
   ** to move to, then just return without doing any work */
   if( pCur->eState==CURSOR_VALID && (pCur->curFlags & BTCF_ValidNKey)!=0 ){
@@ -6532,6 +7889,9 @@ int sqlite3BtreeIndexMoveto(
   assert( pRes );
   assert( pCur->pKeyInfo!=0 );
 
+  rc = btreeBcScanStart(pCur, BTCONC_READ_MOVETO, 0, pIdxKey);
+  if( rc!=SQLITE_OK ) return rc;
+
 #ifdef SQLITE_DEBUG
   pCur->pBtree->nSeek++;   /* Performance measurement during testing */
 #endif
@@ -6870,6 +8230,9 @@ int sqlite3BtreeNext(BtCursor *pCur, int flags){
   UNUSED_PARAMETER( flags );  /* Used in COMDB2 but not native SQLite */
   assert( cursorOwnsBtShared(pCur) );
   assert( flags==0 || flags==1 );
+#ifndef SQLITE_OMIT_CONCURRENT
+  pCur->iScanDir = +1;
+#endif
   pCur->info.nSize = 0;
   pCur->curFlags &= ~(BTCF_ValidNKey|BTCF_ValidOvfl);
   if( pCur->eState!=CURSOR_VALID ) return btreeNext(pCur);
@@ -6961,6 +8324,9 @@ int sqlite3BtreePrevious(BtCursor *pCur, int flags){
   assert( cursorOwnsBtShared(pCur) );
   assert( flags==0 || flags==1 );
   UNUSED_PARAMETER( flags );  /* Used in COMDB2 but not native SQLite */
+#ifndef SQLITE_OMIT_CONCURRENT
+  pCur->iScanDir = -1;
+#endif
   pCur->curFlags &= ~(BTCF_AtLast|BTCF_ValidOvfl|BTCF_ValidNKey);
   pCur->info.nSize = 0;
   if( pCur->eState!=CURSOR_VALID
@@ -9871,6 +11237,22 @@ static int btreeOverwriteCell(BtCursor *pCur, const BtreePayload *pX){
   }
 }
 
+#ifndef SQLITE_OMIT_CONCURRENT
+# define BTCONC_DISABLE(pBtConc)            \
+    int eConcStateSave = pBtConc->eState;   \
+    pBtConc->eState = BTCONC_STATE_RETIRED
+
+# define BTCONC_RESTORE(pBtConc) pBtConc->eState = eConcStateSave
+
+# define SAVE_BTCONC \
+  int eConcStateSave = pCur->pBt->conc.eState;   \
+  pCur->pBt->conc.eState = BTCONC_STATE_RETIRED
+
+# define RESTORE_BTCONC pCur->pBt->conc.eState = eConcStateSave
+#else
+# define SAVE_BTCONC
+# define RESTORE_BTCONC
+#endif
 
 /*
 ** Insert a new record into the BTree.  The content of the new record
@@ -9920,6 +11302,11 @@ int sqlite3BtreeInsert(
   assert( (flags & (BTREE_SAVEPOSITION|BTREE_APPEND|BTREE_PREFORMAT))==flags );
   assert( (flags & BTREE_PREFORMAT)==0 || seekResult || pCur->pKeyInfo==0 );
 
+#ifndef SQLITE_OMIT_CONCURRENT
+  rc = btreeBcInsert(pCur, pX);
+  if( rc!=SQLITE_OK ) return rc;
+#endif
+
   /* Save the positions of any other cursors open on this table.
   **
   ** In some cases, the call to btreeMoveto() below is a no-op. For
@@ -9968,6 +11355,8 @@ int sqlite3BtreeInsert(
   assert( (flags & BTREE_PREFORMAT) || (pX->pKey==0)==(pCur->pKeyInfo==0) );
 
   if( pCur->pKeyInfo==0 ){
+    SAVE_BTCONC;
+
     assert( pX->pKey==0 );
     /* If this is an insert into a table b-tree, invalidate any incrblob
     ** cursors open on the row being replaced */
@@ -9998,6 +11387,7 @@ int sqlite3BtreeInsert(
        && pCur->info.nPayload==(u32)pX->nData+pX->nZero
       ){
         /* New entry is the same size as the old.  Do an overwrite */
+        RESTORE_BTCONC;
         return btreeOverwriteCell(pCur, pX);
       }
       assert( loc==0 );
@@ -10008,9 +11398,12 @@ int sqlite3BtreeInsert(
       */
       rc = sqlite3BtreeTableMoveto(pCur, pX->nKey,
                (flags & BTREE_APPEND)!=0, &loc);
+      RESTORE_BTCONC;
       if( rc ) return rc;
     }
+    RESTORE_BTCONC;
   }else{
+    SAVE_BTCONC;
     /* This is an index or a WITHOUT ROWID table */
 
     /* If BTREE_SAVEPOSITION is set, the cursor must already be pointing
@@ -10036,8 +11429,10 @@ int sqlite3BtreeInsert(
         rc = btreeMoveto(pCur, pX->pKey, pX->nKey,
                     (flags & BTREE_APPEND)!=0, &loc);
       }
+      RESTORE_BTCONC;
       if( rc ) return rc;
     }
+    RESTORE_BTCONC;
 
     /* If the cursor is currently pointing to an entry to be overwritten
     ** and the new content is the same as as the old, then use the
@@ -10367,6 +11762,11 @@ int sqlite3BtreeDelete(BtCursor *pCur, u8 flags){
   }
   assert( pCur->eState==CURSOR_VALID );
 
+#ifndef SQLITE_OMIT_CONCURRENT
+  rc = btreeBcDelete(pCur);
+  if( rc!=SQLITE_OK ) return rc;
+#endif
+
   iCellDepth = pCur->iPage;
   iCellIdx = pCur->ix;
   pPage = pCur->pPage;
@@ -11009,6 +12409,9 @@ int sqlite3BtreeCount(sqlite3 *db, BtCursor *pCur, i64 *pnEntry){
   i64 nEntry = 0;                      /* Value to return in *pnEntry */
   int rc;                              /* Return code */
 
+  rc = btreeBcScanStart(pCur, BTCONC_READ_COUNT, 0, 0);
+  if( rc!=SQLITE_OK ) return rc;
+
   rc = moveToRoot(pCur);
   if( rc==SQLITE_EMPTY ){
     *pnEntry = 0;
@@ -12090,6 +13493,18 @@ int sqlite3BtreeExclusiveLock(Btree *p){
 #ifdef SQLITE_OMIT_CONCURRENT
   assert( db->aCommit[SQLITE_COMMIT_CONFLICT_PGNO]==0 );
 #else
+
+  if( rc==SQLITE_BUSY_SNAPSHOT 
+   && db->aCommit[SQLITE_COMMIT_CONFLICT_PGNO]>1
+   && pBt->conc.eState==BTCONC_STATE_INUSE 
+   && pBt->pCursor==0 
+  ){
+    /* Page-level locking has detected a conflict. But it is not a 
+    ** schema conflict (SQLITE_COMMIT_CONFLICT_PGNO>1) and the BtConcurrent
+    ** object is populated. So attempt a logical commit. */
+    rc = btreeBcTryLogicalCommit(p);
+  }
+
   if( (rc==SQLITE_BUSY_SNAPSHOT)
    && (pgno = db->aCommit[SQLITE_COMMIT_CONFLICT_PGNO]) 
   ){
index 431b1e2c09698cd701e839b96023b0a866acd648..2f1badacd78fdb3832b018c1016a59786f35d8eb 100644 (file)
@@ -234,6 +234,15 @@ typedef struct BtLock BtLock;
 typedef struct CellInfo CellInfo;
 typedef struct BtreePtrmap BtreePtrmap;
 
+typedef struct BtConcurrent BtConcurrent;
+typedef struct BtReadIndex BtReadIndex;
+typedef struct BtReadIntkey BtReadIntkey;
+typedef struct BtSharedLog BtSharedLog;
+typedef struct BtSharedLogEntry BtSharedLogEntry;
+typedef struct BtWrite BtWrite;
+typedef struct BtWriteIndex BtWriteIndex;
+typedef struct BtWriteIntkey BtWriteIntkey;
+
 /*
 ** This is a magic string that appears at the beginning of every
 ** SQLite database in order to identify the file as a real database.
@@ -325,6 +334,186 @@ struct BtLock {
 #define READ_LOCK     1
 #define WRITE_LOCK    2
 
+/*
+** All connections to a single database file that use BEGIN CONCURRENT within 
+** the process share an instance of this object. Its purpose is to store
+** a list of BtSharedLogEntry objects associated with the database file.
+*/
+struct BtSharedLog {
+  sqlite3_mutex *mutex;           /* Mutex protecting this object */
+  int nRef;                       /* Number of users of this struct */
+  char *zFullname;                /* Path identifying this database */
+
+  BtSharedLogEntry *pFirst;
+  BtSharedLogEntry *pLast;
+  int nEntry;                     /* Current number of entries */
+
+  /* Linked list protected by SQLITE_MUTEX_STATIC_MAIN */
+  BtSharedLog *pSharedNext;  /* Next shared log in process */
+};
+
+/*
+** Each time a BEGIN CONCURRENT transaction is committed, an instance of
+** the following object is added to the list maintained by the database
+** file's BtSharedLog object. 
+**
+** iBaseId:
+**   The sqlite3WalCommitId() value at the head of the wal file when this
+**   transaction was written.
+**
+** iThisId:
+**   The sqlite3WalCommitId() value at the head of the wal file after this
+**   transaction was written. i.e. the snapshot created by this write.
+**
+** iFirstFrame:
+**   First wal frame written by this transaction. If in a *-wal2 file, the
+**   0x80000000 bit is set.
+**
+** nFrame:
+**   Total number of frames written by this transaction.
+**
+** aIntkey/nIntkey:
+**   Array of rowids modified by this transaction.
+**
+** aIndex/nIndex:
+**   Array of index keys modified by this transaction.
+*/
+struct BtSharedLogEntry {
+  u64 iBaseId;                    /* Snapshot this transaction was based on */
+  u64 iThisId;                    /* Snapshot this transaction created */
+  u32 iFirstFrame;                /* First frame written by this transaction */
+  u32 nFrame;                     /* Number of frames written by transaction */
+
+  BtWriteIntkey *aIntkey;         /* Writes to intkeys */
+  int nIntkey;                    /* Size of aIntkey[] */
+  BtWriteIndex *aIndex;           /* Writes to indexes */
+  int nIndex;                     /* Size of aIndex[] in bytes */
+
+  BtSharedLogEntry *pLogNext;     /* Transaction commited after this one */
+};
+
+/*
+** Any more savepoints than this and the BtConcurrent object is retired.
+*/
+#define BTCONC_MAX_SAVEPOINT 8
+
+/*
+** An single object of this type is permanently part of each BtShared
+** object. It stores things related to BEGIN CONCURRENT transactions
+** and the in-process shared-log.
+*/
+struct BtConcurrent {
+  BtSharedLog *pBtLog;
+  int eState;                     /* One of the BTCONC_STATE_XXX values */
+  u64 iBase;                      /* WalCommitId() transaction is prepared on */
+
+  /* Reads performed by this transaction */
+  BtReadIntkey *aReadIntkey;
+  BtReadIndex *aReadIndex;
+  int nReadIntkey;
+  int nReadIndex;
+  int nReadIntkeyAlloc;
+  int nReadIndexAlloc;
+  int bReadSorted;                /* True if the aRead[] arrays are sorted */
+
+  /* Changes made by this transaction. */
+  BtWrite *aWrite;
+  int nWrite;
+  int nWriteAlloc;
+
+  /* An UnpackedRecord structure created pKeyInfo->nKeyField==nUnpackedField */
+  UnpackedRecord *pUnpacked;
+  int nUnpackedField;
+
+  int aSvpt[BTCONC_MAX_SAVEPOINT];/* Set nWrite=aSvpt[i] when savepoint i rb. */
+  int nSvpt;                      /* Current number of open savepoints */
+
+  /* Total size of all allocations managed by this object. */
+  i64 nAlloc;
+};
+
+/*
+** NONE:    No BEGIN CONCURRENT transaction is open. 
+** INUSE:   BEGIN CONCURRENT transaction is open and object is accumulating
+**          reads and writes.
+** RETIRED: BEGIN CONCURRENT transaction is open but object is not in use.
+**          Because it grew too large or some such reason.
+*/
+#define BTCONC_STATE_NONE    0
+#define BTCONC_STATE_INUSE   1
+#define BTCONC_STATE_RETIRED 2
+
+/*
+** Each read of an intkey or index btree by a BEGIN CONCURRENT transaction is
+** represented by an object of one of the following types.
+*/
+struct BtReadIntkey {
+  Pgno iRoot;                     /* Root page of table read */
+  i64 iMin;                       /* Smallest rowid scanned */
+  i64 iMax;                       /* Largest rowid scanned */
+};
+
+/*
+** Each scan of an index is represented by an instance of the following
+** structure. The start of the range is encoded in (aRecMin, nRecMin, 
+** drc_min) and the end of the range by (aRecMax, nRecMax, drc_max). 
+** According to the sort order defined by pKeyInfo:
+**
+**     (aRecMin, nRecMin, drc_min) <= (aRecMax, nRecMax, drc_max)
+*/
+struct BtReadIndex {
+  Pgno iRoot;                     /* Root page of index b-tree read */
+  i16 drc_min;
+  i16 drc_max;
+  KeyInfo *pKeyInfo;              /* KeyInfo structure for b-tree */
+  int nRecMin;                    /* Size of aRecFirst[] in bytes */
+  int nRecMax;                    /* Size of aRecLast[] in bytes */
+  u8 *aRecMin;                    /* Record for this change */
+  u8 *aRecMax;                    /* Record for this change */
+};
+
+/*
+** During a BEGIN CONCURRENT transaction, each insert or delete of a b-tree 
+** key is represented by an instance of this structure.
+**
+** If the transaction is succesfully committed, the information in an 
+** array of this type is used to populate arrays of BtWriteIntkey and
+** BtWriteIndex structs, which are stored as part of the shared-log
+** entry to use for future conflict detection.
+*/
+struct BtWrite {
+  Pgno iRoot;                     /* Root page of btree this change affects */
+  int bDel;                       /* True for a delete, false for insert */
+  KeyInfo *pKeyInfo;              /* KeyInfo for index-btree, NULL for intkey */
+  i64 iKey;                       /* Key for intkey updates */             
+  int nRec;                       /* Size of aRec[] in bytes */
+  u8 *aRec;                       /* Record for this change */
+};
+
+struct BtWriteIntkey {
+  Pgno iRoot;                     /* Root page of table read */
+  i64 iKey;                       /* Key written */
+};
+
+struct BtWriteIndex {
+  Pgno iRoot;                     /* Root page of table read */
+  int nRec;                       /* Size of aRec[] in bytes (or 0) */
+  u8 *aRec;                       /* Pointer to record */
+};
+
+/* Values for BtRead.eRead - how the scan was started. */
+#define BTCONC_READ_FIRST  1
+#define BTCONC_READ_MOVETO 2
+#define BTCONC_READ_LAST   3
+#define BTCONC_READ_COUNT  4
+
+int sqlite3BcSerializeRecord(
+  UnpackedRecord *pRec,           /* Record to serialize */
+  u8 **ppRec,                     /* OUT: buffer containing serialization */
+  int *pnRec                      /* OUT: size of (*ppRec) in bytes */
+);
+
+
 /* A Btree handle
 **
 ** A database connection contains a pointer to an instance of
@@ -468,6 +657,7 @@ struct BtShared {
   u8 *pTmpSpace;        /* Temp space sufficient to hold a single cell */
 #ifndef SQLITE_OMIT_CONCURRENT
   BtreePtrmap *pMap;
+  BtConcurrent conc;
 #endif
   int nPreformatSize;   /* Size of last cell written by TransferRow() */
 };
@@ -551,6 +741,11 @@ struct BtCursor {
   Btree *pBtree;            /* The Btree to which this cursor belongs */
   Pgno *aOverflow;          /* Cache of overflow page locations */
   void *pKey;               /* Saved key that was cursor last known position */
+#ifndef SQLITE_OMIT_CONCURRENT
+  int eScanType;            /* BTCONC_READ_FIRST, LAST or MOVETO */
+  int iScanDir;             /* +1 if Next(), -1 if Prev(), 0 if neither */
+  int iScanIndex;           /* Index of pBt->conc.aReadXXX[] array plus 1 */
+#endif
   /* All fields above are zeroed when the cursor is allocated.  See
   ** sqlite3BtreeCursorZero().  Fields that follow must be manually
   ** initialized. */
@@ -758,3 +953,4 @@ struct IntegrityCk {
 #else
 # define get2byteAligned(x)  ((x)[0]<<8 | (x)[1])
 #endif
+
index b4864a446cb80f570d7e8af637d7aaf4bb0bbd6b..137778e49f2f6ad59f6b687dbd50b3851a9b86e3 100644 (file)
@@ -296,6 +296,9 @@ SQLITE_WSD struct Sqlite3Config sqlite3Config = {
    0x7ffffffe,                /* iOnceResetThreshold */
    SQLITE_DEFAULT_SORTERREF_SIZE,   /* szSorterRef */
    0,                         /* iPrngSeed */
+#ifndef SQLITE_OMIT_CONCURRENT
+   SQLITE_DEFAULT_MAXSHAREDLOGENTRY,/* nMaxSharedLogEntry */
+#endif
 #ifdef SQLITE_DEBUG
    {0,0,0,0,0,0},             /* aTune */
 #endif
index 04b781e4d67783d6a4b85b35120a9f6581b3c3b0..5950bebbfedf0897f3843399bdaa7c77be2f1d40 100644 (file)
@@ -83,6 +83,9 @@ static int (*const sqlite3BuiltinExtensions[])(sqlite3*) = {
 #ifdef SQLITE_EXTRA_AUTOEXT
   SQLITE_EXTRA_AUTOEXT,
 #endif
+#ifndef SQLITE_OMIT_CONCURRENT
+  sqlite3ConcurrentRegister,
+#endif
 };
 
 #ifndef SQLITE_AMALGAMATION
index 1259c711b8bd18b8c51adf24e12c3d3cbcdd2b5d..f29c60e777d6e937c577edf4316b156944d28797 100644 (file)
@@ -1865,6 +1865,25 @@ void sqlite3PagerEndConcurrent(Pager *pPager){
 int sqlite3PagerIsWal(Pager *pPager){
   return pPager->pWal!=0;
 }
+
+/* !defined(SQLITE_OMIT_CONCURRENT)
+**
+** If in wal-mode, return a 64-bit value that identifies the snapshot currently
+** in use. Otherwise, return 0.
+*/
+u64 sqlite3PagerWalCommitId(Pager *pPager){
+  return pPager->pWal ? sqlite3WalCommitId(pPager->pWal) : 0;
+}
+
+/* !defined(SQLITE_OMIT_CONCURRENT)
+**
+** If in wal-mode, return a 64-bit value that identifies the snapshot 
+** currently at the head of the wal file. Otherwise, return 0.
+*/
+u64 sqlite3PagerWalLiveId(Pager *pPager){
+  return pPager->pWal ? sqlite3WalLiveId(pPager->pWal) : 0;
+}
+
 #endif /* SQLITE_OMIT_CONCURRENT */
 
 /*
@@ -5575,6 +5594,7 @@ int sqlite3PagerUsePage(Pager *pPager, Pgno pgno){
   }
   return rc;
 }
+
 #endif
 
 /*
@@ -6578,9 +6598,10 @@ int sqlite3PagerExclusiveLock(Pager *pPager, PgHdr *pPage1, u32 *aConflict){
 #ifndef SQLITE_OMIT_CONCURRENT
 /*
 ** This function is called as part of committing an CONCURRENT transaction.
-** At this point the wal WRITER lock is held, and all pages in the cache 
-** except for page 1 are compatible with the snapshot at the head of the
-** wal file. 
+** At this point the wal WRITER lock is held. If parameter bReset is 0,
+** all pages in the cache except for page 1 are compatible with the snapshot 
+** at the head of the wal file. Or, if bReset is non-zero, the cache is
+** cleared of all pages except for page 1.
 **
 ** This function updates the in-memory data structures and reloads the
 ** contents of page 1 so that the client is operating on the snapshot 
@@ -6588,7 +6609,7 @@ int sqlite3PagerExclusiveLock(Pager *pPager, PgHdr *pPage1, u32 *aConflict){
 **
 ** SQLITE_OK is returned if successful, or an SQLite error code otherwise.
 */
-int sqlite3PagerUpgradeSnapshot(Pager *pPager, DbPage *pPage1){
+int sqlite3PagerUpgradeSnapshot(Pager *pPager, DbPage *pPage1, int bReset){
   int rc;
 
   assert( pPager->pWal && pPager->pAllRead );
@@ -6596,7 +6617,9 @@ int sqlite3PagerUpgradeSnapshot(Pager *pPager, DbPage *pPage1){
   if( rc==SQLITE_OK ){
     rc = readDbPage(pPage1);
   }
-
+  if( bReset && rc==SQLITE_OK ){
+    sqlite3PcacheTruncate(pPager->pPCache, 1);
+  }
   return rc;
 }
 
index 5828c37f86ae0d4d048277cefe93961a8745eaf9..076d1c30297ce57bece0860a458310ba90d94e80 100644 (file)
@@ -248,9 +248,11 @@ int sqlite3PagerUsePage(Pager*, Pgno);
 void sqlite3PagerEndConcurrent(Pager*);
 int sqlite3PagerBeginConcurrent(Pager*);
 void sqlite3PagerDropExclusiveLock(Pager*);
-int sqlite3PagerUpgradeSnapshot(Pager *pPager, DbPage*);
+int sqlite3PagerUpgradeSnapshot(Pager *pPager, DbPage*, int);
 void sqlite3PagerSetDbsize(Pager *pPager, Pgno);
 int sqlite3PagerIsWal(Pager*);
+u64 sqlite3PagerWalCommitId(Pager *pPager);
+u64 sqlite3PagerWalLiveId(Pager *pPager);
 #else
 # define sqlite3PagerEndConcurrent(x)
 # define sqlite3PagerUsePage(x, y) SQLITE_OK
index f04578d47f2c49a702d2fe62594e24bb4a11164b..a13f957b98cb2fcef719d9d4b082fa4e42b4ebae 100644 (file)
@@ -1831,6 +1831,10 @@ struct sqlite3 {
 #define CONCURRENT_OPEN   1
 #define CONCURRENT_SCHEMA 2
 
+#ifndef SQLITE_OMIT_CONCURRENT
+int sqlite3ConcurrentRegister(sqlite3 *db);
+#endif
+
 /*
 ** A macro to discover the encoding of a database.
 */
@@ -4390,6 +4394,9 @@ struct Sqlite3Config {
   int iOnceResetThreshold;          /* When to reset OP_Once counters */
   u32 szSorterRef;                  /* Min size in bytes to use sorter-refs */
   unsigned int iPrngSeed;           /* Alternative fixed seed for the PRNG */
+#ifndef SQLITE_OMIT_CONCURRENT
+  int nMaxSharedLogEntry;           /* Max number of shared-log entries */
+#endif
   /* vvvv--- must be last ---vvv */
 #ifdef SQLITE_DEBUG
   sqlite3_int64 aTune[SQLITE_NTUNE]; /* Tuning parameters */
@@ -5928,4 +5935,6 @@ sqlite3_uint64 sqlite3Hwtime(void);
 # define IS_STMT_SCANSTATUS(db) 0
 #endif
 
+#define SQLITE_DEFAULT_MAXSHAREDLOGENTRY 64
+
 #endif /* SQLITEINT_H */
index 80360d68e0551d9bbfd04523698ce1ef75e87979..819c0adeb400ad9ac3fc5831716bafb893050f4c 100644 (file)
--- a/src/wal.c
+++ b/src/wal.c
@@ -4823,6 +4823,29 @@ int sqlite3WalUpgradeSnapshot(Wal *pWal){
   } SEH_EXCEPT( rc = SQLITE_IOERR_IN_PAGE; )
   return rc;
 }
+
+/* !defined(SQLITE_OMIT_CONCURRENT)
+**
+** Return a 64-bit value that identifies the snapshot currently in use.
+*/
+u64 sqlite3WalCommitId(Wal *pWal){
+  return ((u64)(pWal->hdr.aCksum[0]) << 32) + pWal->hdr.aCksum[1];
+}
+
+/* !defined(SQLITE_OMIT_CONCURRENT)
+**
+** Return a 64-bit value that identifies the snapshot currently at the
+** head of the wal file.
+*/
+u64 sqlite3WalLiveId(Wal *pWal){
+  WalIndexHdr hdr;
+  assert( pWal->writeLock );
+  SEH_TRY {
+    walIndexLoadHdr(pWal, &hdr);
+  } SEH_EXCEPT( memset(&hdr, 0, sizeof(hdr)); )
+  return ((u64)(hdr.aCksum[0]) << 32) + hdr.aCksum[1];
+}
+
 #endif   /* SQLITE_OMIT_CONCURRENT */
 
 /*
index cfe3dc2f6966ff25c2628c22d0c567ccf3f53fd0..a72dae196abb980856843e14537ae867b0734727 100644 (file)
--- a/src/wal.h
+++ b/src/wal.h
@@ -145,6 +145,9 @@ int sqlite3WalLockForCommit(Wal *pWal, PgHdr *pPg, Bitvec *pRead, u32*);
 /* Upgrade the state of the client to take into account changes written
 ** by other connections */
 int sqlite3WalUpgradeSnapshot(Wal *pWal);
+
+u64 sqlite3WalCommitId(Wal *pWal);
+u64 sqlite3WalLiveId(Wal *pWal);
 #endif /* SQLITE_OMIT_CONCURRENT */
 
 #ifdef SQLITE_ENABLE_ZIPVFS
diff --git a/test/concurrentA.test b/test/concurrentA.test
new file mode 100644 (file)
index 0000000..eeb2132
--- /dev/null
@@ -0,0 +1,328 @@
+# 2023 January 12
+#
+# The author disclaims copyright to this source code.  In place of
+# a legal notice, here is a blessing:
+#
+#    May you do good and not evil.
+#    May you find forgiveness for yourself and forgive others.
+#    May you share freely, never taking more than you give.
+#
+#*************************************************************************
+# This file implements regression tests for SQLite library.
+#
+
+set testdir [file dirname $argv0]
+source $testdir/tester.tcl
+set testprefix concurrentA
+
+do_execsql_test 1.0 {
+  PRAGMA journal_mode = wal;
+  CREATE TABLE t1(x INTEGER PRIMARY KEY, b);
+  INSERT INTO t1 VALUES(1, 'one');
+  INSERT INTO t1 VALUES(2, 'two');
+  INSERT INTO t1 VALUES(3, 'three');
+  INSERT INTO t1 VALUES(4, 'four');
+  INSERT INTO t1 VALUES(5, 'five');
+} {wal}
+
+do_execsql_test 1.1 {
+  BEGIN CONCURRENT;
+    SELECT * FROM t1 WHERE x=4;
+} {4 four}
+
+do_execsql_test 1.2 {
+    INSERT INTO t1 VALUES(6, 'six');
+}
+
+do_execsql_test 1.3 {
+  SELECT * FROM sqlite_conc;
+} {
+  2 read   5  6
+  2 read   4  4
+  2 insert 6 (NULL,'six')
+}
+
+do_execsql_test 1.4 {
+  COMMIT;
+}
+
+do_execsql_test 2.0 {
+  BEGIN CONCURRENT;
+    UPDATE t1 SET b='four+1' WHERE x=4;
+    DELETE FROM t1 WHERE x=1;
+}
+
+sqlite3 db2 test.db
+do_execsql_test -db db2 2.1 {
+  BEGIN CONCURRENT;
+    UPDATE t1 SET b='two+1' WHERE x=2;
+  COMMIT;
+}
+
+do_execsql_test 2.2 {
+  COMMIT;
+  SELECT * FROM t1;
+} {
+  2 two+1 3 three 4 four+1 5 five 6 six
+}
+
+do_execsql_test 2.3 {
+  BEGIN CONCURRENT;
+    UPDATE t1 SET b='three+1' WHERE x=3;
+    SELECT * FROM t1 WHERE x=6;
+} {6 six}
+
+do_execsql_test -db db2 2.4 {
+  BEGIN CONCURRENT;
+    UPDATE t1 SET b='six+1' WHERE x=6;
+  COMMIT;
+}
+
+do_catchsql_test 2.5 {
+  COMMIT
+} {1 {database is locked}}
+
+#--------------------------------------------------------------------------
+reset_db
+
+do_execsql_test 3.0 {
+  PRAGMA journal_mode = wal;
+  CREATE TABLE t1(x INTEGER PRIMARY KEY, b);
+  INSERT INTO t1 VALUES(2, 'two');
+  INSERT INTO t1 VALUES(4, 'four');
+  INSERT INTO t1 VALUES(6, 'six');
+  INSERT INTO t1 VALUES(8, 'eight');
+  INSERT INTO t1 VALUES(10, 'ten');
+  INSERT INTO t1 VALUES(12, 'twelve');
+  INSERT INTO t1 VALUES(14, 'fourteen');
+  INSERT INTO t1 VALUES(16, 'sixteen');
+  INSERT INTO t1 VALUES(18, 'eightteen');
+  INSERT INTO t1 VALUES(20, 'twenty');
+} {wal}
+
+set START -9223372036854775808
+set END    9223372036854775807
+foreach {tn sql range} {
+  1 { SELECT * FROM t1 }                         {$START $END}
+  2 { SELECT * FROM t1 WHERE x BETWEEN 4 AND 8 } {4 10}
+  3 { SELECT * FROM t1 WHERE x BETWEEN 4 AND 7 } {4 8}
+  4 { SELECT * FROM t1 WHERE x = 4 }             {4 4}
+  5 { SELECT * FROM t1 WHERE x = 5 }             {5 6}
+  6 { SELECT * FROM t1 WHERE x > 6 }             {6 $END}
+  7 { SELECT * FROM t1 WHERE x > 5 }             {5 $END}
+  8 { SELECT max(x) FROM t1 }                    {20 $END}
+  9 { SELECT min(x) FROM t1 }                    {$START 2}
+  10 { INSERT INTO t1 VALUES(NULL, NULL) }       {20 21  20 $END}
+
+  11 { SELECT * FROM t1 WHERE x < 6 ORDER BY x DESC} {$START 6}
+} {
+
+  set lRes [list]
+  foreach {a b} [subst $range] { lappend lRes 2 read $a $b }
+
+  execsql "BEGIN CONCURRENT"
+  execsql $sql
+  do_execsql_test 3.1.$tn {
+    SELECT * FROM sqlite_conc WHERE op='read'
+  } $lRes
+
+  execsql ROLLBACK
+}
+
+db close
+sqlite3 db test.db
+sqlite3 db2 test.db
+
+do_test 3.2 {
+  execsql {
+    BEGIN CONCURRENT;
+      UPDATE t1 SET b='six+1' WHERE x=6;
+    COMMIT;
+  }
+  execsql {
+    BEGIN CONCURRENT;
+      UPDATE t1 SET b='ten+1' WHERE x=10;
+      SELECT * FROM t1 WHERE x=6;
+  } 
+  execsql {
+    BEGIN CONCURRENT;
+      UPDATE t1 SET b='sixteen+1' WHERE x=16;
+    COMMIT;
+  } db2
+  execsql {
+    COMMIT
+  }
+} {}
+do_test 3.3 {
+  execsql {
+    BEGIN CONCURRENT;
+      UPDATE t1 SET b='four+1' WHERE x=4;
+  }
+  execsql {
+    UPDATE t1 SET b='four+2' WHERE x=4;
+  } db2
+  catchsql {
+    COMMIT
+  }
+} {1 {database is locked}}
+execsql { ROLLBACK }
+
+do_test 3.4 {
+  execsql {
+    BEGIN CONCURRENT;
+      UPDATE t1 SET b='four+3' WHERE x=4;
+  }
+  execsql {
+    BEGIN CONCURRENT;
+      UPDATE t1 SET b='four+4' WHERE x=4;
+    COMMIT;
+  } db2
+  catchsql {
+    COMMIT
+  }
+} {1 {database is locked}}
+execsql { ROLLBACK }
+
+#-------------------------------------------------------------------------
+reset_db
+do_execsql_test 4.0 {
+  PRAGMA journal_mode = wal;
+  CREATE TABLE t1(x INTEGER PRIMARY KEY, y INTEGER);
+  INSERT INTO t1 VALUES(1, 10), (2, 20), (3, 30), (4, 40), (5, 50);
+  INSERT INTO t1 VALUES(6, 60), (7, 70), (8, 80), (9, 90), (10, 100);
+  CREATE INDEX i1 ON t1(y);
+} {wal}
+
+foreach {tn sql idx} {
+  1 "SELECT * FROM t1 WHERE y=50" {(50)- (60,6)}
+  2 "SELECT * FROM t1 WHERE y<50" {(NULL)+ (50,5)}
+
+  3 "SELECT * FROM t1 WHERE y>50" {(50)+ ()}
+  4 "SELECT * FROM t1 WHERE y>=50" {(50)- ()}
+
+  4 "SELECT * FROM t1 WHERE y<50 ORDER BY y DESC" {() (50)-}
+
+
+  5 "SELECT * FROM t1 WHERE y>50" {(50)+ ()}
+
+} {
+
+  execsql "BEGIN CONCURRENT"
+  execsql $sql
+
+  set lRes [list]
+  foreach {a b} $idx {
+    lappend lRes 3 read $a $b
+  }
+    
+  do_execsql_test 3.1.$tn {
+    SELECT * FROM sqlite_conc WHERE op='read' 
+  } $lRes
+
+
+  execsql ROLLBACK
+}
+
+#-------------------------------------------------------------------------
+reset_db 
+do_execsql_test 5.0 {
+  PRAGMA journal_mode = wal;
+  CREATE TABLE t1(a INTEGER PRIMARY KEY, b UNIQUE);
+  BEGIN CONCURRENT;
+    INSERT INTO t1 VALUES(1, 'one');
+    INSERT INTO t1 VALUES(2, 'two');
+  COMMIT;
+} {wal}
+
+#-------------------------------------------------------------------------
+reset_db 
+do_execsql_test 6.0 {
+  PRAGMA journal_mode = wal;
+  CREATE TABLE t1(a INTEGER PRIMARY KEY, b);
+  CREATE TABLE t2(x);
+  INSERT INTO t1 VALUES
+    (1, 1), (2, 1), (3, 1), (4, 1),
+    (5, 2), (6, 2), (7, 2), (8, 2),
+    (9, 3), (10, 3), (11, 3), (12, 3);
+  CREATE INDEX i1 ON t1(b);
+} {wal}
+
+sqlite3 db2 test.db
+
+do_execsql_test 6.1.1 {
+  BEGIN CONCURRENT;
+    SELECT a FROM t1 WHERE b=1;
+    INSERT INTO t2 VALUES('hello');
+} {1 2 3 4}
+
+do_test 6.1.2 {
+  execsql {
+    BEGIN CONCURRENT;
+      INSERT INTO t1 VALUES(13, 1);
+    COMMIT;
+  } db2
+} {}
+
+do_catchsql_test 6.1.3 {
+  COMMIT
+} {1 {database is locked}}
+execsql ROLLBACK
+
+do_execsql_test 6.2.1 {
+  BEGIN CONCURRENT;
+    SELECT count(*) FROM t1;
+    INSERT INTO t2 VALUES('world');
+} {13}
+
+do_test 6.2.2 {
+  execsql {
+    BEGIN CONCURRENT;
+      INSERT INTO t1 VALUES(14, 4);
+    COMMIT;
+  } db2
+} {}
+
+do_catchsql_test 6.2.3 {
+  COMMIT
+} {1 {database is locked}}
+execsql ROLLBACK
+
+#-------------------------------------------------------------------------
+reset_db 
+do_execsql_test 7.0 {
+  PRAGMA journal_mode = wal;
+  CREATE TABLE t1(x, y);
+  CREATE TABLE t2(a INTEGER PRIMARY KEY, b);
+  INSERT INTO t2 VALUES(1, 'one');
+  INSERT INTO t2 VALUES(2, 'two');
+  INSERT INTO t2 VALUES(3, 'three');
+} {wal}
+
+do_execsql_test 7.1.1 {
+  BEGIN CONCURRENT;
+    UPDATE t2 SET b='one+1' WHERE a=1;
+    SAVEPOINT rb;
+      INSERT INTO t1 VALUES(1, 2);
+    ROLLBACK TO rb;
+}
+
+sqlite3 db2 test.db
+
+do_test 7.1.2 {
+  execsql {
+    BEGIN CONCURRENT;
+      UPDATE t2 SET b='three+1' WHERE a=3;
+    COMMIT;
+  } db2
+} {}
+
+do_execsql_test 7.1.3 {
+  COMMIT;
+  SELECT * FROM t1;
+} {}
+
+
+
+finish_test
+
+
index 0c058672f95933ba88ce052c1afbf740e8e41a32..75b3f2a6e3824e6e9d654391b5acb98fabe44a98 100644 (file)
@@ -495,6 +495,8 @@ set flist {
    sqlite3session.c
    fts5.c
    stmt.c
+
+   bcrecord.c
 }
 if {$enable_recover} {
   lappend flist sqlite3recover.c dbdata.c