From: drh <>
Date: Tue, 5 Aug 2025 12:01:43 +0000 (+0000)
Subject: Remove the experimental lsm1 extension from trunk, in as much as
X-Git-Url: http://git.ipfire.org/gitweb/gitweb.cgi?a=commitdiff_plain;h=cd70a44d45b57afe63d87540e586c4595aa046b0;p=thirdparty%2Fsqlite.git
Remove the experimental lsm1 extension from trunk, in as much as
readers were thinking that this was a supported extension and were reporting
bugs against it.
FossilOrigin-Name: ae9d7c9c922bb241363aa690b42b9664c4ad6e76ed5ce474daf1ab44461bc6a3
---
diff --git a/ext/lsm1/Makefile b/ext/lsm1/Makefile
deleted file mode 100644
index d497a1d133..0000000000
--- a/ext/lsm1/Makefile
+++ /dev/null
@@ -1,56 +0,0 @@
-#
-# This Makefile is designed for use with main.mk in the root directory of
-# this project. After including main.mk, the users makefile should contain:
-#
-# LSMDIR=$(TOP)/ext/lsm1/
-# LSMOPTS=-fPIC
-# include $(LSMDIR)/Makefile
-#
-# The most useful targets are [lsmtest] and [lsm.so].
-#
-
-LSMOBJ = \
- lsm_ckpt.o \
- lsm_file.o \
- lsm_log.o \
- lsm_main.o \
- lsm_mem.o \
- lsm_mutex.o \
- lsm_shared.o \
- lsm_sorted.o \
- lsm_str.o \
- lsm_tree.o \
- lsm_unix.o \
- lsm_win32.o \
- lsm_varint.o \
- lsm_vtab.o
-
-LSMHDR = \
- $(LSMDIR)/lsm.h \
- $(LSMDIR)/lsmInt.h
-
-LSMTESTSRC = $(LSMDIR)/lsm-test/lsmtest1.c $(LSMDIR)/lsm-test/lsmtest2.c \
- $(LSMDIR)/lsm-test/lsmtest3.c $(LSMDIR)/lsm-test/lsmtest4.c \
- $(LSMDIR)/lsm-test/lsmtest5.c $(LSMDIR)/lsm-test/lsmtest6.c \
- $(LSMDIR)/lsm-test/lsmtest7.c $(LSMDIR)/lsm-test/lsmtest8.c \
- $(LSMDIR)/lsm-test/lsmtest9.c \
- $(LSMDIR)/lsm-test/lsmtest_datasource.c \
- $(LSMDIR)/lsm-test/lsmtest_func.c $(LSMDIR)/lsm-test/lsmtest_io.c \
- $(LSMDIR)/lsm-test/lsmtest_main.c $(LSMDIR)/lsm-test/lsmtest_mem.c \
- $(LSMDIR)/lsm-test/lsmtest_tdb.c $(LSMDIR)/lsm-test/lsmtest_tdb3.c \
- $(LSMDIR)/lsm-test/lsmtest_util.c $(LSMDIR)/lsm-test/lsmtest_win32.c
-
-
-# all: lsm.so
-
-LSMOPTS += -fPIC -DLSM_MUTEX_PTHREADS=1 -I$(LSMDIR) -DHAVE_ZLIB
-
-lsm.so: $(LSMOBJ)
- $(T.link) -shared -fPIC -o lsm.so $(LSMOBJ)
-
-%.o: $(LSMDIR)/%.c $(LSMHDR) sqlite3.h
- $(T.link) $(LSMOPTS) -c $<
-
-lsmtest$(EXE): $(LSMOBJ) $(LSMTESTSRC) $(LSMTESTHDR) sqlite3.o
- # $(T.link) -c $(TOP)/lsm-test/lsmtest_tdb2.cc
- $(T.link) $(LSMOPTS) $(LSMTESTSRC) $(LSMOBJ) sqlite3.o -o lsmtest$(EXE) $(THREADLIB) -lz
diff --git a/ext/lsm1/Makefile.msc b/ext/lsm1/Makefile.msc
deleted file mode 100644
index 3e5a3b3310..0000000000
--- a/ext/lsm1/Makefile.msc
+++ /dev/null
@@ -1,102 +0,0 @@
-#
-# This Makefile is designed for use with Makefile.msc in the root directory
-# of this project. The Makefile.msc should contain:
-#
-# LSMDIR=$(TOP)\ext\lsm1
-# !INCLUDE $(LSMDIR)\Makefile.msc
-#
-# The most useful targets are [lsmtest.exe] and [lsm.dll].
-#
-
-LSMOBJ = \
- lsm_ckpt.lo \
- lsm_file.lo \
- lsm_log.lo \
- lsm_main.lo \
- lsm_mem.lo \
- lsm_mutex.lo \
- lsm_shared.lo \
- lsm_sorted.lo \
- lsm_str.lo \
- lsm_tree.lo \
- lsm_unix.lo \
- lsm_win32.lo \
- lsm_varint.lo \
- lsm_vtab.lo
-
-LSMHDR = \
- $(LSMDIR)\lsm.h \
- $(LSMDIR)\lsmInt.h
-
-LSMTESTSRC = $(LSMDIR)\lsm-test\lsmtest1.c $(LSMDIR)\lsm-test\lsmtest2.c \
- $(LSMDIR)\lsm-test\lsmtest3.c $(LSMDIR)\lsm-test\lsmtest4.c \
- $(LSMDIR)\lsm-test\lsmtest5.c $(LSMDIR)\lsm-test\lsmtest6.c \
- $(LSMDIR)\lsm-test\lsmtest7.c $(LSMDIR)\lsm-test\lsmtest8.c \
- $(LSMDIR)\lsm-test\lsmtest9.c \
- $(LSMDIR)\lsm-test\lsmtest_datasource.c \
- $(LSMDIR)\lsm-test\lsmtest_func.c $(LSMDIR)\lsm-test\lsmtest_io.c \
- $(LSMDIR)\lsm-test\lsmtest_main.c $(LSMDIR)\lsm-test\lsmtest_mem.c \
- $(LSMDIR)\lsm-test\lsmtest_tdb.c $(LSMDIR)\lsm-test\lsmtest_tdb3.c \
- $(LSMDIR)\lsm-test\lsmtest_util.c $(LSMDIR)\lsm-test\lsmtest_win32.c
-
-# all: lsm.dll lsmtest.exe
-
-LSMOPTS = $(NO_WARN) -DLSM_MUTEX_WIN32=1 -I$(LSMDIR)
-
-!IF $(DEBUG)>2
-LSMOPTS = $(LSMOPTS) -DLSM_DEBUG=1
-!ENDIF
-
-!IF $(MEMDEBUG)!=0
-LSMOPTS = $(LSMOPTS) -DLSM_DEBUG_MEM=1
-!ENDIF
-
-lsm_ckpt.lo: $(LSMDIR)\lsm_ckpt.c $(LSMHDR) $(SQLITE3H)
- $(LTCOMPILE) $(LSMOPTS) -c $(LSMDIR)\lsm_ckpt.c
-
-lsm_file.lo: $(LSMDIR)\lsm_file.c $(LSMHDR) $(SQLITE3H)
- $(LTCOMPILE) $(LSMOPTS) -c $(LSMDIR)\lsm_file.c
-
-lsm_log.lo: $(LSMDIR)\lsm_log.c $(LSMHDR) $(SQLITE3H)
- $(LTCOMPILE) $(LSMOPTS) -c $(LSMDIR)\lsm_log.c
-
-lsm_main.lo: $(LSMDIR)\lsm_main.c $(LSMHDR) $(SQLITE3H)
- $(LTCOMPILE) $(LSMOPTS) -c $(LSMDIR)\lsm_main.c
-
-lsm_mem.lo: $(LSMDIR)\lsm_mem.c $(LSMHDR) $(SQLITE3H)
- $(LTCOMPILE) $(LSMOPTS) -c $(LSMDIR)\lsm_mem.c
-
-lsm_mutex.lo: $(LSMDIR)\lsm_mutex.c $(LSMHDR) $(SQLITE3H)
- $(LTCOMPILE) $(LSMOPTS) -c $(LSMDIR)\lsm_mutex.c
-
-lsm_shared.lo: $(LSMDIR)\lsm_shared.c $(LSMHDR) $(SQLITE3H)
- $(LTCOMPILE) $(LSMOPTS) -c $(LSMDIR)\lsm_shared.c
-
-lsm_sorted.lo: $(LSMDIR)\lsm_sorted.c $(LSMHDR) $(SQLITE3H)
- $(LTCOMPILE) $(LSMOPTS) -c $(LSMDIR)\lsm_sorted.c
-
-lsm_str.lo: $(LSMDIR)\lsm_str.c $(LSMHDR) $(SQLITE3H)
- $(LTCOMPILE) $(LSMOPTS) -c $(LSMDIR)\lsm_str.c
-
-lsm_tree.lo: $(LSMDIR)\lsm_tree.c $(LSMHDR) $(SQLITE3H)
- $(LTCOMPILE) $(LSMOPTS) -c $(LSMDIR)\lsm_tree.c
-
-lsm_unix.lo: $(LSMDIR)\lsm_unix.c $(LSMHDR) $(SQLITE3H)
- $(LTCOMPILE) $(LSMOPTS) -c $(LSMDIR)\lsm_unix.c
-
-lsm_win32.lo: $(LSMDIR)\lsm_win32.c $(LSMHDR) $(SQLITE3H)
- $(LTCOMPILE) $(LSMOPTS) -c $(LSMDIR)\lsm_win32.c
-
-lsm_varint.lo: $(LSMDIR)\lsm_varint.c $(LSMHDR) $(SQLITE3H)
- $(LTCOMPILE) $(LSMOPTS) -c $(LSMDIR)\lsm_varint.c
-
-lsm_vtab.lo: $(LSMDIR)\lsm_vtab.c $(LSMHDR) $(SQLITE3H)
- $(LTCOMPILE) $(LSMOPTS) -c $(LSMDIR)\lsm_vtab.c
-
-lsm.dll: $(LSMOBJ)
- $(LD) $(LDFLAGS) $(LTLINKOPTS) $(LTLIBPATHS) /DLL /OUT:$@ $(LSMOBJ)
- copy /Y $@ $(LSMDIR)\$@
-
-lsmtest.exe: $(LSMOBJ) $(LSMTESTSRC) $(LSMTESTHDR) $(LIBOBJ)
- $(LTLINK) $(LSMOPTS) $(LSMTESTSRC) /link $(LSMOBJ) $(LIBOBJ)
- copy /Y $@ $(LSMDIR)\$@
diff --git a/ext/lsm1/lsm-test/README b/ext/lsm1/lsm-test/README
deleted file mode 100644
index 80654ee97e..0000000000
--- a/ext/lsm1/lsm-test/README
+++ /dev/null
@@ -1,40 +0,0 @@
-
-
-Organization of test case files:
-
- lsmtest1.c: Data tests. Tests that perform many inserts and deletes on a
- database file, then verify that the contents of the database can
- be queried.
-
- lsmtest2.c: Crash tests. Tests that attempt to verify that the database
- recovers correctly following an application or system crash.
-
- lsmtest3.c: Rollback tests. Tests that focus on the explicit rollback of
- transactions and sub-transactions.
-
- lsmtest4.c: Multi-client tests.
-
- lsmtest5.c: Multi-client tests with a different thread for each client.
-
- lsmtest6.c: OOM injection tests.
-
- lsmtest7.c: API tests.
-
- lsmtest8.c: Writer crash tests. Tests in this file attempt to verify that
- the system recovers and other clients proceed unaffected if
- a process fails in the middle of a write transaction.
-
- The difference from lsmtest2.c is that this file tests
- live-recovery (recovery from a failure that occurs while other
- clients are still running) whereas lsmtest2.c tests recovery
- from a system or power failure.
-
- lsmtest9.c: More data tests. These focus on testing that calling
- lsm_work(nMerge=1) to compact the database does not corrupt it.
- In other words, that databases containing block-redirects
- can be read and written.
-
-
-
-
-
diff --git a/ext/lsm1/lsm-test/lsmtest.h b/ext/lsm1/lsm-test/lsmtest.h
deleted file mode 100644
index ca60424add..0000000000
--- a/ext/lsm1/lsm-test/lsmtest.h
+++ /dev/null
@@ -1,303 +0,0 @@
-
-#ifndef __WRAPPER_INT_H_
-#define __WRAPPER_INT_H_
-
-#include "lsmtest_tdb.h"
-#include "sqlite3.h"
-#include "lsm.h"
-
-#include
-#include
-#include
-#include
-#include
-#ifndef _WIN32
-# include
-#endif
-#include
-#include
-#include
-#include
-#include
-#include
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#ifdef _WIN32
-# include "windows.h"
-# define gettimeofday win32GetTimeOfDay
-# define F_OK (0)
-# define sleep(sec) Sleep(1000 * (sec))
-# define usleep(usec) Sleep(((usec) + 999) / 1000)
-# ifdef _MSC_VER
-# include
-# define snprintf _snprintf
-# define fsync(fd) FlushFileBuffers((HANDLE)_get_osfhandle((fd)))
-# define fdatasync(fd) FlushFileBuffers((HANDLE)_get_osfhandle((fd)))
-# define __va_copy(dst,src) ((dst) = (src))
-# define ftruncate(fd,sz) ((_chsize_s((fd), (sz))==0) ? 0 : -1)
-# else
-# error Unsupported C compiler for Windows.
-# endif
-int win32GetTimeOfDay(struct timeval *, void *);
-#endif
-
-#ifndef _LSM_INT_H
-typedef unsigned int u32;
-typedef unsigned char u8;
-typedef long long int i64;
-typedef unsigned long long int u64;
-#endif
-
-
-#define ArraySize(x) ((int)(sizeof(x) / sizeof((x)[0])))
-
-#define MIN(x,y) ((x)<(y) ? (x) : (y))
-#define MAX(x,y) ((x)>(y) ? (x) : (y))
-
-#define unused_parameter(x) (void)(x)
-
-#define TESTDB_DEFAULT_PAGE_SIZE 4096
-#define TESTDB_DEFAULT_CACHE_SIZE 2048
-
-#ifndef _O_BINARY
-# define _O_BINARY (0)
-#endif
-
-/*
-** Ideally, these should be in wrapper.c. But they are here instead so that
-** they can be used by the C++ database wrappers in wrapper2.cc.
-*/
-typedef struct DatabaseMethods DatabaseMethods;
-struct TestDb {
- DatabaseMethods const *pMethods; /* Database methods */
- const char *zLibrary; /* Library name for tdb_open() */
-};
-struct DatabaseMethods {
- int (*xClose)(TestDb *);
- int (*xWrite)(TestDb *, void *, int , void *, int);
- int (*xDelete)(TestDb *, void *, int);
- int (*xDeleteRange)(TestDb *, void *, int, void *, int);
- int (*xFetch)(TestDb *, void *, int, void **, int *);
- int (*xScan)(TestDb *, void *, int, void *, int, void *, int,
- void (*)(void *, void *, int , void *, int)
- );
- int (*xBegin)(TestDb *, int);
- int (*xCommit)(TestDb *, int);
- int (*xRollback)(TestDb *, int);
-};
-
-/*
-** Functions in wrapper2.cc (a C++ source file). wrapper2.cc contains the
-** wrapper for Kyoto Cabinet. Kyoto cabinet has a C API, but
-** the primary interface is the C++ API.
-*/
-int test_kc_open(const char*, const char *zFilename, int bClear, TestDb **ppDb);
-int test_kc_close(TestDb *);
-int test_kc_write(TestDb *, void *, int , void *, int);
-int test_kc_delete(TestDb *, void *, int);
-int test_kc_delete_range(TestDb *, void *, int, void *, int);
-int test_kc_fetch(TestDb *, void *, int, void **, int *);
-int test_kc_scan(TestDb *, void *, int, void *, int, void *, int,
- void (*)(void *, void *, int , void *, int)
-);
-
-int test_mdb_open(const char*, const char *zFile, int bClear, TestDb **ppDb);
-int test_mdb_close(TestDb *);
-int test_mdb_write(TestDb *, void *, int , void *, int);
-int test_mdb_delete(TestDb *, void *, int);
-int test_mdb_fetch(TestDb *, void *, int, void **, int *);
-int test_mdb_scan(TestDb *, void *, int, void *, int, void *, int,
- void (*)(void *, void *, int , void *, int)
-);
-
-/*
-** Functions in wrapper3.c. This file contains the tdb wrapper for lsm.
-** The wrapper for lsm is a bit more involved than the others, as it
-** includes code for a couple of different lsm configurations, and for
-** various types of fault injection and robustness testing.
-*/
-int test_lsm_open(const char*, const char *zFile, int bClear, TestDb **ppDb);
-int test_lsm_lomem_open(const char*, const char*, int bClear, TestDb **ppDb);
-int test_lsm_lomem2_open(const char*, const char*, int bClear, TestDb **ppDb);
-int test_lsm_zip_open(const char*, const char*, int bClear, TestDb **ppDb);
-int test_lsm_small_open(const char*, const char*, int bClear, TestDb **ppDb);
-int test_lsm_mt2(const char*, const char *zFile, int bClear, TestDb **ppDb);
-int test_lsm_mt3(const char*, const char *zFile, int bClear, TestDb **ppDb);
-
-int tdb_lsm_configure(lsm_db *, const char *);
-
-/* Functions in lsmtest_tdb4.c */
-int test_bt_open(const char*, const char *zFile, int bClear, TestDb **ppDb);
-int test_fbt_open(const char*, const char *zFile, int bClear, TestDb **ppDb);
-int test_fbts_open(const char*, const char *zFile, int bClear, TestDb **ppDb);
-
-
-/* Functions in testutil.c. */
-int testPrngInit(void);
-u32 testPrngValue(u32 iVal);
-void testPrngArray(u32 iVal, u32 *aOut, int nOut);
-void testPrngString(u32 iVal, char *aOut, int nOut);
-
-void testErrorInit(int argc, char **);
-void testPrintError(const char *zFormat, ...);
-void testPrintUsage(const char *zArgs);
-void testPrintFUsage(const char *zFormat, ...);
-void testTimeInit(void);
-int testTimeGet(void);
-
-/* Functions in testmem.c. */
-void testMallocInstall(lsm_env *pEnv);
-void testMallocUninstall(lsm_env *pEnv);
-void testMallocCheck(lsm_env *pEnv, int *, int *, FILE *);
-void testMallocOom(lsm_env *pEnv, int, int, void(*)(void*), void *);
-void testMallocOomEnable(lsm_env *pEnv, int);
-
-/* lsmtest.c */
-TestDb *testOpen(const char *zSystem, int, int *pRc);
-void testReopen(TestDb **ppDb, int *pRc);
-void testClose(TestDb **ppDb);
-
-void testFetch(TestDb *, void *, int, void *, int, int *);
-void testWrite(TestDb *, void *, int, void *, int, int *);
-void testDelete(TestDb *, void *, int, int *);
-void testDeleteRange(TestDb *, void *, int, void *, int, int *);
-void testWriteStr(TestDb *, const char *, const char *zVal, int *pRc);
-void testFetchStr(TestDb *, const char *, const char *, int *pRc);
-
-void testBegin(TestDb *pDb, int iTrans, int *pRc);
-void testCommit(TestDb *pDb, int iTrans, int *pRc);
-
-void test_failed(void);
-
-char *testMallocPrintf(const char *zFormat, ...);
-char *testMallocVPrintf(const char *zFormat, va_list ap);
-int testGlobMatch(const char *zPattern, const char *zStr);
-
-void testScanCompare(TestDb *, TestDb *, int, void *, int, void *, int, int *);
-void testFetchCompare(TestDb *, TestDb *, void *, int, int *);
-
-void *testMalloc(int);
-void *testMallocCopy(void *pCopy, int nByte);
-void *testRealloc(void *, int);
-void testFree(void *);
-
-/* lsmtest_bt.c */
-int do_bt(int nArg, char **azArg);
-
-/* testio.c */
-int testVfsConfigureDb(TestDb *pDb);
-
-/* testfunc.c */
-int do_show(int nArg, char **azArg);
-int do_work(int nArg, char **azArg);
-
-/* testio.c */
-int do_io(int nArg, char **azArg);
-
-/* lsmtest2.c */
-void do_crash_test(const char *zPattern, int *pRc);
-int do_rollback_test(int nArg, char **azArg);
-
-/* test3.c */
-void test_rollback(const char *zSystem, const char *zPattern, int *pRc);
-
-/* test4.c */
-void test_mc(const char *zSystem, const char *zPattern, int *pRc);
-
-/* test5.c */
-void test_mt(const char *zSystem, const char *zPattern, int *pRc);
-
-/* lsmtest6.c */
-void test_oom(const char *zPattern, int *pRc);
-void testDeleteLsmdb(const char *zFile);
-
-void testSaveDb(const char *zFile, const char *zAuxExt);
-void testRestoreDb(const char *zFile, const char *zAuxExt);
-void testCopyLsmdb(const char *zFrom, const char *zTo);
-
-/* lsmtest7.c */
-void test_api(const char *zPattern, int *pRc);
-
-/* lsmtest8.c */
-void do_writer_crash_test(const char *zPattern, int *pRc);
-
-/*************************************************************************
-** Interface to functionality in test_datasource.c.
-*/
-typedef struct Datasource Datasource;
-typedef struct DatasourceDefn DatasourceDefn;
-
-struct DatasourceDefn {
- int eType; /* A TEST_DATASOURCE_* value */
- int nMinKey; /* Minimum key size */
- int nMaxKey; /* Maximum key size */
- int nMinVal; /* Minimum value size */
- int nMaxVal; /* Maximum value size */
-};
-
-#define TEST_DATASOURCE_RANDOM 1
-#define TEST_DATASOURCE_SEQUENCE 2
-
-char *testDatasourceName(const DatasourceDefn *);
-Datasource *testDatasourceNew(const DatasourceDefn *);
-void testDatasourceFree(Datasource *);
-void testDatasourceEntry(Datasource *, int, void **, int *, void **, int *);
-/* End of test_datasource.c interface.
-*************************************************************************/
-void testDatasourceFetch(
- TestDb *pDb, /* Database handle */
- Datasource *pData,
- int iKey,
- int *pRc /* IN/OUT: Error code */
-);
-
-void testWriteDatasource(TestDb *, Datasource *, int, int *);
-void testWriteDatasourceRange(TestDb *, Datasource *, int, int, int *);
-void testDeleteDatasource(TestDb *, Datasource *, int, int *);
-void testDeleteDatasourceRange(TestDb *, Datasource *, int, int, int *);
-
-
-/* test1.c */
-void test_data_1(const char *, const char *, int *pRc);
-void test_data_2(const char *, const char *, int *pRc);
-void test_data_3(const char *, const char *, int *pRc);
-void testDbContents(TestDb *, Datasource *, int, int, int, int, int, int *);
-void testCaseProgress(int, int, int, int *);
-int testCaseNDot(void);
-
-void testCompareDb(Datasource *, int, int, TestDb *, TestDb *, int *);
-int testControlDb(TestDb **ppDb);
-
-typedef struct CksumDb CksumDb;
-CksumDb *testCksumArrayNew(Datasource *, int, int, int);
-char *testCksumArrayGet(CksumDb *, int);
-void testCksumArrayFree(CksumDb *);
-void testCaseStart(int *pRc, char *zFmt, ...);
-void testCaseFinish(int rc);
-void testCaseSkip(void);
-int testCaseBegin(int *, const char *, const char *, ...);
-
-#define TEST_CKSUM_BYTES 29
-int testCksumDatabase(TestDb *pDb, char *zOut);
-int testCountDatabase(TestDb *pDb);
-void testCompareInt(int, int, int *);
-void testCompareStr(const char *z1, const char *z2, int *pRc);
-
-/* lsmtest9.c */
-void test_data_4(const char *, const char *, int *pRc);
-
-
-/*
-** Similar to the Tcl_GetIndexFromObjStruct() Tcl library function.
-*/
-#define testArgSelect(w,x,y,z) testArgSelectX(w,x,sizeof(w[0]),y,z)
-int testArgSelectX(void *, const char *, int, const char *, int *);
-
-#ifdef __cplusplus
-} /* End of the 'extern "C"' block */
-#endif
-
-#endif
diff --git a/ext/lsm1/lsm-test/lsmtest1.c b/ext/lsm1/lsm-test/lsmtest1.c
deleted file mode 100644
index 1ce2cc0588..0000000000
--- a/ext/lsm1/lsm-test/lsmtest1.c
+++ /dev/null
@@ -1,656 +0,0 @@
-
-#include "lsmtest.h"
-
-#define DATA_SEQUENTIAL TEST_DATASOURCE_SEQUENCE
-#define DATA_RANDOM TEST_DATASOURCE_RANDOM
-
-typedef struct Datatest1 Datatest1;
-typedef struct Datatest2 Datatest2;
-
-/*
-** An instance of the following structure contains parameters used to
-** customize the test function in this file. Test procedure:
-**
-** 1. Create a data-source based on the "datasource definition" vars.
-**
-** 2. Insert nRow key value pairs into the database.
-**
-** 3. Delete all keys from the database. Deletes are done in the same
-** order as the inserts.
-**
-** During steps 2 and 3 above, after each Datatest1.nVerify inserts or
-** deletes, the following:
-**
-** a. Run Datasource.nTest key lookups and check the results are as expected.
-**
-** b. If Datasource.bTestScan is true, run a handful (8) of range
-** queries (scanning forwards and backwards). Check that the results
-** are as expected.
-**
-** c. Close and reopen the database. Then run (a) and (b) again.
-*/
-struct Datatest1 {
- /* Datasource definition */
- DatasourceDefn defn;
-
- /* Test procedure parameters */
- int nRow; /* Number of rows to insert then delete */
- int nVerify; /* How often to verify the db contents */
- int nTest; /* Number of keys to test (0==all) */
- int bTestScan; /* True to do scan tests */
-};
-
-/*
-** An instance of the following data structure is used to describe the
-** second type of test case in this file. The chief difference between
-** these tests and those described by Datatest1 is that these tests also
-** experiment with range-delete operations. Tests proceed as follows:
-**
-** 1. Open the datasource described by Datatest2.defn.
-**
-** 2. Open a connection on an empty database.
-**
-** 3. Do this Datatest2.nIter times:
-**
-** a) Insert Datatest2.nWrite key-value pairs from the datasource.
-**
-** b) Select two pseudo-random keys and use them as the start
-** and end points of a range-delete operation.
-**
-** c) Verify that the contents of the database are as expected (see
-** below for details).
-**
-** d) Close and then reopen the database handle.
-**
-** e) Verify that the contents of the database are still as expected.
-**
-** The inserts and range deletes are run twice - once on the database being
-** tested and once using a control system (sqlite3, kc etc. - something that
-** works). In order to verify that the contents of the db being tested are
-** correct, the test runs a bunch of scans and lookups on both the test and
-** control databases. If the results are the same, the test passes.
-*/
-struct Datatest2 {
- DatasourceDefn defn;
- int nRange;
- int nWrite; /* Number of writes per iteration */
- int nIter; /* Total number of iterations to run */
-};
-
-/*
-** Generate a unique name for the test case pTest with database system
-** zSystem.
-*/
-static char *getName(const char *zSystem, int bRecover, Datatest1 *pTest){
- char *zRet;
- char *zData;
- zData = testDatasourceName(&pTest->defn);
- zRet = testMallocPrintf("data.%s.%s.rec=%d.%d.%d",
- zSystem, zData, bRecover, pTest->nRow, pTest->nVerify
- );
- testFree(zData);
- return zRet;
-}
-
-int testControlDb(TestDb **ppDb){
-#ifdef HAVE_KYOTOCABINET
- return tdb_open("kyotocabinet", "tmp.db", 1, ppDb);
-#else
- return tdb_open("sqlite3", "", 1, ppDb);
-#endif
-}
-
-void testDatasourceFetch(
- TestDb *pDb, /* Database handle */
- Datasource *pData,
- int iKey,
- int *pRc /* IN/OUT: Error code */
-){
- void *pKey; int nKey; /* Database key to query for */
- void *pVal; int nVal; /* Expected result of query */
-
- testDatasourceEntry(pData, iKey, &pKey, &nKey, &pVal, &nVal);
- testFetch(pDb, pKey, nKey, pVal, nVal, pRc);
-}
-
-/*
-** This function is called to test that the contents of database pDb
-** are as expected. In this case, expected is defined as containing
-** key-value pairs iFirst through iLast, inclusive, from data source
-** pData. In other words, a loop like the following could be used to
-** construct a database with identical contents from scratch.
-**
-** for(i=iFirst; i<=iLast; i++){
-** testDatasourceEntry(pData, i, &pKey, &nKey, &pVal, &nVal);
-** // insert (pKey, nKey) -> (pVal, nVal) into database
-** }
-**
-** The key domain consists of keys 0 to (nRow-1), inclusive, from
-** data source pData. For both scan and lookup tests, keys are selected
-** pseudo-randomly from within this set.
-**
-** This function runs nLookupTest lookup tests and nScanTest scan tests.
-**
-** A lookup test consists of selecting a key from the domain and querying
-** pDb for it. The test fails if the presence of the key and, if present,
-** the associated value do not match the expectations defined above.
-**
-** A scan test involves selecting a key from the domain and running
-** the following queries:
-**
-** 1. Scan all keys equal to or greater than the key, in ascending order.
-** 2. Scan all keys equal to or smaller than the key, in descending order.
-**
-** Additionally, if nLookupTest is greater than zero, the following are
-** run once:
-**
-** 1. Scan all keys in the db, in ascending order.
-** 2. Scan all keys in the db, in descending order.
-**
-** As you would assume, the test fails if the returned values do not match
-** expectations.
-*/
-void testDbContents(
- TestDb *pDb, /* Database handle being tested */
- Datasource *pData, /* pDb contains data from here */
- int nRow, /* Size of key domain */
- int iFirst, /* Index of first key from pData in pDb */
- int iLast, /* Index of last key from pData in pDb */
- int nLookupTest, /* Number of lookup tests to run */
- int nScanTest, /* Number of scan tests to run */
- int *pRc /* IN/OUT: Error code */
-){
- int j;
- int rc = *pRc;
-
- if( rc==0 && nScanTest ){
- TestDb *pDb2 = 0;
-
- /* Open a control db (i.e. one that we assume works) */
- rc = testControlDb(&pDb2);
-
- for(j=iFirst; rc==0 && j<=iLast; j++){
- void *pKey; int nKey; /* Database key to insert */
- void *pVal; int nVal; /* Database value to insert */
- testDatasourceEntry(pData, j, &pKey, &nKey, &pVal, &nVal);
- rc = tdb_write(pDb2, pKey, nKey, pVal, nVal);
- }
-
- if( rc==0 ){
- int iKey1;
- int iKey2;
- void *pKey1; int nKey1; /* Start key */
- void *pKey2; int nKey2; /* Final key */
-
- iKey1 = testPrngValue((iFirst<<8) + (iLast<<16)) % nRow;
- iKey2 = testPrngValue((iLast<<8) + (iFirst<<16)) % nRow;
- testDatasourceEntry(pData, iKey1, &pKey2, &nKey1, 0, 0);
- pKey1 = testMalloc(nKey1+1);
- memcpy(pKey1, pKey2, nKey1+1);
- testDatasourceEntry(pData, iKey2, &pKey2, &nKey2, 0, 0);
-
- testScanCompare(pDb2, pDb, 0, 0, 0, 0, 0, &rc);
- testScanCompare(pDb2, pDb, 0, 0, 0, pKey2, nKey2, &rc);
- testScanCompare(pDb2, pDb, 0, pKey1, nKey1, 0, 0, &rc);
- testScanCompare(pDb2, pDb, 0, pKey1, nKey1, pKey2, nKey2, &rc);
- testScanCompare(pDb2, pDb, 1, 0, 0, 0, 0, &rc);
- testScanCompare(pDb2, pDb, 1, 0, 0, pKey2, nKey2, &rc);
- testScanCompare(pDb2, pDb, 1, pKey1, nKey1, 0, 0, &rc);
- testScanCompare(pDb2, pDb, 1, pKey1, nKey1, pKey2, nKey2, &rc);
- testFree(pKey1);
- }
- tdb_close(pDb2);
- }
-
- /* Test some lookups. */
- for(j=0; rc==0 && j=nRow ){
- iKey = j;
- }else{
- iKey = testPrngValue(j + (iFirst<<8) + (iLast<<16)) % nRow;
- }
-
- testDatasourceEntry(pData, iKey, &pKey, &nKey, &pVal, &nVal);
- if( iFirst>iKey || iKey>iLast ){
- pVal = 0;
- nVal = -1;
- }
-
- testFetch(pDb, pKey, nKey, pVal, nVal, &rc);
- }
-
- *pRc = rc;
-}
-
-/*
-** This function should be called during long running test cases to output
-** the progress dots (...) to stdout.
-*/
-void testCaseProgress(int i, int n, int nDot, int *piDot){
- int iDot = *piDot;
- while( iDot < ( ((nDot*2+1) * i) / (n*2) ) ){
- printf(".");
- fflush(stdout);
- iDot++;
- }
- *piDot = iDot;
-}
-
-int testCaseNDot(void){ return 20; }
-
-#if 0
-static void printScanCb(
- void *pCtx, void *pKey, int nKey, void *pVal, int nVal
-){
- printf("%s\n", (char *)pKey);
- fflush(stdout);
-}
-#endif
-
-void testReopenRecover(TestDb **ppDb, int *pRc){
- if( *pRc==0 ){
- const char *zLib = tdb_library_name(*ppDb);
- const char *zDflt = tdb_default_db(zLib);
- testCopyLsmdb(zDflt, "bak.db");
- testClose(ppDb);
- testCopyLsmdb("bak.db", zDflt);
- *pRc = tdb_open(zLib, 0, 0, ppDb);
- }
-}
-
-
-static void doDataTest1(
- const char *zSystem, /* Database system to test */
- int bRecover,
- Datatest1 *p, /* Structure containing test parameters */
- int *pRc /* OUT: Error code */
-){
- int i;
- int iDot;
- int rc = LSM_OK;
- Datasource *pData;
- TestDb *pDb;
- int iToggle = 0;
-
- /* Start the test case, open a database and allocate the datasource. */
- pDb = testOpen(zSystem, 1, &rc);
- pData = testDatasourceNew(&p->defn);
-
- i = 0;
- iDot = 0;
- while( rc==LSM_OK && inRow ){
-
- /* Insert some data */
- testWriteDatasourceRange(pDb, pData, i, p->nVerify, &rc);
- i += p->nVerify;
-
- if( iToggle ) testBegin(pDb, 1, &rc);
- /* Check that the db content is correct. */
- testDbContents(pDb, pData, p->nRow, 0, i-1, p->nTest, p->bTestScan, &rc);
- if( iToggle ) testCommit(pDb, 0, &rc);
- iToggle = (iToggle+1)%2;
-
- if( bRecover ){
- testReopenRecover(&pDb, &rc);
- }else{
- testReopen(&pDb, &rc);
- }
-
- /* Check that the db content is still correct. */
- testDbContents(pDb, pData, p->nRow, 0, i-1, p->nTest, p->bTestScan, &rc);
-
- /* Update the progress dots... */
- testCaseProgress(i, p->nRow, testCaseNDot()/2, &iDot);
- }
-
- i = 0;
- iDot = 0;
- while( rc==LSM_OK && inRow ){
-
- /* Delete some entries */
- testDeleteDatasourceRange(pDb, pData, i, p->nVerify, &rc);
- i += p->nVerify;
-
- /* Check that the db content is correct. */
- testDbContents(pDb, pData, p->nRow, i, p->nRow-1,p->nTest,p->bTestScan,&rc);
-
- /* Close and reopen the database. */
- if( bRecover ){
- testReopenRecover(&pDb, &rc);
- }else{
- testReopen(&pDb, &rc);
- }
-
- /* Check that the db content is still correct. */
- testDbContents(pDb, pData, p->nRow, i, p->nRow-1,p->nTest,p->bTestScan,&rc);
-
- /* Update the progress dots... */
- testCaseProgress(i, p->nRow, testCaseNDot()/2, &iDot);
- }
-
- /* Free the datasource, close the database and finish the test case. */
- testDatasourceFree(pData);
- tdb_close(pDb);
- testCaseFinish(rc);
- *pRc = rc;
-}
-
-
-void test_data_1(
- const char *zSystem, /* Database system name */
- const char *zPattern, /* Run test cases that match this pattern */
- int *pRc /* IN/OUT: Error code */
-){
- Datatest1 aTest[] = {
- { {DATA_RANDOM, 500,600, 1000,2000}, 1000, 100, 10, 0},
- { {DATA_RANDOM, 20,25, 100,200}, 1000, 250, 1000, 1},
- { {DATA_RANDOM, 8,10, 100,200}, 1000, 250, 1000, 1},
- { {DATA_RANDOM, 8,10, 10,20}, 1000, 250, 1000, 1},
- { {DATA_RANDOM, 8,10, 1000,2000}, 1000, 250, 1000, 1},
- { {DATA_RANDOM, 8,100, 10000,20000}, 100, 25, 100, 1},
- { {DATA_RANDOM, 80,100, 10,20}, 1000, 250, 1000, 1},
- { {DATA_RANDOM, 5000,6000, 10,20}, 100, 25, 100, 1},
- { {DATA_SEQUENTIAL, 5,10, 10,20}, 1000, 250, 1000, 1},
- { {DATA_SEQUENTIAL, 5,10, 100,200}, 1000, 250, 1000, 1},
- { {DATA_SEQUENTIAL, 5,10, 1000,2000}, 1000, 250, 1000, 1},
- { {DATA_SEQUENTIAL, 5,100, 10000,20000}, 100, 25, 100, 1},
- { {DATA_RANDOM, 10,10, 100,100}, 100000, 1000, 100, 0},
- { {DATA_SEQUENTIAL, 10,10, 100,100}, 100000, 1000, 100, 0},
- };
-
- int i;
- int bRecover;
-
- for(bRecover=0; bRecover<2; bRecover++){
- if( bRecover==1 && memcmp(zSystem, "lsm", 3) ) break;
- for(i=0; *pRc==LSM_OK && idefn);
- rc = testControlDb(&pControl);
-
- if( tdb_lsm(pDb) ){
- int nBuf = 32 * 1024 * 1024;
- lsm_config(tdb_lsm(pDb), LSM_CONFIG_AUTOFLUSH, &nBuf);
- }
-
- for(i=0; rc==0 && inIter; i++){
- void *pKey1; int nKey1;
- void *pKey2; int nKey2;
- int ii;
- int nRange = MIN(p->nIter*p->nWrite, p->nRange);
-
- for(ii=0; rc==0 && iinWrite; ii++){
- int iKey = (i*p->nWrite + ii) % p->nRange;
- testWriteDatasource(pControl, pData, iKey, &rc);
- testWriteDatasource(pDb, pData, iKey, &rc);
- }
-
- testDatasourceEntry(pData, i+1000000, &pKey1, &nKey1, 0, 0);
- pKey1 = testMallocCopy(pKey1, nKey1);
- testDatasourceEntry(pData, i+2000000, &pKey2, &nKey2, 0, 0);
-
- testDeleteRange(pDb, pKey1, nKey1, pKey2, nKey2, &rc);
- testDeleteRange(pControl, pKey1, nKey1, pKey2, nKey2, &rc);
- testFree(pKey1);
-
- testCompareDb(pData, nRange, i, pControl, pDb, &rc);
- if( bRecover ){
- testReopenRecover(&pDb, &rc);
- }else{
- testReopen(&pDb, &rc);
- }
- testCompareDb(pData, nRange, i, pControl, pDb, &rc);
-
- /* Update the progress dots... */
- testCaseProgress(i, p->nIter, testCaseNDot(), &iDot);
- }
-
- testClose(&pDb);
- testClose(&pControl);
- testDatasourceFree(pData);
- testCaseFinish(rc);
- *pRc = rc;
-}
-
-static char *getName2(const char *zSystem, int bRecover, Datatest2 *pTest){
- char *zRet;
- char *zData;
- zData = testDatasourceName(&pTest->defn);
- zRet = testMallocPrintf("data2.%s.%s.rec=%d.%d.%d.%d",
- zSystem, zData, bRecover, pTest->nRange, pTest->nWrite, pTest->nIter
- );
- testFree(zData);
- return zRet;
-}
-
-void test_data_2(
- const char *zSystem, /* Database system name */
- const char *zPattern, /* Run test cases that match this pattern */
- int *pRc /* IN/OUT: Error code */
-){
- Datatest2 aTest[] = {
- /* defn, nRange, nWrite, nIter */
- { {DATA_RANDOM, 20,25, 100,200}, 10000, 10, 50 },
- { {DATA_RANDOM, 20,25, 100,200}, 10000, 200, 50 },
- { {DATA_RANDOM, 20,25, 100,200}, 100, 10, 1000 },
- { {DATA_RANDOM, 20,25, 100,200}, 100, 200, 50 },
- };
-
- int i;
- int bRecover;
-
- for(bRecover=0; bRecover<2; bRecover++){
- if( bRecover==1 && memcmp(zSystem, "lsm", 3) ) break;
- for(i=0; *pRc==LSM_OK && i> 24) & 0xFF;
- aBuf[1] = (iVal >> 16) & 0xFF;
- aBuf[2] = (iVal >> 8) & 0xFF;
- aBuf[3] = (iVal >> 0) & 0xFF;
-}
-
-void dt3PutKey(u8 *aBuf, int iKey){
- assert( iKey<100000 && iKey>=0 );
- sprintf((char *)aBuf, "%.5d", iKey);
-}
-
-static void doDataTest3(
- const char *zSystem, /* Database system to test */
- Datatest3 *p, /* Structure containing test parameters */
- int *pRc /* OUT: Error code */
-){
- int iDot = 0;
- int rc = *pRc;
- TestDb *pDb;
- u8 *abPresent; /* Array of boolean */
- char *aVal; /* Buffer to hold values */
- int i;
- u32 iSeq = 10; /* prng counter */
-
- abPresent = (u8 *)testMalloc(p->nRange+1);
- aVal = (char *)testMalloc(p->nValMax+1);
- pDb = testOpen(zSystem, 1, &rc);
-
- for(i=0; inIter && rc==0; i++){
- int ii;
-
- testCaseProgress(i, p->nIter, testCaseNDot(), &iDot);
-
- /* Perform nWrite inserts */
- for(ii=0; iinWrite; ii++){
- u8 aKey[6];
- u32 iKey;
- int nVal;
-
- iKey = (testPrngValue(iSeq++) % p->nRange) + 1;
- nVal = (testPrngValue(iSeq++) % (p->nValMax - p->nValMin)) + p->nValMin;
- testPrngString(testPrngValue(iSeq++), aVal, nVal);
- dt3PutKey(aKey, iKey);
-
- testWrite(pDb, aKey, sizeof(aKey)-1, aVal, nVal, &rc);
- abPresent[iKey] = 1;
- }
-
- /* Perform nDelete deletes */
- for(ii=0; iinDelete; ii++){
- u8 aKey1[6];
- u8 aKey2[6];
- u32 iKey;
-
- iKey = (testPrngValue(iSeq++) % p->nRange) + 1;
- dt3PutKey(aKey1, iKey-1);
- dt3PutKey(aKey2, iKey+1);
-
- testDeleteRange(pDb, aKey1, sizeof(aKey1)-1, aKey2, sizeof(aKey2)-1, &rc);
- abPresent[iKey] = 0;
- }
-
- testReopen(&pDb, &rc);
-
- for(ii=1; rc==0 && ii<=p->nRange; ii++){
- int nDbVal;
- void *pDbVal;
- u8 aKey[6];
- int dbrc;
-
- dt3PutKey(aKey, ii);
- dbrc = tdb_fetch(pDb, aKey, sizeof(aKey)-1, &pDbVal, &nDbVal);
- testCompareInt(0, dbrc, &rc);
-
- if( abPresent[ii] ){
- testCompareInt(1, (nDbVal>0), &rc);
- }else{
- testCompareInt(1, (nDbVal<0), &rc);
- }
- }
- }
-
- testClose(&pDb);
- testCaseFinish(rc);
- *pRc = rc;
-}
-
-static char *getName3(const char *zSystem, Datatest3 *p){
- return testMallocPrintf("data3.%s.%d.%d.%d.%d.(%d..%d)",
- zSystem, p->nRange, p->nIter, p->nWrite, p->nDelete,
- p->nValMin, p->nValMax
- );
-}
-
-void test_data_3(
- const char *zSystem, /* Database system name */
- const char *zPattern, /* Run test cases that match this pattern */
- int *pRc /* IN/OUT: Error code */
-){
- Datatest3 aTest[] = {
- /* nRange, nIter, nWrite, nDelete, nValMin, nValMax */
- { 100, 1000, 5, 5, 50, 100 },
- { 100, 1000, 2, 2, 5, 10 },
- };
-
- int i;
-
- for(i=0; *pRc==LSM_OK && inRow++;
- for(i=0; icksum1 += ((u8 *)pKey)[i];
- p->cksum2 += p->cksum1;
- }
- for(i=0; icksum1 += ((u8 *)pVal)[i];
- p->cksum2 += p->cksum1;
- }
-}
-
-/*
-** tdb_scan() callback used by testCountDatabase()
-*/
-static void scanCountDb(
- void *pCtx,
- void *pKey, int nKey,
- void *pVal, int nVal
-){
- Cksum *p = (Cksum *)pCtx;
- p->nRow++;
-
- unused_parameter(pKey);
- unused_parameter(nKey);
- unused_parameter(pVal);
- unused_parameter(nVal);
-}
-
-
-/*
-** Iterate through the entire contents of database pDb. Write a checksum
-** string based on the db contents into buffer zOut before returning. A
-** checksum string is at most 29 (TEST_CKSUM_BYTES) bytes in size:
-**
-** * 32-bit integer (10 bytes)
-** * 1 space (1 byte)
-** * 32-bit hex (8 bytes)
-** * 1 space (1 byte)
-** * 32-bit hex (8 bytes)
-** * nul-terminator (1 byte)
-**
-** The number of entries in the database is returned.
-*/
-int testCksumDatabase(
- TestDb *pDb, /* Database handle */
- char *zOut /* Buffer to write checksum to */
-){
- Cksum cksum;
- memset(&cksum, 0, sizeof(Cksum));
- tdb_scan(pDb, (void *)&cksum, 0, 0, 0, 0, 0, scanCksumDb);
- sprintf(zOut, "%d %x %x",
- cksum.nRow, (u32)cksum.cksum1, (u32)cksum.cksum2
- );
- assert( strlen(zOut)0 ); */
- if( testrc==0 ) testrc = lsm_checkpoint(db, 0);
- }
- tdb_close(pDb);
-
- /* Check that the database content is still correct */
- testCompareCksumLsmdb(DBNAME,
- bCompress, testCksumArrayGet(pCksumDb, nRow), 0, pRc);
- }
-
- testCksumArrayFree(pCksumDb);
- testDatasourceFree(pData);
-}
-
-/*
-** This test verifies that if a system crash occurs while committing a
-** transaction to the log file, no earlier transactions are lost or damaged.
-*/
-static void crash_test2(int bCompress, int *pRc){
- const char *DBNAME = "testdb.lsm";
- const DatasourceDefn defn = {TEST_DATASOURCE_RANDOM, 12, 16, 1000, 1000};
-
- const int nIter = 200;
- const int nInsert = 20;
-
- int i;
- int iDot = 0;
- Datasource *pData;
- CksumDb *pCksumDb;
- TestDb *pDb;
-
- /* Allocate datasource. And calculate the expected checksums. */
- pData = testDatasourceNew(&defn);
- pCksumDb = testCksumArrayNew(pData, 100, 100+nInsert, 1);
-
- /* Setup and save the initial database. */
- testSetupSavedLsmdb("", DBNAME, pData, 100, pRc);
-
- for(i=0; izTest) ){
- p->x(p->bCompress, pRc);
- testCaseFinish(*pRc);
- }
- }
-}
diff --git a/ext/lsm1/lsm-test/lsmtest3.c b/ext/lsm1/lsm-test/lsmtest3.c
deleted file mode 100644
index 760dec300f..0000000000
--- a/ext/lsm1/lsm-test/lsmtest3.c
+++ /dev/null
@@ -1,238 +0,0 @@
-
-
-/*
-** This file contains tests related to the explicit rollback of database
-** transactions and sub-transactions.
-*/
-
-
-/*
-** Repeat 2000 times (until the db contains 100,000 entries):
-**
-** 1. Open a transaction and insert 500 rows, opening a nested
-** sub-transaction each 100 rows.
-**
-** 2. Roll back to each sub-transaction savepoint. Check the database
-** checksum looks Ok.
-**
-** 3. Every second iteration, roll back the main transaction. Check the
-** db checksum is correct. Every other iteration, commit the main
-** transaction (increasing the size of the db by 100 rows).
-*/
-
-
-#include "lsmtest.h"
-
-struct CksumDb {
- int nFirst;
- int nLast;
- int nStep;
- char **azCksum;
-};
-
-CksumDb *testCksumArrayNew(
- Datasource *pData,
- int nFirst,
- int nLast,
- int nStep
-){
- TestDb *pDb;
- CksumDb *pRet;
- int i;
- int nEntry;
- int rc = 0;
-
- assert( nLast>=nFirst && ((nLast-nFirst)%nStep)==0 );
-
- pRet = malloc(sizeof(CksumDb));
- memset(pRet, 0, sizeof(CksumDb));
- pRet->nFirst = nFirst;
- pRet->nLast = nLast;
- pRet->nStep = nStep;
- nEntry = 1 + ((nLast - nFirst) / nStep);
-
- /* Allocate space so that azCksum is an array of nEntry pointers to
- ** buffers each TEST_CKSUM_BYTES in size. */
- pRet->azCksum = (char **)malloc(nEntry * (sizeof(char *) + TEST_CKSUM_BYTES));
- for(i=0; iazCksum[nEntry]);
- pRet->azCksum[i] = &pStart[i * TEST_CKSUM_BYTES];
- }
-
- tdb_open("lsm", "tempdb.lsm", 1, &pDb);
- testWriteDatasourceRange(pDb, pData, 0, nFirst, &rc);
- for(i=0; iazCksum[i]);
- if( i==nEntry ) break;
- testWriteDatasourceRange(pDb, pData, nFirst+i*nStep, nStep, &rc);
- }
-
- tdb_close(pDb);
-
- return pRet;
-}
-
-char *testCksumArrayGet(CksumDb *p, int nRow){
- int i;
- assert( nRow>=p->nFirst );
- assert( nRow<=p->nLast );
- assert( ((nRow-p->nFirst) % p->nStep)==0 );
-
- i = (nRow - p->nFirst) / p->nStep;
- return p->azCksum[i];
-}
-
-void testCksumArrayFree(CksumDb *p){
- free(p->azCksum);
- memset(p, 0x55, sizeof(*p));
- free(p);
-}
-
-/* End of CksumDb code.
-**************************************************************************/
-
-/*
-** Test utility function. Write key-value pair $i from datasource pData
-** into database pDb.
-*/
-void testWriteDatasource(TestDb *pDb, Datasource *pData, int i, int *pRc){
- void *pKey; int nKey;
- void *pVal; int nVal;
- testDatasourceEntry(pData, i, &pKey, &nKey, &pVal, &nVal);
- testWrite(pDb, pKey, nKey, pVal, nVal, pRc);
-}
-
-/*
-** Test utility function. Delete datasource pData key $i from database pDb.
-*/
-void testDeleteDatasource(TestDb *pDb, Datasource *pData, int i, int *pRc){
- void *pKey; int nKey;
- testDatasourceEntry(pData, i, &pKey, &nKey, 0, 0);
- testDelete(pDb, pKey, nKey, pRc);
-}
-
-/*
-** This function inserts nWrite key/value pairs into database pDb - the
-** nWrite key value pairs starting at iFirst from data source pData.
-*/
-void testWriteDatasourceRange(
- TestDb *pDb, /* Database to write to */
- Datasource *pData, /* Data source to read values from */
- int iFirst, /* Index of first key/value pair */
- int nWrite, /* Number of key/value pairs to write */
- int *pRc /* IN/OUT: Error code */
-){
- int i;
- for(i=0; i2 && rc==0; iTrans--){
- tdb_rollback(pDb, iTrans);
- nCurrent -= 100;
- testCksumDatabase(pDb, zCksum);
- testCompareStr(zCksum, testCksumArrayGet(pCksum, nCurrent), &rc);
- }
-
- if( i%2 ){
- tdb_rollback(pDb, 0);
- nCurrent -= 100;
- testCksumDatabase(pDb, zCksum);
- testCompareStr(zCksum, testCksumArrayGet(pCksum, nCurrent), &rc);
- }else{
- tdb_commit(pDb, 0);
- }
- }
- testCaseFinish(rc);
-
- skip_rollback_test:
- tdb_close(pDb);
- testCksumArrayFree(pCksum);
- return rc;
-}
-
-void test_rollback(
- const char *zSystem,
- const char *zPattern,
- int *pRc
-){
- if( *pRc==0 ){
- int bRun = 1;
-
- if( zPattern ){
- char *zName = getName(zSystem);
- bRun = testGlobMatch(zPattern, zName);
- testFree(zName);
- }
-
- if( bRun ){
- DatasourceDefn defn = { TEST_DATASOURCE_RANDOM, 10, 15, 50, 100 };
- Datasource *pData = testDatasourceNew(&defn);
- *pRc = rollback_test_1(zSystem, pData);
- testDatasourceFree(pData);
- }
- }
-}
diff --git a/ext/lsm1/lsm-test/lsmtest4.c b/ext/lsm1/lsm-test/lsmtest4.c
deleted file mode 100644
index a47241db92..0000000000
--- a/ext/lsm1/lsm-test/lsmtest4.c
+++ /dev/null
@@ -1,127 +0,0 @@
-
-/*
-** This file contains test cases involving multiple database clients.
-*/
-
-#include "lsmtest.h"
-
-/*
-** The following code implements test cases "mc1.*".
-**
-** This test case uses one writer and $nReader readers. All connections
-** are driven by a single thread. All connections are opened at the start
-** of the test and remain open until the test is finished.
-**
-** The test consists of $nStep steps. Each step the following is performed:
-**
-** 1. The writer inserts $nWriteStep records into the db.
-**
-** 2. The writer checks that the contents of the db are as expected.
-**
-** 3. Each reader that currently has an open read transaction also checks
-** that the contents of the db are as expected (according to the snapshot
-** the read transaction is reading - see below).
-**
-** After step 1, reader 1 opens a read transaction. After step 2, reader
-** 2 opens a read transaction, and so on. At step ($nReader+1), reader 1
-** closes the current read transaction and opens a new one. And so on.
-** The result is that at step N (for N > $nReader), there exists a reader
-** with an open read transaction reading the snapshot committed following
-** steps (N-$nReader-1) to N.
-*/
-typedef struct Mctest Mctest;
-struct Mctest {
- DatasourceDefn defn; /* Datasource to use */
- int nStep; /* Total number of steps in test */
- int nWriteStep; /* Number of rows to insert each step */
- int nReader; /* Number of read connections */
-};
-static void do_mc_test(
- const char *zSystem, /* Database system to test */
- Mctest *pTest,
- int *pRc /* IN/OUT: return code */
-){
- const int nDomain = pTest->nStep * pTest->nWriteStep;
- Datasource *pData; /* Source of data */
- TestDb *pDb; /* First database connection (writer) */
- int iReader; /* Used to iterate through aReader */
- int iStep; /* Current step in test */
- int iDot = 0; /* Current step in test */
-
- /* Array of reader connections */
- struct Reader {
- TestDb *pDb; /* Connection handle */
- int iLast; /* Current snapshot contains keys 0..iLast */
- } *aReader;
-
- /* Create a data source */
- pData = testDatasourceNew(&pTest->defn);
-
- /* Open the writer connection */
- pDb = testOpen(zSystem, 1, pRc);
-
- /* Allocate aReader */
- aReader = (struct Reader *)testMalloc(sizeof(aReader[0]) * pTest->nReader);
- for(iReader=0; iReadernReader; iReader++){
- aReader[iReader].pDb = testOpen(zSystem, 0, pRc);
- }
-
- for(iStep=0; iStepnStep; iStep++){
- int iLast;
- int iBegin; /* Start read trans using aReader[iBegin] */
-
- /* Insert nWriteStep more records into the database */
- int iFirst = iStep*pTest->nWriteStep;
- testWriteDatasourceRange(pDb, pData, iFirst, pTest->nWriteStep, pRc);
-
- /* Check that the db is Ok according to the writer */
- iLast = (iStep+1) * pTest->nWriteStep - 1;
- testDbContents(pDb, pData, nDomain, 0, iLast, iLast, 1, pRc);
-
- /* Have reader (iStep % nReader) open a read transaction here. */
- iBegin = (iStep % pTest->nReader);
- if( iBeginnReader && aReader[iReader].iLast; iReader++){
- iLast = aReader[iReader].iLast;
- testDbContents(
- aReader[iReader].pDb, pData, nDomain, 0, iLast, iLast, 1, pRc
- );
- }
-
- /* Report progress */
- testCaseProgress(iStep, pTest->nStep, testCaseNDot(), &iDot);
- }
-
- /* Close all readers */
- for(iReader=0; iReadernReader; iReader++){
- testClose(&aReader[iReader].pDb);
- }
- testFree(aReader);
-
- /* Close the writer-connection and free the datasource */
- testClose(&pDb);
- testDatasourceFree(pData);
-}
-
-
-void test_mc(
- const char *zSystem, /* Database system name */
- const char *zPattern, /* Run test cases that match this pattern */
- int *pRc /* IN/OUT: Error code */
-){
- int i;
- Mctest aTest[] = {
- { { TEST_DATASOURCE_RANDOM, 10,10, 100,100 }, 100, 10, 5 },
- };
-
- for(i=0; i "k.0000000045".
-**
-** As well as the key/value pairs, the database also contains checksum
-** entries. The checksums form a hierarchy - for every F key/value
-** entries there is one level 1 checksum. And for each F level 1 checksums
-** there is one level 2 checksum. And so on.
-**
-** Checksum keys are encoded as the two byte "c." followed by the
-** checksum level, followed by a 10 digit decimal number containing
-** the value of the first key that contributes to the checksum value.
-** For example, assuming F==10, the level 1 checksum that spans keys
-** 10 to 19 is "c.1.0000000010".
-**
-** Clients may perform one of two operations on the database: a read
-** or a write.
-**
-** READ OPERATIONS:
-**
-** A read operation scans a range of F key/value pairs. It computes
-** the expected checksum and then compares the computed value to the
-** actual value stored in the level 1 checksum entry. It then scans
-** the group of F level 1 checksums, and compares the computed checksum
-** to the associated level 2 checksum value, and so on until the
-** highest level checksum value has been verified.
-**
-** If a checksum ever fails to match the expected value, the test
-** has failed.
-**
-** WRITE OPERATIONS:
-**
-** A write operation involves writing (possibly clobbering) a single
-** key/value pair. The associated level 1 checksum is then recalculated
-** updated. Then the level 2 checksum, and so on until the highest
-** level checksum has been modified.
-**
-** All updates occur inside a single transaction.
-**
-** INTERFACE:
-**
-** The interface used by test cases to read and write the db consists
-** of type DbParameters and the following functions:
-**
-** dbReadOperation()
-** dbWriteOperation()
-*/
-
-#include "lsmtest.h"
-
-typedef struct DbParameters DbParameters;
-struct DbParameters {
- int nFanout; /* Checksum fanout (F) */
- int nKey; /* Size of key space (N) */
-};
-
-#define DB_KEY_BYTES (2+5+10+1)
-
-/*
-** Argument aBuf[] must point to a buffer at least DB_KEY_BYTES in size.
-** This function populates the buffer with a nul-terminated key string
-** corresponding to key iKey.
-*/
-static void dbFormatKey(
- DbParameters *pParam,
- int iLevel,
- int iKey, /* Key value */
- char *aBuf /* Write key string here */
-){
- if( iLevel==0 ){
- snprintf(aBuf, DB_KEY_BYTES, "k.%.10d", iKey);
- }else{
- int f = 1;
- int i;
- for(i=0; inFanout;
- snprintf(aBuf, DB_KEY_BYTES, "c.%d.%.10d", iLevel, f*(iKey/f));
- }
-}
-
-/*
-** Argument aBuf[] must point to a buffer at least DB_KEY_BYTES in size.
-** This function populates the buffer with the string representation of
-** checksum value iVal.
-*/
-static void dbFormatCksumValue(u32 iVal, char *aBuf){
- snprintf(aBuf, DB_KEY_BYTES, "%.10u", iVal);
-}
-
-/*
-** Return the highest level of checksum in the database described
-** by *pParam.
-*/
-static int dbMaxLevel(DbParameters *pParam){
- int iMax;
- int n = 1;
- for(iMax=0; nnKey; iMax++){
- n = n * pParam->nFanout;
- }
- return iMax;
-}
-
-static void dbCksum(
- void *pCtx, /* IN/OUT: Pointer to u32 containing cksum */
- void *pKey, int nKey, /* Database key. Unused. */
- void *pVal, int nVal /* Database value. Checksum this. */
-){
- u8 *aVal = (u8 *)pVal;
- u32 *pCksum = (u32 *)pCtx;
- u32 cksum = *pCksum;
- int i;
-
- unused_parameter(pKey);
- unused_parameter(nKey);
-
- for(i=0; inFanout entries at level
-** iLevel-1.
-*/
-static u32 dbComputeCksum(
- DbParameters *pParam, /* Database parameters */
- TestDb *pDb, /* Database connection handle */
- int iLevel, /* Level of checksum to compute */
- int iKey, /* Compute checksum for this key */
- int *pRc /* IN/OUT: Error code */
-){
- u32 cksum = 0;
- if( *pRc==0 ){
- int nFirst;
- int nLast;
- int iFirst = 0;
- int iLast = 0;
- int i;
- int f = 1;
- char zFirst[DB_KEY_BYTES];
- char zLast[DB_KEY_BYTES];
-
- assert( iLevel>=1 );
- for(i=0; inFanout;
-
- iFirst = f*(iKey/f);
- iLast = iFirst + f - 1;
- dbFormatKey(pParam, iLevel-1, iFirst, zFirst);
- dbFormatKey(pParam, iLevel-1, iLast, zLast);
- nFirst = strlen(zFirst);
- nLast = strlen(zLast);
-
- *pRc = tdb_scan(pDb, (u32*)&cksum, 0, zFirst, nFirst, zLast, nLast,dbCksum);
- }
-
- return cksum;
-}
-
-static void dbReadOperation(
- DbParameters *pParam, /* Database parameters */
- TestDb *pDb, /* Database connection handle */
- void (*xDelay)(void *),
- void *pDelayCtx,
- int iKey, /* Key to read */
- int *pRc /* IN/OUT: Error code */
-){
- const int iMax = dbMaxLevel(pParam);
- int i;
-
- if( tdb_transaction_support(pDb) ) testBegin(pDb, 1, pRc);
- for(i=1; *pRc==0 && i<=iMax; i++){
- char zCksum[DB_KEY_BYTES];
- char zKey[DB_KEY_BYTES];
- u32 iCksum = 0;
-
- iCksum = dbComputeCksum(pParam, pDb, i, iKey, pRc);
- if( iCksum ){
- if( xDelay && i==1 ) xDelay(pDelayCtx);
- dbFormatCksumValue(iCksum, zCksum);
- dbFormatKey(pParam, i, iKey, zKey);
- testFetchStr(pDb, zKey, zCksum, pRc);
- }
- }
- if( tdb_transaction_support(pDb) ) testCommit(pDb, 0, pRc);
-}
-
-static int dbWriteOperation(
- DbParameters *pParam, /* Database parameters */
- TestDb *pDb, /* Database connection handle */
- int iKey, /* Key to write to */
- const char *zValue, /* Nul-terminated value to write */
- int *pRc /* IN/OUT: Error code */
-){
- const int iMax = dbMaxLevel(pParam);
- char zKey[DB_KEY_BYTES];
- int i;
- int rc;
-
- assert( iKey>=0 && iKeynKey );
- dbFormatKey(pParam, 0, iKey, zKey);
-
- /* Open a write transaction. This may fail - SQLITE4_BUSY */
- if( *pRc==0 && tdb_transaction_support(pDb) ){
- rc = tdb_begin(pDb, 2);
- if( rc==5 ) return 0;
- *pRc = rc;
- }
-
- testWriteStr(pDb, zKey, zValue, pRc);
- for(i=1; i<=iMax; i++){
- char zCksum[DB_KEY_BYTES];
- u32 iCksum = 0;
-
- iCksum = dbComputeCksum(pParam, pDb, i, iKey, pRc);
- dbFormatCksumValue(iCksum, zCksum);
- dbFormatKey(pParam, i, iKey, zKey);
- testWriteStr(pDb, zKey, zCksum, pRc);
- }
- if( tdb_transaction_support(pDb) ) testCommit(pDb, 0, pRc);
- return 1;
-}
-
-/*************************************************************************
-** The following block contains testXXX() functions that implement a
-** wrapper around the systems native multi-thread support. There are no
-** synchronization primitives - just functions to launch and join
-** threads. Wrapper functions are:
-**
-** testThreadSupport()
-**
-** testThreadInit()
-** testThreadShutdown()
-** testThreadLaunch()
-** testThreadWait()
-**
-** testThreadSetHalt()
-** testThreadGetHalt()
-** testThreadSetResult()
-** testThreadGetResult()
-**
-** testThreadEnterMutex()
-** testThreadLeaveMutex()
-*/
-typedef struct ThreadSet ThreadSet;
-#ifdef LSM_MUTEX_PTHREADS
-
-#include
-#include
-
-typedef struct Thread Thread;
-struct Thread {
- int rc;
- char *zMsg;
- pthread_t id;
- void (*xMain)(ThreadSet *, int, void *);
- void *pCtx;
- ThreadSet *pThreadSet;
-};
-
-struct ThreadSet {
- int bHalt; /* Halt flag */
- int nThread; /* Number of threads */
- Thread *aThread; /* Array of Thread structures */
- pthread_mutex_t mutex; /* Mutex used for cheating */
-};
-
-/*
-** Return true if this build supports threads, or false otherwise. If
-** this function returns false, no other testThreadXXX() functions should
-** be called.
-*/
-static int testThreadSupport(){ return 1; }
-
-/*
-** Allocate and return a thread-set handle with enough space allocated
-** to handle up to nMax threads. Each call to this function should be
-** matched by a call to testThreadShutdown() to delete the object.
-*/
-static ThreadSet *testThreadInit(int nMax){
- int nByte; /* Total space to allocate */
- ThreadSet *p; /* Return value */
-
- nByte = sizeof(ThreadSet) + sizeof(struct Thread) * nMax;
- p = (ThreadSet *)testMalloc(nByte);
- p->nThread = nMax;
- p->aThread = (Thread *)&p[1];
- pthread_mutex_init(&p->mutex, 0);
-
- return p;
-}
-
-/*
-** Delete a thread-set object and release all resources held by it.
-*/
-static void testThreadShutdown(ThreadSet *p){
- int i;
- for(i=0; inThread; i++){
- testFree(p->aThread[i].zMsg);
- }
- pthread_mutex_destroy(&p->mutex);
- testFree(p);
-}
-
-static void *ttMain(void *pArg){
- Thread *pThread = (Thread *)pArg;
- int iThread;
- iThread = (pThread - pThread->pThreadSet->aThread);
- pThread->xMain(pThread->pThreadSet, iThread, pThread->pCtx);
- return 0;
-}
-
-/*
-** Launch a new thread.
-*/
-static int testThreadLaunch(
- ThreadSet *p,
- int iThread,
- void (*xMain)(ThreadSet *, int, void *),
- void *pCtx
-){
- int rc;
- Thread *pThread;
-
- assert( iThread>=0 && iThreadnThread );
-
- pThread = &p->aThread[iThread];
- assert( pThread->pThreadSet==0 );
- pThread->xMain = xMain;
- pThread->pCtx = pCtx;
- pThread->pThreadSet = p;
- rc = pthread_create(&pThread->id, 0, ttMain, (void *)pThread);
-
- return rc;
-}
-
-/*
-** Set the thread-set "halt" flag.
-*/
-static void testThreadSetHalt(ThreadSet *pThreadSet){
- pThreadSet->bHalt = 1;
-}
-
-/*
-** Return the current value of the thread-set "halt" flag.
-*/
-static int testThreadGetHalt(ThreadSet *pThreadSet){
- return pThreadSet->bHalt;
-}
-
-static void testThreadSleep(ThreadSet *pThreadSet, int nMs){
- int nRem = nMs;
- while( nRem>0 && testThreadGetHalt(pThreadSet)==0 ){
- usleep(50000);
- nRem -= 50;
- }
-}
-
-/*
-** Wait for all threads launched to finish before returning. If nMs
-** is greater than zero, set the "halt" flag to tell all threads
-** to halt after waiting nMs milliseconds.
-*/
-static void testThreadWait(ThreadSet *pThreadSet, int nMs){
- int i;
-
- testThreadSleep(pThreadSet, nMs);
- testThreadSetHalt(pThreadSet);
- for(i=0; inThread; i++){
- Thread *pThread = &pThreadSet->aThread[i];
- if( pThread->xMain ){
- pthread_join(pThread->id, 0);
- }
- }
-}
-
-/*
-** Set the result for thread iThread.
-*/
-static void testThreadSetResult(
- ThreadSet *pThreadSet, /* Thread-set handle */
- int iThread, /* Set result for this thread */
- int rc, /* Result error code */
- char *zFmt, /* Result string format */
- ... /* Result string formatting args... */
-){
- va_list ap;
-
- testFree(pThreadSet->aThread[iThread].zMsg);
- pThreadSet->aThread[iThread].rc = rc;
- pThreadSet->aThread[iThread].zMsg = 0;
- if( zFmt ){
- va_start(ap, zFmt);
- pThreadSet->aThread[iThread].zMsg = testMallocVPrintf(zFmt, ap);
- va_end(ap);
- }
-}
-
-/*
-** Retrieve the result for thread iThread.
-*/
-static int testThreadGetResult(
- ThreadSet *pThreadSet, /* Thread-set handle */
- int iThread, /* Get result for this thread */
- const char **pzRes /* OUT: Pointer to result string */
-){
- if( pzRes ) *pzRes = pThreadSet->aThread[iThread].zMsg;
- return pThreadSet->aThread[iThread].rc;
-}
-
-/*
-** Enter and leave the test case mutex.
-*/
-#if 0
-static void testThreadEnterMutex(ThreadSet *p){
- pthread_mutex_lock(&p->mutex);
-}
-static void testThreadLeaveMutex(ThreadSet *p){
- pthread_mutex_unlock(&p->mutex);
-}
-#endif
-#endif
-
-#if !defined(LSM_MUTEX_PTHREADS)
-static int testThreadSupport(){ return 0; }
-
-#define testThreadInit(a) 0
-#define testThreadShutdown(a)
-#define testThreadLaunch(a,b,c,d) 0
-#define testThreadWait(a,b)
-#define testThreadSetHalt(a)
-#define testThreadGetHalt(a) 0
-#define testThreadGetResult(a,b,c) 0
-#define testThreadSleep(a,b) 0
-
-static void testThreadSetResult(ThreadSet *a, int b, int c, char *d, ...){
- unused_parameter(a);
- unused_parameter(b);
- unused_parameter(c);
- unused_parameter(d);
-}
-#endif
-/* End of threads wrapper.
-*************************************************************************/
-
-/*************************************************************************
-** Below this point is the third part of this file - the implementation
-** of the mt1.* tests.
-*/
-typedef struct Mt1Test Mt1Test;
-struct Mt1Test {
- DbParameters param; /* Description of database to read/write */
- int nReadwrite; /* Number of read/write threads */
- int nFastReader; /* Number of fast reader threads */
- int nSlowReader; /* Number of slow reader threads */
- int nMs; /* How long to run for */
- const char *zSystem; /* Database system to test */
-};
-
-typedef struct Mt1DelayCtx Mt1DelayCtx;
-struct Mt1DelayCtx {
- ThreadSet *pSet; /* Threadset to sleep within */
- int nMs; /* Sleep in ms */
-};
-
-static void xMt1Delay(void *pCtx){
- Mt1DelayCtx *p = (Mt1DelayCtx *)pCtx;
- testThreadSleep(p->pSet, p->nMs);
-}
-
-#define MT1_THREAD_RDWR 0
-#define MT1_THREAD_SLOW 1
-#define MT1_THREAD_FAST 2
-
-static void xMt1Work(lsm_db *pDb, void *pCtx){
-#if 0
- char *z = 0;
- lsm_info(pDb, LSM_INFO_DB_STRUCTURE, &z);
- printf("%s\n", z);
- fflush(stdout);
-#endif
-}
-
-/*
-** This is the main() proc for all threads in test case "mt1".
-*/
-static void mt1Main(ThreadSet *pThreadSet, int iThread, void *pCtx){
- Mt1Test *p = (Mt1Test *)pCtx; /* Test parameters */
- Mt1DelayCtx delay;
- int nRead = 0; /* Number of calls to dbReadOperation() */
- int nWrite = 0; /* Number of completed database writes */
- int rc = 0; /* Error code */
- int iPrng; /* Prng argument variable */
- TestDb *pDb; /* Database handle */
- int eType;
-
- delay.pSet = pThreadSet;
- delay.nMs = 0;
- if( iThreadnReadwrite ){
- eType = MT1_THREAD_RDWR;
- }else if( iThread<(p->nReadwrite+p->nFastReader) ){
- eType = MT1_THREAD_FAST;
- }else{
- eType = MT1_THREAD_SLOW;
- delay.nMs = (p->nMs / 20);
- }
-
- /* Open a new database connection. Initialize the pseudo-random number
- ** argument based on the thread number. */
- iPrng = testPrngValue(iThread);
- pDb = testOpen(p->zSystem, 0, &rc);
-
- if( rc==0 ){
- tdb_lsm_config_work_hook(pDb, xMt1Work, 0);
- }
-
- /* Loop until either an error occurs or some other thread sets the
- ** halt flag. */
- while( rc==0 && testThreadGetHalt(pThreadSet)==0 ){
- int iKey;
-
- /* Perform a read operation on an arbitrarily selected key. */
- iKey = (testPrngValue(iPrng++) % p->param.nKey);
- dbReadOperation(&p->param, pDb, xMt1Delay, (void *)&delay, iKey, &rc);
- if( rc ) continue;
- nRead++;
-
- /* Attempt to write an arbitrary key value pair (and update the associated
- ** checksum entries). dbWriteOperation() returns 1 if the write is
- ** successful, or 0 if it failed with an LSM_BUSY error. */
- if( eType==MT1_THREAD_RDWR ){
- char aValue[50];
- char aRnd[25];
-
- iKey = (testPrngValue(iPrng++) % p->param.nKey);
- testPrngString(iPrng, aRnd, sizeof(aRnd));
- iPrng += sizeof(aRnd);
- snprintf(aValue, sizeof(aValue), "%d.%s", iThread, aRnd);
- nWrite += dbWriteOperation(&p->param, pDb, iKey, aValue, &rc);
- }
- }
- testClose(&pDb);
-
- /* If an error has occured, set the thread error code and the threadset
- ** halt flag to tell the other test threads to halt. Otherwise, set the
- ** thread error code to 0 and post a message with the number of read
- ** and write operations completed. */
- if( rc ){
- testThreadSetResult(pThreadSet, iThread, rc, 0);
- testThreadSetHalt(pThreadSet);
- }else{
- testThreadSetResult(pThreadSet, iThread, 0, "r/w: %d/%d", nRead, nWrite);
- }
-}
-
-static void do_test_mt1(
- const char *zSystem, /* Database system name */
- const char *zPattern, /* Run test cases that match this pattern */
- int *pRc /* IN/OUT: Error code */
-){
- Mt1Test aTest[] = {
- /* param, nReadwrite, nFastReader, nSlowReader, nMs, zSystem */
- { {10, 1000}, 4, 0, 0, 10000, 0 },
- { {10, 1000}, 4, 4, 2, 100000, 0 },
- { {10, 100000}, 4, 0, 0, 10000, 0 },
- { {10, 100000}, 4, 4, 2, 100000, 0 },
- };
- int i;
-
- for(i=0; *pRc==0 && iparam.nFanout, p->param.nKey,
- p->nMs, p->nReadwrite, p->nFastReader, p->nSlowReader
- );
- if( bRun ){
- TestDb *pDb;
- ThreadSet *pSet;
- int iThread;
- int nThread;
-
- p->zSystem = zSystem;
- pDb = testOpen(zSystem, 1, pRc);
-
- nThread = p->nReadwrite + p->nFastReader + p->nSlowReader;
- pSet = testThreadInit(nThread);
- for(iThread=0; *pRc==0 && iThreadnMs);
- for(iThread=0; *pRc==0 && iThreadiNext = 1;
- p->bEnable = 1;
- p->nFail = 1;
- p->pEnv = tdb_lsm_env();
-}
-
-static void xOomHook(OomTest *p){
- p->nFail++;
-}
-
-static int testOomContinue(OomTest *p){
- if( p->rc!=0 || (p->iNext>1 && p->nFail==0) ){
- return 0;
- }
- p->nFail = 0;
- testMallocOom(p->pEnv, p->iNext, 0, (void (*)(void*))xOomHook, (void *)p);
- return 1;
-}
-
-static void testOomEnable(OomTest *p, int bEnable){
- p->bEnable = bEnable;
- testMallocOomEnable(p->pEnv, bEnable);
-}
-
-static void testOomNext(OomTest *p){
- p->iNext++;
-}
-
-static int testOomHit(OomTest *p){
- return (p->nFail>0);
-}
-
-static int testOomFinish(OomTest *p){
- return p->rc;
-}
-
-static void testOomAssert(OomTest *p, int bVal){
- if( bVal==0 ){
- test_failed();
- p->rc = 1;
- }
-}
-
-/*
-** Test that the error code matches the state of the OomTest object passed
-** as the first argument. Specifically, check that rc is LSM_NOMEM if an
-** OOM error has already been injected, or LSM_OK if not.
-*/
-static void testOomAssertRc(OomTest *p, int rc){
- testOomAssert(p, rc==LSM_OK || rc==LSM_NOMEM);
- testOomAssert(p, testOomHit(p)==(rc==LSM_NOMEM) || p->bEnable==0 );
-}
-
-static void testOomOpen(
- OomTest *pOom,
- const char *zName,
- lsm_db **ppDb,
- int *pRc
-){
- if( *pRc==LSM_OK ){
- int rc;
- rc = lsm_new(tdb_lsm_env(), ppDb);
- if( rc==LSM_OK ) rc = lsm_open(*ppDb, zName);
- testOomAssertRc(pOom, rc);
- *pRc = rc;
- }
-}
-
-static void testOomFetch(
- OomTest *pOom,
- lsm_db *pDb,
- void *pKey, int nKey,
- void *pVal, int nVal,
- int *pRc
-){
- testOomAssertRc(pOom, *pRc);
- if( *pRc==LSM_OK ){
- lsm_cursor *pCsr;
- int rc;
-
- rc = lsm_csr_open(pDb, &pCsr);
- if( rc==LSM_OK ) rc = lsm_csr_seek(pCsr, pKey, nKey, 0);
- testOomAssertRc(pOom, rc);
-
- if( rc==LSM_OK ){
- const void *p; int n;
- testOomAssert(pOom, lsm_csr_valid(pCsr));
-
- rc = lsm_csr_key(pCsr, &p, &n);
- testOomAssertRc(pOom, rc);
- testOomAssert(pOom, rc!=LSM_OK || (n==nKey && memcmp(pKey, p, nKey)==0) );
- }
-
- if( rc==LSM_OK ){
- const void *p; int n;
- testOomAssert(pOom, lsm_csr_valid(pCsr));
-
- rc = lsm_csr_value(pCsr, &p, &n);
- testOomAssertRc(pOom, rc);
- testOomAssert(pOom, rc!=LSM_OK || (n==nVal && memcmp(pVal, p, nVal)==0) );
- }
-
- lsm_csr_close(pCsr);
- *pRc = rc;
- }
-}
-
-static void testOomWrite(
- OomTest *pOom,
- lsm_db *pDb,
- void *pKey, int nKey,
- void *pVal, int nVal,
- int *pRc
-){
- testOomAssertRc(pOom, *pRc);
- if( *pRc==LSM_OK ){
- int rc;
-
- rc = lsm_insert(pDb, pKey, nKey, pVal, nVal);
- testOomAssertRc(pOom, rc);
-
- *pRc = rc;
- }
-}
-
-
-static void testOomFetchStr(
- OomTest *pOom,
- lsm_db *pDb,
- const char *zKey,
- const char *zVal,
- int *pRc
-){
- int nKey = strlen(zKey);
- int nVal = strlen(zVal);
- testOomFetch(pOom, pDb, (void *)zKey, nKey, (void *)zVal, nVal, pRc);
-}
-
-static void testOomFetchData(
- OomTest *pOom,
- lsm_db *pDb,
- Datasource *pData,
- int iKey,
- int *pRc
-){
- void *pKey; int nKey;
- void *pVal; int nVal;
- testDatasourceEntry(pData, iKey, &pKey, &nKey, &pVal, &nVal);
- testOomFetch(pOom, pDb, pKey, nKey, pVal, nVal, pRc);
-}
-
-static void testOomWriteStr(
- OomTest *pOom,
- lsm_db *pDb,
- const char *zKey,
- const char *zVal,
- int *pRc
-){
- int nKey = strlen(zKey);
- int nVal = strlen(zVal);
- testOomWrite(pOom, pDb, (void *)zKey, nKey, (void *)zVal, nVal, pRc);
-}
-
-static void testOomWriteData(
- OomTest *pOom,
- lsm_db *pDb,
- Datasource *pData,
- int iKey,
- int *pRc
-){
- void *pKey; int nKey;
- void *pVal; int nVal;
- testDatasourceEntry(pData, iKey, &pKey, &nKey, &pVal, &nVal);
- testOomWrite(pOom, pDb, pKey, nKey, pVal, nVal, pRc);
-}
-
-static void testOomScan(
- OomTest *pOom,
- lsm_db *pDb,
- int bReverse,
- const void *pKey, int nKey,
- int nScan,
- int *pRc
-){
- if( *pRc==0 ){
- int rc;
- int iScan = 0;
- lsm_cursor *pCsr;
- int (*xAdvance)(lsm_cursor *) = 0;
-
-
- rc = lsm_csr_open(pDb, &pCsr);
- testOomAssertRc(pOom, rc);
-
- if( rc==LSM_OK ){
- if( bReverse ){
- rc = lsm_csr_seek(pCsr, pKey, nKey, LSM_SEEK_LE);
- xAdvance = lsm_csr_prev;
- }else{
- rc = lsm_csr_seek(pCsr, pKey, nKey, LSM_SEEK_GE);
- xAdvance = lsm_csr_next;
- }
- }
- testOomAssertRc(pOom, rc);
-
- while( rc==LSM_OK && lsm_csr_valid(pCsr) && iScan "one"
-** "two" -> "four"
-** "three" -> "nine"
-** "four" -> "sixteen"
-** "five" -> "twentyfive"
-** "six" -> "thirtysix"
-** "seven" -> "fourtynine"
-** "eight" -> "sixtyfour"
-*/
-static void setup_populate_db(void){
- const char *azStr[] = {
- "one", "one",
- "two", "four",
- "three", "nine",
- "four", "sixteen",
- "five", "twentyfive",
- "six", "thirtysix",
- "seven", "fourtynine",
- "eight", "sixtyfour",
- };
- int rc;
- int ii;
- lsm_db *pDb;
-
- testDeleteLsmdb(LSMTEST6_TESTDB);
-
- rc = lsm_new(tdb_lsm_env(), &pDb);
- if( rc==LSM_OK ) rc = lsm_open(pDb, LSMTEST6_TESTDB);
-
- for(ii=0; rc==LSM_OK && iiiInsStart, pStep->nIns, pRc);
- testDeleteDatasourceRange(pDb, pData, pStep->iDelStart, pStep->nDel, pRc);
- if( *pRc==0 ){
- int nSave = -1;
- int nBuf = 64;
- lsm_db *db = tdb_lsm(pDb);
-
- lsm_config(db, LSM_CONFIG_AUTOFLUSH, &nSave);
- lsm_config(db, LSM_CONFIG_AUTOFLUSH, &nBuf);
- lsm_begin(db, 1);
- lsm_commit(db, 0);
- lsm_config(db, LSM_CONFIG_AUTOFLUSH, &nSave);
-
- *pRc = lsm_work(db, 0, 0, 0);
- if( *pRc==0 ){
- *pRc = lsm_checkpoint(db, 0);
- }
- }
-}
-
-static void doSetupStepArray(
- TestDb *pDb,
- Datasource *pData,
- const SetupStep *aStep,
- int nStep
-){
- int i;
- for(i=0; i
-void testReadFile(const char *zFile, int iOff, void *pOut, int nByte, int *pRc){
- if( *pRc==0 ){
- FILE *fd;
- fd = fopen(zFile, "rb");
- if( fd==0 ){
- *pRc = 1;
- }else{
- if( 0!=fseek(fd, iOff, SEEK_SET) ){
- *pRc = 1;
- }else{
- assert( nByte>=0 );
- if( (size_t)nByte!=fread(pOut, 1, nByte, fd) ){
- *pRc = 1;
- }
- }
- fclose(fd);
- }
- }
-}
-
-void testWriteFile(
- const char *zFile,
- int iOff,
- void *pOut,
- int nByte,
- int *pRc
-){
- if( *pRc==0 ){
- FILE *fd;
- fd = fopen(zFile, "r+b");
- if( fd==0 ){
- *pRc = 1;
- }else{
- if( 0!=fseek(fd, iOff, SEEK_SET) ){
- *pRc = 1;
- }else{
- assert( nByte>=0 );
- if( (size_t)nByte!=fwrite(pOut, 1, nByte, fd) ){
- *pRc = 1;
- }
- }
- fclose(fd);
- }
- }
-}
-
-static ShmHeader *getShmHeader(const char *zDb){
- int rc = 0;
- char *zShm = testMallocPrintf("%s-shm", zDb);
- ShmHeader *pHdr;
-
- pHdr = testMalloc(sizeof(ShmHeader));
- testReadFile(zShm, 0, (void *)pHdr, sizeof(ShmHeader), &rc);
- assert( rc==0 );
-
- return pHdr;
-}
-
-/*
-** This function makes a copy of the three files associated with LSM
-** database zDb (i.e. if zDb is "test.db", it makes copies of "test.db",
-** "test.db-log" and "test.db-shm").
-**
-** It then opens a new database connection to the copy with the xLock() call
-** instrumented so that it appears that some other process already connected
-** to the db (holding a shared lock on DMS2). This prevents recovery from
-** running. Then:
-**
-** 1) Check that the checksum of the database is zCksum.
-** 2) Write a few keys to the database. Then delete the same keys.
-** 3) Check that the checksum is zCksum.
-** 4) Flush the db to disk and run a checkpoint.
-** 5) Check once more that the checksum is still zCksum.
-*/
-static void doLiveRecovery(const char *zDb, const char *zCksum, int *pRc){
- if( *pRc==LSM_OK ){
- const DatasourceDefn defn = {TEST_DATASOURCE_RANDOM, 20, 25, 100, 500};
- Datasource *pData;
- const char *zCopy = "testcopy.lsm";
- char zCksum2[TEST_CKSUM_BYTES];
- TestDb *pDb = 0;
- int rc;
-
- pData = testDatasourceNew(&defn);
-
- testCopyLsmdb(zDb, zCopy);
- rc = tdb_lsm_open("test_no_recovery=1", zCopy, 0, &pDb);
- if( rc==0 ){
- ShmHeader *pHdr;
- lsm_db *db;
- testCksumDatabase(pDb, zCksum2);
- testCompareStr(zCksum, zCksum2, &rc);
-
- testWriteDatasourceRange(pDb, pData, 1, 10, &rc);
- testDeleteDatasourceRange(pDb, pData, 1, 10, &rc);
-
- /* Test that the two tree-headers are now consistent. */
- pHdr = getShmHeader(zCopy);
- if( rc==0 && memcmp(&pHdr->hdr1, &pHdr->hdr2, sizeof(pHdr->hdr1)) ){
- rc = 1;
- }
- testFree(pHdr);
-
- if( rc==0 ){
- int nBuf = 64;
- db = tdb_lsm(pDb);
- lsm_config(db, LSM_CONFIG_AUTOFLUSH, &nBuf);
- lsm_begin(db, 1);
- lsm_commit(db, 0);
- rc = lsm_work(db, 0, 0, 0);
- }
-
- testCksumDatabase(pDb, zCksum2);
- testCompareStr(zCksum, zCksum2, &rc);
- }
-
- testDatasourceFree(pData);
- testClose(&pDb);
- testDeleteLsmdb(zCopy);
- *pRc = rc;
- }
-}
-
-static void doWriterCrash1(int *pRc){
- const int nWrite = 2000;
- const int nStep = 10;
- const int iWriteStart = 20000;
- int rc = 0;
- TestDb *pDb = 0;
- Datasource *pData = 0;
-
- rc = tdb_lsm_open("autowork=0", "testdb.lsm", 1, &pDb);
- if( rc==0 ){
- int iDot = 0;
- char zCksum[TEST_CKSUM_BYTES];
- int i;
- setupDatabase1(pDb, &pData);
- testCksumDatabase(pDb, zCksum);
- testBegin(pDb, 2, &rc);
- for(i=0; rc==0 && ihdr1, &pHdr1->hdr1, sizeof(pHdr1->hdr1));
- pHdr2->bWriter = 1;
- testWriteFile("testdb.lsm-shm", 0, (void *)pHdr2, sizeof(ShmHeader), &rc);
- doLiveRecovery("testdb.lsm", zCksum1, &rc);
-
- /* If both tree-headers are valid, tree-header-1 is used. */
- memcpy(&pHdr2->hdr1, &pHdr2->hdr2, sizeof(pHdr1->hdr1));
- memcpy(&pHdr2->hdr2, &pHdr1->hdr1, sizeof(pHdr1->hdr1));
- pHdr2->bWriter = 1;
- testWriteFile("testdb.lsm-shm", 0, (void *)pHdr2, sizeof(ShmHeader), &rc);
- doLiveRecovery("testdb.lsm", zCksum2, &rc);
-
- /* If tree-header 1 is invalid, tree-header-2 is used */
- memcpy(&pHdr2->hdr2, &pHdr2->hdr1, sizeof(pHdr1->hdr1));
- pHdr2->hdr1.aCksum[0] = 5;
- pHdr2->hdr1.aCksum[0] = 6;
- pHdr2->bWriter = 1;
- testWriteFile("testdb.lsm-shm", 0, (void *)pHdr2, sizeof(ShmHeader), &rc);
- doLiveRecovery("testdb.lsm", zCksum2, &rc);
-
- /* If tree-header 2 is invalid, tree-header-1 is used */
- memcpy(&pHdr2->hdr1, &pHdr2->hdr2, sizeof(pHdr1->hdr1));
- pHdr2->hdr2.aCksum[0] = 5;
- pHdr2->hdr2.aCksum[0] = 6;
- pHdr2->bWriter = 1;
- testWriteFile("testdb.lsm-shm", 0, (void *)pHdr2, sizeof(ShmHeader), &rc);
- doLiveRecovery("testdb.lsm", zCksum2, &rc);
-
- testFree(pHdr1);
- testFree(pHdr2);
- testClose(&pDb);
- }
-
- *pRc = rc;
-}
-
-void do_writer_crash_test(const char *zPattern, int *pRc){
- struct Test {
- const char *zName;
- void (*xFunc)(int *);
- } aTest[] = {
- { "writercrash1.lsm", doWriterCrash1 },
- { "writercrash2.lsm", doWriterCrash2 },
- };
- int i;
- for(i=0; izName) ){
- p->xFunc(pRc);
- testCaseFinish(*pRc);
- }
- }
-
-}
diff --git a/ext/lsm1/lsm-test/lsmtest9.c b/ext/lsm1/lsm-test/lsmtest9.c
deleted file mode 100644
index b01de0d4e5..0000000000
--- a/ext/lsm1/lsm-test/lsmtest9.c
+++ /dev/null
@@ -1,140 +0,0 @@
-
-#include "lsmtest.h"
-
-#define DATA_SEQUENTIAL TEST_DATASOURCE_SEQUENCE
-#define DATA_RANDOM TEST_DATASOURCE_RANDOM
-
-typedef struct Datatest4 Datatest4;
-
-/*
-** Test overview:
-**
-** 1. Insert (Datatest4.nRec) records into a database.
-**
-** 2. Repeat (Datatest4.nRepeat) times:
-**
-** 2a. Delete 2/3 of the records in the database.
-**
-** 2b. Run lsm_work(nMerge=1).
-**
-** 2c. Insert as many records as were deleted in 2a.
-**
-** 2d. Check database content is as expected.
-**
-** 2e. If (Datatest4.bReopen) is true, close and reopen the database.
-*/
-struct Datatest4 {
- /* Datasource definition */
- DatasourceDefn defn;
-
- int nRec;
- int nRepeat;
- int bReopen;
-};
-
-static void doDataTest4(
- const char *zSystem, /* Database system to test */
- Datatest4 *p, /* Structure containing test parameters */
- int *pRc /* OUT: Error code */
-){
- lsm_db *db = 0;
- TestDb *pDb;
- TestDb *pControl;
- Datasource *pData;
- int i;
- int rc = 0;
- int iDot = 0;
- int bMultiThreaded = 0; /* True for MT LSM database */
-
- int nRecOn3 = (p->nRec / 3);
- int iData = 0;
-
- /* Start the test case, open a database and allocate the datasource. */
- rc = testControlDb(&pControl);
- pDb = testOpen(zSystem, 1, &rc);
- pData = testDatasourceNew(&p->defn);
- if( rc==0 ){
- db = tdb_lsm(pDb);
- bMultiThreaded = tdb_lsm_multithread(pDb);
- }
-
- testWriteDatasourceRange(pControl, pData, iData, nRecOn3*3, &rc);
- testWriteDatasourceRange(pDb, pData, iData, nRecOn3*3, &rc);
-
- for(i=0; rc==0 && inRepeat; i++){
-
- testDeleteDatasourceRange(pControl, pData, iData, nRecOn3*2, &rc);
- testDeleteDatasourceRange(pDb, pData, iData, nRecOn3*2, &rc);
-
- if( db ){
- int nDone;
-#if 0
- fprintf(stderr, "lsm_work() start...\n"); fflush(stderr);
-#endif
- do {
- nDone = 0;
- rc = lsm_work(db, 1, (1<<30), &nDone);
- }while( rc==0 && nDone>0 );
- if( bMultiThreaded && rc==LSM_BUSY ) rc = LSM_OK;
-#if 0
- fprintf(stderr, "lsm_work() done...\n"); fflush(stderr);
-#endif
- }
-
-if( i+1nRepeat ){
- iData += (nRecOn3*2);
- testWriteDatasourceRange(pControl, pData, iData+nRecOn3, nRecOn3*2, &rc);
- testWriteDatasourceRange(pDb, pData, iData+nRecOn3, nRecOn3*2, &rc);
-
- testCompareDb(pData, nRecOn3*3, iData, pControl, pDb, &rc);
-
- /* If Datatest4.bReopen is true, close and reopen the database */
- if( p->bReopen ){
- testReopen(&pDb, &rc);
- if( rc==0 ) db = tdb_lsm(pDb);
- }
-}
-
- /* Update the progress dots... */
- testCaseProgress(i, p->nRepeat, testCaseNDot(), &iDot);
- }
-
- testClose(&pDb);
- testClose(&pControl);
- testDatasourceFree(pData);
- testCaseFinish(rc);
- *pRc = rc;
-}
-
-static char *getName4(const char *zSystem, Datatest4 *pTest){
- char *zRet;
- char *zData;
- zData = testDatasourceName(&pTest->defn);
- zRet = testMallocPrintf("data4.%s.%s.%d.%d.%d",
- zSystem, zData, pTest->nRec, pTest->nRepeat, pTest->bReopen
- );
- testFree(zData);
- return zRet;
-}
-
-void test_data_4(
- const char *zSystem, /* Database system name */
- const char *zPattern, /* Run test cases that match this pattern */
- int *pRc /* IN/OUT: Error code */
-){
- Datatest4 aTest[] = {
- /* defn, nRec, nRepeat, bReopen */
- { {DATA_RANDOM, 20,25, 500,600}, 10000, 10, 0 },
- { {DATA_RANDOM, 20,25, 500,600}, 10000, 10, 1 },
- };
-
- int i;
-
- for(i=0; *pRc==LSM_OK && ieType ){
- case TEST_DATASOURCE_RANDOM: {
- int nRange = (1 + p->nMaxKey - p->nMinKey);
- nKey = (int)( testPrngValue((u32)iData) % nRange ) + p->nMinKey;
- testPrngString((u32)iData, p->aKey, nKey);
- break;
- }
- case TEST_DATASOURCE_SEQUENCE:
- nKey = sprintf(p->aKey, "%012d", iData);
- break;
- }
- *ppKey = p->aKey;
- *pnKey = nKey;
- }
- if( ppVal ){
- u32 nVal = testPrngValue((u32)iData)%(1+p->nMaxVal-p->nMinVal)+p->nMinVal;
- testPrngString((u32)~iData, p->aVal, (int)nVal);
- *ppVal = p->aVal;
- *pnVal = (int)nVal;
- }
-}
-
-void testDatasourceFree(Datasource *p){
- testFree(p);
-}
-
-/*
-** Return a pointer to a nul-terminated string that corresponds to the
-** contents of the datasource-definition passed as the first argument.
-** The caller should eventually free the returned pointer using testFree().
-*/
-char *testDatasourceName(const DatasourceDefn *p){
- char *zRet;
- zRet = testMallocPrintf("%s.(%d-%d).(%d-%d)",
- (p->eType==TEST_DATASOURCE_SEQUENCE ? "seq" : "rnd"),
- p->nMinKey, p->nMaxKey,
- p->nMinVal, p->nMaxVal
- );
- return zRet;
-}
-
-Datasource *testDatasourceNew(const DatasourceDefn *pDefn){
- Datasource *p;
- int nMinKey;
- int nMaxKey;
- int nMinVal;
- int nMaxVal;
-
- if( pDefn->eType==TEST_DATASOURCE_SEQUENCE ){
- nMinKey = 128;
- nMaxKey = 128;
- }else{
- nMinKey = MAX(0, pDefn->nMinKey);
- nMaxKey = MAX(nMinKey, pDefn->nMaxKey);
- }
- nMinVal = MAX(0, pDefn->nMinVal);
- nMaxVal = MAX(nMinVal, pDefn->nMaxVal);
-
- p = (Datasource *)testMalloc(sizeof(Datasource) + nMaxKey + nMaxVal + 1);
- p->eType = pDefn->eType;
- p->nMinKey = nMinKey;
- p->nMinVal = nMinVal;
- p->nMaxKey = nMaxKey;
- p->nMaxVal = nMaxVal;
-
- p->aKey = (char *)&p[1];
- p->aVal = &p->aKey[nMaxKey];
- return p;
-};
diff --git a/ext/lsm1/lsm-test/lsmtest_func.c b/ext/lsm1/lsm-test/lsmtest_func.c
deleted file mode 100644
index eb8346aa83..0000000000
--- a/ext/lsm1/lsm-test/lsmtest_func.c
+++ /dev/null
@@ -1,177 +0,0 @@
-
-#include "lsmtest.h"
-
-
-int do_work(int nArg, char **azArg){
- struct Option {
- const char *zName;
- } aOpt [] = {
- { "-nmerge" },
- { "-nkb" },
- { 0 }
- };
-
- lsm_db *pDb;
- int rc;
- int i;
- const char *zDb;
- int nMerge = 1;
- int nKB = (1<<30);
-
- if( nArg==0 ) goto usage;
- zDb = azArg[nArg-1];
- for(i=0; i<(nArg-1); i++){
- int iSel;
- rc = testArgSelect(aOpt, "option", azArg[i], &iSel);
- if( rc ) return rc;
- switch( iSel ){
- case 0:
- i++;
- if( i==(nArg-1) ) goto usage;
- nMerge = atoi(azArg[i]);
- break;
- case 1:
- i++;
- if( i==(nArg-1) ) goto usage;
- nKB = atoi(azArg[i]);
- break;
- }
- }
-
- rc = lsm_new(0, &pDb);
- if( rc!=LSM_OK ){
- testPrintError("lsm_open(): rc=%d\n", rc);
- }else{
- rc = lsm_open(pDb, zDb);
- if( rc!=LSM_OK ){
- testPrintError("lsm_open(): rc=%d\n", rc);
- }else{
- int n = -1;
- lsm_config(pDb, LSM_CONFIG_BLOCK_SIZE, &n);
- n = n*2;
- lsm_config(pDb, LSM_CONFIG_AUTOCHECKPOINT, &n);
-
- rc = lsm_work(pDb, nMerge, nKB, 0);
- if( rc!=LSM_OK ){
- testPrintError("lsm_work(): rc=%d\n", rc);
- }
- }
- }
- if( rc==LSM_OK ){
- rc = lsm_checkpoint(pDb, 0);
- }
-
- lsm_close(pDb);
- return rc;
-
- usage:
- testPrintUsage("?-optimize? ?-n N? DATABASE");
- return -1;
-}
-
-
-/*
-** lsmtest show ?-config LSM-CONFIG? DATABASE ?COMMAND ?PGNO??
-*/
-int do_show(int nArg, char **azArg){
- lsm_db *pDb;
- int rc;
- const char *zDb;
-
- int eOpt = LSM_INFO_DB_STRUCTURE;
- unsigned int iPg = 0;
- int bConfig = 0;
- const char *zConfig = "";
-
- struct Option {
- const char *zName;
- int bConfig;
- int eOpt;
- } aOpt [] = {
- { "array", 0, LSM_INFO_ARRAY_STRUCTURE },
- { "array-pages", 0, LSM_INFO_ARRAY_PAGES },
- { "blocksize", 1, LSM_CONFIG_BLOCK_SIZE },
- { "pagesize", 1, LSM_CONFIG_PAGE_SIZE },
- { "freelist", 0, LSM_INFO_FREELIST },
- { "page-ascii", 0, LSM_INFO_PAGE_ASCII_DUMP },
- { "page-hex", 0, LSM_INFO_PAGE_HEX_DUMP },
- { 0, 0 }
- };
-
- char *z = 0;
- int iDb = 0; /* Index of DATABASE in azArg[] */
-
- /* Check if there is a "-config" option: */
- if( nArg>2 && strlen(azArg[0])>1
- && memcmp(azArg[0], "-config", strlen(azArg[0]))==0
- ){
- zConfig = azArg[1];
- iDb = 2;
- }
- if( nArg<(iDb+1) ) goto usage;
-
- if( nArg>(iDb+1) ){
- rc = testArgSelect(aOpt, "option", azArg[iDb+1], &eOpt);
- if( rc!=0 ) return rc;
- bConfig = aOpt[eOpt].bConfig;
- eOpt = aOpt[eOpt].eOpt;
- if( (bConfig==0 && eOpt==LSM_INFO_FREELIST)
- || (bConfig==1 && eOpt==LSM_CONFIG_BLOCK_SIZE)
- || (bConfig==1 && eOpt==LSM_CONFIG_PAGE_SIZE)
- ){
- if( nArg!=(iDb+2) ) goto usage;
- }else{
- if( nArg!=(iDb+3) ) goto usage;
- iPg = atoi(azArg[iDb+2]);
- }
- }
- zDb = azArg[iDb];
-
- rc = lsm_new(0, &pDb);
- tdb_lsm_configure(pDb, zConfig);
- if( rc!=LSM_OK ){
- testPrintError("lsm_new(): rc=%d\n", rc);
- }else{
- rc = lsm_open(pDb, zDb);
- if( rc!=LSM_OK ){
- testPrintError("lsm_open(): rc=%d\n", rc);
- }
- }
-
- if( rc==LSM_OK ){
- if( bConfig==0 ){
- switch( eOpt ){
- case LSM_INFO_DB_STRUCTURE:
- case LSM_INFO_FREELIST:
- rc = lsm_info(pDb, eOpt, &z);
- break;
- case LSM_INFO_ARRAY_STRUCTURE:
- case LSM_INFO_ARRAY_PAGES:
- case LSM_INFO_PAGE_ASCII_DUMP:
- case LSM_INFO_PAGE_HEX_DUMP:
- rc = lsm_info(pDb, eOpt, iPg, &z);
- break;
- default:
- assert( !"no chance" );
- }
-
- if( rc==LSM_OK ){
- printf("%s\n", z ? z : "");
- fflush(stdout);
- }
- lsm_free(lsm_get_env(pDb), z);
- }else{
- int iRes = -1;
- lsm_config(pDb, eOpt, &iRes);
- printf("%d\n", iRes);
- fflush(stdout);
- }
- }
-
- lsm_close(pDb);
- return rc;
-
- usage:
- testPrintUsage("DATABASE ?array|page-ascii|page-hex PGNO?");
- return -1;
-}
diff --git a/ext/lsm1/lsm-test/lsmtest_io.c b/ext/lsm1/lsm-test/lsmtest_io.c
deleted file mode 100644
index 7aa5d10948..0000000000
--- a/ext/lsm1/lsm-test/lsmtest_io.c
+++ /dev/null
@@ -1,248 +0,0 @@
-
-/*
-** SUMMARY
-**
-** This file implements the 'io' subcommand of the test program. It is used
-** for testing the performance of various combinations of write() and fsync()
-** system calls. All operations occur on a single file, which may or may not
-** exist when a test is started.
-**
-** A test consists of a series of commands. Each command is either a write
-** or an fsync. A write is specified as "@", where
-** is the amount of data written, and is the offset of the file
-** to write to. An or an is specified as an integer number
-** of bytes. Or, if postfixed with a "K", "M" or "G", an integer number of
-** KB, MB or GB, respectively. An fsync is simply "S". All commands are
-** case-insensitive.
-**
-** Example test program:
-**
-** 2M@6M 1492K@4M S 4096@4K S
-**
-** This program writes 2 MB of data starting at the offset 6MB offset of
-** the file, followed by 1492 KB of data written at the 4MB offset of the
-** file, followed by a call to fsync(), a write of 4KB of data at byte
-** offset 4096, and finally another call to fsync().
-**
-** Commands may either be specified on the command line (one command per
-** command line argument) or read from stdin. Commands read from stdin
-** must be separated by white-space.
-**
-** COMMAND LINE INVOCATION
-**
-** The sub-command implemented in this file must be invoked with at least
-** two arguments - the path to the file to write to and the page-size to
-** use for writing. If there are more than two arguments, then each
-** subsequent argument is assumed to be a test command. If there are exactly
-** two arguments, the test commands are read from stdin.
-**
-** A write command does not result in a single call to system call write().
-** Instead, the specified region is written sequentially using one or
-** more calls to write(), each of which writes not more than one page of
-** data. For example, if the page-size is 4KB, the command "2M@6M" results
-** in 512 calls to write(), each of which writes 4KB of data.
-**
-** EXAMPLES
-**
-** Two equivalent examples:
-**
-** $ lsmtest io testfile.db 4KB 2M@6M 1492K@4M S 4096@4K S
-** 3544K written in 129 ms
-** $ echo "2M@6M 1492K@4M S 4096@4K S" | lsmtest io testfile.db 4096
-** 3544K written in 127 ms
-**
-*/
-
-#include "lsmtest.h"
-
-typedef struct IoContext IoContext;
-
-struct IoContext {
- int fd;
- int nWrite;
-};
-
-/*
-** As isspace(3)
-*/
-static int safe_isspace(char c){
- if( c&0x80) return 0;
- return isspace(c);
-}
-
-/*
-** As isdigit(3)
-*/
-static int safe_isdigit(char c){
- if( c&0x80) return 0;
- return isdigit(c);
-}
-
-static i64 getNextSize(char *zIn, char **pzOut, int *pRc){
- i64 iRet = 0;
- if( *pRc==0 ){
- char *z = zIn;
-
- if( !safe_isdigit(*z) ){
- *pRc = 1;
- return 0;
- }
-
- /* Process digits */
- while( safe_isdigit(*z) ){
- iRet = iRet*10 + (*z - '0');
- z++;
- }
-
- /* Process suffix */
- switch( *z ){
- case 'k': case 'K':
- iRet = iRet * 1024;
- z++;
- break;
-
- case 'm': case 'M':
- iRet = iRet * 1024 * 1024;
- z++;
- break;
-
- case 'g': case 'G':
- iRet = iRet * 1024 * 1024 * 1024;
- z++;
- break;
- }
-
- if( pzOut ) *pzOut = z;
- }
- return iRet;
-}
-
-static int doOneCmd(
- IoContext *pCtx,
- u8 *aData,
- int pgsz,
- char *zCmd,
- char **pzOut
-){
- char c;
- char *z = zCmd;
-
- while( safe_isspace(*z) ) z++;
- c = *z;
-
- if( c==0 ){
- if( pzOut ) *pzOut = z;
- return 0;
- }
-
- if( c=='s' || c=='S' ){
- if( pzOut ) *pzOut = &z[1];
- return fdatasync(pCtx->fd);
- }
-
- if( safe_isdigit(c) ){
- i64 iOff = 0;
- int nByte = 0;
- int rc = 0;
- int nPg;
- int iPg;
-
- nByte = (int)getNextSize(z, &z, &rc);
- if( rc || *z!='@' ) goto bad_command;
- z++;
- iOff = getNextSize(z, &z, &rc);
- if( rc || (safe_isspace(*z)==0 && *z!='\0') ) goto bad_command;
- if( pzOut ) *pzOut = z;
-
- nPg = (nByte+pgsz-1) / pgsz;
- lseek(pCtx->fd, (off_t)iOff, SEEK_SET);
- for(iPg=0; iPgfd, aData, pgsz);
- }
- pCtx->nWrite += nByte/1024;
-
- return 0;
- }
-
- bad_command:
- testPrintError("unrecognized command: %s", zCmd);
- return 1;
-}
-
-static int readStdin(char **pzOut){
- int nAlloc = 128;
- char *zOut = 0;
- int nOut = 0;
-
- while( !feof(stdin) ){
- int nRead;
-
- nAlloc = nAlloc*2;
- zOut = realloc(zOut, nAlloc);
- nRead = fread(&zOut[nOut], 1, nAlloc-nOut-1, stdin);
-
- if( nRead==0 ) break;
- nOut += nRead;
- zOut[nOut] = '\0';
- }
-
- *pzOut = zOut;
- return 0;
-}
-
-int do_io(int nArg, char **azArg){
- IoContext ctx;
- int pgsz;
- char *zFile;
- char *zPgsz;
- int i;
- int rc = 0;
-
- char *zStdin = 0;
- char *z;
-
- u8 *aData;
-
- memset(&ctx, 0, sizeof(IoContext));
- if( nArg<2 ){
- testPrintUsage("FILE PGSZ ?CMD-1 ...?");
- return -1;
- }
- zFile = azArg[0];
- zPgsz = azArg[1];
-
- pgsz = (int)getNextSize(zPgsz, 0, &rc);
- if( pgsz<=0 ){
- testPrintError("Ridiculous page size: %d", pgsz);
- return -1;
- }
- aData = malloc(pgsz);
- memset(aData, 0x77, pgsz);
-
- ctx.fd = open(zFile, O_RDWR|O_CREAT|_O_BINARY, 0644);
- if( ctx.fd<0 ){
- perror("open: ");
- return -1;
- }
-
- if( nArg==2 ){
- readStdin(&zStdin);
- testTimeInit();
- z = zStdin;
- while( *z && rc==0 ){
- rc = doOneCmd(&ctx, aData, pgsz, z, &z);
- }
- }else{
- testTimeInit();
- for(i=2; i
-
-void test_failed(){
- assert( 0 );
- return;
-}
-
-#define testSetError(rc) testSetErrorFunc(rc, pRc, __FILE__, __LINE__)
-static void testSetErrorFunc(int rc, int *pRc, const char *zFile, int iLine){
- if( rc ){
- *pRc = rc;
- fprintf(stderr, "FAILED (%s:%d) rc=%d ", zFile, iLine, rc);
- test_failed();
- }
-}
-
-static int lsm_memcmp(u8 *a, u8 *b, int c){
- int i;
- for(i=0; i0 && lsm_memcmp(pVal, pDbVal, nVal))) ){
- testSetError(1);
- }
- }
-}
-
-void testWrite(
- TestDb *pDb, /* Database handle */
- void *pKey, int nKey, /* Key to query database for */
- void *pVal, int nVal, /* Value to write */
- int *pRc /* IN/OUT: Error code */
-){
- if( *pRc==0 ){
- int rc;
-static int nCall = 0;
-nCall++;
- rc = tdb_write(pDb, pKey, nKey, pVal, nVal);
- testSetError(rc);
- }
-}
-void testDelete(
- TestDb *pDb, /* Database handle */
- void *pKey, int nKey, /* Key to query database for */
- int *pRc /* IN/OUT: Error code */
-){
- if( *pRc==0 ){
- int rc;
- *pRc = rc = tdb_delete(pDb, pKey, nKey);
- testSetError(rc);
- }
-}
-void testDeleteRange(
- TestDb *pDb, /* Database handle */
- void *pKey1, int nKey1,
- void *pKey2, int nKey2,
- int *pRc /* IN/OUT: Error code */
-){
- if( *pRc==0 ){
- int rc;
- *pRc = rc = tdb_delete_range(pDb, pKey1, nKey1, pKey2, nKey2);
- testSetError(rc);
- }
-}
-
-void testBegin(TestDb *pDb, int iTrans, int *pRc){
- if( *pRc==0 ){
- int rc;
- rc = tdb_begin(pDb, iTrans);
- testSetError(rc);
- }
-}
-void testCommit(TestDb *pDb, int iTrans, int *pRc){
- if( *pRc==0 ){
- int rc;
- rc = tdb_commit(pDb, iTrans);
- testSetError(rc);
- }
-}
-#if 0 /* unused */
-static void testRollback(TestDb *pDb, int iTrans, int *pRc){
- if( *pRc==0 ){
- int rc;
- rc = tdb_rollback(pDb, iTrans);
- testSetError(rc);
- }
-}
-#endif
-
-void testWriteStr(
- TestDb *pDb, /* Database handle */
- const char *zKey, /* Key to query database for */
- const char *zVal, /* Value to write */
- int *pRc /* IN/OUT: Error code */
-){
- int nVal = (zVal ? strlen(zVal) : 0);
- testWrite(pDb, (void *)zKey, strlen(zKey), (void *)zVal, nVal, pRc);
-}
-
-#if 0 /* unused */
-static void testDeleteStr(TestDb *pDb, const char *zKey, int *pRc){
- testDelete(pDb, (void *)zKey, strlen(zKey), pRc);
-}
-#endif
-void testFetchStr(
- TestDb *pDb, /* Database handle */
- const char *zKey, /* Key to query database for */
- const char *zVal, /* Value to write */
- int *pRc /* IN/OUT: Error code */
-){
- int nVal = (zVal ? strlen(zVal) : 0);
- testFetch(pDb, (void *)zKey, strlen(zKey), (void *)zVal, nVal, pRc);
-}
-
-void testFetchCompare(
- TestDb *pControl,
- TestDb *pDb,
- void *pKey, int nKey,
- int *pRc
-){
- int rc;
- void *pDbVal1;
- void *pDbVal2;
- int nDbVal1;
- int nDbVal2;
-
- static int nCall = 0;
- nCall++;
-
- rc = tdb_fetch(pControl, pKey, nKey, &pDbVal1, &nDbVal1);
- testSetError(rc);
-
- rc = tdb_fetch(pDb, pKey, nKey, &pDbVal2, &nDbVal2);
- testSetError(rc);
-
- if( *pRc==0
- && (nDbVal1!=nDbVal2 || (nDbVal1>0 && memcmp(pDbVal1, pDbVal2, nDbVal1)))
- ){
- testSetError(1);
- }
-}
-
-typedef struct ScanResult ScanResult;
-struct ScanResult {
- TestDb *pDb;
-
- int nRow;
- u32 cksum1;
- u32 cksum2;
- void *pKey1; int nKey1;
- void *pKey2; int nKey2;
-
- int bReverse;
- int nPrevKey;
- u8 aPrevKey[256];
-};
-
-static int keyCompare(void *pKey1, int nKey1, void *pKey2, int nKey2){
- int res;
- res = memcmp(pKey1, pKey2, MIN(nKey1, nKey2));
- if( res==0 ){
- res = nKey1 - nKey2;
- }
- return res;
-}
-
-int test_scan_debug = 0;
-
-static void scanCompareCb(
- void *pCtx,
- void *pKey, int nKey,
- void *pVal, int nVal
-){
- ScanResult *p = (ScanResult *)pCtx;
- u8 *aKey = (u8 *)pKey;
- u8 *aVal = (u8 *)pVal;
- int i;
-
- if( test_scan_debug ){
- printf("%d: %.*s\n", p->nRow, nKey, (char *)pKey);
- fflush(stdout);
- }
-#if 0
- if( test_scan_debug ) printf("%.20s\n", (char *)pVal);
-#endif
-
-#if 0
- /* Check tdb_fetch() matches */
- int rc = 0;
- testFetch(p->pDb, pKey, nKey, pVal, nVal, &rc);
- assert( rc==0 );
-#endif
-
- /* Update the checksum data */
- p->nRow++;
- for(i=0; icksum1 += ((int)aKey[i] << (i&0x0F));
- p->cksum2 += p->cksum1;
- }
- for(i=0; icksum1 += ((int)aVal[i] << (i&0x0F));
- p->cksum2 += p->cksum1;
- }
-
- /* Check that the delivered row is not out of order. */
- if( nKey<(int)sizeof(p->aPrevKey) ){
- if( p->nPrevKey ){
- int res = keyCompare(p->aPrevKey, p->nPrevKey, pKey, nKey);
- if( (res<0 && p->bReverse) || (res>0 && p->bReverse==0) ){
- testPrintError("Returned key out of order at %s:%d\n",
- __FILE__, __LINE__
- );
- }
- }
-
- p->nPrevKey = nKey;
- memcpy(p->aPrevKey, pKey, MIN(p->nPrevKey, nKey));
- }
-
- /* Check that the delivered row is within range. */
- if( p->pKey1 && (
- (memcmp(p->pKey1, pKey, MIN(p->nKey1, nKey))>0)
- || (memcmp(p->pKey1, pKey, MIN(p->nKey1, nKey))==0 && p->nKey1>nKey)
- )){
- testPrintError("Returned key too small at %s:%d\n", __FILE__, __LINE__);
- }
- if( p->pKey2 && (
- (memcmp(p->pKey2, pKey, MIN(p->nKey2, nKey))<0)
- || (memcmp(p->pKey2, pKey, MIN(p->nKey2, nKey))==0 && p->nKey2=0 );
- zRet = (char *)testMalloc(nByte+1);
- vsnprintf(zRet, nByte+1, zFormat, ap);
- return zRet;
-}
-
-char *testMallocPrintf(const char *zFormat, ...){
- va_list ap;
- char *zRet;
-
- va_start(ap, zFormat);
- zRet = testMallocVPrintf(zFormat, ap);
- va_end(ap);
-
- return zRet;
-}
-
-
-/*
-** A wrapper around malloc(3).
-**
-** This function should be used for all allocations made by test procedures.
-** It has the following properties:
-**
-** * Test code may assume that allocations may not fail.
-** * Returned memory is always zeroed.
-**
-** Allocations made using testMalloc() should be freed using testFree().
-*/
-void *testMalloc(int n){
- u8 *p = (u8*)malloc(n + 8);
- memset(p, 0, n+8);
- *(int*)p = n;
- return (void*)&p[8];
-}
-
-void *testMallocCopy(void *pCopy, int nByte){
- void *pRet = testMalloc(nByte);
- memcpy(pRet, pCopy, nByte);
- return pRet;
-}
-
-void *testRealloc(void *ptr, int n){
- if( ptr ){
- u8 *p = (u8*)ptr - 8;
- int nOrig = *(int*)p;
- p = (u8*)realloc(p, n+8);
- if( nOrig1 ){
- testPrintError("Usage: test ?PATTERN?\n");
- return 1;
- }
- if( nArg==1 ){
- zPattern = azArg[0];
- }
-
- for(j=0; tdb_system_name(j); j++){
- rc = 0;
-
- test_data_1(tdb_system_name(j), zPattern, &rc);
- test_data_2(tdb_system_name(j), zPattern, &rc);
- test_data_3(tdb_system_name(j), zPattern, &rc);
- test_data_4(tdb_system_name(j), zPattern, &rc);
- test_rollback(tdb_system_name(j), zPattern, &rc);
- test_mc(tdb_system_name(j), zPattern, &rc);
- test_mt(tdb_system_name(j), zPattern, &rc);
-
- if( rc ) nFail++;
- }
-
- rc = 0;
- test_oom(zPattern, &rc);
- if( rc ) nFail++;
-
- rc = 0;
- test_api(zPattern, &rc);
- if( rc ) nFail++;
-
- rc = 0;
- do_crash_test(zPattern, &rc);
- if( rc ) nFail++;
-
- rc = 0;
- do_writer_crash_test(zPattern, &rc);
- if( rc ) nFail++;
-
- return (nFail!=0);
-}
-
-static lsm_db *configure_lsm_db(TestDb *pDb){
- lsm_db *pLsm;
- pLsm = tdb_lsm(pDb);
- if( pLsm ){
- tdb_lsm_config_str(pDb, "mmap=1 autowork=1 automerge=4 worker_automerge=4");
- }
- return pLsm;
-}
-
-typedef struct WriteHookEvent WriteHookEvent;
-struct WriteHookEvent {
- i64 iOff;
- int nData;
- int nUs;
-};
-WriteHookEvent prev = {0, 0, 0};
-
-static void flushPrev(FILE *pOut){
- if( prev.nData ){
- fprintf(pOut, "w %s %lld %d %d\n", "d", prev.iOff, prev.nData, prev.nUs);
- prev.nData = 0;
- }
-}
-
-#if 0 /* unused */
-static void do_speed_write_hook2(
- void *pCtx,
- int bLog,
- i64 iOff,
- int nData,
- int nUs
-){
- FILE *pOut = (FILE *)pCtx;
- if( bLog ) return;
-
- if( prev.nData && nData && iOff==prev.iOff+prev.nData ){
- prev.nData += nData;
- prev.nUs += nUs;
- }else{
- flushPrev(pOut);
- if( nData==0 ){
- fprintf(pOut, "s %s 0 0 %d\n", (bLog ? "l" : "d"), nUs);
- }else{
- prev.iOff = iOff;
- prev.nData = nData;
- prev.nUs = nUs;
- }
- }
-}
-#endif
-
-#define ST_REPEAT 0
-#define ST_WRITE 1
-#define ST_PAUSE 2
-#define ST_FETCH 3
-#define ST_SCAN 4
-#define ST_NSCAN 5
-#define ST_KEYSIZE 6
-#define ST_VALSIZE 7
-#define ST_TRANS 8
-
-
-static void print_speed_test_help(){
- printf(
-"\n"
-"Repeat the following $repeat times:\n"
-" 1. Insert $write key-value pairs. One transaction for each write op.\n"
-" 2. Pause for $pause ms.\n"
-" 3. Perform $fetch queries on the database.\n"
-"\n"
-" Keys are $keysize bytes in size. Values are $valsize bytes in size\n"
-" Both keys and values are pseudo-randomly generated\n"
-"\n"
-"Options are:\n"
-" -repeat $repeat (default value 10)\n"
-" -write $write (default value 10000)\n"
-" -pause $pause (default value 0)\n"
-" -fetch $fetch (default value 0)\n"
-" -keysize $keysize (default value 12)\n"
-" -valsize $valsize (default value 100)\n"
-" -system $system (default value \"lsm\")\n"
-" -trans $trans (default value 0)\n"
-"\n"
-);
-}
-
-int do_speed_test2(int nArg, char **azArg){
- struct Option {
- const char *zOpt;
- int eVal;
- int iDefault;
- } aOpt[] = {
- { "-repeat", ST_REPEAT, 10},
- { "-write", ST_WRITE, 10000},
- { "-pause", ST_PAUSE, 0},
- { "-fetch", ST_FETCH, 0},
- { "-scan", ST_SCAN, 0},
- { "-nscan", ST_NSCAN, 0},
- { "-keysize", ST_KEYSIZE, 12},
- { "-valsize", ST_VALSIZE, 100},
- { "-trans", ST_TRANS, 0},
- { "-system", -1, 0},
- { "help", -2, 0},
- {0, 0, 0}
- };
- int i;
- int aParam[9];
- int rc = 0;
- int bReadonly = 0;
- int nContent = 0;
-
- TestDb *pDb;
- Datasource *pData;
- DatasourceDefn defn = { TEST_DATASOURCE_RANDOM, 0, 0, 0, 0 };
- char *zSystem = "";
- int bLsm = 1;
- FILE *pLog = 0;
-
-#ifdef NDEBUG
- /* If NDEBUG is defined, disable the dynamic memory related checks in
- ** lsmtest_mem.c. They slow things down. */
- testMallocUninstall(tdb_lsm_env());
-#endif
-
- /* Initialize aParam[] with default values. */
- for(i=0; i=0 ){
- aParam[aOpt[iSel].eVal] = atoi(azArg[i+1]);
- }else{
- zSystem = azArg[i+1];
- bLsm = 0;
-#if 0
- for(j=0; zSystem[j]; j++){
- if( zSystem[j]=='=' ) bLsm = 1;
- }
-#endif
- }
- }
-
- printf("#");
- for(i=0; i=0 ){
- printf(" %s=%d", &aOpt[i].zOpt[1], aParam[aOpt[i].eVal]);
- }else if( aOpt[i].eVal==-1 ){
- printf(" %s=\"%s\"", &aOpt[i].zOpt[1], zSystem);
- }
- }
- }
- printf("\n");
-
- defn.nMinKey = defn.nMaxKey = aParam[ST_KEYSIZE];
- defn.nMinVal = defn.nMaxVal = aParam[ST_VALSIZE];
- pData = testDatasourceNew(&defn);
-
- if( aParam[ST_WRITE]==0 ){
- bReadonly = 1;
- }
-
- if( bLsm ){
- rc = tdb_lsm_open(zSystem, "testdb.lsm", !bReadonly, &pDb);
- }else{
- pDb = testOpen(zSystem, !bReadonly, &rc);
- }
- if( rc!=0 ) return rc;
- if( bReadonly ){
- nContent = testCountDatabase(pDb);
- }
-
-#if 0
- pLog = fopen("/tmp/speed.log", "w");
- tdb_lsm_write_hook(pDb, do_speed_write_hook2, (void *)pLog);
-#endif
-
- for(i=0; i=nArg ){
- testPrintError("option %s requires an argument\n", aOpt[iSel].zOpt);
- return 1;
- }
- if( aOpt[iSel].isSwitch==1 ){
- nRow = atoi(azArg[i]);
- }
- if( aOpt[iSel].isSwitch==2 ){
- nSleep = atoi(azArg[i]);
- }
- if( aOpt[iSel].isSwitch==3 ){
- struct Mode {
- const char *zMode;
- int doReadTest;
- int doWriteTest;
- } aMode[] = {{"ro", 1, 0} , {"rw", 1, 1}, {"wo", 0, 1}, {0, 0, 0}};
- int iMode;
- rc = testArgSelect(aMode, "option", azArg[i], &iMode);
- if( rc ) return rc;
- doReadTest = aMode[iMode].doReadTest;
- doWriteTest = aMode[iMode].doWriteTest;
- }
- if( aOpt[iSel].isSwitch==4 ){
- /* The "-out FILE" switch. This option is used to specify a file to
- ** write the gnuplot script to. */
- zOut = azArg[i];
- }
- }else{
- /* A db name */
- rc = testArgSelect(aOpt, "system", azArg[i], &iSel);
- if( rc ) return rc;
- sys_mask |= (1< 100000) ? 100000 : nSelStep;
-
- aTime = malloc(sizeof(int) * ArraySize(aSys) * nRow/nStep);
- aWrite = malloc(sizeof(int) * nRow/nStep);
- aSelTime = malloc(sizeof(int) * ArraySize(aSys) * nRow/nSelStep);
-
- /* This loop collects the INSERT speed data. */
- if( doWriteTest ){
- printf("Writing output to file \"%s\".\n", zOut);
-
- for(j=0; aSys[j].zLibrary; j++){
- FILE *pLog = 0;
- TestDb *pDb; /* Database being tested */
- lsm_db *pLsm;
- int iDot = 0;
-
- if( ((1<nData ){
- fprintf(pHook->pOut, "write %s %d %d\n",
- (pHook->bLog ? "log" : "db"), (int)pHook->iOff, pHook->nData
- );
- pHook->nData = 0;
- fflush(pHook->pOut);
- }
-}
-
-static void do_insert_write_hook(
- void *pCtx,
- int bLog,
- i64 iOff,
- int nData,
- int nUs
-){
- InsertWriteHook *pHook = (InsertWriteHook *)pCtx;
- if( bLog ) return;
-
- if( nData==0 ){
- flushHook(pHook);
- fprintf(pHook->pOut, "sync %s\n", (bLog ? "log" : "db"));
- }else if( pHook->nData
- && bLog==pHook->bLog
- && iOff==(pHook->iOff+pHook->nData)
- ){
- pHook->nData += nData;
- }else{
- flushHook(pHook);
- pHook->bLog = bLog;
- pHook->iOff = iOff;
- pHook->nData = nData;
- }
-}
-
-static int do_replay(int nArg, char **azArg){
- char aBuf[4096];
- FILE *pInput;
- FILE *pClose = 0;
- const char *zDb;
-
- lsm_env *pEnv;
- lsm_file *pOut;
- int rc;
-
- if( nArg!=2 ){
- testPrintError("Usage: replay WRITELOG FILE\n");
- return 1;
- }
-
- if( strcmp(azArg[0], "-")==0 ){
- pInput = stdin;
- }else{
- pClose = pInput = fopen(azArg[0], "r");
- }
- zDb = azArg[1];
- pEnv = tdb_lsm_env();
- rc = pEnv->xOpen(pEnv, zDb, 0, &pOut);
- if( rc!=LSM_OK ) return rc;
-
- while( feof(pInput)==0 ){
- char zLine[80];
- fgets(zLine, sizeof(zLine)-1, pInput);
- zLine[sizeof(zLine)-1] = '\0';
-
- if( 0==memcmp("sync db", zLine, 7) ){
- rc = pEnv->xSync(pOut);
- if( rc!=0 ) break;
- }else{
- int iOff;
- int nData;
- int nMatch;
- nMatch = sscanf(zLine, "write db %d %d", &iOff, &nData);
- if( nMatch==2 ){
- int i;
- for(i=0; ixWrite(pOut, iOff+i, aBuf, sizeof(aBuf));
- if( rc!=0 ) break;
- }
- }
- }
- }
- if( pClose ) fclose(pClose);
- pEnv->xClose(pOut);
-
- return rc;
-}
-
-static int do_insert(int nArg, char **azArg){
- const char *zDb = "lsm";
- TestDb *pDb = 0;
- int i;
- int rc;
- const int nRow = 1 * 1000 * 1000;
-
- DatasourceDefn defn = { TEST_DATASOURCE_RANDOM, 8, 15, 80, 150 };
- Datasource *pData = 0;
-
- if( nArg>1 ){
- testPrintError("Usage: insert ?DATABASE?\n");
- return 1;
- }
- if( nArg==1 ){ zDb = azArg[0]; }
-
- testMallocUninstall(tdb_lsm_env());
- for(i=0; zDb[i] && zDb[i]!='='; i++);
- if( zDb[i] ){
- rc = tdb_lsm_open(zDb, "testdb.lsm", 1, &pDb);
- }else{
- rc = tdb_open(zDb, 0, 1, &pDb);
- }
-
- if( rc!=0 ){
- testPrintError("Error opening db \"%s\": %d\n", zDb, rc);
- }else{
- InsertWriteHook hook;
- memset(&hook, 0, sizeof(hook));
- hook.pOut = fopen("writelog.txt", "w");
-
- pData = testDatasourceNew(&defn);
- tdb_lsm_config_work_hook(pDb, do_insert_work_hook, 0);
- tdb_lsm_write_hook(pDb, do_insert_write_hook, (void *)&hook);
-
- if( rc==0 ){
- for(i=0; i
-#include
-
-static void lsmtest_rusage_report(void){
- struct rusage r;
- memset(&r, 0, sizeof(r));
-
- getrusage(RUSAGE_SELF, &r);
- printf("# getrusage: { ru_maxrss %d ru_oublock %d ru_inblock %d }\n",
- (int)r.ru_maxrss, (int)r.ru_oublock, (int)r.ru_inblock
- );
-}
-#else
-static void lsmtest_rusage_report(void){
- /* no-op */
-}
-#endif
-
-int main(int argc, char **argv){
- struct TestFunc {
- const char *zName;
- int bRusageReport;
- int (*xFunc)(int, char **);
- } aTest[] = {
- {"random", 1, do_random_tests},
- {"writespeed", 1, do_writer_test},
- {"io", 1, st_do_io},
-
- {"insert", 1, do_insert},
- {"replay", 1, do_replay},
-
- {"speed", 1, do_speed_tests},
- {"speed2", 1, do_speed_test2},
- {"show", 0, st_do_show},
- {"work", 1, st_do_work},
- {"test", 1, do_test},
-
- {0, 0}
- };
- int rc; /* Return Code */
- int iFunc; /* Index into aTest[] */
-
- int nLeakAlloc = 0; /* Allocations leaked by lsm */
- int nLeakByte = 0; /* Bytes leaked by lsm */
-
-#ifdef LSM_DEBUG_MEM
- FILE *pReport = 0; /* lsm malloc() report file */
- const char *zReport = "malloc.txt generated";
-#else
- const char *zReport = "malloc.txt NOT generated";
-#endif
-
- testMallocInstall(tdb_lsm_env());
-
- if( argc<2 ){
- testPrintError("Usage: %s sub-command ?args...?\n", argv[0]);
- return -1;
- }
-
- /* Initialize error reporting */
- testErrorInit(argc, argv);
-
- /* Initialize PRNG system */
- testPrngInit();
-
- rc = testArgSelect(aTest, "sub-command", argv[1], &iFunc);
- if( rc==0 ){
- rc = aTest[iFunc].xFunc(argc-2, &argv[2]);
- }
-
-#ifdef LSM_DEBUG_MEM
- pReport = fopen("malloc.txt", "w");
- testMallocCheck(tdb_lsm_env(), &nLeakAlloc, &nLeakByte, pReport);
- fclose(pReport);
-#else
- testMallocCheck(tdb_lsm_env(), &nLeakAlloc, &nLeakByte, 0);
-#endif
-
- if( nLeakAlloc ){
- testPrintError("Leaked %d bytes in %d allocations (%s)\n",
- nLeakByte, nLeakAlloc, zReport
- );
- if( rc==0 ) rc = -1;
- }
- testMallocUninstall(tdb_lsm_env());
-
- if( aTest[iFunc].bRusageReport ){
- lsmtest_rusage_report();
- }
- return rc;
-}
diff --git a/ext/lsm1/lsm-test/lsmtest_mem.c b/ext/lsm1/lsm-test/lsmtest_mem.c
deleted file mode 100644
index 4c35e849f2..0000000000
--- a/ext/lsm1/lsm-test/lsmtest_mem.c
+++ /dev/null
@@ -1,409 +0,0 @@
-
-#include
-#include
-#include
-
-#define ArraySize(x) ((int)(sizeof(x) / sizeof((x)[0])))
-
-#define MIN(x,y) ((x)<(y) ? (x) : (y))
-
-typedef unsigned int u32;
-typedef unsigned char u8;
-typedef long long int i64;
-typedef unsigned long long int u64;
-
-#if defined(__GLIBC__) && defined(LSM_DEBUG_MEM)
- extern int backtrace(void**,int);
- extern void backtrace_symbols_fd(void*const*,int,int);
-# define TM_BACKTRACE 12
-#else
-# define backtrace(A,B) 1
-# define backtrace_symbols_fd(A,B,C)
-#endif
-
-
-typedef struct TmBlockHdr TmBlockHdr;
-typedef struct TmAgg TmAgg;
-typedef struct TmGlobal TmGlobal;
-
-struct TmGlobal {
- /* Linked list of all currently outstanding allocations. And a table of
- ** all allocations, past and present, indexed by backtrace() info. */
- TmBlockHdr *pFirst;
-#ifdef TM_BACKTRACE
- TmAgg *aHash[10000];
-#endif
-
- /* Underlying malloc/realloc/free functions */
- void *(*xMalloc)(int); /* underlying malloc(3) function */
- void *(*xRealloc)(void *, int); /* underlying realloc(3) function */
- void (*xFree)(void *); /* underlying free(3) function */
-
- /* Mutex to protect pFirst and aHash */
- void (*xEnterMutex)(TmGlobal*); /* Call this to enter the mutex */
- void (*xLeaveMutex)(TmGlobal*); /* Call this to leave mutex */
- void (*xDelMutex)(TmGlobal*); /* Call this to delete mutex */
- void *pMutex; /* Mutex handle */
-
- void *(*xSaveMalloc)(void *, size_t);
- void *(*xSaveRealloc)(void *, void *, size_t);
- void (*xSaveFree)(void *, void *);
-
- /* OOM injection scheduling. If nCountdown is greater than zero when a
- ** malloc attempt is made, it is decremented. If this means nCountdown
- ** transitions from 1 to 0, then the allocation fails. If bPersist is true
- ** when this happens, nCountdown is then incremented back to 1 (so that the
- ** next attempt fails too).
- */
- int nCountdown;
- int bPersist;
- int bEnable;
- void (*xHook)(void *);
- void *pHookCtx;
-};
-
-struct TmBlockHdr {
- TmBlockHdr *pNext;
- TmBlockHdr *pPrev;
- int nByte;
-#ifdef TM_BACKTRACE
- TmAgg *pAgg;
-#endif
- u32 iForeGuard;
-};
-
-#ifdef TM_BACKTRACE
-struct TmAgg {
- int nAlloc; /* Number of allocations at this path */
- int nByte; /* Total number of bytes allocated */
- int nOutAlloc; /* Number of outstanding allocations */
- int nOutByte; /* Number of outstanding bytes */
- void *aFrame[TM_BACKTRACE]; /* backtrace() output */
- TmAgg *pNext; /* Next object in hash-table collision */
-};
-#endif
-
-#define FOREGUARD 0x80F5E153
-#define REARGUARD 0xE4676B53
-static const u32 rearguard = REARGUARD;
-
-#define ROUND8(x) (((x)+7)&~7)
-
-#define BLOCK_HDR_SIZE (ROUND8( sizeof(TmBlockHdr) ))
-
-static void lsmtest_oom_error(void){
- static int nErr = 0;
- nErr++;
-}
-
-static void tmEnterMutex(TmGlobal *pTm){
- pTm->xEnterMutex(pTm);
-}
-static void tmLeaveMutex(TmGlobal *pTm){
- pTm->xLeaveMutex(pTm);
-}
-
-static void *tmMalloc(TmGlobal *pTm, int nByte){
- TmBlockHdr *pNew; /* New allocation header block */
- u8 *pUser; /* Return value */
- int nReq; /* Total number of bytes requested */
-
- assert( sizeof(rearguard)==4 );
- nReq = BLOCK_HDR_SIZE + nByte + 4;
- pNew = (TmBlockHdr *)pTm->xMalloc(nReq);
- memset(pNew, 0, sizeof(TmBlockHdr));
-
- tmEnterMutex(pTm);
- assert( pTm->nCountdown>=0 );
- assert( pTm->bPersist==0 || pTm->bPersist==1 );
-
- if( pTm->bEnable && pTm->nCountdown==1 ){
- /* Simulate an OOM error. */
- lsmtest_oom_error();
- pTm->xFree(pNew);
- pTm->nCountdown = pTm->bPersist;
- if( pTm->xHook ) pTm->xHook(pTm->pHookCtx);
- pUser = 0;
- }else{
- if( pTm->bEnable && pTm->nCountdown ) pTm->nCountdown--;
-
- pNew->iForeGuard = FOREGUARD;
- pNew->nByte = nByte;
- pNew->pNext = pTm->pFirst;
-
- if( pTm->pFirst ){
- pTm->pFirst->pPrev = pNew;
- }
- pTm->pFirst = pNew;
-
- pUser = &((u8 *)pNew)[BLOCK_HDR_SIZE];
- memset(pUser, 0x56, nByte);
- memcpy(&pUser[nByte], &rearguard, 4);
-
-#ifdef TM_BACKTRACE
- {
- TmAgg *pAgg;
- int i;
- u32 iHash = 0;
- void *aFrame[TM_BACKTRACE];
- memset(aFrame, 0, sizeof(aFrame));
- backtrace(aFrame, TM_BACKTRACE);
-
- for(i=0; iaHash);
-
- for(pAgg=pTm->aHash[iHash]; pAgg; pAgg=pAgg->pNext){
- if( memcmp(pAgg->aFrame, aFrame, sizeof(aFrame))==0 ) break;
- }
- if( !pAgg ){
- pAgg = (TmAgg *)pTm->xMalloc(sizeof(TmAgg));
- memset(pAgg, 0, sizeof(TmAgg));
- memcpy(pAgg->aFrame, aFrame, sizeof(aFrame));
- pAgg->pNext = pTm->aHash[iHash];
- pTm->aHash[iHash] = pAgg;
- }
- pAgg->nAlloc++;
- pAgg->nByte += nByte;
- pAgg->nOutAlloc++;
- pAgg->nOutByte += nByte;
- pNew->pAgg = pAgg;
- }
-#endif
- }
-
- tmLeaveMutex(pTm);
- return pUser;
-}
-
-static void tmFree(TmGlobal *pTm, void *p){
- if( p ){
- TmBlockHdr *pHdr;
- u8 *pUser = (u8 *)p;
-
- tmEnterMutex(pTm);
- pHdr = (TmBlockHdr *)(pUser - BLOCK_HDR_SIZE);
- assert( pHdr->iForeGuard==FOREGUARD );
- assert( 0==memcmp(&pUser[pHdr->nByte], &rearguard, 4) );
-
- if( pHdr->pPrev ){
- assert( pHdr->pPrev->pNext==pHdr );
- pHdr->pPrev->pNext = pHdr->pNext;
- }else{
- assert( pHdr==pTm->pFirst );
- pTm->pFirst = pHdr->pNext;
- }
- if( pHdr->pNext ){
- assert( pHdr->pNext->pPrev==pHdr );
- pHdr->pNext->pPrev = pHdr->pPrev;
- }
-
-#ifdef TM_BACKTRACE
- pHdr->pAgg->nOutAlloc--;
- pHdr->pAgg->nOutByte -= pHdr->nByte;
-#endif
-
- tmLeaveMutex(pTm);
- memset(pUser, 0x58, pHdr->nByte);
- memset(pHdr, 0x57, sizeof(TmBlockHdr));
- pTm->xFree(pHdr);
- }
-}
-
-static void *tmRealloc(TmGlobal *pTm, void *p, int nByte){
- void *pNew;
-
- pNew = tmMalloc(pTm, nByte);
- if( pNew && p ){
- TmBlockHdr *pHdr;
- u8 *pUser = (u8 *)p;
- pHdr = (TmBlockHdr *)(pUser - BLOCK_HDR_SIZE);
- memcpy(pNew, p, MIN(nByte, pHdr->nByte));
- tmFree(pTm, p);
- }
- return pNew;
-}
-
-static void tmMallocOom(
- TmGlobal *pTm,
- int nCountdown,
- int bPersist,
- void (*xHook)(void *),
- void *pHookCtx
-){
- assert( nCountdown>=0 );
- assert( bPersist==0 || bPersist==1 );
- pTm->nCountdown = nCountdown;
- pTm->bPersist = bPersist;
- pTm->xHook = xHook;
- pTm->pHookCtx = pHookCtx;
- pTm->bEnable = 1;
-}
-
-static void tmMallocOomEnable(
- TmGlobal *pTm,
- int bEnable
-){
- pTm->bEnable = bEnable;
-}
-
-static void tmMallocCheck(
- TmGlobal *pTm,
- int *pnLeakAlloc,
- int *pnLeakByte,
- FILE *pFile
-){
- TmBlockHdr *pHdr;
- int nLeak = 0;
- int nByte = 0;
-
- if( pTm==0 ) return;
-
- for(pHdr=pTm->pFirst; pHdr; pHdr=pHdr->pNext){
- nLeak++;
- nByte += pHdr->nByte;
- }
- if( pnLeakAlloc ) *pnLeakAlloc = nLeak;
- if( pnLeakByte ) *pnLeakByte = nByte;
-
-#ifdef TM_BACKTRACE
- if( pFile ){
- int i;
- fprintf(pFile, "LEAKS\n");
- for(i=0; iaHash); i++){
- TmAgg *pAgg;
- for(pAgg=pTm->aHash[i]; pAgg; pAgg=pAgg->pNext){
- if( pAgg->nOutAlloc ){
- int j;
- fprintf(pFile, "%d %d ", pAgg->nOutByte, pAgg->nOutAlloc);
- for(j=0; jaFrame[j]);
- }
- fprintf(pFile, "\n");
- }
- }
- }
- fprintf(pFile, "\nALLOCATIONS\n");
- for(i=0; iaHash); i++){
- TmAgg *pAgg;
- for(pAgg=pTm->aHash[i]; pAgg; pAgg=pAgg->pNext){
- int j;
- fprintf(pFile, "%d %d ", pAgg->nByte, pAgg->nAlloc);
- for(j=0; jaFrame[j]);
- fprintf(pFile, "\n");
- }
- }
- }
-#else
- (void)pFile;
-#endif
-}
-
-
-#include "lsm.h"
-#include "stdlib.h"
-
-typedef struct LsmMutex LsmMutex;
-struct LsmMutex {
- lsm_env *pEnv;
- lsm_mutex *pMutex;
-};
-
-static void tmLsmMutexEnter(TmGlobal *pTm){
- LsmMutex *p = (LsmMutex *)pTm->pMutex;
- p->pEnv->xMutexEnter(p->pMutex);
-}
-static void tmLsmMutexLeave(TmGlobal *pTm){
- LsmMutex *p = (LsmMutex *)(pTm->pMutex);
- p->pEnv->xMutexLeave(p->pMutex);
-}
-static void tmLsmMutexDel(TmGlobal *pTm){
- LsmMutex *p = (LsmMutex *)pTm->pMutex;
- pTm->xFree(p);
-}
-static void *tmLsmMalloc(int n){ return malloc(n); }
-static void tmLsmFree(void *ptr){ free(ptr); }
-static void *tmLsmRealloc(void *ptr, int n){ return realloc(ptr, n); }
-
-static void *tmLsmEnvMalloc(lsm_env *p, size_t n){
- return tmMalloc((TmGlobal *)(p->pMemCtx), n);
-}
-static void tmLsmEnvFree(lsm_env *p, void *ptr){
- tmFree((TmGlobal *)(p->pMemCtx), ptr);
-}
-static void *tmLsmEnvRealloc(lsm_env *p, void *ptr, size_t n){
- return tmRealloc((TmGlobal *)(p->pMemCtx), ptr, n);
-}
-
-void testMallocInstall(lsm_env *pEnv){
- TmGlobal *pGlobal;
- LsmMutex *pMutex;
- assert( pEnv->pMemCtx==0 );
-
- /* Allocate and populate a TmGlobal structure. */
- pGlobal = (TmGlobal *)tmLsmMalloc(sizeof(TmGlobal));
- memset(pGlobal, 0, sizeof(TmGlobal));
- pGlobal->xMalloc = tmLsmMalloc;
- pGlobal->xRealloc = tmLsmRealloc;
- pGlobal->xFree = tmLsmFree;
- pMutex = (LsmMutex *)pGlobal->xMalloc(sizeof(LsmMutex));
- pMutex->pEnv = pEnv;
- pEnv->xMutexStatic(pEnv, LSM_MUTEX_HEAP, &pMutex->pMutex);
- pGlobal->xEnterMutex = tmLsmMutexEnter;
- pGlobal->xLeaveMutex = tmLsmMutexLeave;
- pGlobal->xDelMutex = tmLsmMutexDel;
- pGlobal->pMutex = (void *)pMutex;
-
- pGlobal->xSaveMalloc = pEnv->xMalloc;
- pGlobal->xSaveRealloc = pEnv->xRealloc;
- pGlobal->xSaveFree = pEnv->xFree;
-
- /* Set up pEnv to the use the new TmGlobal */
- pEnv->pMemCtx = (void *)pGlobal;
- pEnv->xMalloc = tmLsmEnvMalloc;
- pEnv->xRealloc = tmLsmEnvRealloc;
- pEnv->xFree = tmLsmEnvFree;
-}
-
-void testMallocUninstall(lsm_env *pEnv){
- TmGlobal *p = (TmGlobal *)pEnv->pMemCtx;
- pEnv->pMemCtx = 0;
- if( p ){
- pEnv->xMalloc = p->xSaveMalloc;
- pEnv->xRealloc = p->xSaveRealloc;
- pEnv->xFree = p->xSaveFree;
- p->xDelMutex(p);
- tmLsmFree(p);
- }
-}
-
-void testMallocCheck(
- lsm_env *pEnv,
- int *pnLeakAlloc,
- int *pnLeakByte,
- FILE *pFile
-){
- if( pEnv->pMemCtx==0 ){
- *pnLeakAlloc = 0;
- *pnLeakByte = 0;
- }else{
- tmMallocCheck((TmGlobal *)(pEnv->pMemCtx), pnLeakAlloc, pnLeakByte, pFile);
- }
-}
-
-void testMallocOom(
- lsm_env *pEnv,
- int nCountdown,
- int bPersist,
- void (*xHook)(void *),
- void *pHookCtx
-){
- TmGlobal *pTm = (TmGlobal *)(pEnv->pMemCtx);
- tmMallocOom(pTm, nCountdown, bPersist, xHook, pHookCtx);
-}
-
-void testMallocOomEnable(lsm_env *pEnv, int bEnable){
- TmGlobal *pTm = (TmGlobal *)(pEnv->pMemCtx);
- tmMallocOomEnable(pTm, bEnable);
-}
diff --git a/ext/lsm1/lsm-test/lsmtest_tdb.c b/ext/lsm1/lsm-test/lsmtest_tdb.c
deleted file mode 100644
index 8f63f64acb..0000000000
--- a/ext/lsm1/lsm-test/lsmtest_tdb.c
+++ /dev/null
@@ -1,846 +0,0 @@
-
-/*
-** This program attempts to test the correctness of some facets of the
-** LSM database library. Specifically, that the contents of the database
-** are maintained correctly during a series of inserts and deletes.
-*/
-
-
-#include "lsmtest_tdb.h"
-#include "lsm.h"
-
-#include "lsmtest.h"
-
-#include
-#include
-#include
-#ifndef _WIN32
-# include
-#endif
-#include
-
-
-typedef struct SqlDb SqlDb;
-
-static int error_transaction_function(TestDb *p, int iLevel){
- unused_parameter(p);
- unused_parameter(iLevel);
- return -1;
-}
-
-
-/*************************************************************************
-** Begin wrapper for LevelDB.
-*/
-#ifdef HAVE_LEVELDB
-
-#include
-
-typedef struct LevelDb LevelDb;
-struct LevelDb {
- TestDb base;
- leveldb_t *db;
- leveldb_options_t *pOpt;
- leveldb_writeoptions_t *pWriteOpt;
- leveldb_readoptions_t *pReadOpt;
-
- char *pVal;
-};
-
-static int test_leveldb_close(TestDb *pTestDb){
- LevelDb *pDb = (LevelDb *)pTestDb;
-
- leveldb_close(pDb->db);
- leveldb_writeoptions_destroy(pDb->pWriteOpt);
- leveldb_readoptions_destroy(pDb->pReadOpt);
- leveldb_options_destroy(pDb->pOpt);
- free(pDb->pVal);
- free(pDb);
-
- return 0;
-}
-
-static int test_leveldb_write(
- TestDb *pTestDb,
- void *pKey,
- int nKey,
- void *pVal,
- int nVal
-){
- LevelDb *pDb = (LevelDb *)pTestDb;
- char *zErr = 0;
- leveldb_put(pDb->db, pDb->pWriteOpt, pKey, nKey, pVal, nVal, &zErr);
- return (zErr!=0);
-}
-
-static int test_leveldb_delete(TestDb *pTestDb, void *pKey, int nKey){
- LevelDb *pDb = (LevelDb *)pTestDb;
- char *zErr = 0;
- leveldb_delete(pDb->db, pDb->pWriteOpt, pKey, nKey, &zErr);
- return (zErr!=0);
-}
-
-static int test_leveldb_fetch(
- TestDb *pTestDb,
- void *pKey,
- int nKey,
- void **ppVal,
- int *pnVal
-){
- LevelDb *pDb = (LevelDb *)pTestDb;
- char *zErr = 0;
- size_t nVal = 0;
-
- if( pKey==0 ) return 0;
- free(pDb->pVal);
- pDb->pVal = leveldb_get(pDb->db, pDb->pReadOpt, pKey, nKey, &nVal, &zErr);
- *ppVal = (void *)(pDb->pVal);
- if( pDb->pVal==0 ){
- *pnVal = -1;
- }else{
- *pnVal = (int)nVal;
- }
-
- return (zErr!=0);
-}
-
-static int test_leveldb_scan(
- TestDb *pTestDb,
- void *pCtx,
- int bReverse,
- void *pKey1, int nKey1, /* Start of search */
- void *pKey2, int nKey2, /* End of search */
- void (*xCallback)(void *, void *, int , void *, int)
-){
- LevelDb *pDb = (LevelDb *)pTestDb;
- leveldb_iterator_t *iter;
-
- iter = leveldb_create_iterator(pDb->db, pDb->pReadOpt);
-
- if( bReverse==0 ){
- if( pKey1 ){
- leveldb_iter_seek(iter, pKey1, nKey1);
- }else{
- leveldb_iter_seek_to_first(iter);
- }
- }else{
- if( pKey2 ){
- leveldb_iter_seek(iter, pKey2, nKey2);
-
- if( leveldb_iter_valid(iter)==0 ){
- leveldb_iter_seek_to_last(iter);
- }else{
- const char *k; size_t n;
- int res;
- k = leveldb_iter_key(iter, &n);
- res = memcmp(k, pKey2, MIN(n, nKey2));
- if( res==0 ) res = n - nKey2;
- assert( res>=0 );
- if( res>0 ){
- leveldb_iter_prev(iter);
- }
- }
- }else{
- leveldb_iter_seek_to_last(iter);
- }
- }
-
-
- while( leveldb_iter_valid(iter) ){
- const char *k; size_t n;
- const char *v; size_t n2;
- int res;
-
- k = leveldb_iter_key(iter, &n);
- if( bReverse==0 && pKey2 ){
- res = memcmp(k, pKey2, MIN(n, nKey2));
- if( res==0 ) res = n - nKey2;
- if( res>0 ) break;
- }
- if( bReverse!=0 && pKey1 ){
- res = memcmp(k, pKey1, MIN(n, nKey1));
- if( res==0 ) res = n - nKey1;
- if( res<0 ) break;
- }
-
- v = leveldb_iter_value(iter, &n2);
-
- xCallback(pCtx, (void *)k, n, (void *)v, n2);
-
- if( bReverse==0 ){
- leveldb_iter_next(iter);
- }else{
- leveldb_iter_prev(iter);
- }
- }
-
- leveldb_iter_destroy(iter);
- return 0;
-}
-
-static int test_leveldb_open(
- const char *zSpec,
- const char *zFilename,
- int bClear,
- TestDb **ppDb
-){
- static const DatabaseMethods LeveldbMethods = {
- test_leveldb_close,
- test_leveldb_write,
- test_leveldb_delete,
- 0,
- test_leveldb_fetch,
- test_leveldb_scan,
- error_transaction_function,
- error_transaction_function,
- error_transaction_function
- };
-
- LevelDb *pLevelDb;
- char *zErr = 0;
-
- if( bClear ){
- char *zCmd = sqlite3_mprintf("rm -rf %s\n", zFilename);
- system(zCmd);
- sqlite3_free(zCmd);
- }
-
- pLevelDb = (LevelDb *)malloc(sizeof(LevelDb));
- memset(pLevelDb, 0, sizeof(LevelDb));
-
- pLevelDb->pOpt = leveldb_options_create();
- leveldb_options_set_create_if_missing(pLevelDb->pOpt, 1);
- pLevelDb->pWriteOpt = leveldb_writeoptions_create();
- pLevelDb->pReadOpt = leveldb_readoptions_create();
-
- pLevelDb->db = leveldb_open(pLevelDb->pOpt, zFilename, &zErr);
-
- if( zErr ){
- test_leveldb_close((TestDb *)pLevelDb);
- *ppDb = 0;
- return 1;
- }
-
- *ppDb = (TestDb *)pLevelDb;
- pLevelDb->base.pMethods = &LeveldbMethods;
- return 0;
-}
-#endif /* HAVE_LEVELDB */
-/*
-** End wrapper for LevelDB.
-*************************************************************************/
-
-#ifdef HAVE_KYOTOCABINET
-static int kc_close(TestDb *pTestDb){
- return test_kc_close(pTestDb);
-}
-
-static int kc_write(
- TestDb *pTestDb,
- void *pKey,
- int nKey,
- void *pVal,
- int nVal
-){
- return test_kc_write(pTestDb, pKey, nKey, pVal, nVal);
-}
-
-static int kc_delete(TestDb *pTestDb, void *pKey, int nKey){
- return test_kc_delete(pTestDb, pKey, nKey);
-}
-
-static int kc_delete_range(
- TestDb *pTestDb,
- void *pKey1, int nKey1,
- void *pKey2, int nKey2
-){
- return test_kc_delete_range(pTestDb, pKey1, nKey1, pKey2, nKey2);
-}
-
-static int kc_fetch(
- TestDb *pTestDb,
- void *pKey,
- int nKey,
- void **ppVal,
- int *pnVal
-){
- if( pKey==0 ) return LSM_OK;
- return test_kc_fetch(pTestDb, pKey, nKey, ppVal, pnVal);
-}
-
-static int kc_scan(
- TestDb *pTestDb,
- void *pCtx,
- int bReverse,
- void *pFirst, int nFirst,
- void *pLast, int nLast,
- void (*xCallback)(void *, void *, int , void *, int)
-){
- return test_kc_scan(
- pTestDb, pCtx, bReverse, pFirst, nFirst, pLast, nLast, xCallback
- );
-}
-
-static int kc_open(
- const char *zSpec,
- const char *zFilename,
- int bClear,
- TestDb **ppDb
-){
- static const DatabaseMethods KcdbMethods = {
- kc_close,
- kc_write,
- kc_delete,
- kc_delete_range,
- kc_fetch,
- kc_scan,
- error_transaction_function,
- error_transaction_function,
- error_transaction_function
- };
-
- int rc;
- TestDb *pTestDb = 0;
-
- rc = test_kc_open(zFilename, bClear, &pTestDb);
- if( rc!=0 ){
- *ppDb = 0;
- return rc;
- }
- pTestDb->pMethods = &KcdbMethods;
- *ppDb = pTestDb;
- return 0;
-}
-#endif /* HAVE_KYOTOCABINET */
-/*
-** End wrapper for Kyoto cabinet.
-*************************************************************************/
-
-#ifdef HAVE_MDB
-static int mdb_close(TestDb *pTestDb){
- return test_mdb_close(pTestDb);
-}
-
-static int mdb_write(
- TestDb *pTestDb,
- void *pKey,
- int nKey,
- void *pVal,
- int nVal
-){
- return test_mdb_write(pTestDb, pKey, nKey, pVal, nVal);
-}
-
-static int mdb_delete(TestDb *pTestDb, void *pKey, int nKey){
- return test_mdb_delete(pTestDb, pKey, nKey);
-}
-
-static int mdb_fetch(
- TestDb *pTestDb,
- void *pKey,
- int nKey,
- void **ppVal,
- int *pnVal
-){
- if( pKey==0 ) return LSM_OK;
- return test_mdb_fetch(pTestDb, pKey, nKey, ppVal, pnVal);
-}
-
-static int mdb_scan(
- TestDb *pTestDb,
- void *pCtx,
- int bReverse,
- void *pFirst, int nFirst,
- void *pLast, int nLast,
- void (*xCallback)(void *, void *, int , void *, int)
-){
- return test_mdb_scan(
- pTestDb, pCtx, bReverse, pFirst, nFirst, pLast, nLast, xCallback
- );
-}
-
-static int mdb_open(
- const char *zSpec,
- const char *zFilename,
- int bClear,
- TestDb **ppDb
-){
- static const DatabaseMethods KcdbMethods = {
- mdb_close,
- mdb_write,
- mdb_delete,
- 0,
- mdb_fetch,
- mdb_scan,
- error_transaction_function,
- error_transaction_function,
- error_transaction_function
- };
-
- int rc;
- TestDb *pTestDb = 0;
-
- rc = test_mdb_open(zSpec, zFilename, bClear, &pTestDb);
- if( rc!=0 ){
- *ppDb = 0;
- return rc;
- }
- pTestDb->pMethods = &KcdbMethods;
- *ppDb = pTestDb;
- return 0;
-}
-#endif /* HAVE_MDB */
-
-/*************************************************************************
-** Begin wrapper for SQLite.
-*/
-
-/*
-** nOpenTrans:
-** The number of open nested transactions, in the same sense as used
-** by the tdb_begin/commit/rollback and SQLite 4 KV interfaces. If this
-** value is 0, there are no transactions open at all. If it is 1, then
-** there is a read transaction. If it is 2 or greater, then there are
-** (nOpenTrans-1) nested write transactions open.
-*/
-struct SqlDb {
- TestDb base;
- sqlite3 *db;
- sqlite3_stmt *pInsert;
- sqlite3_stmt *pDelete;
- sqlite3_stmt *pDeleteRange;
- sqlite3_stmt *pFetch;
- sqlite3_stmt *apScan[8];
-
- int nOpenTrans;
-
- /* Used by sql_fetch() to allocate space for results */
- int nAlloc;
- u8 *aAlloc;
-};
-
-static int sql_close(TestDb *pTestDb){
- SqlDb *pDb = (SqlDb *)pTestDb;
- sqlite3_finalize(pDb->pInsert);
- sqlite3_finalize(pDb->pDelete);
- sqlite3_finalize(pDb->pDeleteRange);
- sqlite3_finalize(pDb->pFetch);
- sqlite3_finalize(pDb->apScan[0]);
- sqlite3_finalize(pDb->apScan[1]);
- sqlite3_finalize(pDb->apScan[2]);
- sqlite3_finalize(pDb->apScan[3]);
- sqlite3_finalize(pDb->apScan[4]);
- sqlite3_finalize(pDb->apScan[5]);
- sqlite3_finalize(pDb->apScan[6]);
- sqlite3_finalize(pDb->apScan[7]);
- sqlite3_close(pDb->db);
- free((char *)pDb->aAlloc);
- free((char *)pDb);
- return SQLITE_OK;
-}
-
-static int sql_write(
- TestDb *pTestDb,
- void *pKey,
- int nKey,
- void *pVal,
- int nVal
-){
- SqlDb *pDb = (SqlDb *)pTestDb;
- sqlite3_bind_blob(pDb->pInsert, 1, pKey, nKey, SQLITE_STATIC);
- sqlite3_bind_blob(pDb->pInsert, 2, pVal, nVal, SQLITE_STATIC);
- sqlite3_step(pDb->pInsert);
- return sqlite3_reset(pDb->pInsert);
-}
-
-static int sql_delete(TestDb *pTestDb, void *pKey, int nKey){
- SqlDb *pDb = (SqlDb *)pTestDb;
- sqlite3_bind_blob(pDb->pDelete, 1, pKey, nKey, SQLITE_STATIC);
- sqlite3_step(pDb->pDelete);
- return sqlite3_reset(pDb->pDelete);
-}
-
-static int sql_delete_range(
- TestDb *pTestDb,
- void *pKey1, int nKey1,
- void *pKey2, int nKey2
-){
- SqlDb *pDb = (SqlDb *)pTestDb;
- sqlite3_bind_blob(pDb->pDeleteRange, 1, pKey1, nKey1, SQLITE_STATIC);
- sqlite3_bind_blob(pDb->pDeleteRange, 2, pKey2, nKey2, SQLITE_STATIC);
- sqlite3_step(pDb->pDeleteRange);
- return sqlite3_reset(pDb->pDeleteRange);
-}
-
-static int sql_fetch(
- TestDb *pTestDb,
- void *pKey,
- int nKey,
- void **ppVal,
- int *pnVal
-){
- SqlDb *pDb = (SqlDb *)pTestDb;
- int rc;
-
- sqlite3_reset(pDb->pFetch);
- if( pKey==0 ){
- assert( ppVal==0 );
- assert( pnVal==0 );
- return LSM_OK;
- }
-
- sqlite3_bind_blob(pDb->pFetch, 1, pKey, nKey, SQLITE_STATIC);
- rc = sqlite3_step(pDb->pFetch);
- if( rc==SQLITE_ROW ){
- int nVal = sqlite3_column_bytes(pDb->pFetch, 0);
- u8 *aVal = (void *)sqlite3_column_blob(pDb->pFetch, 0);
-
- if( nVal>pDb->nAlloc ){
- free(pDb->aAlloc);
- pDb->aAlloc = (u8 *)malloc(nVal*2);
- pDb->nAlloc = nVal*2;
- }
- memcpy(pDb->aAlloc, aVal, nVal);
- *pnVal = nVal;
- *ppVal = (void *)pDb->aAlloc;
- }else{
- *pnVal = -1;
- *ppVal = 0;
- }
-
- rc = sqlite3_reset(pDb->pFetch);
- return rc;
-}
-
-static int sql_scan(
- TestDb *pTestDb,
- void *pCtx,
- int bReverse,
- void *pFirst, int nFirst,
- void *pLast, int nLast,
- void (*xCallback)(void *, void *, int , void *, int)
-){
- SqlDb *pDb = (SqlDb *)pTestDb;
- sqlite3_stmt *pScan;
-
- assert( bReverse==1 || bReverse==0 );
- pScan = pDb->apScan[(pFirst==0) + (pLast==0)*2 + bReverse*4];
-
- if( pFirst ) sqlite3_bind_blob(pScan, 1, pFirst, nFirst, SQLITE_STATIC);
- if( pLast ) sqlite3_bind_blob(pScan, 2, pLast, nLast, SQLITE_STATIC);
-
- while( SQLITE_ROW==sqlite3_step(pScan) ){
- void *pKey; int nKey;
- void *pVal; int nVal;
-
- nKey = sqlite3_column_bytes(pScan, 0);
- pKey = (void *)sqlite3_column_blob(pScan, 0);
- nVal = sqlite3_column_bytes(pScan, 1);
- pVal = (void *)sqlite3_column_blob(pScan, 1);
-
- xCallback(pCtx, pKey, nKey, pVal, nVal);
- }
- return sqlite3_reset(pScan);
-}
-
-static int sql_begin(TestDb *pTestDb, int iLevel){
- int i;
- SqlDb *pDb = (SqlDb *)pTestDb;
-
- /* iLevel==0 is a no-op */
- if( iLevel==0 ) return 0;
-
- /* If there are no transactions at all open, open a read transaction. */
- if( pDb->nOpenTrans==0 ){
- int rc = sqlite3_exec(pDb->db,
- "BEGIN; SELECT * FROM sqlite_schema LIMIT 1;" , 0, 0, 0
- );
- if( rc!=0 ) return rc;
- pDb->nOpenTrans = 1;
- }
-
- /* Open any required write transactions */
- for(i=pDb->nOpenTrans; idb, zSql, 0, 0, 0);
- sqlite3_free(zSql);
- if( rc!=SQLITE_OK ) return rc;
- }
-
- pDb->nOpenTrans = iLevel;
- return 0;
-}
-
-static int sql_commit(TestDb *pTestDb, int iLevel){
- SqlDb *pDb = (SqlDb *)pTestDb;
- assert( iLevel>=0 );
-
- /* Close the read transaction if requested. */
- if( pDb->nOpenTrans>=1 && iLevel==0 ){
- int rc = sqlite3_exec(pDb->db, "COMMIT", 0, 0, 0);
- if( rc!=0 ) return rc;
- pDb->nOpenTrans = 0;
- }
-
- /* Close write transactions as required */
- if( pDb->nOpenTrans>iLevel ){
- char *zSql = sqlite3_mprintf("RELEASE x%d", iLevel);
- int rc = sqlite3_exec(pDb->db, zSql, 0, 0, 0);
- sqlite3_free(zSql);
- if( rc!=0 ) return rc;
- }
-
- pDb->nOpenTrans = iLevel;
- return 0;
-}
-
-static int sql_rollback(TestDb *pTestDb, int iLevel){
- SqlDb *pDb = (SqlDb *)pTestDb;
- assert( iLevel>=0 );
-
- if( pDb->nOpenTrans>=1 && iLevel==0 ){
- /* Close the read transaction if requested. */
- int rc = sqlite3_exec(pDb->db, "ROLLBACK", 0, 0, 0);
- if( rc!=0 ) return rc;
- }else if( pDb->nOpenTrans>1 && iLevel==1 ){
- /* Or, rollback and close the top-level write transaction */
- int rc = sqlite3_exec(pDb->db, "ROLLBACK TO x1; RELEASE x1;", 0, 0, 0);
- if( rc!=0 ) return rc;
- }else{
- /* Or, just roll back some nested transactions */
- char *zSql = sqlite3_mprintf("ROLLBACK TO x%d", iLevel-1);
- int rc = sqlite3_exec(pDb->db, zSql, 0, 0, 0);
- sqlite3_free(zSql);
- if( rc!=0 ) return rc;
- }
-
- pDb->nOpenTrans = iLevel;
- return 0;
-}
-
-static int sql_open(
- const char *zSpec,
- const char *zFilename,
- int bClear,
- TestDb **ppDb
-){
- static const DatabaseMethods SqlMethods = {
- sql_close,
- sql_write,
- sql_delete,
- sql_delete_range,
- sql_fetch,
- sql_scan,
- sql_begin,
- sql_commit,
- sql_rollback
- };
- const char *zCreate = "CREATE TABLE IF NOT EXISTS t1(k PRIMARY KEY, v)";
- const char *zInsert = "REPLACE INTO t1 VALUES(?, ?)";
- const char *zDelete = "DELETE FROM t1 WHERE k = ?";
- const char *zRange = "DELETE FROM t1 WHERE k>? AND k";
- const char *zFetch = "SELECT v FROM t1 WHERE k = ?";
-
- const char *zScan0 = "SELECT * FROM t1 WHERE k BETWEEN ?1 AND ?2 ORDER BY k";
- const char *zScan1 = "SELECT * FROM t1 WHERE k <= ?2 ORDER BY k";
- const char *zScan2 = "SELECT * FROM t1 WHERE k >= ?1 ORDER BY k";
- const char *zScan3 = "SELECT * FROM t1 ORDER BY k";
-
- const char *zScan4 =
- "SELECT * FROM t1 WHERE k BETWEEN ?1 AND ?2 ORDER BY k DESC";
- const char *zScan5 = "SELECT * FROM t1 WHERE k <= ?2 ORDER BY k DESC";
- const char *zScan6 = "SELECT * FROM t1 WHERE k >= ?1 ORDER BY k DESC";
- const char *zScan7 = "SELECT * FROM t1 ORDER BY k DESC";
-
- int rc;
- SqlDb *pDb;
- char *zPragma;
-
- if( bClear && zFilename && zFilename[0] ){
- unlink(zFilename);
- }
-
- pDb = (SqlDb *)malloc(sizeof(SqlDb));
- memset(pDb, 0, sizeof(SqlDb));
- pDb->base.pMethods = &SqlMethods;
-
- if( 0!=(rc = sqlite3_open(zFilename, &pDb->db))
- || 0!=(rc = sqlite3_exec(pDb->db, zCreate, 0, 0, 0))
- || 0!=(rc = sqlite3_prepare_v2(pDb->db, zInsert, -1, &pDb->pInsert, 0))
- || 0!=(rc = sqlite3_prepare_v2(pDb->db, zDelete, -1, &pDb->pDelete, 0))
- || 0!=(rc = sqlite3_prepare_v2(pDb->db, zRange, -1, &pDb->pDeleteRange, 0))
- || 0!=(rc = sqlite3_prepare_v2(pDb->db, zFetch, -1, &pDb->pFetch, 0))
- || 0!=(rc = sqlite3_prepare_v2(pDb->db, zScan0, -1, &pDb->apScan[0], 0))
- || 0!=(rc = sqlite3_prepare_v2(pDb->db, zScan1, -1, &pDb->apScan[1], 0))
- || 0!=(rc = sqlite3_prepare_v2(pDb->db, zScan2, -1, &pDb->apScan[2], 0))
- || 0!=(rc = sqlite3_prepare_v2(pDb->db, zScan3, -1, &pDb->apScan[3], 0))
- || 0!=(rc = sqlite3_prepare_v2(pDb->db, zScan4, -1, &pDb->apScan[4], 0))
- || 0!=(rc = sqlite3_prepare_v2(pDb->db, zScan5, -1, &pDb->apScan[5], 0))
- || 0!=(rc = sqlite3_prepare_v2(pDb->db, zScan6, -1, &pDb->apScan[6], 0))
- || 0!=(rc = sqlite3_prepare_v2(pDb->db, zScan7, -1, &pDb->apScan[7], 0))
- ){
- *ppDb = 0;
- sql_close((TestDb *)pDb);
- return rc;
- }
-
- zPragma = sqlite3_mprintf("PRAGMA page_size=%d", TESTDB_DEFAULT_PAGE_SIZE);
- sqlite3_exec(pDb->db, zPragma, 0, 0, 0);
- sqlite3_free(zPragma);
- zPragma = sqlite3_mprintf("PRAGMA cache_size=%d", TESTDB_DEFAULT_CACHE_SIZE);
- sqlite3_exec(pDb->db, zPragma, 0, 0, 0);
- sqlite3_free(zPragma);
-
- /* sqlite3_exec(pDb->db, "PRAGMA locking_mode=EXCLUSIVE", 0, 0, 0); */
- sqlite3_exec(pDb->db, "PRAGMA synchronous=OFF", 0, 0, 0);
- sqlite3_exec(pDb->db, "PRAGMA journal_mode=WAL", 0, 0, 0);
- sqlite3_exec(pDb->db, "PRAGMA wal_autocheckpoint=4096", 0, 0, 0);
- if( zSpec ){
- rc = sqlite3_exec(pDb->db, zSpec, 0, 0, 0);
- if( rc!=SQLITE_OK ){
- sql_close((TestDb *)pDb);
- return rc;
- }
- }
-
- *ppDb = (TestDb *)pDb;
- return 0;
-}
-/*
-** End wrapper for SQLite.
-*************************************************************************/
-
-/*************************************************************************
-** Begin exported functions.
-*/
-static struct Lib {
- const char *zName;
- const char *zDefaultDb;
- int (*xOpen)(const char *, const char *zFilename, int bClear, TestDb **ppDb);
-} aLib[] = {
- { "sqlite3", "testdb.sqlite", sql_open },
- { "lsm_small", "testdb.lsm_small", test_lsm_small_open },
- { "lsm_lomem", "testdb.lsm_lomem", test_lsm_lomem_open },
- { "lsm_lomem2", "testdb.lsm_lomem2", test_lsm_lomem2_open },
-#ifdef HAVE_ZLIB
- { "lsm_zip", "testdb.lsm_zip", test_lsm_zip_open },
-#endif
- { "lsm", "testdb.lsm", test_lsm_open },
-#ifdef LSM_MUTEX_PTHREADS
- { "lsm_mt2", "testdb.lsm_mt2", test_lsm_mt2 },
- { "lsm_mt3", "testdb.lsm_mt3", test_lsm_mt3 },
-#endif
-#ifdef HAVE_LEVELDB
- { "leveldb", "testdb.leveldb", test_leveldb_open },
-#endif
-#ifdef HAVE_KYOTOCABINET
- { "kyotocabinet", "testdb.kc", kc_open },
-#endif
-#ifdef HAVE_MDB
- { "mdb", "./testdb.mdb", mdb_open }
-#endif
-};
-
-const char *tdb_system_name(int i){
- if( i<0 || i>=ArraySize(aLib) ) return 0;
- return aLib[i].zName;
-}
-
-const char *tdb_default_db(const char *zSys){
- int i;
- for(i=0; izLibrary = aLib[i].zName;
- }
- break;
- }
- }
-
- if( rc ){
- /* Failed to find the requested database library. Return an error. */
- *ppDb = 0;
- }
- return rc;
-}
-
-int tdb_close(TestDb *pDb){
- if( pDb ){
- return pDb->pMethods->xClose(pDb);
- }
- return 0;
-}
-
-int tdb_write(TestDb *pDb, void *pKey, int nKey, void *pVal, int nVal){
- return pDb->pMethods->xWrite(pDb, pKey, nKey, pVal, nVal);
-}
-
-int tdb_delete(TestDb *pDb, void *pKey, int nKey){
- return pDb->pMethods->xDelete(pDb, pKey, nKey);
-}
-
-int tdb_delete_range(
- TestDb *pDb, void *pKey1, int nKey1, void *pKey2, int nKey2
-){
- return pDb->pMethods->xDeleteRange(pDb, pKey1, nKey1, pKey2, nKey2);
-}
-
-int tdb_fetch(TestDb *pDb, void *pKey, int nKey, void **ppVal, int *pnVal){
- return pDb->pMethods->xFetch(pDb, pKey, nKey, ppVal, pnVal);
-}
-
-int tdb_scan(
- TestDb *pDb, /* Database handle */
- void *pCtx, /* Context pointer to pass to xCallback */
- int bReverse, /* True to scan in reverse order */
- void *pKey1, int nKey1, /* Start of search */
- void *pKey2, int nKey2, /* End of search */
- void (*xCallback)(void *pCtx, void *pKey, int nKey, void *pVal, int nVal)
-){
- return pDb->pMethods->xScan(
- pDb, pCtx, bReverse, pKey1, nKey1, pKey2, nKey2, xCallback
- );
-}
-
-int tdb_begin(TestDb *pDb, int iLevel){
- return pDb->pMethods->xBegin(pDb, iLevel);
-}
-int tdb_commit(TestDb *pDb, int iLevel){
- return pDb->pMethods->xCommit(pDb, iLevel);
-}
-int tdb_rollback(TestDb *pDb, int iLevel){
- return pDb->pMethods->xRollback(pDb, iLevel);
-}
-
-int tdb_transaction_support(TestDb *pDb){
- return (pDb->pMethods->xBegin != error_transaction_function);
-}
-
-const char *tdb_library_name(TestDb *pDb){
- return pDb->zLibrary;
-}
-
-/*
-** End exported functions.
-*************************************************************************/
diff --git a/ext/lsm1/lsm-test/lsmtest_tdb.h b/ext/lsm1/lsm-test/lsmtest_tdb.h
deleted file mode 100644
index c55b6e2f80..0000000000
--- a/ext/lsm1/lsm-test/lsmtest_tdb.h
+++ /dev/null
@@ -1,174 +0,0 @@
-
-/*
-** This file is the interface to a very simple database library used for
-** testing. The interface is similar to that of the LSM. The main virtue
-** of this library is that the same API may be used to access a key-value
-** store implemented by LSM, SQLite or another database system. Which
-** makes it easy to use for correctness and performance tests.
-*/
-
-#ifndef __WRAPPER_H_
-#define __WRAPPER_H_
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include "lsm.h"
-
-typedef struct TestDb TestDb;
-
-/*
-** Open a new database connection. The first argument is the name of the
-** database library to use. e.g. something like:
-**
-** "sqlite3"
-** "lsm"
-**
-** See function tdb_system_name() for a list of available database systems.
-**
-** The second argument is the name of the database to open (e.g. a filename).
-**
-** If the third parameter is non-zero, then any existing database by the
-** name of zDb is removed before opening a new one. If it is zero, then an
-** existing database may be opened.
-*/
-int tdb_open(const char *zLibrary, const char *zDb, int bClear, TestDb **ppDb);
-
-/*
-** Close a database handle.
-*/
-int tdb_close(TestDb *pDb);
-
-/*
-** Write a new key/value into the database.
-*/
-int tdb_write(TestDb *pDb, void *pKey, int nKey, void *pVal, int nVal);
-
-/*
-** Delete a key from the database.
-*/
-int tdb_delete(TestDb *pDb, void *pKey, int nKey);
-
-/*
-** Delete a range of keys from the database.
-*/
-int tdb_delete_range(TestDb *, void *pKey1, int nKey1, void *pKey2, int nKey2);
-
-/*
-** Query the database for key (pKey/nKey). If no entry is found, set *ppVal
-** to 0 and *pnVal to -1 before returning. Otherwise, set *ppVal and *pnVal
-** to a pointer to and size of the value associated with (pKey/nKey).
-*/
-int tdb_fetch(TestDb *pDb, void *pKey, int nKey, void **ppVal, int *pnVal);
-
-/*
-** Open and close nested transactions. Currently, these functions only
-** work for SQLite3 and LSM systems. Use the tdb_transaction_support()
-** function to determine if a given TestDb handle supports these methods.
-**
-** These functions and the iLevel parameter follow the same conventions as
-** the SQLite 4 transaction interface. Note that this is slightly different
-** from the way LSM does things. As follows:
-**
-** tdb_begin():
-** A successful call to tdb_begin() with (iLevel>1) guarantees that
-** there are at least (iLevel-1) write transactions open. If iLevel==1,
-** then it guarantees that at least a read-transaction is open. Calling
-** tdb_begin() with iLevel==0 is a no-op.
-**
-** tdb_commit():
-** A successful call to tdb_commit() with (iLevel>1) guarantees that
-** there are at most (iLevel-1) write transactions open. If iLevel==1,
-** then it guarantees that there are no write transactions open (although
-** a read-transaction may remain open). Calling tdb_commit() with
-** iLevel==0 ensures that all transactions, read or write, have been
-** closed and committed.
-**
-** tdb_rollback():
-** This call is similar to tdb_commit(), except that instead of committing
-** transactions, it reverts them. For example, calling tdb_rollback() with
-** iLevel==2 ensures that there is at most one write transaction open, and
-** restores the database to the state that it was in when that transaction
-** was opened.
-**
-** In other words, tdb_commit() just closes transactions - tdb_rollback()
-** closes transactions and then restores the database to the state it
-** was in before those transactions were even opened.
-*/
-int tdb_begin(TestDb *pDb, int iLevel);
-int tdb_commit(TestDb *pDb, int iLevel);
-int tdb_rollback(TestDb *pDb, int iLevel);
-
-/*
-** Return true if transactions are supported, or false otherwise.
-*/
-int tdb_transaction_support(TestDb *pDb);
-
-/*
-** Return the name of the database library (as passed to tdb_open()) used
-** by the handled passed as the first argument.
-*/
-const char *tdb_library_name(TestDb *pDb);
-
-/*
-** Scan a range of database keys. Invoke the callback function for each
-** key visited.
-*/
-int tdb_scan(
- TestDb *pDb, /* Database handle */
- void *pCtx, /* Context pointer to pass to xCallback */
- int bReverse, /* True to scan in reverse order */
- void *pKey1, int nKey1, /* Start of search */
- void *pKey2, int nKey2, /* End of search */
- void (*xCallback)(void *pCtx, void *pKey, int nKey, void *pVal, int nVal)
-);
-
-const char *tdb_system_name(int i);
-const char *tdb_default_db(const char *zSys);
-
-int tdb_lsm_open(const char *zCfg, const char *zDb, int bClear, TestDb **ppDb);
-
-/*
-** If the TestDb handle passed as an argument is a wrapper around an LSM
-** database, return the LSM handle. Otherwise, if the argument is some other
-** database system, return NULL.
-*/
-lsm_db *tdb_lsm(TestDb *pDb);
-
-/*
-** Return true if the db passed as an argument is a multi-threaded LSM
-** connection.
-*/
-int tdb_lsm_multithread(TestDb *pDb);
-
-/*
-** Return a pointer to the lsm_env object used by all lsm database
-** connections initialized as a copy of the object returned by
-** lsm_default_env(). It may be modified (e.g. to override functions)
-** if the caller can guarantee that it is not already in use.
-*/
-lsm_env *tdb_lsm_env(void);
-
-/*
-** The following functions only work with LSM database handles. It is
-** illegal to call them with any other type of database handle specified
-** as an argument.
-*/
-void tdb_lsm_enable_log(TestDb *pDb, int bEnable);
-void tdb_lsm_application_crash(TestDb *pDb);
-void tdb_lsm_prepare_system_crash(TestDb *pDb);
-void tdb_lsm_system_crash(TestDb *pDb);
-void tdb_lsm_prepare_sync_crash(TestDb *pDb, int iSync);
-
-
-void tdb_lsm_safety(TestDb *pDb, int eMode);
-void tdb_lsm_config_work_hook(TestDb *pDb, void (*)(lsm_db *, void *), void *);
-void tdb_lsm_write_hook(TestDb *, void(*)(void*,int,lsm_i64,int,int), void*);
-int tdb_lsm_config_str(TestDb *pDb, const char *zStr);
-
-#ifdef __cplusplus
-} /* End of the 'extern "C"' block */
-#endif
-
-#endif
diff --git a/ext/lsm1/lsm-test/lsmtest_tdb2.cc b/ext/lsm1/lsm-test/lsmtest_tdb2.cc
deleted file mode 100644
index 86ebb49583..0000000000
--- a/ext/lsm1/lsm-test/lsmtest_tdb2.cc
+++ /dev/null
@@ -1,369 +0,0 @@
-
-
-#include "lsmtest.h"
-#include
-
-#ifdef HAVE_KYOTOCABINET
-#include "kcpolydb.h"
-extern "C" {
- struct KcDb {
- TestDb base;
- kyotocabinet::TreeDB* db;
- char *pVal;
- };
-}
-
-int test_kc_open(const char *zFilename, int bClear, TestDb **ppDb){
- KcDb *pKcDb;
- int ok;
- int rc = 0;
-
- if( bClear ){
- char *zCmd = sqlite3_mprintf("rm -rf %s\n", zFilename);
- system(zCmd);
- sqlite3_free(zCmd);
- }
-
- pKcDb = (KcDb *)malloc(sizeof(KcDb));
- memset(pKcDb, 0, sizeof(KcDb));
-
-
- pKcDb->db = new kyotocabinet::TreeDB();
- pKcDb->db->tune_page(TESTDB_DEFAULT_PAGE_SIZE);
- pKcDb->db->tune_page_cache(
- TESTDB_DEFAULT_PAGE_SIZE * TESTDB_DEFAULT_CACHE_SIZE
- );
- ok = pKcDb->db->open(zFilename,
- kyotocabinet::PolyDB::OWRITER | kyotocabinet::PolyDB::OCREATE
- );
- if( ok==0 ){
- free(pKcDb);
- pKcDb = 0;
- rc = 1;
- }
-
- *ppDb = (TestDb *)pKcDb;
- return rc;
-}
-
-int test_kc_close(TestDb *pDb){
- KcDb *pKcDb = (KcDb *)pDb;
- if( pKcDb->pVal ){
- delete [] pKcDb->pVal;
- }
- pKcDb->db->close();
- delete pKcDb->db;
- free(pKcDb);
- return 0;
-}
-
-int test_kc_write(TestDb *pDb, void *pKey, int nKey, void *pVal, int nVal){
- KcDb *pKcDb = (KcDb *)pDb;
- int ok;
-
- ok = pKcDb->db->set((const char *)pKey, nKey, (const char *)pVal, nVal);
- return (ok ? 0 : 1);
-}
-
-int test_kc_delete(TestDb *pDb, void *pKey, int nKey){
- KcDb *pKcDb = (KcDb *)pDb;
- int ok;
-
- ok = pKcDb->db->remove((const char *)pKey, nKey);
- return (ok ? 0 : 1);
-}
-
-int test_kc_delete_range(
- TestDb *pDb,
- void *pKey1, int nKey1,
- void *pKey2, int nKey2
-){
- int res;
- KcDb *pKcDb = (KcDb *)pDb;
- kyotocabinet::DB::Cursor* pCur = pKcDb->db->cursor();
-
- if( pKey1 ){
- res = pCur->jump((const char *)pKey1, nKey1);
- }else{
- res = pCur->jump();
- }
-
- while( 1 ){
- const char *pKey; size_t nKey;
- const char *pVal; size_t nVal;
-
- pKey = pCur->get(&nKey, &pVal, &nVal);
- if( pKey==0 ) break;
-
-#ifndef NDEBUG
- if( pKey1 ){
- res = memcmp(pKey, pKey1, MIN((size_t)nKey1, nKey));
- assert( res>0 || (res==0 && nKey>nKey1) );
- }
-#endif
-
- if( pKey2 ){
- res = memcmp(pKey, pKey2, MIN((size_t)nKey2, nKey));
- if( res>0 || (res==0 && (size_t)nKey2remove();
- delete [] pKey;
- }
-
- delete pCur;
- return 0;
-}
-
-int test_kc_fetch(
- TestDb *pDb,
- void *pKey,
- int nKey,
- void **ppVal,
- int *pnVal
-){
- KcDb *pKcDb = (KcDb *)pDb;
- size_t nVal;
-
- if( pKcDb->pVal ){
- delete [] pKcDb->pVal;
- pKcDb->pVal = 0;
- }
-
- pKcDb->pVal = pKcDb->db->get((const char *)pKey, nKey, &nVal);
- if( pKcDb->pVal ){
- *ppVal = pKcDb->pVal;
- *pnVal = nVal;
- }else{
- *ppVal = 0;
- *pnVal = -1;
- }
-
- return 0;
-}
-
-int test_kc_scan(
- TestDb *pDb, /* Database handle */
- void *pCtx, /* Context pointer to pass to xCallback */
- int bReverse, /* True for a reverse order scan */
- void *pKey1, int nKey1, /* Start of search */
- void *pKey2, int nKey2, /* End of search */
- void (*xCallback)(void *pCtx, void *pKey, int nKey, void *pVal, int nVal)
-){
- KcDb *pKcDb = (KcDb *)pDb;
- kyotocabinet::DB::Cursor* pCur = pKcDb->db->cursor();
- int res;
-
- if( bReverse==0 ){
- if( pKey1 ){
- res = pCur->jump((const char *)pKey1, nKey1);
- }else{
- res = pCur->jump();
- }
- }else{
- if( pKey2 ){
- res = pCur->jump_back((const char *)pKey2, nKey2);
- }else{
- res = pCur->jump_back();
- }
- }
-
- while( res ){
- const char *pKey; size_t nKey;
- const char *pVal; size_t nVal;
- pKey = pCur->get(&nKey, &pVal, &nVal);
-
- if( bReverse==0 && pKey2 ){
- res = memcmp(pKey, pKey2, MIN((size_t)nKey2, nKey));
- if( res>0 || (res==0 && (size_t)nKey2nKey) ){
- delete [] pKey;
- break;
- }
- }
-
- xCallback(pCtx, (void *)pKey, (int)nKey, (void *)pVal, (int)nVal);
- delete [] pKey;
-
- if( bReverse ){
- res = pCur->step_back();
- }else{
- res = pCur->step();
- }
- }
-
- delete pCur;
- return 0;
-}
-#endif /* HAVE_KYOTOCABINET */
-
-#ifdef HAVE_MDB
-#include "lmdb.h"
-
-extern "C" {
- struct MdbDb {
- TestDb base;
- MDB_env *env;
- MDB_dbi dbi;
- };
-}
-
-int test_mdb_open(
- const char *zSpec,
- const char *zFilename,
- int bClear,
- TestDb **ppDb
-){
- MDB_txn *txn;
- MdbDb *pMdb;
- int rc;
-
- if( bClear ){
- char *zCmd = sqlite3_mprintf("rm -rf %s\n", zFilename);
- system(zCmd);
- sqlite3_free(zCmd);
- }
-
- pMdb = (MdbDb *)malloc(sizeof(MdbDb));
- memset(pMdb, 0, sizeof(MdbDb));
-
- rc = mdb_env_create(&pMdb->env);
- if( rc==0 ) rc = mdb_env_set_mapsize(pMdb->env, 1*1024*1024*1024);
- if( rc==0 ) rc = mdb_env_open(pMdb->env, zFilename, MDB_NOSYNC|MDB_NOSUBDIR, 0600);
- if( rc==0 ) rc = mdb_txn_begin(pMdb->env, NULL, 0, &txn);
- if( rc==0 ){
- rc = mdb_open(txn, NULL, 0, &pMdb->dbi);
- mdb_txn_commit(txn);
- }
-
- *ppDb = (TestDb *)pMdb;
- return rc;
-}
-
-int test_mdb_close(TestDb *pDb){
- MdbDb *pMdb = (MdbDb *)pDb;
-
- mdb_close(pMdb->env, pMdb->dbi);
- mdb_env_close(pMdb->env);
- free(pMdb);
- return 0;
-}
-
-int test_mdb_write(TestDb *pDb, void *pKey, int nKey, void *pVal, int nVal){
- int rc;
- MdbDb *pMdb = (MdbDb *)pDb;
- MDB_val val;
- MDB_val key;
- MDB_txn *txn;
-
- val.mv_size = nVal;
- val.mv_data = pVal;
- key.mv_size = nKey;
- key.mv_data = pKey;
-
- rc = mdb_txn_begin(pMdb->env, NULL, 0, &txn);
- if( rc==0 ){
- rc = mdb_put(txn, pMdb->dbi, &key, &val, 0);
- if( rc==0 ){
- rc = mdb_txn_commit(txn);
- }else{
- mdb_txn_abort(txn);
- }
- }
-
- return rc;
-}
-
-int test_mdb_delete(TestDb *pDb, void *pKey, int nKey){
- int rc;
- MdbDb *pMdb = (MdbDb *)pDb;
- MDB_val key;
- MDB_txn *txn;
-
- key.mv_size = nKey;
- key.mv_data = pKey;
- rc = mdb_txn_begin(pMdb->env, NULL, 0, &txn);
- if( rc==0 ){
- rc = mdb_del(txn, pMdb->dbi, &key, 0);
- if( rc==0 ){
- rc = mdb_txn_commit(txn);
- }else{
- mdb_txn_abort(txn);
- }
- }
-
- return rc;
-}
-
-int test_mdb_fetch(
- TestDb *pDb,
- void *pKey,
- int nKey,
- void **ppVal,
- int *pnVal
-){
- int rc;
- MdbDb *pMdb = (MdbDb *)pDb;
- MDB_val key;
- MDB_txn *txn;
-
- key.mv_size = nKey;
- key.mv_data = pKey;
-
- rc = mdb_txn_begin(pMdb->env, NULL, MDB_RDONLY, &txn);
- if( rc==0 ){
- MDB_val val = {0, 0};
- rc = mdb_get(txn, pMdb->dbi, &key, &val);
- if( rc==MDB_NOTFOUND ){
- rc = 0;
- *ppVal = 0;
- *pnVal = -1;
- }else{
- *ppVal = val.mv_data;
- *pnVal = val.mv_size;
- }
- mdb_txn_commit(txn);
- }
-
- return rc;
-}
-
-int test_mdb_scan(
- TestDb *pDb, /* Database handle */
- void *pCtx, /* Context pointer to pass to xCallback */
- int bReverse, /* True for a reverse order scan */
- void *pKey1, int nKey1, /* Start of search */
- void *pKey2, int nKey2, /* End of search */
- void (*xCallback)(void *pCtx, void *pKey, int nKey, void *pVal, int nVal)
-){
- MdbDb *pMdb = (MdbDb *)pDb;
- int rc;
- MDB_cursor_op op = bReverse ? MDB_PREV : MDB_NEXT;
- MDB_txn *txn;
-
- rc = mdb_txn_begin(pMdb->env, NULL, MDB_RDONLY, &txn);
- if( rc==0 ){
- MDB_cursor *csr;
- MDB_val key = {0, 0};
- MDB_val val = {0, 0};
-
- rc = mdb_cursor_open(txn, pMdb->dbi, &csr);
- if( rc==0 ){
- while( mdb_cursor_get(csr, &key, &val, op)==0 ){
- xCallback(pCtx, key.mv_data, key.mv_size, val.mv_data, val.mv_size);
- }
- mdb_cursor_close(csr);
- }
- }
-
- return rc;
-}
-
-#endif /* HAVE_MDB */
diff --git a/ext/lsm1/lsm-test/lsmtest_tdb3.c b/ext/lsm1/lsm-test/lsmtest_tdb3.c
deleted file mode 100644
index e29497af20..0000000000
--- a/ext/lsm1/lsm-test/lsmtest_tdb3.c
+++ /dev/null
@@ -1,1429 +0,0 @@
-
-#include "lsmtest_tdb.h"
-#include "lsm.h"
-#include "lsmtest.h"
-
-#include
-#include
-#include
-#ifndef _WIN32
-# include
-#endif
-#include
-
-#ifndef _WIN32
-# include
-#endif
-
-typedef struct LsmDb LsmDb;
-typedef struct LsmWorker LsmWorker;
-typedef struct LsmFile LsmFile;
-
-#define LSMTEST_DFLT_MT_MAX_CKPT (8*1024)
-#define LSMTEST_DFLT_MT_MIN_CKPT (2*1024)
-
-#ifdef LSM_MUTEX_PTHREADS
-#include
-
-#define LSMTEST_THREAD_CKPT 1
-#define LSMTEST_THREAD_WORKER 2
-#define LSMTEST_THREAD_WORKER_AC 3
-
-/*
-** There are several different types of worker threads that run in different
-** test configurations, depending on the value of LsmWorker.eType.
-**
-** 1. Checkpointer.
-** 2. Worker with auto-checkpoint.
-** 3. Worker without auto-checkpoint.
-*/
-struct LsmWorker {
- LsmDb *pDb; /* Main database structure */
- lsm_db *pWorker; /* Worker database handle */
- pthread_t worker_thread; /* Worker thread */
- pthread_cond_t worker_cond; /* Condition var the worker waits on */
- pthread_mutex_t worker_mutex; /* Mutex used with worker_cond */
- int bDoWork; /* Set to true by client when there is work */
- int worker_rc; /* Store error code here */
- int eType; /* LSMTEST_THREAD_XXX constant */
- int bBlock;
-};
-#else
-struct LsmWorker { int worker_rc; int bBlock; };
-#endif
-
-static void mt_shutdown(LsmDb *);
-
-lsm_env *tdb_lsm_env(void){
- static int bInit = 0;
- static lsm_env env;
- if( bInit==0 ){
- memcpy(&env, lsm_default_env(), sizeof(env));
- bInit = 1;
- }
- return &env;
-}
-
-typedef struct FileSector FileSector;
-typedef struct FileData FileData;
-
-struct FileSector {
- u8 *aOld; /* Old data for this sector */
-};
-
-struct FileData {
- int nSector; /* Allocated size of apSector[] array */
- FileSector *aSector; /* Array of file sectors */
-};
-
-/*
-** bPrepareCrash:
-** If non-zero, the file wrappers maintain enough in-memory data to
-** simulate the effect of a power-failure on the file-system (i.e. that
-** unsynced sectors may be written, not written, or overwritten with
-** arbitrary data when the crash occurs).
-**
-** bCrashed:
-** Set to true after a crash is simulated. Once this variable is true, all
-** VFS methods other than xClose() return LSM_IOERR as soon as they are
-** called (without affecting the contents of the file-system).
-**
-** env:
-** The environment object used by all lsm_db* handles opened by this
-** object (i.e. LsmDb.db plus any worker connections). Variable env.pVfsCtx
-** always points to the containing LsmDb structure.
-*/
-struct LsmDb {
- TestDb base; /* Base class - methods table */
- lsm_env env; /* Environment used by connection db */
- char *zName; /* Database file name */
- lsm_db *db; /* LSM database handle */
-
- lsm_cursor *pCsr; /* Cursor held open during read transaction */
- void *pBuf; /* Buffer for tdb_fetch() output */
- int nBuf; /* Allocated (not used) size of pBuf */
-
- /* Crash testing related state */
- int bCrashed; /* True once a crash has occurred */
- int nAutoCrash; /* Number of syncs until a crash */
- int bPrepareCrash; /* True to store writes in memory */
-
- /* Unsynced data (while crash testing) */
- int szSector; /* Assumed size of disk sectors (512B) */
- FileData aFile[2]; /* Database and log file data */
-
- /* Other test instrumentation */
- int bNoRecovery; /* If true, assume DMS2 is locked */
-
- /* Work hook redirection */
- void (*xWork)(lsm_db *, void *);
- void *pWorkCtx;
-
- /* IO logging hook */
- void (*xWriteHook)(void *, int, lsm_i64, int, int);
- void *pWriteCtx;
-
- /* Worker threads (for lsm_mt) */
- int nMtMinCkpt;
- int nMtMaxCkpt;
- int eMode;
- int nWorker;
- LsmWorker *aWorker;
-};
-
-#define LSMTEST_MODE_SINGLETHREAD 1
-#define LSMTEST_MODE_BACKGROUND_CKPT 2
-#define LSMTEST_MODE_BACKGROUND_WORK 3
-#define LSMTEST_MODE_BACKGROUND_BOTH 4
-
-/*************************************************************************
-**************************************************************************
-** Begin test VFS code.
-*/
-
-struct LsmFile {
- lsm_file *pReal; /* Real underlying file */
- int bLog; /* True for log file. False for db file */
- LsmDb *pDb; /* Database handle that uses this file */
-};
-
-static int testEnvFullpath(
- lsm_env *pEnv, /* Environment for current LsmDb */
- const char *zFile, /* Relative path name */
- char *zOut, /* Output buffer */
- int *pnOut /* IN/OUT: Size of output buffer */
-){
- lsm_env *pRealEnv = tdb_lsm_env();
- return pRealEnv->xFullpath(pRealEnv, zFile, zOut, pnOut);
-}
-
-static int testEnvOpen(
- lsm_env *pEnv, /* Environment for current LsmDb */
- const char *zFile, /* Name of file to open */
- int flags,
- lsm_file **ppFile /* OUT: New file handle object */
-){
- lsm_env *pRealEnv = tdb_lsm_env();
- LsmDb *pDb = (LsmDb *)pEnv->pVfsCtx;
- int rc; /* Return Code */
- LsmFile *pRet; /* The new file handle */
- int nFile; /* Length of string zFile in bytes */
-
- nFile = strlen(zFile);
- pRet = (LsmFile *)testMalloc(sizeof(LsmFile));
- pRet->pDb = pDb;
- pRet->bLog = (nFile > 4 && 0==memcmp("-log", &zFile[nFile-4], 4));
-
- rc = pRealEnv->xOpen(pRealEnv, zFile, flags, &pRet->pReal);
- if( rc!=LSM_OK ){
- testFree(pRet);
- pRet = 0;
- }
-
- *ppFile = (lsm_file *)pRet;
- return rc;
-}
-
-static int testEnvRead(lsm_file *pFile, lsm_i64 iOff, void *pData, int nData){
- lsm_env *pRealEnv = tdb_lsm_env();
- LsmFile *p = (LsmFile *)pFile;
- if( p->pDb->bCrashed ) return LSM_IOERR;
- return pRealEnv->xRead(p->pReal, iOff, pData, nData);
-}
-
-static int testEnvWrite(lsm_file *pFile, lsm_i64 iOff, void *pData, int nData){
- lsm_env *pRealEnv = tdb_lsm_env();
- LsmFile *p = (LsmFile *)pFile;
- LsmDb *pDb = p->pDb;
-
- if( pDb->bCrashed ) return LSM_IOERR;
-
- if( pDb->bPrepareCrash ){
- FileData *pData2 = &pDb->aFile[p->bLog];
- int iFirst;
- int iLast;
- int iSector;
-
- iFirst = (int)(iOff / pDb->szSector);
- iLast = (int)((iOff + nData - 1) / pDb->szSector);
-
- if( pData2->nSector<(iLast+1) ){
- int nNew = ( ((iLast + 1) + 63) / 64 ) * 64;
- assert( nNew>iLast );
- pData2->aSector = (FileSector *)testRealloc(
- pData2->aSector, nNew*sizeof(FileSector)
- );
- memset(&pData2->aSector[pData2->nSector],
- 0, (nNew - pData2->nSector) * sizeof(FileSector)
- );
- pData2->nSector = nNew;
- }
-
- for(iSector=iFirst; iSector<=iLast; iSector++){
- if( pData2->aSector[iSector].aOld==0 ){
- u8 *aOld = (u8 *)testMalloc(pDb->szSector);
- pRealEnv->xRead(
- p->pReal, (lsm_i64)iSector*pDb->szSector, aOld, pDb->szSector
- );
- pData2->aSector[iSector].aOld = aOld;
- }
- }
- }
-
- if( pDb->xWriteHook ){
- int rc;
- int nUs;
- struct timeval t1;
- struct timeval t2;
-
- gettimeofday(&t1, 0);
- assert( nData>0 );
- rc = pRealEnv->xWrite(p->pReal, iOff, pData, nData);
- gettimeofday(&t2, 0);
-
- nUs = (t2.tv_sec - t1.tv_sec) * 1000000 + (t2.tv_usec - t1.tv_usec);
- pDb->xWriteHook(pDb->pWriteCtx, p->bLog, iOff, nData, nUs);
- return rc;
- }
-
- return pRealEnv->xWrite(p->pReal, iOff, pData, nData);
-}
-
-static void doSystemCrash(LsmDb *pDb);
-
-static int testEnvSync(lsm_file *pFile){
- lsm_env *pRealEnv = tdb_lsm_env();
- LsmFile *p = (LsmFile *)pFile;
- LsmDb *pDb = p->pDb;
- FileData *pData = &pDb->aFile[p->bLog];
- int i;
-
- if( pDb->bCrashed ) return LSM_IOERR;
-
- if( pDb->nAutoCrash ){
- pDb->nAutoCrash--;
- if( pDb->nAutoCrash==0 ){
- doSystemCrash(pDb);
- pDb->bCrashed = 1;
- return LSM_IOERR;
- }
- }
-
- if( pDb->bPrepareCrash ){
- for(i=0; inSector; i++){
- testFree(pData->aSector[i].aOld);
- pData->aSector[i].aOld = 0;
- }
- }
-
- if( pDb->xWriteHook ){
- int rc;
- int nUs;
- struct timeval t1;
- struct timeval t2;
-
- gettimeofday(&t1, 0);
- rc = pRealEnv->xSync(p->pReal);
- gettimeofday(&t2, 0);
-
- nUs = (t2.tv_sec - t1.tv_sec) * 1000000 + (t2.tv_usec - t1.tv_usec);
- pDb->xWriteHook(pDb->pWriteCtx, p->bLog, 0, 0, nUs);
- return rc;
- }
-
- return pRealEnv->xSync(p->pReal);
-}
-
-static int testEnvTruncate(lsm_file *pFile, lsm_i64 iOff){
- lsm_env *pRealEnv = tdb_lsm_env();
- LsmFile *p = (LsmFile *)pFile;
- if( p->pDb->bCrashed ) return LSM_IOERR;
- return pRealEnv->xTruncate(p->pReal, iOff);
-}
-
-static int testEnvSectorSize(lsm_file *pFile){
- lsm_env *pRealEnv = tdb_lsm_env();
- LsmFile *p = (LsmFile *)pFile;
- return pRealEnv->xSectorSize(p->pReal);
-}
-
-static int testEnvRemap(
- lsm_file *pFile,
- lsm_i64 iMin,
- void **ppOut,
- lsm_i64 *pnOut
-){
- lsm_env *pRealEnv = tdb_lsm_env();
- LsmFile *p = (LsmFile *)pFile;
- return pRealEnv->xRemap(p->pReal, iMin, ppOut, pnOut);
-}
-
-static int testEnvFileid(
- lsm_file *pFile,
- void *ppOut,
- int *pnOut
-){
- lsm_env *pRealEnv = tdb_lsm_env();
- LsmFile *p = (LsmFile *)pFile;
- return pRealEnv->xFileid(p->pReal, ppOut, pnOut);
-}
-
-static int testEnvClose(lsm_file *pFile){
- lsm_env *pRealEnv = tdb_lsm_env();
- LsmFile *p = (LsmFile *)pFile;
-
- pRealEnv->xClose(p->pReal);
- testFree(p);
- return LSM_OK;
-}
-
-static int testEnvUnlink(lsm_env *pEnv, const char *zFile){
- lsm_env *pRealEnv = tdb_lsm_env();
- unused_parameter(pEnv);
- return pRealEnv->xUnlink(pRealEnv, zFile);
-}
-
-static int testEnvLock(lsm_file *pFile, int iLock, int eType){
- LsmFile *p = (LsmFile *)pFile;
- lsm_env *pRealEnv = tdb_lsm_env();
-
- if( iLock==2 && eType==LSM_LOCK_EXCL && p->pDb->bNoRecovery ){
- return LSM_BUSY;
- }
- return pRealEnv->xLock(p->pReal, iLock, eType);
-}
-
-static int testEnvTestLock(lsm_file *pFile, int iLock, int nLock, int eType){
- LsmFile *p = (LsmFile *)pFile;
- lsm_env *pRealEnv = tdb_lsm_env();
-
- if( iLock==2 && eType==LSM_LOCK_EXCL && p->pDb->bNoRecovery ){
- return LSM_BUSY;
- }
- return pRealEnv->xTestLock(p->pReal, iLock, nLock, eType);
-}
-
-static int testEnvShmMap(lsm_file *pFile, int iRegion, int sz, void **pp){
- LsmFile *p = (LsmFile *)pFile;
- lsm_env *pRealEnv = tdb_lsm_env();
- return pRealEnv->xShmMap(p->pReal, iRegion, sz, pp);
-}
-
-static void testEnvShmBarrier(void){
-}
-
-static int testEnvShmUnmap(lsm_file *pFile, int bDel){
- LsmFile *p = (LsmFile *)pFile;
- lsm_env *pRealEnv = tdb_lsm_env();
- return pRealEnv->xShmUnmap(p->pReal, bDel);
-}
-
-static int testEnvSleep(lsm_env *pEnv, int us){
- lsm_env *pRealEnv = tdb_lsm_env();
- return pRealEnv->xSleep(pRealEnv, us);
-}
-
-static void doSystemCrash(LsmDb *pDb){
- lsm_env *pEnv = tdb_lsm_env();
- int iFile;
- int iSeed = pDb->aFile[0].nSector + pDb->aFile[1].nSector;
-
- char *zFile = pDb->zName;
- char *zFree = 0;
-
- for(iFile=0; iFile<2; iFile++){
- lsm_file *pFile = 0;
- int i;
-
- pEnv->xOpen(pEnv, zFile, 0, &pFile);
- for(i=0; iaFile[iFile].nSector; i++){
- u8 *aOld = pDb->aFile[iFile].aSector[i].aOld;
- if( aOld ){
- int iOpt = testPrngValue(iSeed++) % 3;
- switch( iOpt ){
- case 0:
- break;
-
- case 1:
- testPrngArray(iSeed++, (u32 *)aOld, pDb->szSector/4);
- /* Fall-through */
-
- case 2:
- pEnv->xWrite(
- pFile, (lsm_i64)i * pDb->szSector, aOld, pDb->szSector
- );
- break;
- }
- testFree(aOld);
- pDb->aFile[iFile].aSector[i].aOld = 0;
- }
- }
- pEnv->xClose(pFile);
- zFree = zFile = sqlite3_mprintf("%s-log", pDb->zName);
- }
-
- sqlite3_free(zFree);
-}
-/*
-** End test VFS code.
-**************************************************************************
-*************************************************************************/
-
-/*************************************************************************
-**************************************************************************
-** Begin test compression hooks.
-*/
-
-#ifdef HAVE_ZLIB
-#include
-
-static int testZipBound(void *pCtx, int nSrc){
- return compressBound(nSrc);
-}
-
-static int testZipCompress(
- void *pCtx, /* Context pointer */
- char *aOut, int *pnOut, /* OUT: Buffer containing compressed data */
- const char *aIn, int nIn /* Buffer containing input data */
-){
- uLongf n = *pnOut; /* In/out buffer size for compress() */
- int rc; /* compress() return code */
-
- rc = compress((Bytef*)aOut, &n, (Bytef*)aIn, nIn);
- *pnOut = n;
- return (rc==Z_OK ? 0 : LSM_ERROR);
-}
-
-static int testZipUncompress(
- void *pCtx, /* Context pointer */
- char *aOut, int *pnOut, /* OUT: Buffer containing uncompressed data */
- const char *aIn, int nIn /* Buffer containing input data */
-){
- uLongf n = *pnOut; /* In/out buffer size for uncompress() */
- int rc; /* uncompress() return code */
-
- rc = uncompress((Bytef*)aOut, &n, (Bytef*)aIn, nIn);
- *pnOut = n;
- return (rc==Z_OK ? 0 : LSM_ERROR);
-}
-
-static int testConfigureCompression(lsm_db *pDb){
- static lsm_compress zip = {
- 0, /* Context pointer (unused) */
- 1, /* Id value */
- testZipBound, /* xBound method */
- testZipCompress, /* xCompress method */
- testZipUncompress /* xUncompress method */
- };
- return lsm_config(pDb, LSM_CONFIG_SET_COMPRESSION, &zip);
-}
-#endif /* ifdef HAVE_ZLIB */
-
-/*
-** End test compression hooks.
-**************************************************************************
-*************************************************************************/
-
-static int test_lsm_close(TestDb *pTestDb){
- int i;
- int rc = LSM_OK;
- LsmDb *pDb = (LsmDb *)pTestDb;
-
- lsm_csr_close(pDb->pCsr);
- lsm_close(pDb->db);
-
- /* If this is a multi-threaded database, wait on the worker threads. */
- mt_shutdown(pDb);
- for(i=0; inWorker && rc==LSM_OK; i++){
- rc = pDb->aWorker[i].worker_rc;
- }
-
- for(i=0; iaFile[0].nSector; i++){
- testFree(pDb->aFile[0].aSector[i].aOld);
- }
- testFree(pDb->aFile[0].aSector);
- for(i=0; iaFile[1].nSector; i++){
- testFree(pDb->aFile[1].aSector[i].aOld);
- }
- testFree(pDb->aFile[1].aSector);
-
- memset(pDb, sizeof(LsmDb), 0x11);
- testFree((char *)pDb->pBuf);
- testFree((char *)pDb);
- return rc;
-}
-
-static void mt_signal_worker(LsmDb*, int);
-
-static int waitOnCheckpointer(LsmDb *pDb, lsm_db *db){
- int nSleep = 0;
- int nKB;
- int rc;
-
- do {
- nKB = 0;
- rc = lsm_info(db, LSM_INFO_CHECKPOINT_SIZE, &nKB);
- if( rc!=LSM_OK || nKBnMtMaxCkpt ) break;
-#ifdef LSM_MUTEX_PTHREADS
- mt_signal_worker(pDb,
- (pDb->eMode==LSMTEST_MODE_BACKGROUND_CKPT ? 0 : 1)
- );
-#endif
- usleep(5000);
- nSleep += 5;
- }while( 1 );
-
-#if 0
- if( nSleep ) printf("# waitOnCheckpointer(): nSleep=%d\n", nSleep);
-#endif
-
- return rc;
-}
-
-static int waitOnWorker(LsmDb *pDb){
- int rc;
- int nLimit = -1;
- int nSleep = 0;
-
- rc = lsm_config(pDb->db, LSM_CONFIG_AUTOFLUSH, &nLimit);
- do {
- int nOld, nNew, rc2;
- rc2 = lsm_info(pDb->db, LSM_INFO_TREE_SIZE, &nOld, &nNew);
- if( rc2!=LSM_OK ) return rc2;
- if( nOld==0 || nNew<(nLimit/2) ) break;
-#ifdef LSM_MUTEX_PTHREADS
- mt_signal_worker(pDb, 0);
-#endif
- usleep(5000);
- nSleep += 5;
- }while( 1 );
-
-#if 0
- if( nSleep ) printf("# waitOnWorker(): nSleep=%d\n", nSleep);
-#endif
-
- return rc;
-}
-
-static int test_lsm_write(
- TestDb *pTestDb,
- void *pKey,
- int nKey,
- void *pVal,
- int nVal
-){
- LsmDb *pDb = (LsmDb *)pTestDb;
- int rc = LSM_OK;
-
- if( pDb->eMode==LSMTEST_MODE_BACKGROUND_CKPT ){
- rc = waitOnCheckpointer(pDb, pDb->db);
- }else if(
- pDb->eMode==LSMTEST_MODE_BACKGROUND_WORK
- || pDb->eMode==LSMTEST_MODE_BACKGROUND_BOTH
- ){
- rc = waitOnWorker(pDb);
- }
-
- if( rc==LSM_OK ){
- rc = lsm_insert(pDb->db, pKey, nKey, pVal, nVal);
- }
- return rc;
-}
-
-static int test_lsm_delete(TestDb *pTestDb, void *pKey, int nKey){
- LsmDb *pDb = (LsmDb *)pTestDb;
- return lsm_delete(pDb->db, pKey, nKey);
-}
-
-static int test_lsm_delete_range(
- TestDb *pTestDb,
- void *pKey1, int nKey1,
- void *pKey2, int nKey2
-){
- LsmDb *pDb = (LsmDb *)pTestDb;
- return lsm_delete_range(pDb->db, pKey1, nKey1, pKey2, nKey2);
-}
-
-static int test_lsm_fetch(
- TestDb *pTestDb,
- void *pKey,
- int nKey,
- void **ppVal,
- int *pnVal
-){
- int rc;
- LsmDb *pDb = (LsmDb *)pTestDb;
- lsm_cursor *csr;
-
- if( pKey==0 ) return LSM_OK;
-
- if( pDb->pCsr==0 ){
- rc = lsm_csr_open(pDb->db, &csr);
- if( rc!=LSM_OK ) return rc;
- }else{
- csr = pDb->pCsr;
- }
-
- rc = lsm_csr_seek(csr, pKey, nKey, LSM_SEEK_EQ);
- if( rc==LSM_OK ){
- if( lsm_csr_valid(csr) ){
- const void *pVal; int nVal;
- rc = lsm_csr_value(csr, &pVal, &nVal);
- if( nVal>pDb->nBuf ){
- testFree(pDb->pBuf);
- pDb->pBuf = testMalloc(nVal*2);
- pDb->nBuf = nVal*2;
- }
- memcpy(pDb->pBuf, pVal, nVal);
- *ppVal = pDb->pBuf;
- *pnVal = nVal;
- }else{
- *ppVal = 0;
- *pnVal = -1;
- }
- }
- if( pDb->pCsr==0 ){
- lsm_csr_close(csr);
- }
- return rc;
-}
-
-static int test_lsm_scan(
- TestDb *pTestDb,
- void *pCtx,
- int bReverse,
- void *pFirst, int nFirst,
- void *pLast, int nLast,
- void (*xCallback)(void *, void *, int , void *, int)
-){
- LsmDb *pDb = (LsmDb *)pTestDb;
- lsm_cursor *csr;
- lsm_cursor *csr2 = 0;
- int rc;
-
- if( pDb->pCsr==0 ){
- rc = lsm_csr_open(pDb->db, &csr);
- if( rc!=LSM_OK ) return rc;
- }else{
- rc = LSM_OK;
- csr = pDb->pCsr;
- }
-
- /* To enhance testing, if both pLast and pFirst are defined, seek the
- ** cursor to the "end" boundary here. Then the next block seeks it to
- ** the "start" ready for the scan. The point is to test that cursors
- ** can be reused. */
- if( pLast && pFirst ){
- if( bReverse ){
- rc = lsm_csr_seek(csr, pFirst, nFirst, LSM_SEEK_LE);
- }else{
- rc = lsm_csr_seek(csr, pLast, nLast, LSM_SEEK_GE);
- }
- }
-
- if( bReverse ){
- if( pLast ){
- rc = lsm_csr_seek(csr, pLast, nLast, LSM_SEEK_LE);
- }else{
- rc = lsm_csr_last(csr);
- }
- }else{
- if( pFirst ){
- rc = lsm_csr_seek(csr, pFirst, nFirst, LSM_SEEK_GE);
- }else{
- rc = lsm_csr_first(csr);
- }
- }
-
- while( rc==LSM_OK && lsm_csr_valid(csr) ){
- const void *pKey; int nKey;
- const void *pVal; int nVal;
- int cmp;
-
- lsm_csr_key(csr, &pKey, &nKey);
- lsm_csr_value(csr, &pVal, &nVal);
-
- if( bReverse && pFirst ){
- cmp = memcmp(pFirst, pKey, MIN(nKey, nFirst));
- if( cmp>0 || (cmp==0 && nFirst>nKey) ) break;
- }else if( bReverse==0 && pLast ){
- cmp = memcmp(pLast, pKey, MIN(nKey, nLast));
- if( cmp<0 || (cmp==0 && nLastpCsr==0 ){
- lsm_csr_close(csr);
- }
- return rc;
-}
-
-static int test_lsm_begin(TestDb *pTestDb, int iLevel){
- int rc = LSM_OK;
- LsmDb *pDb = (LsmDb *)pTestDb;
-
- /* iLevel==0 is a no-op. */
- if( iLevel==0 ) return 0;
-
- if( pDb->pCsr==0 ) rc = lsm_csr_open(pDb->db, &pDb->pCsr);
- if( rc==LSM_OK && iLevel>1 ){
- rc = lsm_begin(pDb->db, iLevel-1);
- }
-
- return rc;
-}
-static int test_lsm_commit(TestDb *pTestDb, int iLevel){
- LsmDb *pDb = (LsmDb *)pTestDb;
-
- /* If iLevel==0, close any open read transaction */
- if( iLevel==0 && pDb->pCsr ){
- lsm_csr_close(pDb->pCsr);
- pDb->pCsr = 0;
- }
-
- /* If iLevel==0, close any open read transaction */
- return lsm_commit(pDb->db, MAX(0, iLevel-1));
-}
-static int test_lsm_rollback(TestDb *pTestDb, int iLevel){
- LsmDb *pDb = (LsmDb *)pTestDb;
-
- /* If iLevel==0, close any open read transaction */
- if( iLevel==0 && pDb->pCsr ){
- lsm_csr_close(pDb->pCsr);
- pDb->pCsr = 0;
- }
-
- return lsm_rollback(pDb->db, MAX(0, iLevel-1));
-}
-
-/*
-** A log message callback registered with lsm connections. Prints all
-** messages to stderr.
-*/
-static void xLog(void *pCtx, int rc, const char *z){
- unused_parameter(rc);
- /* fprintf(stderr, "lsm: rc=%d \"%s\"\n", rc, z); */
- if( pCtx ) fprintf(stderr, "%s: ", (char *)pCtx);
- fprintf(stderr, "%s\n", z);
- fflush(stderr);
-}
-
-static void xWorkHook(lsm_db *db, void *pArg){
- LsmDb *p = (LsmDb *)pArg;
- if( p->xWork ) p->xWork(db, p->pWorkCtx);
-}
-
-#define TEST_NO_RECOVERY -1
-#define TEST_COMPRESSION -3
-
-#define TEST_MT_MODE -2
-#define TEST_MT_MIN_CKPT -4
-#define TEST_MT_MAX_CKPT -5
-
-
-int test_lsm_config_str(
- LsmDb *pLsm,
- lsm_db *db,
- int bWorker,
- const char *zStr,
- int *pnThread
-){
- struct CfgParam {
- const char *zParam;
- int bWorker;
- int eParam;
- } aParam[] = {
- { "autoflush", 0, LSM_CONFIG_AUTOFLUSH },
- { "page_size", 0, LSM_CONFIG_PAGE_SIZE },
- { "block_size", 0, LSM_CONFIG_BLOCK_SIZE },
- { "safety", 0, LSM_CONFIG_SAFETY },
- { "autowork", 0, LSM_CONFIG_AUTOWORK },
- { "autocheckpoint", 0, LSM_CONFIG_AUTOCHECKPOINT },
- { "mmap", 0, LSM_CONFIG_MMAP },
- { "use_log", 0, LSM_CONFIG_USE_LOG },
- { "automerge", 0, LSM_CONFIG_AUTOMERGE },
- { "max_freelist", 0, LSM_CONFIG_MAX_FREELIST },
- { "multi_proc", 0, LSM_CONFIG_MULTIPLE_PROCESSES },
- { "worker_automerge", 1, LSM_CONFIG_AUTOMERGE },
- { "test_no_recovery", 0, TEST_NO_RECOVERY },
- { "bg_min_ckpt", 0, TEST_NO_RECOVERY },
-
- { "mt_mode", 0, TEST_MT_MODE },
- { "mt_min_ckpt", 0, TEST_MT_MIN_CKPT },
- { "mt_max_ckpt", 0, TEST_MT_MAX_CKPT },
-
-#ifdef HAVE_ZLIB
- { "compression", 0, TEST_COMPRESSION },
-#endif
- { 0, 0 }
- };
- const char *z = zStr;
- int nThread = 1;
-
- if( zStr==0 ) return 0;
-
- assert( db );
- while( z[0] ){
- const char *zStart;
-
- /* Skip whitespace */
- while( *z==' ' ) z++;
- zStart = z;
-
- while( *z && *z!='=' ) z++;
- if( *z ){
- int eParam;
- int i;
- int iVal;
- int iMul = 1;
- int rc;
- char zParam[32];
- int nParam = z-zStart;
- if( nParam==0 || nParam>sizeof(zParam)-1 ) goto syntax_error;
-
- memcpy(zParam, zStart, nParam);
- zParam[nParam] = '\0';
- rc = testArgSelect(aParam, "param", zParam, &i);
- if( rc!=0 ) return rc;
- eParam = aParam[i].eParam;
-
- z++;
- zStart = z;
- while( *z>='0' && *z<='9' ) z++;
- if( *z=='k' || *z=='K' ){
- iMul = 1;
- z++;
- }else if( *z=='M' || *z=='M' ){
- iMul = 1024;
- z++;
- }
- nParam = z-zStart;
- if( nParam==0 || nParam>sizeof(zParam)-1 ) goto syntax_error;
- memcpy(zParam, zStart, nParam);
- zParam[nParam] = '\0';
- iVal = atoi(zParam) * iMul;
-
- if( eParam>0 ){
- if( bWorker || aParam[i].bWorker==0 ){
- lsm_config(db, eParam, &iVal);
- }
- }else{
- switch( eParam ){
- case TEST_NO_RECOVERY:
- if( pLsm ) pLsm->bNoRecovery = iVal;
- break;
- case TEST_MT_MODE:
- if( pLsm ) nThread = iVal;
- break;
- case TEST_MT_MIN_CKPT:
- if( pLsm && iVal>0 ) pLsm->nMtMinCkpt = iVal*1024;
- break;
- case TEST_MT_MAX_CKPT:
- if( pLsm && iVal>0 ) pLsm->nMtMaxCkpt = iVal*1024;
- break;
-#ifdef HAVE_ZLIB
- case TEST_COMPRESSION:
- testConfigureCompression(db);
- break;
-#endif
- }
- }
- }else if( z!=zStart ){
- goto syntax_error;
- }
- }
-
- if( pnThread ) *pnThread = nThread;
- if( pLsm && pLsm->nMtMaxCkpt < pLsm->nMtMinCkpt ){
- pLsm->nMtMinCkpt = pLsm->nMtMaxCkpt;
- }
-
- return 0;
- syntax_error:
- testPrintError("syntax error at: \"%s\"\n", z);
- return 1;
-}
-
-int tdb_lsm_config_str(TestDb *pDb, const char *zStr){
- int rc = 0;
- if( tdb_lsm(pDb) ){
-#ifdef LSM_MUTEX_PTHREADS
- int i;
-#endif
- LsmDb *pLsm = (LsmDb *)pDb;
-
- rc = test_lsm_config_str(pLsm, pLsm->db, 0, zStr, 0);
-#ifdef LSM_MUTEX_PTHREADS
- for(i=0; rc==0 && inWorker; i++){
- rc = test_lsm_config_str(0, pLsm->aWorker[i].pWorker, 1, zStr, 0);
- }
-#endif
- }
- return rc;
-}
-
-int tdb_lsm_configure(lsm_db *db, const char *zConfig){
- return test_lsm_config_str(0, db, 0, zConfig, 0);
-}
-
-static int testLsmStartWorkers(LsmDb *, int, const char *, const char *);
-
-static int testLsmOpen(
- const char *zCfg,
- const char *zFilename,
- int bClear,
- TestDb **ppDb
-){
- static const DatabaseMethods LsmMethods = {
- test_lsm_close,
- test_lsm_write,
- test_lsm_delete,
- test_lsm_delete_range,
- test_lsm_fetch,
- test_lsm_scan,
- test_lsm_begin,
- test_lsm_commit,
- test_lsm_rollback
- };
-
- int rc;
- int nFilename;
- LsmDb *pDb;
-
- /* If the bClear flag is set, delete any existing database. */
- assert( zFilename);
- if( bClear ) testDeleteLsmdb(zFilename);
- nFilename = strlen(zFilename);
-
- pDb = (LsmDb *)testMalloc(sizeof(LsmDb) + nFilename + 1);
- memset(pDb, 0, sizeof(LsmDb));
- pDb->base.pMethods = &LsmMethods;
- pDb->zName = (char *)&pDb[1];
- memcpy(pDb->zName, zFilename, nFilename + 1);
-
- /* Default the sector size used for crash simulation to 512 bytes.
- ** Todo: There should be an OS method to obtain this value - just as
- ** there is in SQLite. For now, LSM assumes that it is smaller than
- ** the page size (default 4KB).
- */
- pDb->szSector = 256;
-
- /* Default values for the mt_min_ckpt and mt_max_ckpt parameters. */
- pDb->nMtMinCkpt = LSMTEST_DFLT_MT_MIN_CKPT;
- pDb->nMtMaxCkpt = LSMTEST_DFLT_MT_MAX_CKPT;
-
- memcpy(&pDb->env, tdb_lsm_env(), sizeof(lsm_env));
- pDb->env.pVfsCtx = (void *)pDb;
- pDb->env.xFullpath = testEnvFullpath;
- pDb->env.xOpen = testEnvOpen;
- pDb->env.xRead = testEnvRead;
- pDb->env.xWrite = testEnvWrite;
- pDb->env.xTruncate = testEnvTruncate;
- pDb->env.xSync = testEnvSync;
- pDb->env.xSectorSize = testEnvSectorSize;
- pDb->env.xRemap = testEnvRemap;
- pDb->env.xFileid = testEnvFileid;
- pDb->env.xClose = testEnvClose;
- pDb->env.xUnlink = testEnvUnlink;
- pDb->env.xLock = testEnvLock;
- pDb->env.xTestLock = testEnvTestLock;
- pDb->env.xShmBarrier = testEnvShmBarrier;
- pDb->env.xShmMap = testEnvShmMap;
- pDb->env.xShmUnmap = testEnvShmUnmap;
- pDb->env.xSleep = testEnvSleep;
-
- rc = lsm_new(&pDb->env, &pDb->db);
- if( rc==LSM_OK ){
- int nThread = 1;
- lsm_config_log(pDb->db, xLog, 0);
- lsm_config_work_hook(pDb->db, xWorkHook, (void *)pDb);
-
- rc = test_lsm_config_str(pDb, pDb->db, 0, zCfg, &nThread);
- if( rc==LSM_OK ) rc = lsm_open(pDb->db, zFilename);
-
- pDb->eMode = nThread;
-#ifdef LSM_MUTEX_PTHREADS
- if( rc==LSM_OK && nThread>1 ){
- testLsmStartWorkers(pDb, nThread, zFilename, zCfg);
- }
-#endif
-
- if( rc!=LSM_OK ){
- test_lsm_close((TestDb *)pDb);
- pDb = 0;
- }
- }
-
- *ppDb = (TestDb *)pDb;
- return rc;
-}
-
-int test_lsm_open(
- const char *zSpec,
- const char *zFilename,
- int bClear,
- TestDb **ppDb
-){
- return testLsmOpen(zSpec, zFilename, bClear, ppDb);
-}
-
-int test_lsm_small_open(
- const char *zSpec,
- const char *zFile,
- int bClear,
- TestDb **ppDb
-){
- const char *zCfg = "page_size=256 block_size=64 mmap=1024";
- return testLsmOpen(zCfg, zFile, bClear, ppDb);
-}
-
-int test_lsm_lomem_open(
- const char *zSpec,
- const char *zFilename,
- int bClear,
- TestDb **ppDb
-){
- /* "max_freelist=4 autocheckpoint=32" */
- const char *zCfg =
- "page_size=256 block_size=64 autoflush=16 "
- "autocheckpoint=32"
- "mmap=0 "
- ;
- return testLsmOpen(zCfg, zFilename, bClear, ppDb);
-}
-
-int test_lsm_lomem2_open(
- const char *zSpec,
- const char *zFilename,
- int bClear,
- TestDb **ppDb
-){
- /* "max_freelist=4 autocheckpoint=32" */
- const char *zCfg =
- "page_size=512 block_size=64 autoflush=0 mmap=0 "
- ;
- return testLsmOpen(zCfg, zFilename, bClear, ppDb);
-}
-
-int test_lsm_zip_open(
- const char *zSpec,
- const char *zFilename,
- int bClear,
- TestDb **ppDb
-){
- const char *zCfg =
- "page_size=256 block_size=64 autoflush=16 "
- "autocheckpoint=32 compression=1 mmap=0 "
- ;
- return testLsmOpen(zCfg, zFilename, bClear, ppDb);
-}
-
-lsm_db *tdb_lsm(TestDb *pDb){
- if( pDb->pMethods->xClose==test_lsm_close ){
- return ((LsmDb *)pDb)->db;
- }
- return 0;
-}
-
-int tdb_lsm_multithread(TestDb *pDb){
- int ret = 0;
- if( tdb_lsm(pDb) ){
- ret = ((LsmDb*)pDb)->eMode!=LSMTEST_MODE_SINGLETHREAD;
- }
- return ret;
-}
-
-void tdb_lsm_enable_log(TestDb *pDb, int bEnable){
- lsm_db *db = tdb_lsm(pDb);
- if( db ){
- lsm_config_log(db, (bEnable ? xLog : 0), (void *)"client");
- }
-}
-
-void tdb_lsm_application_crash(TestDb *pDb){
- if( tdb_lsm(pDb) ){
- LsmDb *p = (LsmDb *)pDb;
- p->bCrashed = 1;
- }
-}
-
-void tdb_lsm_prepare_system_crash(TestDb *pDb){
- if( tdb_lsm(pDb) ){
- LsmDb *p = (LsmDb *)pDb;
- p->bPrepareCrash = 1;
- }
-}
-
-void tdb_lsm_system_crash(TestDb *pDb){
- if( tdb_lsm(pDb) ){
- LsmDb *p = (LsmDb *)pDb;
- p->bCrashed = 1;
- doSystemCrash(p);
- }
-}
-
-void tdb_lsm_safety(TestDb *pDb, int eMode){
- assert( eMode==LSM_SAFETY_OFF
- || eMode==LSM_SAFETY_NORMAL
- || eMode==LSM_SAFETY_FULL
- );
- if( tdb_lsm(pDb) ){
- int iParam = eMode;
- LsmDb *p = (LsmDb *)pDb;
- lsm_config(p->db, LSM_CONFIG_SAFETY, &iParam);
- }
-}
-
-void tdb_lsm_prepare_sync_crash(TestDb *pDb, int iSync){
- assert( iSync>0 );
- if( tdb_lsm(pDb) ){
- LsmDb *p = (LsmDb *)pDb;
- p->nAutoCrash = iSync;
- p->bPrepareCrash = 1;
- }
-}
-
-void tdb_lsm_config_work_hook(
- TestDb *pDb,
- void (*xWork)(lsm_db *, void *),
- void *pWorkCtx
-){
- if( tdb_lsm(pDb) ){
- LsmDb *p = (LsmDb *)pDb;
- p->xWork = xWork;
- p->pWorkCtx = pWorkCtx;
- }
-}
-
-void tdb_lsm_write_hook(
- TestDb *pDb,
- void (*xWrite)(void *, int, lsm_i64, int, int),
- void *pWriteCtx
-){
- if( tdb_lsm(pDb) ){
- LsmDb *p = (LsmDb *)pDb;
- p->xWriteHook = xWrite;
- p->pWriteCtx = pWriteCtx;
- }
-}
-
-int tdb_lsm_open(const char *zCfg, const char *zDb, int bClear, TestDb **ppDb){
- return testLsmOpen(zCfg, zDb, bClear, ppDb);
-}
-
-#ifdef LSM_MUTEX_PTHREADS
-
-/*
-** Signal worker thread iWorker that there may be work to do.
-*/
-static void mt_signal_worker(LsmDb *pDb, int iWorker){
- LsmWorker *p = &pDb->aWorker[iWorker];
- pthread_mutex_lock(&p->worker_mutex);
- p->bDoWork = 1;
- pthread_cond_signal(&p->worker_cond);
- pthread_mutex_unlock(&p->worker_mutex);
-}
-
-/*
-** This routine is used as the main() for all worker threads.
-*/
-static void *worker_main(void *pArg){
- LsmWorker *p = (LsmWorker *)pArg;
- lsm_db *pWorker; /* Connection to access db through */
-
- pthread_mutex_lock(&p->worker_mutex);
- while( (pWorker = p->pWorker) ){
- int rc = LSM_OK;
-
- /* Do some work. If an error occurs, exit. */
-
- pthread_mutex_unlock(&p->worker_mutex);
- if( p->eType==LSMTEST_THREAD_CKPT ){
- int nKB = 0;
- rc = lsm_info(pWorker, LSM_INFO_CHECKPOINT_SIZE, &nKB);
- if( rc==LSM_OK && nKB>=p->pDb->nMtMinCkpt ){
- rc = lsm_checkpoint(pWorker, 0);
- }
- }else{
- int nWrite;
- do {
-
- if( p->eType==LSMTEST_THREAD_WORKER ){
- waitOnCheckpointer(p->pDb, pWorker);
- }
-
- nWrite = 0;
- rc = lsm_work(pWorker, 0, 256, &nWrite);
-
- if( p->eType==LSMTEST_THREAD_WORKER && nWrite ){
- mt_signal_worker(p->pDb, 1);
- }
- }while( nWrite && p->pWorker );
- }
- pthread_mutex_lock(&p->worker_mutex);
-
- if( rc!=LSM_OK && rc!=LSM_BUSY ){
- p->worker_rc = rc;
- break;
- }
-
- /* The thread will wake up when it is signaled either because another
- ** thread has created some work for this one or because the connection
- ** is being closed. */
- if( p->pWorker && p->bDoWork==0 ){
- pthread_cond_wait(&p->worker_cond, &p->worker_mutex);
- }
- p->bDoWork = 0;
- }
- pthread_mutex_unlock(&p->worker_mutex);
-
- return 0;
-}
-
-
-static void mt_stop_worker(LsmDb *pDb, int iWorker){
- LsmWorker *p = &pDb->aWorker[iWorker];
- if( p->pWorker ){
- void *pDummy;
- lsm_db *pWorker;
-
- /* Signal the worker to stop */
- pthread_mutex_lock(&p->worker_mutex);
- pWorker = p->pWorker;
- p->pWorker = 0;
- pthread_cond_signal(&p->worker_cond);
- pthread_mutex_unlock(&p->worker_mutex);
-
- /* Join the worker thread. */
- pthread_join(p->worker_thread, &pDummy);
-
- /* Free resources allocated in mt_start_worker() */
- pthread_cond_destroy(&p->worker_cond);
- pthread_mutex_destroy(&p->worker_mutex);
- lsm_close(pWorker);
- }
-}
-
-static void mt_shutdown(LsmDb *pDb){
- int i;
- for(i=0; inWorker; i++){
- mt_stop_worker(pDb, i);
- }
-}
-
-/*
-** This callback is invoked by LSM when the client database writes to
-** the database file (i.e. to flush the contents of the in-memory tree).
-** This implies there may be work to do on the database, so signal
-** the worker threads.
-*/
-static void mt_client_work_hook(lsm_db *db, void *pArg){
- LsmDb *pDb = (LsmDb *)pArg; /* LsmDb database handle */
-
- /* Invoke the user level work-hook, if any. */
- if( pDb->xWork ) pDb->xWork(db, pDb->pWorkCtx);
-
- /* Wake up worker thread 0. */
- mt_signal_worker(pDb, 0);
-}
-
-static void mt_worker_work_hook(lsm_db *db, void *pArg){
- LsmDb *pDb = (LsmDb *)pArg; /* LsmDb database handle */
-
- /* Invoke the user level work-hook, if any. */
- if( pDb->xWork ) pDb->xWork(db, pDb->pWorkCtx);
-}
-
-/*
-** Launch worker thread iWorker for database connection pDb.
-*/
-static int mt_start_worker(
- LsmDb *pDb, /* Main database structure */
- int iWorker, /* Worker number to start */
- const char *zFilename, /* File name of database to open */
- const char *zCfg, /* Connection configuration string */
- int eType /* Type of worker thread */
-){
- int rc = 0; /* Return code */
- LsmWorker *p; /* Object to initialize */
-
- assert( iWorkernWorker );
- assert( eType==LSMTEST_THREAD_CKPT
- || eType==LSMTEST_THREAD_WORKER
- || eType==LSMTEST_THREAD_WORKER_AC
- );
-
- p = &pDb->aWorker[iWorker];
- p->eType = eType;
- p->pDb = pDb;
-
- /* Open the worker connection */
- if( rc==0 ) rc = lsm_new(&pDb->env, &p->pWorker);
- if( zCfg ){
- test_lsm_config_str(pDb, p->pWorker, 1, zCfg, 0);
- }
- if( rc==0 ) rc = lsm_open(p->pWorker, zFilename);
- lsm_config_log(p->pWorker, xLog, (void *)"worker");
-
- /* Configure the work-hook */
- if( rc==0 ){
- lsm_config_work_hook(p->pWorker, mt_worker_work_hook, (void *)pDb);
- }
-
- if( eType==LSMTEST_THREAD_WORKER ){
- test_lsm_config_str(0, p->pWorker, 1, "autocheckpoint=0", 0);
- }
-
- /* Kick off the worker thread. */
- if( rc==0 ) rc = pthread_cond_init(&p->worker_cond, 0);
- if( rc==0 ) rc = pthread_mutex_init(&p->worker_mutex, 0);
- if( rc==0 ) rc = pthread_create(&p->worker_thread, 0, worker_main, (void *)p);
-
- return rc;
-}
-
-
-static int testLsmStartWorkers(
- LsmDb *pDb, int eModel, const char *zFilename, const char *zCfg
-){
- int rc;
-
- if( eModel<1 || eModel>4 ) return 1;
- if( eModel==1 ) return 0;
-
- /* Configure a work-hook for the client connection. Worker 0 is signalled
- ** every time the users connection writes to the database. */
- lsm_config_work_hook(pDb->db, mt_client_work_hook, (void *)pDb);
-
- /* Allocate space for two worker connections. They may not both be
- ** used, but both are allocated. */
- pDb->aWorker = (LsmWorker *)testMalloc(sizeof(LsmWorker) * 2);
- memset(pDb->aWorker, 0, sizeof(LsmWorker) * 2);
-
- switch( eModel ){
- case LSMTEST_MODE_BACKGROUND_CKPT:
- pDb->nWorker = 1;
- test_lsm_config_str(0, pDb->db, 0, "autocheckpoint=0", 0);
- rc = mt_start_worker(pDb, 0, zFilename, zCfg, LSMTEST_THREAD_CKPT);
- break;
-
- case LSMTEST_MODE_BACKGROUND_WORK:
- pDb->nWorker = 1;
- test_lsm_config_str(0, pDb->db, 0, "autowork=0", 0);
- rc = mt_start_worker(pDb, 0, zFilename, zCfg, LSMTEST_THREAD_WORKER_AC);
- break;
-
- case LSMTEST_MODE_BACKGROUND_BOTH:
- pDb->nWorker = 2;
- test_lsm_config_str(0, pDb->db, 0, "autowork=0", 0);
- rc = mt_start_worker(pDb, 0, zFilename, zCfg, LSMTEST_THREAD_WORKER);
- if( rc==0 ){
- rc = mt_start_worker(pDb, 1, zFilename, zCfg, LSMTEST_THREAD_CKPT);
- }
- break;
- }
-
- return rc;
-}
-
-
-int test_lsm_mt2(
- const char *zSpec,
- const char *zFilename,
- int bClear,
- TestDb **ppDb
-){
- const char *zCfg = "mt_mode=2";
- return testLsmOpen(zCfg, zFilename, bClear, ppDb);
-}
-
-int test_lsm_mt3(
- const char *zSpec,
- const char *zFilename,
- int bClear,
- TestDb **ppDb
-){
- const char *zCfg = "mt_mode=4";
- return testLsmOpen(zCfg, zFilename, bClear, ppDb);
-}
-
-#else
-static void mt_shutdown(LsmDb *pDb) {
- unused_parameter(pDb);
-}
-int test_lsm_mt(const char *zFilename, int bClear, TestDb **ppDb){
- unused_parameter(zFilename);
- unused_parameter(bClear);
- unused_parameter(ppDb);
- testPrintError("threads unavailable - recompile with LSM_MUTEX_PTHREADS\n");
- return 1;
-}
-#endif
diff --git a/ext/lsm1/lsm-test/lsmtest_tdb4.c b/ext/lsm1/lsm-test/lsmtest_tdb4.c
deleted file mode 100644
index 1f92928522..0000000000
--- a/ext/lsm1/lsm-test/lsmtest_tdb4.c
+++ /dev/null
@@ -1,980 +0,0 @@
-
-/*
-** This file contains the TestDb bt wrapper.
-*/
-
-#include "lsmtest_tdb.h"
-#include "lsmtest.h"
-#include
-#include "bt.h"
-
-#include
-
-typedef struct BtDb BtDb;
-typedef struct BtFile BtFile;
-
-/* Background checkpointer interface (see implementations below). */
-typedef struct bt_ckpter bt_ckpter;
-static int bgc_attach(BtDb *pDb, const char*);
-static int bgc_detach(BtDb *pDb);
-
-/*
-** Each database or log file opened by a database handle is wrapped by
-** an object of the following type.
-*/
-struct BtFile {
- BtDb *pBt; /* Database handle that opened this file */
- bt_env *pVfs; /* Underlying VFS */
- bt_file *pFile; /* File handle belonging to underlying VFS */
- int nSectorSize; /* Size of sectors in bytes */
- int nSector; /* Allocated size of nSector array */
- u8 **apSector; /* Original sector data */
-};
-
-/*
-** nCrashSync:
-** If this value is non-zero, then a "crash-test" is running. If
-** nCrashSync==1, then the crash is simulated during the very next
-** call to the xSync() VFS method (on either the db or log file).
-** If nCrashSync==2, the following call to xSync(), and so on.
-**
-** bCrash:
-** After a crash is simulated, this variable is set. Any subsequent
-** attempts to write to a file or modify the file system in any way
-** fail once this is set. All the caller can do is close the connection.
-**
-** bFastInsert:
-** If this variable is set to true, then a BT_CONTROL_FAST_INSERT_OP
-** control is issued before each callto BtReplace() or BtCsrOpen().
-*/
-struct BtDb {
- TestDb base; /* Base class */
- bt_db *pBt; /* bt database handle */
- sqlite4_env *pEnv; /* SQLite environment (for malloc/free) */
- bt_env *pVfs; /* Underlying VFS */
- int bFastInsert; /* True to use fast-insert */
-
- /* Space for bt_fetch() results */
- u8 *aBuffer; /* Space to store results */
- int nBuffer; /* Allocated size of aBuffer[] in bytes */
- int nRef;
-
- /* Background checkpointer used by mt connections */
- bt_ckpter *pCkpter;
-
- /* Stuff used for crash test simulation */
- BtFile *apFile[2]; /* Database and log files used by pBt */
- bt_env env; /* Private VFS for this object */
- int nCrashSync; /* Number of syncs until crash (see above) */
- int bCrash; /* True once a crash has been simulated */
-};
-
-static int btVfsFullpath(
- sqlite4_env *pEnv,
- bt_env *pVfs,
- const char *z,
- char **pzOut
-){
- BtDb *pBt = (BtDb*)pVfs->pVfsCtx;
- if( pBt->bCrash ) return SQLITE4_IOERR;
- return pBt->pVfs->xFullpath(pEnv, pBt->pVfs, z, pzOut);
-}
-
-static int btVfsOpen(
- sqlite4_env *pEnv,
- bt_env *pVfs,
- const char *zFile,
- int flags, bt_file **ppFile
-){
- BtFile *p;
- BtDb *pBt = (BtDb*)pVfs->pVfsCtx;
- int rc;
-
- if( pBt->bCrash ) return SQLITE4_IOERR;
-
- p = (BtFile*)testMalloc(sizeof(BtFile));
- if( !p ) return SQLITE4_NOMEM;
- if( flags & BT_OPEN_DATABASE ){
- pBt->apFile[0] = p;
- }else if( flags & BT_OPEN_LOG ){
- pBt->apFile[1] = p;
- }
- if( (flags & BT_OPEN_SHARED)==0 ){
- p->pBt = pBt;
- }
- p->pVfs = pBt->pVfs;
-
- rc = pBt->pVfs->xOpen(pEnv, pVfs, zFile, flags, &p->pFile);
- if( rc!=SQLITE4_OK ){
- testFree(p);
- p = 0;
- }else{
- pBt->nRef++;
- }
-
- *ppFile = (bt_file*)p;
- return rc;
-}
-
-static int btVfsSize(bt_file *pFile, sqlite4_int64 *piRes){
- BtFile *p = (BtFile*)pFile;
- if( p->pBt && p->pBt->bCrash ) return SQLITE4_IOERR;
- return p->pVfs->xSize(p->pFile, piRes);
-}
-
-static int btVfsRead(bt_file *pFile, sqlite4_int64 iOff, void *pBuf, int nBuf){
- BtFile *p = (BtFile*)pFile;
- if( p->pBt && p->pBt->bCrash ) return SQLITE4_IOERR;
- return p->pVfs->xRead(p->pFile, iOff, pBuf, nBuf);
-}
-
-static int btFlushSectors(BtFile *p, int iFile){
- sqlite4_int64 iSz;
- int rc;
- int i;
- u8 *aTmp = 0;
-
- rc = p->pBt->pVfs->xSize(p->pFile, &iSz);
- for(i=0; rc==SQLITE4_OK && inSector; i++){
- if( p->pBt->bCrash && p->apSector[i] ){
-
- /* The system is simulating a crash. There are three choices for
- ** this sector:
- **
- ** 1) Leave it as it is (simulating a successful write),
- ** 2) Restore the original data (simulating a lost write),
- ** 3) Populate the disk sector with garbage data.
- */
- sqlite4_int64 iSOff = p->nSectorSize*i;
- int nWrite = MIN(p->nSectorSize, iSz - iSOff);
-
- if( nWrite ){
- u8 *aWrite = 0;
- int iOpt = (testPrngValue(i) % 3) + 1;
- if( iOpt==1 ){
- aWrite = p->apSector[i];
- }else if( iOpt==3 ){
- if( aTmp==0 ) aTmp = testMalloc(p->nSectorSize);
- aWrite = aTmp;
- testPrngArray(i*13, (u32*)aWrite, nWrite/sizeof(u32));
- }
-
-#if 0
-fprintf(stderr, "handle sector %d of %s with %s\n", i,
- iFile==0 ? "db" : "log",
- iOpt==1 ? "rollback" : iOpt==2 ? "write" : "omit"
-);
-fflush(stderr);
-#endif
-
- if( aWrite ){
- rc = p->pBt->pVfs->xWrite(p->pFile, iSOff, aWrite, nWrite);
- }
- }
- }
- testFree(p->apSector[i]);
- p->apSector[i] = 0;
- }
-
- testFree(aTmp);
- return rc;
-}
-
-static int btSaveSectors(BtFile *p, sqlite4_int64 iOff, int nBuf){
- int rc;
- sqlite4_int64 iSz; /* Size of file on disk */
- int iFirst; /* First sector affected */
- int iSector; /* Current sector */
- int iLast; /* Last sector affected */
-
- if( p->nSectorSize==0 ){
- p->nSectorSize = p->pBt->pVfs->xSectorSize(p->pFile);
- if( p->nSectorSize<512 ) p->nSectorSize = 512;
- }
- iLast = (iOff+nBuf-1) / p->nSectorSize;
- iFirst = iOff / p->nSectorSize;
-
- rc = p->pBt->pVfs->xSize(p->pFile, &iSz);
- for(iSector=iFirst; rc==SQLITE4_OK && iSector<=iLast; iSector++){
- int nRead;
- sqlite4_int64 iSOff = iSector * p->nSectorSize;
- u8 *aBuf = testMalloc(p->nSectorSize);
- nRead = MIN(p->nSectorSize, (iSz - iSOff));
- if( nRead>0 ){
- rc = p->pBt->pVfs->xRead(p->pFile, iSOff, aBuf, nRead);
- }
-
- while( rc==SQLITE4_OK && iSector>=p->nSector ){
- int nNew = p->nSector + 32;
- u8 **apNew = (u8**)testMalloc(nNew * sizeof(u8*));
- memcpy(apNew, p->apSector, p->nSector*sizeof(u8*));
- testFree(p->apSector);
- p->apSector = apNew;
- p->nSector = nNew;
- }
-
- p->apSector[iSector] = aBuf;
- }
-
- return rc;
-}
-
-static int btVfsWrite(bt_file *pFile, sqlite4_int64 iOff, void *pBuf, int nBuf){
- BtFile *p = (BtFile*)pFile;
- if( p->pBt && p->pBt->bCrash ) return SQLITE4_IOERR;
- if( p->pBt && p->pBt->nCrashSync ){
- btSaveSectors(p, iOff, nBuf);
- }
- return p->pVfs->xWrite(p->pFile, iOff, pBuf, nBuf);
-}
-
-static int btVfsTruncate(bt_file *pFile, sqlite4_int64 iOff){
- BtFile *p = (BtFile*)pFile;
- if( p->pBt && p->pBt->bCrash ) return SQLITE4_IOERR;
- return p->pVfs->xTruncate(p->pFile, iOff);
-}
-
-static int btVfsSync(bt_file *pFile){
- int rc = SQLITE4_OK;
- BtFile *p = (BtFile*)pFile;
- BtDb *pBt = p->pBt;
-
- if( pBt ){
- if( pBt->bCrash ) return SQLITE4_IOERR;
- if( pBt->nCrashSync ){
- pBt->nCrashSync--;
- pBt->bCrash = (pBt->nCrashSync==0);
- if( pBt->bCrash ){
- btFlushSectors(pBt->apFile[0], 0);
- btFlushSectors(pBt->apFile[1], 1);
- rc = SQLITE4_IOERR;
- }else{
- btFlushSectors(p, 0);
- }
- }
- }
-
- if( rc==SQLITE4_OK ){
- rc = p->pVfs->xSync(p->pFile);
- }
- return rc;
-}
-
-static int btVfsSectorSize(bt_file *pFile){
- BtFile *p = (BtFile*)pFile;
- return p->pVfs->xSectorSize(p->pFile);
-}
-
-static void btDeref(BtDb *p){
- p->nRef--;
- assert( p->nRef>=0 );
- if( p->nRef<=0 ) testFree(p);
-}
-
-static int btVfsClose(bt_file *pFile){
- BtFile *p = (BtFile*)pFile;
- BtDb *pBt = p->pBt;
- int rc;
- if( pBt ){
- btFlushSectors(p, 0);
- if( p==pBt->apFile[0] ) pBt->apFile[0] = 0;
- if( p==pBt->apFile[1] ) pBt->apFile[1] = 0;
- }
- testFree(p->apSector);
- rc = p->pVfs->xClose(p->pFile);
-#if 0
- btDeref(p->pBt);
-#endif
- testFree(p);
- return rc;
-}
-
-static int btVfsUnlink(sqlite4_env *pEnv, bt_env *pVfs, const char *zFile){
- BtDb *pBt = (BtDb*)pVfs->pVfsCtx;
- if( pBt->bCrash ) return SQLITE4_IOERR;
- return pBt->pVfs->xUnlink(pEnv, pBt->pVfs, zFile);
-}
-
-static int btVfsLock(bt_file *pFile, int iLock, int eType){
- BtFile *p = (BtFile*)pFile;
- if( p->pBt && p->pBt->bCrash ) return SQLITE4_IOERR;
- return p->pVfs->xLock(p->pFile, iLock, eType);
-}
-
-static int btVfsTestLock(bt_file *pFile, int iLock, int nLock, int eType){
- BtFile *p = (BtFile*)pFile;
- if( p->pBt && p->pBt->bCrash ) return SQLITE4_IOERR;
- return p->pVfs->xTestLock(p->pFile, iLock, nLock, eType);
-}
-
-static int btVfsShmMap(bt_file *pFile, int iChunk, int sz, void **ppOut){
- BtFile *p = (BtFile*)pFile;
- if( p->pBt && p->pBt->bCrash ) return SQLITE4_IOERR;
- return p->pVfs->xShmMap(p->pFile, iChunk, sz, ppOut);
-}
-
-static void btVfsShmBarrier(bt_file *pFile){
- BtFile *p = (BtFile*)pFile;
- return p->pVfs->xShmBarrier(p->pFile);
-}
-
-static int btVfsShmUnmap(bt_file *pFile, int bDelete){
- BtFile *p = (BtFile*)pFile;
- if( p->pBt && p->pBt->bCrash ) return SQLITE4_IOERR;
- return p->pVfs->xShmUnmap(p->pFile, bDelete);
-}
-
-static int bt_close(TestDb *pTestDb){
- BtDb *p = (BtDb*)pTestDb;
- int rc = sqlite4BtClose(p->pBt);
- free(p->aBuffer);
- if( p->apFile[0] ) p->apFile[0]->pBt = 0;
- if( p->apFile[1] ) p->apFile[1]->pBt = 0;
- bgc_detach(p);
- testFree(p);
- return rc;
-}
-
-static int btMinTransaction(BtDb *p, int iMin, int *piLevel){
- int iLevel;
- int rc = SQLITE4_OK;
-
- iLevel = sqlite4BtTransactionLevel(p->pBt);
- if( iLevelpBt, iMin);
- *piLevel = iLevel;
- }else{
- *piLevel = -1;
- }
-
- return rc;
-}
-static int btRestoreTransaction(BtDb *p, int iLevel, int rcin){
- int rc = rcin;
- if( iLevel>=0 ){
- if( rc==SQLITE4_OK ){
- rc = sqlite4BtCommit(p->pBt, iLevel);
- }else{
- sqlite4BtRollback(p->pBt, iLevel);
- }
- assert( iLevel==sqlite4BtTransactionLevel(p->pBt) );
- }
- return rc;
-}
-
-static int bt_write(TestDb *pTestDb, void *pK, int nK, void *pV, int nV){
- BtDb *p = (BtDb*)pTestDb;
- int iLevel;
- int rc;
-
- rc = btMinTransaction(p, 2, &iLevel);
- if( rc==SQLITE4_OK ){
- if( p->bFastInsert ) sqlite4BtControl(p->pBt, BT_CONTROL_FAST_INSERT_OP, 0);
- rc = sqlite4BtReplace(p->pBt, pK, nK, pV, nV);
- rc = btRestoreTransaction(p, iLevel, rc);
- }
- return rc;
-}
-
-static int bt_delete(TestDb *pTestDb, void *pK, int nK){
- return bt_write(pTestDb, pK, nK, 0, -1);
-}
-
-static int bt_delete_range(
- TestDb *pTestDb,
- void *pKey1, int nKey1,
- void *pKey2, int nKey2
-){
- BtDb *p = (BtDb*)pTestDb;
- bt_cursor *pCsr = 0;
- int rc = SQLITE4_OK;
- int iLevel;
-
- rc = btMinTransaction(p, 2, &iLevel);
- if( rc==SQLITE4_OK ){
- if( p->bFastInsert ) sqlite4BtControl(p->pBt, BT_CONTROL_FAST_INSERT_OP, 0);
- rc = sqlite4BtCsrOpen(p->pBt, 0, &pCsr);
- }
- while( rc==SQLITE4_OK ){
- const void *pK;
- int n;
- int nCmp;
- int res;
-
- rc = sqlite4BtCsrSeek(pCsr, pKey1, nKey1, BT_SEEK_GE);
- if( rc==SQLITE4_INEXACT ) rc = SQLITE4_OK;
- if( rc!=SQLITE4_OK ) break;
-
- rc = sqlite4BtCsrKey(pCsr, &pK, &n);
- if( rc!=SQLITE4_OK ) break;
-
- nCmp = MIN(n, nKey1);
- res = memcmp(pKey1, pK, nCmp);
- assert( res<0 || (res==0 && nKey1<=n) );
- if( res==0 && nKey1==n ){
- rc = sqlite4BtCsrNext(pCsr);
- if( rc!=SQLITE4_OK ) break;
- rc = sqlite4BtCsrKey(pCsr, &pK, &n);
- if( rc!=SQLITE4_OK ) break;
- }
-
- nCmp = MIN(n, nKey2);
- res = memcmp(pKey2, pK, nCmp);
- if( res<0 || (res==0 && nKey2<=n) ) break;
-
- rc = sqlite4BtDelete(pCsr);
- }
- if( rc==SQLITE4_NOTFOUND ) rc = SQLITE4_OK;
-
- sqlite4BtCsrClose(pCsr);
-
- rc = btRestoreTransaction(p, iLevel, rc);
- return rc;
-}
-
-static int bt_fetch(
- TestDb *pTestDb,
- void *pK, int nK,
- void **ppVal, int *pnVal
-){
- BtDb *p = (BtDb*)pTestDb;
- bt_cursor *pCsr = 0;
- int iLevel;
- int rc = SQLITE4_OK;
-
- iLevel = sqlite4BtTransactionLevel(p->pBt);
- if( iLevel==0 ){
- rc = sqlite4BtBegin(p->pBt, 1);
- if( rc!=SQLITE4_OK ) return rc;
- }
-
- if( p->bFastInsert ) sqlite4BtControl(p->pBt, BT_CONTROL_FAST_INSERT_OP, 0);
- rc = sqlite4BtCsrOpen(p->pBt, 0, &pCsr);
- if( rc==SQLITE4_OK ){
- rc = sqlite4BtCsrSeek(pCsr, pK, nK, BT_SEEK_EQ);
- if( rc==SQLITE4_OK ){
- const void *pV = 0;
- int nV = 0;
- rc = sqlite4BtCsrData(pCsr, 0, -1, &pV, &nV);
- if( rc==SQLITE4_OK ){
- if( nV>p->nBuffer ){
- free(p->aBuffer);
- p->aBuffer = (u8*)malloc(nV*2);
- p->nBuffer = nV*2;
- }
- memcpy(p->aBuffer, pV, nV);
- *pnVal = nV;
- *ppVal = (void*)(p->aBuffer);
- }
-
- }else if( rc==SQLITE4_INEXACT || rc==SQLITE4_NOTFOUND ){
- *ppVal = 0;
- *pnVal = -1;
- rc = SQLITE4_OK;
- }
- sqlite4BtCsrClose(pCsr);
- }
-
- if( iLevel==0 ) sqlite4BtCommit(p->pBt, 0);
- return rc;
-}
-
-static int bt_scan(
- TestDb *pTestDb,
- void *pCtx,
- int bReverse,
- void *pFirst, int nFirst,
- void *pLast, int nLast,
- void (*xCallback)(void *, void *, int , void *, int)
-){
- BtDb *p = (BtDb*)pTestDb;
- bt_cursor *pCsr = 0;
- int rc;
- int iLevel;
-
- rc = btMinTransaction(p, 1, &iLevel);
-
- if( rc==SQLITE4_OK ){
- if( p->bFastInsert ) sqlite4BtControl(p->pBt, BT_CONTROL_FAST_INSERT_OP, 0);
- rc = sqlite4BtCsrOpen(p->pBt, 0, &pCsr);
- }
- if( rc==SQLITE4_OK ){
- if( bReverse ){
- if( pLast ){
- rc = sqlite4BtCsrSeek(pCsr, pLast, nLast, BT_SEEK_LE);
- }else{
- rc = sqlite4BtCsrLast(pCsr);
- }
- }else{
- rc = sqlite4BtCsrSeek(pCsr, pFirst, nFirst, BT_SEEK_GE);
- }
- if( rc==SQLITE4_INEXACT ) rc = SQLITE4_OK;
-
- while( rc==SQLITE4_OK ){
- const void *pK = 0; int nK = 0;
- const void *pV = 0; int nV = 0;
-
- rc = sqlite4BtCsrKey(pCsr, &pK, &nK);
- if( rc==SQLITE4_OK ){
- rc = sqlite4BtCsrData(pCsr, 0, -1, &pV, &nV);
- }
-
- if( rc!=SQLITE4_OK ) break;
- if( bReverse ){
- if( pFirst ){
- int res;
- int nCmp = MIN(nK, nFirst);
- res = memcmp(pFirst, pK, nCmp);
- if( res>0 || (res==0 && nKnLast) ) break;
- }
- }
-
- xCallback(pCtx, (void*)pK, nK, (void*)pV, nV);
- if( bReverse ){
- rc = sqlite4BtCsrPrev(pCsr);
- }else{
- rc = sqlite4BtCsrNext(pCsr);
- }
- }
- if( rc==SQLITE4_NOTFOUND ) rc = SQLITE4_OK;
-
- sqlite4BtCsrClose(pCsr);
- }
-
- rc = btRestoreTransaction(p, iLevel, rc);
- return rc;
-}
-
-static int bt_begin(TestDb *pTestDb, int iLvl){
- BtDb *p = (BtDb*)pTestDb;
- int rc = sqlite4BtBegin(p->pBt, iLvl);
- return rc;
-}
-
-static int bt_commit(TestDb *pTestDb, int iLvl){
- BtDb *p = (BtDb*)pTestDb;
- int rc = sqlite4BtCommit(p->pBt, iLvl);
- return rc;
-}
-
-static int bt_rollback(TestDb *pTestDb, int iLvl){
- BtDb *p = (BtDb*)pTestDb;
- int rc = sqlite4BtRollback(p->pBt, iLvl);
- return rc;
-}
-
-static int testParseOption(
- const char **pzIn, /* IN/OUT: pointer to next option */
- const char **pzOpt, /* OUT: nul-terminated option name */
- const char **pzArg, /* OUT: nul-terminated option argument */
- char *pSpace /* Temporary space for output params */
-){
- const char *p = *pzIn;
- const char *pStart;
- int n;
-
- char *pOut = pSpace;
-
- while( *p==' ' ) p++;
- pStart = p;
- while( *p && *p!='=' ) p++;
- if( *p==0 ) return 1;
-
- n = (p - pStart);
- memcpy(pOut, pStart, n);
- *pzOpt = pOut;
- pOut += n;
- *pOut++ = '\0';
-
- p++;
- pStart = p;
- while( *p && *p!=' ' ) p++;
- n = (p - pStart);
-
- memcpy(pOut, pStart, n);
- *pzArg = pOut;
- pOut += n;
- *pOut++ = '\0';
-
- *pzIn = p;
- return 0;
-}
-
-static int testParseInt(const char *z, int *piVal){
- int i = 0;
- const char *p = z;
-
- while( *p>='0' && *p<='9' ){
- i = i*10 + (*p - '0');
- p++;
- }
- if( *p=='K' || *p=='k' ){
- i = i * 1024;
- p++;
- }else if( *p=='M' || *p=='m' ){
- i = i * 1024 * 1024;
- p++;
- }
-
- if( *p ) return SQLITE4_ERROR;
- *piVal = i;
- return SQLITE4_OK;
-}
-
-static int testBtConfigure(BtDb *pDb, const char *zCfg, int *pbMt){
- int rc = SQLITE4_OK;
-
- if( zCfg ){
- struct CfgParam {
- const char *zParam;
- int eParam;
- } aParam[] = {
- { "safety", BT_CONTROL_SAFETY },
- { "autockpt", BT_CONTROL_AUTOCKPT },
- { "multiproc", BT_CONTROL_MULTIPROC },
- { "blksz", BT_CONTROL_BLKSZ },
- { "pagesz", BT_CONTROL_PAGESZ },
- { "mt", -1 },
- { "fastinsert", -2 },
- { 0, 0 }
- };
- const char *z = zCfg;
- int n = strlen(z);
- char *aSpace;
- const char *zOpt;
- const char *zArg;
-
- aSpace = (char*)testMalloc(n+2);
- while( rc==SQLITE4_OK && 0==testParseOption(&z, &zOpt, &zArg, aSpace) ){
- int i;
- int iVal;
- rc = testArgSelect(aParam, "param", zOpt, &i);
- if( rc!=SQLITE4_OK ) break;
-
- rc = testParseInt(zArg, &iVal);
- if( rc!=SQLITE4_OK ) break;
-
- switch( aParam[i].eParam ){
- case -1:
- *pbMt = iVal;
- break;
- case -2:
- pDb->bFastInsert = 1;
- break;
- default:
- rc = sqlite4BtControl(pDb->pBt, aParam[i].eParam, (void*)&iVal);
- break;
- }
- }
- testFree(aSpace);
- }
-
- return rc;
-}
-
-
-int test_bt_open(
- const char *zSpec,
- const char *zFilename,
- int bClear,
- TestDb **ppDb
-){
-
- static const DatabaseMethods SqlMethods = {
- bt_close,
- bt_write,
- bt_delete,
- bt_delete_range,
- bt_fetch,
- bt_scan,
- bt_begin,
- bt_commit,
- bt_rollback
- };
- BtDb *p = 0;
- bt_db *pBt = 0;
- int rc;
- sqlite4_env *pEnv = sqlite4_env_default();
-
- if( bClear && zFilename && zFilename[0] ){
- char *zLog = sqlite3_mprintf("%s-wal", zFilename);
- unlink(zFilename);
- unlink(zLog);
- sqlite3_free(zLog);
- }
-
- rc = sqlite4BtNew(pEnv, 0, &pBt);
- if( rc==SQLITE4_OK ){
- int mt = 0; /* True for multi-threaded connection */
-
- p = (BtDb*)testMalloc(sizeof(BtDb));
- p->base.pMethods = &SqlMethods;
- p->pBt = pBt;
- p->pEnv = pEnv;
- p->nRef = 1;
-
- p->env.pVfsCtx = (void*)p;
- p->env.xFullpath = btVfsFullpath;
- p->env.xOpen = btVfsOpen;
- p->env.xSize = btVfsSize;
- p->env.xRead = btVfsRead;
- p->env.xWrite = btVfsWrite;
- p->env.xTruncate = btVfsTruncate;
- p->env.xSync = btVfsSync;
- p->env.xSectorSize = btVfsSectorSize;
- p->env.xClose = btVfsClose;
- p->env.xUnlink = btVfsUnlink;
- p->env.xLock = btVfsLock;
- p->env.xTestLock = btVfsTestLock;
- p->env.xShmMap = btVfsShmMap;
- p->env.xShmBarrier = btVfsShmBarrier;
- p->env.xShmUnmap = btVfsShmUnmap;
-
- sqlite4BtControl(pBt, BT_CONTROL_GETVFS, (void*)&p->pVfs);
- sqlite4BtControl(pBt, BT_CONTROL_SETVFS, (void*)&p->env);
-
- rc = testBtConfigure(p, zSpec, &mt);
- if( rc==SQLITE4_OK ){
- rc = sqlite4BtOpen(pBt, zFilename);
- }
-
- if( rc==SQLITE4_OK && mt ){
- int nAuto = 0;
- rc = bgc_attach(p, zSpec);
- sqlite4BtControl(pBt, BT_CONTROL_AUTOCKPT, (void*)&nAuto);
- }
- }
-
- if( rc!=SQLITE4_OK && p ){
- bt_close(&p->base);
- }
-
- *ppDb = &p->base;
- return rc;
-}
-
-int test_fbt_open(
- const char *zSpec,
- const char *zFilename,
- int bClear,
- TestDb **ppDb
-){
- return test_bt_open("fast=1", zFilename, bClear, ppDb);
-}
-
-int test_fbts_open(
- const char *zSpec,
- const char *zFilename,
- int bClear,
- TestDb **ppDb
-){
- return test_bt_open("fast=1 blksz=32K pagesz=512", zFilename, bClear, ppDb);
-}
-
-
-void tdb_bt_prepare_sync_crash(TestDb *pTestDb, int iSync){
- BtDb *p = (BtDb*)pTestDb;
- assert( pTestDb->pMethods->xClose==bt_close );
- assert( p->bCrash==0 );
- p->nCrashSync = iSync;
-}
-
-bt_db *tdb_bt(TestDb *pDb){
- if( pDb->pMethods->xClose==bt_close ){
- return ((BtDb *)pDb)->pBt;
- }
- return 0;
-}
-
-/*************************************************************************
-** Beginning of code for background checkpointer.
-*/
-
-struct bt_ckpter {
- sqlite4_buffer file; /* File name */
- sqlite4_buffer spec; /* Options */
- int nLogsize; /* Minimum log size to checkpoint */
- int nRef; /* Number of clients */
-
- int bDoWork; /* Set by client threads */
- pthread_t ckpter_thread; /* Checkpointer thread */
- pthread_cond_t ckpter_cond; /* Condition var the ckpter waits on */
- pthread_mutex_t ckpter_mutex; /* Mutex used with ckpter_cond */
-
- bt_ckpter *pNext; /* Next object in list at gBgc.pCkpter */
-};
-
-static struct GlobalBackgroundCheckpointer {
- bt_ckpter *pCkpter; /* Linked list of checkpointers */
-} gBgc;
-
-static void *bgc_main(void *pArg){
- BtDb *pDb = 0;
- int rc;
- int mt;
- bt_ckpter *pCkpter = (bt_ckpter*)pArg;
-
- rc = test_bt_open("", (char*)pCkpter->file.p, 0, (TestDb**)&pDb);
- assert( rc==SQLITE4_OK );
- rc = testBtConfigure(pDb, (char*)pCkpter->spec.p, &mt);
-
- while( pCkpter->nRef>0 ){
- bt_db *db = pDb->pBt;
- int nLog = 0;
-
- sqlite4BtBegin(db, 1);
- sqlite4BtCommit(db, 0);
- sqlite4BtControl(db, BT_CONTROL_LOGSIZE, (void*)&nLog);
-
- if( nLog>=pCkpter->nLogsize ){
- int rc;
- bt_checkpoint ckpt;
- memset(&ckpt, 0, sizeof(bt_checkpoint));
- ckpt.nFrameBuffer = nLog/2;
- rc = sqlite4BtControl(db, BT_CONTROL_CHECKPOINT, (void*)&ckpt);
- assert( rc==SQLITE4_OK );
- sqlite4BtControl(db, BT_CONTROL_LOGSIZE, (void*)&nLog);
- }
-
- /* The thread will wake up when it is signaled either because another
- ** thread has created some work for this one or because the connection
- ** is being closed. */
- pthread_mutex_lock(&pCkpter->ckpter_mutex);
- if( pCkpter->bDoWork==0 ){
- pthread_cond_wait(&pCkpter->ckpter_cond, &pCkpter->ckpter_mutex);
- }
- pCkpter->bDoWork = 0;
- pthread_mutex_unlock(&pCkpter->ckpter_mutex);
- }
-
- if( pDb ) bt_close((TestDb*)pDb);
- return 0;
-}
-
-static void bgc_logsize_cb(void *pCtx, int nLogsize){
- bt_ckpter *p = (bt_ckpter*)pCtx;
- if( nLogsize>=p->nLogsize ){
- pthread_mutex_lock(&p->ckpter_mutex);
- p->bDoWork = 1;
- pthread_cond_signal(&p->ckpter_cond);
- pthread_mutex_unlock(&p->ckpter_mutex);
- }
-}
-
-static int bgc_attach(BtDb *pDb, const char *zSpec){
- int rc;
- int n;
- bt_info info;
- bt_ckpter *pCkpter;
-
- /* Figure out the full path to the database opened by handle pDb. */
- info.eType = BT_INFO_FILENAME;
- info.pgno = 0;
- sqlite4_buffer_init(&info.output, 0);
- rc = sqlite4BtControl(pDb->pBt, BT_CONTROL_INFO, (void*)&info);
- if( rc!=SQLITE4_OK ) return rc;
-
- sqlite4_mutex_enter(sqlite4_mutex_alloc(pDb->pEnv, SQLITE4_MUTEX_STATIC_KV));
-
- /* Search for an existing bt_ckpter object. */
- n = info.output.n;
- for(pCkpter=gBgc.pCkpter; pCkpter; pCkpter=pCkpter->pNext){
- if( n==pCkpter->file.n && 0==memcmp(info.output.p, pCkpter->file.p, n) ){
- break;
- }
- }
-
- /* Failed to find a suitable checkpointer. Create a new one. */
- if( pCkpter==0 ){
- bt_logsizecb cb;
-
- pCkpter = testMalloc(sizeof(bt_ckpter));
- memcpy(&pCkpter->file, &info.output, sizeof(sqlite4_buffer));
- info.output.p = 0;
- pCkpter->pNext = gBgc.pCkpter;
- pCkpter->nLogsize = 1000;
- gBgc.pCkpter = pCkpter;
- pCkpter->nRef = 1;
-
- sqlite4_buffer_init(&pCkpter->spec, 0);
- rc = sqlite4_buffer_set(&pCkpter->spec, zSpec, strlen(zSpec)+1);
- assert( rc==SQLITE4_OK );
-
- /* Kick off the checkpointer thread. */
- if( rc==0 ) rc = pthread_cond_init(&pCkpter->ckpter_cond, 0);
- if( rc==0 ) rc = pthread_mutex_init(&pCkpter->ckpter_mutex, 0);
- if( rc==0 ){
- rc = pthread_create(&pCkpter->ckpter_thread, 0, bgc_main, (void*)pCkpter);
- }
- assert( rc==0 ); /* todo: Fix this */
-
- /* Set up the logsize callback for the client thread */
- cb.pCtx = (void*)pCkpter;
- cb.xLogsize = bgc_logsize_cb;
- sqlite4BtControl(pDb->pBt, BT_CONTROL_LOGSIZECB, (void*)&cb);
- }else{
- pCkpter->nRef++;
- }
-
- /* Assuming a checkpointer was encountered or effected, attach the
- ** connection to it. */
- if( pCkpter ){
- pDb->pCkpter = pCkpter;
- }
-
- sqlite4_mutex_leave(sqlite4_mutex_alloc(pDb->pEnv, SQLITE4_MUTEX_STATIC_KV));
- sqlite4_buffer_clear(&info.output);
- return rc;
-}
-
-static int bgc_detach(BtDb *pDb){
- int rc = SQLITE4_OK;
- bt_ckpter *pCkpter = pDb->pCkpter;
- if( pCkpter ){
- int bShutdown = 0; /* True if this is the last reference */
-
- sqlite4_mutex_enter(sqlite4_mutex_alloc(pDb->pEnv,SQLITE4_MUTEX_STATIC_KV));
- pCkpter->nRef--;
- if( pCkpter->nRef==0 ){
- bt_ckpter **pp;
-
- *pp = pCkpter->pNext;
- for(pp=&gBgc.pCkpter; *pp!=pCkpter; pp=&((*pp)->pNext));
- bShutdown = 1;
- }
- sqlite4_mutex_leave(sqlite4_mutex_alloc(pDb->pEnv,SQLITE4_MUTEX_STATIC_KV));
-
- if( bShutdown ){
- void *pDummy;
-
- /* Signal the checkpointer thread. */
- pthread_mutex_lock(&pCkpter->ckpter_mutex);
- pCkpter->bDoWork = 1;
- pthread_cond_signal(&pCkpter->ckpter_cond);
- pthread_mutex_unlock(&pCkpter->ckpter_mutex);
-
- /* Join the checkpointer thread. */
- pthread_join(pCkpter->ckpter_thread, &pDummy);
- pthread_cond_destroy(&pCkpter->ckpter_cond);
- pthread_mutex_destroy(&pCkpter->ckpter_mutex);
-
- sqlite4_buffer_clear(&pCkpter->file);
- sqlite4_buffer_clear(&pCkpter->spec);
- testFree(pCkpter);
- }
-
- pDb->pCkpter = 0;
- }
- return rc;
-}
-
-/*
-** End of background checkpointer.
-*************************************************************************/
diff --git a/ext/lsm1/lsm-test/lsmtest_util.c b/ext/lsm1/lsm-test/lsmtest_util.c
deleted file mode 100644
index adab8a53e8..0000000000
--- a/ext/lsm1/lsm-test/lsmtest_util.c
+++ /dev/null
@@ -1,223 +0,0 @@
-
-#include "lsmtest.h"
-#include
-#include
-#include
-#ifndef _WIN32
-# include
-#endif
-
-/*
-** Global variables used within this module.
-*/
-static struct TestutilGlobal {
- char **argv;
- int argc;
-} g = {0, 0};
-
-static struct TestutilRnd {
- unsigned int aRand1[2048]; /* Bits 0..10 */
- unsigned int aRand2[2048]; /* Bits 11..21 */
- unsigned int aRand3[1024]; /* Bits 22..31 */
-} r;
-
-/*************************************************************************
-** The following block is a copy of the implementation of SQLite function
-** sqlite3_randomness. This version has two important differences:
-**
-** 1. It always uses the same seed. So the sequence of random data output
-** is the same for every run of the program.
-**
-** 2. It is not threadsafe.
-*/
-static struct sqlite3PrngType {
- unsigned char i, j; /* State variables */
- unsigned char s[256]; /* State variables */
-} sqlite3Prng = {
- 0xAF, 0x28,
- {
- 0x71, 0xF5, 0xB4, 0x6E, 0x80, 0xAB, 0x1D, 0xB8,
- 0xFB, 0xB7, 0x49, 0xBF, 0xFF, 0x72, 0x2D, 0x14,
- 0x79, 0x09, 0xE3, 0x78, 0x76, 0xB0, 0x2C, 0x0A,
- 0x8E, 0x23, 0xEE, 0xDF, 0xE0, 0x9A, 0x2F, 0x67,
- 0xE1, 0xBE, 0x0E, 0xA7, 0x08, 0x97, 0xEB, 0x77,
- 0x78, 0xBA, 0x9D, 0xCA, 0x49, 0x4C, 0x60, 0x9A,
- 0xF6, 0xBD, 0xDA, 0x7F, 0xBC, 0x48, 0x58, 0x52,
- 0xE5, 0xCD, 0x83, 0x72, 0x23, 0x52, 0xFF, 0x6D,
- 0xEF, 0x0F, 0x82, 0x29, 0xA0, 0x83, 0x3F, 0x7D,
- 0xA4, 0x88, 0x31, 0xE7, 0x88, 0x92, 0x3B, 0x9B,
- 0x3B, 0x2C, 0xC2, 0x4C, 0x71, 0xA2, 0xB0, 0xEA,
- 0x36, 0xD0, 0x00, 0xF1, 0xD3, 0x39, 0x17, 0x5D,
- 0x2A, 0x7A, 0xE4, 0xAD, 0xE1, 0x64, 0xCE, 0x0F,
- 0x9C, 0xD9, 0xF5, 0xED, 0xB0, 0x22, 0x5E, 0x62,
- 0x97, 0x02, 0xA3, 0x8C, 0x67, 0x80, 0xFC, 0x88,
- 0x14, 0x0B, 0x15, 0x10, 0x0F, 0xC7, 0x40, 0xD4,
- 0xF1, 0xF9, 0x0E, 0x1A, 0xCE, 0xB9, 0x1E, 0xA1,
- 0x72, 0x8E, 0xD7, 0x78, 0x39, 0xCD, 0xF4, 0x5D,
- 0x2A, 0x59, 0x26, 0x34, 0xF2, 0x73, 0x0B, 0xA0,
- 0x02, 0x51, 0x2C, 0x03, 0xA3, 0xA7, 0x43, 0x13,
- 0xE8, 0x98, 0x2B, 0xD2, 0x53, 0xF8, 0xEE, 0x91,
- 0x7D, 0xE7, 0xE3, 0xDA, 0xD5, 0xBB, 0xC0, 0x92,
- 0x9D, 0x98, 0x01, 0x2C, 0xF9, 0xB9, 0xA0, 0xEB,
- 0xCF, 0x32, 0xFA, 0x01, 0x49, 0xA5, 0x1D, 0x9A,
- 0x76, 0x86, 0x3F, 0x40, 0xD4, 0x89, 0x8F, 0x9C,
- 0xE2, 0xE3, 0x11, 0x31, 0x37, 0xB2, 0x49, 0x28,
- 0x35, 0xC0, 0x99, 0xB6, 0xD0, 0xBC, 0x66, 0x35,
- 0xF7, 0x83, 0x5B, 0xD7, 0x37, 0x1A, 0x2B, 0x18,
- 0xA6, 0xFF, 0x8D, 0x7C, 0x81, 0xA8, 0xFC, 0x9E,
- 0xC4, 0xEC, 0x80, 0xD0, 0x98, 0xA7, 0x76, 0xCC,
- 0x9C, 0x2F, 0x7B, 0xFF, 0x8E, 0x0E, 0xBB, 0x90,
- 0xAE, 0x13, 0x06, 0xF5, 0x1C, 0x4E, 0x52, 0xF7
- }
-};
-
-/* Generate and return single random byte */
-static unsigned char randomByte(void){
- unsigned char t;
- sqlite3Prng.i++;
- t = sqlite3Prng.s[sqlite3Prng.i];
- sqlite3Prng.j += t;
- sqlite3Prng.s[sqlite3Prng.i] = sqlite3Prng.s[sqlite3Prng.j];
- sqlite3Prng.s[sqlite3Prng.j] = t;
- t += sqlite3Prng.s[sqlite3Prng.i];
- return sqlite3Prng.s[t];
-}
-
-/*
-** Return N random bytes.
-*/
-static void randomBlob(int nBuf, unsigned char *zBuf){
- int i;
- for(i=0; i>11) & 0x000007FF] ^
- r.aRand3[(iVal>>22) & 0x000003FF]
- ;
-}
-
-void testPrngArray(unsigned int iVal, unsigned int *aOut, int nOut){
- int i;
- for(i=0; izName;
- pEntry=(struct Entry *)&((unsigned char *)pEntry)[sz]
- ){
- if( zPrev ){ testPrintError("%s, ", zPrev); }
- zPrev = pEntry->zName;
- }
- testPrintError("or %s\n", zPrev);
-}
-
-int testArgSelectX(
- void *aData,
- const char *zType,
- int sz,
- const char *zArg,
- int *piOut
-){
- struct Entry { const char *zName; };
- struct Entry *pEntry;
- int nArg = strlen(zArg);
-
- int i = 0;
- int iOut = -1;
- int nOut = 0;
-
- for(pEntry=(struct Entry *)aData;
- pEntry->zName;
- pEntry=(struct Entry *)&((unsigned char *)pEntry)[sz]
- ){
- int nName = strlen(pEntry->zName);
- if( nArg<=nName && memcmp(pEntry->zName, zArg, nArg)==0 ){
- iOut = i;
- if( nName==nArg ){
- nOut = 1;
- break;
- }
- nOut++;
- }
- i++;
- }
-
- if( nOut!=1 ){
- argError(aData, zType, sz, zArg);
- }else{
- *piOut = iOut;
- }
- return (nOut!=1);
-}
-
-struct timeval zero_time;
-
-void testTimeInit(void){
- gettimeofday(&zero_time, 0);
-}
-
-int testTimeGet(void){
- struct timeval now;
- gettimeofday(&now, 0);
- return
- (((int)now.tv_sec - (int)zero_time.tv_sec)*1000) +
- (((int)now.tv_usec - (int)zero_time.tv_usec)/1000);
-}
diff --git a/ext/lsm1/lsm-test/lsmtest_win32.c b/ext/lsm1/lsm-test/lsmtest_win32.c
deleted file mode 100644
index 9472723368..0000000000
--- a/ext/lsm1/lsm-test/lsmtest_win32.c
+++ /dev/null
@@ -1,30 +0,0 @@
-
-#include "lsmtest.h"
-
-#ifdef _WIN32
-
-#define TICKS_PER_SECOND (10000000)
-#define TICKS_PER_MICROSECOND (10)
-#define TICKS_UNIX_EPOCH (116444736000000000LL)
-
-int win32GetTimeOfDay(
- struct timeval *tp,
- void *tzp
-){
- FILETIME fileTime;
- ULONGLONG ticks;
- ULONGLONG unixTicks;
-
- unused_parameter(tzp);
- memset(&fileTime, 0, sizeof(FILETIME));
- GetSystemTimeAsFileTime(&fileTime);
- ticks = (ULONGLONG)fileTime.dwHighDateTime << 32;
- ticks |= (ULONGLONG)fileTime.dwLowDateTime;
- unixTicks = ticks - TICKS_UNIX_EPOCH;
- tp->tv_sec = (long)(unixTicks / TICKS_PER_SECOND);
- unixTicks -= ((ULONGLONG)tp->tv_sec * TICKS_PER_SECOND);
- tp->tv_usec = (long)(unixTicks / TICKS_PER_MICROSECOND);
-
- return 0;
-}
-#endif
diff --git a/ext/lsm1/lsm.h b/ext/lsm1/lsm.h
deleted file mode 100644
index 48701c4c5e..0000000000
--- a/ext/lsm1/lsm.h
+++ /dev/null
@@ -1,684 +0,0 @@
-/*
-** 2011-08-10
-**
-** The author disclaims copyright to this source code. In place of
-** a legal notice, here is a blessing:
-**
-** May you do good and not evil.
-** May you find forgiveness for yourself and forgive others.
-** May you share freely, never taking more than you give.
-**
-*************************************************************************
-**
-** This file defines the LSM API.
-*/
-#ifndef _LSM_H
-#define _LSM_H
-#include
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/*
-** Opaque handle types.
-*/
-typedef struct lsm_compress lsm_compress; /* Compression library functions */
-typedef struct lsm_compress_factory lsm_compress_factory;
-typedef struct lsm_cursor lsm_cursor; /* Database cursor handle */
-typedef struct lsm_db lsm_db; /* Database connection handle */
-typedef struct lsm_env lsm_env; /* Runtime environment */
-typedef struct lsm_file lsm_file; /* OS file handle */
-typedef struct lsm_mutex lsm_mutex; /* Mutex handle */
-
-/* 64-bit integer type used for file offsets. */
-typedef long long int lsm_i64; /* 64-bit signed integer type */
-
-/* Candidate values for the 3rd argument to lsm_env.xLock() */
-#define LSM_LOCK_UNLOCK 0
-#define LSM_LOCK_SHARED 1
-#define LSM_LOCK_EXCL 2
-
-/* Flags for lsm_env.xOpen() */
-#define LSM_OPEN_READONLY 0x0001
-
-/*
-** CAPI: Database Runtime Environment
-**
-** Run-time environment used by LSM
-*/
-struct lsm_env {
- int nByte; /* Size of this structure in bytes */
- int iVersion; /* Version number of this structure (1) */
- /****** file i/o ***********************************************/
- void *pVfsCtx;
- int (*xFullpath)(lsm_env*, const char *, char *, int *);
- int (*xOpen)(lsm_env*, const char *, int flags, lsm_file **);
- int (*xRead)(lsm_file *, lsm_i64, void *, int);
- int (*xWrite)(lsm_file *, lsm_i64, void *, int);
- int (*xTruncate)(lsm_file *, lsm_i64);
- int (*xSync)(lsm_file *);
- int (*xSectorSize)(lsm_file *);
- int (*xRemap)(lsm_file *, lsm_i64, void **, lsm_i64*);
- int (*xFileid)(lsm_file *, void *pBuf, int *pnBuf);
- int (*xClose)(lsm_file *);
- int (*xUnlink)(lsm_env*, const char *);
- int (*xLock)(lsm_file*, int, int);
- int (*xTestLock)(lsm_file*, int, int, int);
- int (*xShmMap)(lsm_file*, int, int, void **);
- void (*xShmBarrier)(void);
- int (*xShmUnmap)(lsm_file*, int);
- /****** memory allocation ****************************************/
- void *pMemCtx;
- void *(*xMalloc)(lsm_env*, size_t); /* malloc(3) function */
- void *(*xRealloc)(lsm_env*, void *, size_t); /* realloc(3) function */
- void (*xFree)(lsm_env*, void *); /* free(3) function */
- size_t (*xSize)(lsm_env*, void *); /* xSize function */
- /****** mutexes ****************************************************/
- void *pMutexCtx;
- int (*xMutexStatic)(lsm_env*,int,lsm_mutex**); /* Obtain a static mutex */
- int (*xMutexNew)(lsm_env*, lsm_mutex**); /* Get a new dynamic mutex */
- void (*xMutexDel)(lsm_mutex *); /* Delete an allocated mutex */
- void (*xMutexEnter)(lsm_mutex *); /* Grab a mutex */
- int (*xMutexTry)(lsm_mutex *); /* Attempt to obtain a mutex */
- void (*xMutexLeave)(lsm_mutex *); /* Leave a mutex */
- int (*xMutexHeld)(lsm_mutex *); /* Return true if mutex is held */
- int (*xMutexNotHeld)(lsm_mutex *); /* Return true if mutex not held */
- /****** other ****************************************************/
- int (*xSleep)(lsm_env*, int microseconds);
-
- /* New fields may be added in future releases, in which case the
- ** iVersion value will increase. */
-};
-
-/*
-** Values that may be passed as the second argument to xMutexStatic.
-*/
-#define LSM_MUTEX_GLOBAL 1
-#define LSM_MUTEX_HEAP 2
-
-/*
-** CAPI: LSM Error Codes
-*/
-#define LSM_OK 0
-#define LSM_ERROR 1
-#define LSM_BUSY 5
-#define LSM_NOMEM 7
-#define LSM_READONLY 8
-#define LSM_IOERR 10
-#define LSM_CORRUPT 11
-#define LSM_FULL 13
-#define LSM_CANTOPEN 14
-#define LSM_PROTOCOL 15
-#define LSM_MISUSE 21
-
-#define LSM_MISMATCH 50
-
-
-#define LSM_IOERR_NOENT (LSM_IOERR | (1<<8))
-
-/*
-** CAPI: Creating and Destroying Database Connection Handles
-**
-** Open and close a database connection handle.
-*/
-int lsm_new(lsm_env*, lsm_db **ppDb);
-int lsm_close(lsm_db *pDb);
-
-/*
-** CAPI: Connecting to a Database
-*/
-int lsm_open(lsm_db *pDb, const char *zFilename);
-
-/*
-** CAPI: Obtaining pointers to database environments
-**
-** Return a pointer to the environment used by the database connection
-** passed as the first argument. Assuming the argument is valid, this
-** function always returns a valid environment pointer - it cannot fail.
-*/
-lsm_env *lsm_get_env(lsm_db *pDb);
-
-/*
-** The lsm_default_env() function returns a pointer to the default LSM
-** environment for the current platform.
-*/
-lsm_env *lsm_default_env(void);
-
-
-/*
-** CAPI: Configuring a database connection.
-**
-** The lsm_config() function is used to configure a database connection.
-*/
-int lsm_config(lsm_db *, int, ...);
-
-/*
-** The following values may be passed as the second argument to lsm_config().
-**
-** LSM_CONFIG_AUTOFLUSH:
-** A read/write integer parameter.
-**
-** This value determines the amount of data allowed to accumulate in a
-** live in-memory tree before it is marked as old. After committing a
-** transaction, a connection checks if the size of the live in-memory tree,
-** including data structure overhead, is greater than the value of this
-** option in KB. If it is, and there is not already an old in-memory tree,
-** the live in-memory tree is marked as old.
-**
-** The maximum allowable value is 1048576 (1GB). There is no minimum
-** value. If this parameter is set to zero, then an attempt is made to
-** mark the live in-memory tree as old after each transaction is committed.
-**
-** The default value is 1024 (1MB).
-**
-** LSM_CONFIG_PAGE_SIZE:
-** A read/write integer parameter. This parameter may only be set before
-** lsm_open() has been called.
-**
-** LSM_CONFIG_BLOCK_SIZE:
-** A read/write integer parameter.
-**
-** This parameter may only be set before lsm_open() has been called. It
-** must be set to a power of two between 64 and 65536, inclusive (block
-** sizes between 64KB and 64MB).
-**
-** If the connection creates a new database, the block size of the new
-** database is set to the value of this option in KB. After lsm_open()
-** has been called, querying this parameter returns the actual block
-** size of the opened database.
-**
-** The default value is 1024 (1MB blocks).
-**
-** LSM_CONFIG_SAFETY:
-** A read/write integer parameter. Valid values are 0, 1 (the default)
-** and 2. This parameter determines how robust the database is in the
-** face of a system crash (e.g. a power failure or operating system
-** crash). As follows:
-**
-** 0 (off): No robustness. A system crash may corrupt the database.
-**
-** 1 (normal): Some robustness. A system crash may not corrupt the
-** database file, but recently committed transactions may
-** be lost following recovery.
-**
-** 2 (full): Full robustness. A system crash may not corrupt the
-** database file. Following recovery the database file
-** contains all successfully committed transactions.
-**
-** LSM_CONFIG_AUTOWORK:
-** A read/write integer parameter.
-**
-** LSM_CONFIG_AUTOCHECKPOINT:
-** A read/write integer parameter.
-**
-** If this option is set to non-zero value N, then a checkpoint is
-** automatically attempted after each N KB of data have been written to
-** the database file.
-**
-** The amount of uncheckpointed data already written to the database file
-** is a global parameter. After performing database work (writing to the
-** database file), the process checks if the total amount of uncheckpointed
-** data exceeds the value of this paramter. If so, a checkpoint is performed.
-** This means that this option may cause the connection to perform a
-** checkpoint even if the current connection has itself written very little
-** data into the database file.
-**
-** The default value is 2048 (checkpoint every 2MB).
-**
-** LSM_CONFIG_MMAP:
-** A read/write integer parameter. If this value is set to 0, then the
-** database file is accessed using ordinary read/write IO functions. Or,
-** if it is set to 1, then the database file is memory mapped and accessed
-** that way. If this parameter is set to any value N greater than 1, then
-** up to the first N KB of the file are memory mapped, and any remainder
-** accessed using read/write IO.
-**
-** The default value is 1 on 64-bit platforms and 32768 on 32-bit platforms.
-**
-**
-** LSM_CONFIG_USE_LOG:
-** A read/write boolean parameter. True (the default) to use the log
-** file normally. False otherwise.
-**
-** LSM_CONFIG_AUTOMERGE:
-** A read/write integer parameter. The minimum number of segments to
-** merge together at a time. Default value 4.
-**
-** LSM_CONFIG_MAX_FREELIST:
-** A read/write integer parameter. The maximum number of free-list
-** entries that are stored in a database checkpoint (the others are
-** stored elsewhere in the database).
-**
-** There is no reason for an application to configure or query this
-** parameter. It is only present because configuring a small value
-** makes certain parts of the lsm code easier to test.
-**
-** LSM_CONFIG_MULTIPLE_PROCESSES:
-** A read/write boolean parameter. This parameter may only be set before
-** lsm_open() has been called. If true, the library uses shared-memory
-** and posix advisory locks to co-ordinate access by clients from within
-** multiple processes. Otherwise, if false, all database clients must be
-** located in the same process. The default value is true.
-**
-** LSM_CONFIG_SET_COMPRESSION:
-** Set the compression methods used to compress and decompress database
-** content. The argument to this option should be a pointer to a structure
-** of type lsm_compress. The lsm_config() method takes a copy of the
-** structures contents.
-**
-** This option may only be used before lsm_open() is called. Invoking it
-** after lsm_open() has been called results in an LSM_MISUSE error.
-**
-** LSM_CONFIG_GET_COMPRESSION:
-** Query the compression methods used to compress and decompress database
-** content.
-**
-** LSM_CONFIG_SET_COMPRESSION_FACTORY:
-** Configure a factory method to be invoked in case of an LSM_MISMATCH
-** error.
-**
-** LSM_CONFIG_READONLY:
-** A read/write boolean parameter. This parameter may only be set before
-** lsm_open() is called.
-*/
-#define LSM_CONFIG_AUTOFLUSH 1
-#define LSM_CONFIG_PAGE_SIZE 2
-#define LSM_CONFIG_SAFETY 3
-#define LSM_CONFIG_BLOCK_SIZE 4
-#define LSM_CONFIG_AUTOWORK 5
-#define LSM_CONFIG_MMAP 7
-#define LSM_CONFIG_USE_LOG 8
-#define LSM_CONFIG_AUTOMERGE 9
-#define LSM_CONFIG_MAX_FREELIST 10
-#define LSM_CONFIG_MULTIPLE_PROCESSES 11
-#define LSM_CONFIG_AUTOCHECKPOINT 12
-#define LSM_CONFIG_SET_COMPRESSION 13
-#define LSM_CONFIG_GET_COMPRESSION 14
-#define LSM_CONFIG_SET_COMPRESSION_FACTORY 15
-#define LSM_CONFIG_READONLY 16
-
-#define LSM_SAFETY_OFF 0
-#define LSM_SAFETY_NORMAL 1
-#define LSM_SAFETY_FULL 2
-
-/*
-** CAPI: Compression and/or Encryption Hooks
-*/
-struct lsm_compress {
- void *pCtx;
- unsigned int iId;
- int (*xBound)(void *, int nSrc);
- int (*xCompress)(void *, char *, int *, const char *, int);
- int (*xUncompress)(void *, char *, int *, const char *, int);
- void (*xFree)(void *pCtx);
-};
-
-struct lsm_compress_factory {
- void *pCtx;
- int (*xFactory)(void *, lsm_db *, unsigned int);
- void (*xFree)(void *pCtx);
-};
-
-#define LSM_COMPRESSION_EMPTY 0
-#define LSM_COMPRESSION_NONE 1
-
-/*
-** CAPI: Allocating and Freeing Memory
-**
-** Invoke the memory allocation functions that belong to environment
-** pEnv. Or the system defaults if no memory allocation functions have
-** been registered.
-*/
-void *lsm_malloc(lsm_env*, size_t);
-void *lsm_realloc(lsm_env*, void *, size_t);
-void lsm_free(lsm_env*, void *);
-
-/*
-** CAPI: Querying a Connection For Operational Data
-**
-** Query a database connection for operational statistics or data.
-*/
-int lsm_info(lsm_db *, int, ...);
-
-int lsm_get_user_version(lsm_db *, unsigned int *);
-int lsm_set_user_version(lsm_db *, unsigned int);
-
-/*
-** The following values may be passed as the second argument to lsm_info().
-**
-** LSM_INFO_NWRITE:
-** The third parameter should be of type (int *). The location pointed
-** to by the third parameter is set to the number of 4KB pages written to
-** the database file during the lifetime of this connection.
-**
-** LSM_INFO_NREAD:
-** The third parameter should be of type (int *). The location pointed
-** to by the third parameter is set to the number of 4KB pages read from
-** the database file during the lifetime of this connection.
-**
-** LSM_INFO_DB_STRUCTURE:
-** The third argument should be of type (char **). The location pointed
-** to is populated with a pointer to a nul-terminated string containing
-** the string representation of a Tcl data-structure reflecting the
-** current structure of the database file. Specifically, the current state
-** of the worker snapshot. The returned string should be eventually freed
-** by the caller using lsm_free().
-**
-** The returned list contains one element for each level in the database,
-** in order from most to least recent. Each element contains a
-** single element for each segment comprising the corresponding level,
-** starting with the lhs segment, then each of the rhs segments (if any)
-** in order from most to least recent.
-**
-** Each segment element is itself a list of 4 integer values, as follows:
-**
-**
First page of segment
-**
Last page of segment
-**
Root page of segment (if applicable)
-**
Total number of pages in segment
-**
-**
-** LSM_INFO_ARRAY_STRUCTURE:
-** There should be two arguments passed following this option (i.e. a
-** total of four arguments passed to lsm_info()). The first argument
-** should be the page number of the first page in a database array
-** (perhaps obtained from an earlier INFO_DB_STRUCTURE call). The second
-** trailing argument should be of type (char **). The location pointed
-** to is populated with a pointer to a nul-terminated string that must
-** be eventually freed using lsm_free() by the caller.
-**
-** The output string contains the text representation of a Tcl list of
-** integers. Each pair of integers represent a range of pages used by
-** the identified array. For example, if the array occupies database
-** pages 993 to 1024, then pages 2048 to 2777, then the returned string
-** will be "993 1024 2048 2777".
-**
-** If the specified integer argument does not correspond to the first
-** page of any database array, LSM_ERROR is returned and the output
-** pointer is set to a NULL value.
-**
-** LSM_INFO_LOG_STRUCTURE:
-** The third argument should be of type (char **). The location pointed
-** to is populated with a pointer to a nul-terminated string containing
-** the string representation of a Tcl data-structure. The returned
-** string should be eventually freed by the caller using lsm_free().
-**
-** The Tcl structure returned is a list of six integers that describe
-** the current structure of the log file.
-**
-** LSM_INFO_ARRAY_PAGES:
-**
-** LSM_INFO_PAGE_ASCII_DUMP:
-** As with LSM_INFO_ARRAY_STRUCTURE, there should be two arguments passed
-** with calls that specify this option - an integer page number and a
-** (char **) used to return a nul-terminated string that must be later
-** freed using lsm_free(). In this case the output string is populated
-** with a human-readable description of the page content.
-**
-** If the page cannot be decoded, it is not an error. In this case the
-** human-readable output message will report the systems failure to
-** interpret the page data.
-**
-** LSM_INFO_PAGE_HEX_DUMP:
-** This argument is similar to PAGE_ASCII_DUMP, except that keys and
-** values are represented using hexadecimal notation instead of ascii.
-**
-** LSM_INFO_FREELIST:
-** The third argument should be of type (char **). The location pointed
-** to is populated with a pointer to a nul-terminated string containing
-** the string representation of a Tcl data-structure. The returned
-** string should be eventually freed by the caller using lsm_free().
-**
-** The Tcl structure returned is a list containing one element for each
-** free block in the database. The element itself consists of two
-** integers - the block number and the id of the snapshot that freed it.
-**
-** LSM_INFO_CHECKPOINT_SIZE:
-** The third argument should be of type (int *). The location pointed to
-** by this argument is populated with the number of KB written to the
-** database file since the most recent checkpoint.
-**
-** LSM_INFO_TREE_SIZE:
-** If this value is passed as the second argument to an lsm_info() call, it
-** should be followed by two arguments of type (int *) (for a total of four
-** arguments).
-**
-** At any time, there are either one or two tree structures held in shared
-** memory that new database clients will access (there may also be additional
-** tree structures being used by older clients - this API does not provide
-** information on them). One tree structure - the current tree - is used to
-** accumulate new data written to the database. The other tree structure -
-** the old tree - is a read-only tree holding older data and may be flushed
-** to disk at any time.
-**
-** Assuming no error occurs, the location pointed to by the first of the two
-** (int *) arguments is set to the size of the old in-memory tree in KB.
-** The second is set to the size of the current, or live in-memory tree.
-**
-** LSM_INFO_COMPRESSION_ID:
-** This value should be followed by a single argument of type
-** (unsigned int *). If successful, the location pointed to is populated
-** with the database compression id before returning.
-*/
-#define LSM_INFO_NWRITE 1
-#define LSM_INFO_NREAD 2
-#define LSM_INFO_DB_STRUCTURE 3
-#define LSM_INFO_LOG_STRUCTURE 4
-#define LSM_INFO_ARRAY_STRUCTURE 5
-#define LSM_INFO_PAGE_ASCII_DUMP 6
-#define LSM_INFO_PAGE_HEX_DUMP 7
-#define LSM_INFO_FREELIST 8
-#define LSM_INFO_ARRAY_PAGES 9
-#define LSM_INFO_CHECKPOINT_SIZE 10
-#define LSM_INFO_TREE_SIZE 11
-#define LSM_INFO_FREELIST_SIZE 12
-#define LSM_INFO_COMPRESSION_ID 13
-
-
-/*
-** CAPI: Opening and Closing Write Transactions
-**
-** These functions are used to open and close transactions and nested
-** sub-transactions.
-**
-** The lsm_begin() function is used to open transactions and sub-transactions.
-** A successful call to lsm_begin() ensures that there are at least iLevel
-** nested transactions open. To open a top-level transaction, pass iLevel=1.
-** To open a sub-transaction within the top-level transaction, iLevel=2.
-** Passing iLevel=0 is a no-op.
-**
-** lsm_commit() is used to commit transactions and sub-transactions. A
-** successful call to lsm_commit() ensures that there are at most iLevel
-** nested transactions open. To commit a top-level transaction, pass iLevel=0.
-** To commit all sub-transactions inside the main transaction, pass iLevel=1.
-**
-** Function lsm_rollback() is used to roll back transactions and
-** sub-transactions. A successful call to lsm_rollback() restores the database
-** to the state it was in when the iLevel'th nested sub-transaction (if any)
-** was first opened. And then closes transactions to ensure that there are
-** at most iLevel nested transactions open. Passing iLevel=0 rolls back and
-** closes the top-level transaction. iLevel=1 also rolls back the top-level
-** transaction, but leaves it open. iLevel=2 rolls back the sub-transaction
-** nested directly inside the top-level transaction (and leaves it open).
-*/
-int lsm_begin(lsm_db *pDb, int iLevel);
-int lsm_commit(lsm_db *pDb, int iLevel);
-int lsm_rollback(lsm_db *pDb, int iLevel);
-
-/*
-** CAPI: Writing to a Database
-**
-** Write a new value into the database. If a value with a duplicate key
-** already exists it is replaced.
-*/
-int lsm_insert(lsm_db*, const void *pKey, int nKey, const void *pVal, int nVal);
-
-/*
-** Delete a value from the database. No error is returned if the specified
-** key value does not exist in the database.
-*/
-int lsm_delete(lsm_db *, const void *pKey, int nKey);
-
-/*
-** Delete all database entries with keys that are greater than (pKey1/nKey1)
-** and smaller than (pKey2/nKey2). Note that keys (pKey1/nKey1) and
-** (pKey2/nKey2) themselves, if they exist in the database, are not deleted.
-**
-** Return LSM_OK if successful, or an LSM error code otherwise.
-*/
-int lsm_delete_range(lsm_db *,
- const void *pKey1, int nKey1, const void *pKey2, int nKey2
-);
-
-/*
-** CAPI: Explicit Database Work and Checkpointing
-**
-** This function is called by a thread to work on the database structure.
-*/
-int lsm_work(lsm_db *pDb, int nMerge, int nKB, int *pnWrite);
-
-int lsm_flush(lsm_db *pDb);
-
-/*
-** Attempt to checkpoint the current database snapshot. Return an LSM
-** error code if an error occurs or LSM_OK otherwise.
-**
-** If the current snapshot has already been checkpointed, calling this
-** function is a no-op. In this case if pnKB is not NULL, *pnKB is
-** set to 0. Or, if the current snapshot is successfully checkpointed
-** by this function and pbKB is not NULL, *pnKB is set to the number
-** of bytes written to the database file since the previous checkpoint
-** (the same measure as returned by the LSM_INFO_CHECKPOINT_SIZE query).
-*/
-int lsm_checkpoint(lsm_db *pDb, int *pnKB);
-
-/*
-** CAPI: Opening and Closing Database Cursors
-**
-** Open and close a database cursor.
-*/
-int lsm_csr_open(lsm_db *pDb, lsm_cursor **ppCsr);
-int lsm_csr_close(lsm_cursor *pCsr);
-
-/*
-** CAPI: Positioning Database Cursors
-**
-** If the fourth parameter is LSM_SEEK_EQ, LSM_SEEK_GE or LSM_SEEK_LE,
-** this function searches the database for an entry with key (pKey/nKey).
-** If an error occurs, an LSM error code is returned. Otherwise, LSM_OK.
-**
-** If no error occurs and the requested key is present in the database, the
-** cursor is left pointing to the entry with the specified key. Or, if the
-** specified key is not present in the database the state of the cursor
-** depends on the value passed as the final parameter, as follows:
-**
-** LSM_SEEK_EQ:
-** The cursor is left at EOF (invalidated). A call to lsm_csr_valid()
-** returns non-zero.
-**
-** LSM_SEEK_LE:
-** The cursor is left pointing to the largest key in the database that
-** is smaller than (pKey/nKey). If the database contains no keys smaller
-** than (pKey/nKey), the cursor is left at EOF.
-**
-** LSM_SEEK_GE:
-** The cursor is left pointing to the smallest key in the database that
-** is larger than (pKey/nKey). If the database contains no keys larger
-** than (pKey/nKey), the cursor is left at EOF.
-**
-** If the fourth parameter is LSM_SEEK_LEFAST, this function searches the
-** database in a similar manner to LSM_SEEK_LE, with two differences:
-**
-**
Even if a key can be found (the cursor is not left at EOF), the
-** lsm_csr_value() function may not be used (attempts to do so return
-** LSM_MISUSE).
-**
-**
The key that the cursor is left pointing to may be one that has
-** been recently deleted from the database. In this case it is
-** guaranteed that the returned key is larger than any key currently
-** in the database that is less than or equal to (pKey/nKey).
-**
-**
-** LSM_SEEK_LEFAST requests are intended to be used to allocate database
-** keys.
-*/
-int lsm_csr_seek(lsm_cursor *pCsr, const void *pKey, int nKey, int eSeek);
-
-int lsm_csr_first(lsm_cursor *pCsr);
-int lsm_csr_last(lsm_cursor *pCsr);
-
-/*
-** Advance the specified cursor to the next or previous key in the database.
-** Return LSM_OK if successful, or an LSM error code otherwise.
-**
-** Functions lsm_csr_seek(), lsm_csr_first() and lsm_csr_last() are "seek"
-** functions. Whether or not lsm_csr_next and lsm_csr_prev may be called
-** successfully also depends on the most recent seek function called on
-** the cursor. Specifically:
-**
-**
-**
At least one seek function must have been called on the cursor.
-**
To call lsm_csr_next(), the most recent call to a seek function must
-** have been either lsm_csr_first() or a call to lsm_csr_seek() specifying
-** LSM_SEEK_GE.
-**
To call lsm_csr_prev(), the most recent call to a seek function must
-** have been either lsm_csr_last() or a call to lsm_csr_seek() specifying
-** LSM_SEEK_LE.
-**
-**
-** Otherwise, if the above conditions are not met when lsm_csr_next or
-** lsm_csr_prev is called, LSM_MISUSE is returned and the cursor position
-** remains unchanged.
-*/
-int lsm_csr_next(lsm_cursor *pCsr);
-int lsm_csr_prev(lsm_cursor *pCsr);
-
-/*
-** Values that may be passed as the fourth argument to lsm_csr_seek().
-*/
-#define LSM_SEEK_LEFAST -2
-#define LSM_SEEK_LE -1
-#define LSM_SEEK_EQ 0
-#define LSM_SEEK_GE 1
-
-/*
-** CAPI: Extracting Data From Database Cursors
-**
-** Retrieve data from a database cursor.
-*/
-int lsm_csr_valid(lsm_cursor *pCsr);
-int lsm_csr_key(lsm_cursor *pCsr, const void **ppKey, int *pnKey);
-int lsm_csr_value(lsm_cursor *pCsr, const void **ppVal, int *pnVal);
-
-/*
-** If no error occurs, this function compares the database key passed via
-** the pKey/nKey arguments with the key that the cursor passed as the first
-** argument currently points to. If the cursors key is less than, equal to
-** or greater than pKey/nKey, *piRes is set to less than, equal to or greater
-** than zero before returning. LSM_OK is returned in this case.
-**
-** Or, if an error occurs, an LSM error code is returned and the final
-** value of *piRes is undefined. If the cursor does not point to a valid
-** key when this function is called, LSM_MISUSE is returned.
-*/
-int lsm_csr_cmp(lsm_cursor *pCsr, const void *pKey, int nKey, int *piRes);
-
-/*
-** CAPI: Change these!!
-**
-** Configure a callback to which debugging and other messages should
-** be directed. Only useful for debugging lsm.
-*/
-void lsm_config_log(lsm_db *, void (*)(void *, int, const char *), void *);
-
-/*
-** Configure a callback that is invoked if the database connection ever
-** writes to the database file.
-*/
-void lsm_config_work_hook(lsm_db *, void (*)(lsm_db *, void *), void *);
-
-/* ENDOFAPI */
-#ifdef __cplusplus
-} /* End of the 'extern "C"' block */
-#endif
-#endif /* ifndef _LSM_H */
diff --git a/ext/lsm1/lsmInt.h b/ext/lsm1/lsmInt.h
deleted file mode 100644
index 4e3c5e59ce..0000000000
--- a/ext/lsm1/lsmInt.h
+++ /dev/null
@@ -1,997 +0,0 @@
-/*
-** 2011-08-18
-**
-** The author disclaims copyright to this source code. In place of
-** a legal notice, here is a blessing:
-**
-** May you do good and not evil.
-** May you find forgiveness for yourself and forgive others.
-** May you share freely, never taking more than you give.
-**
-*************************************************************************
-** Internal structure definitions for the LSM module.
-*/
-#ifndef _LSM_INT_H
-#define _LSM_INT_H
-
-#include "lsm.h"
-#include
-#include
-
-#include
-#include
-#include
-#include
-
-#ifdef _WIN32
-# ifdef _MSC_VER
-# define snprintf _snprintf
-# endif
-#else
-# include
-#endif
-
-#ifdef NDEBUG
-# ifdef LSM_DEBUG_EXPENSIVE
-# undef LSM_DEBUG_EXPENSIVE
-# endif
-# ifdef LSM_DEBUG
-# undef LSM_DEBUG
-# endif
-#else
-# ifndef LSM_DEBUG
-# define LSM_DEBUG
-# endif
-#endif
-
-/* #define LSM_DEBUG_EXPENSIVE 1 */
-
-/*
-** Default values for various data structure parameters. These may be
-** overridden by calls to lsm_config().
-*/
-#define LSM_DFLT_PAGE_SIZE (4 * 1024)
-#define LSM_DFLT_BLOCK_SIZE (1 * 1024 * 1024)
-#define LSM_DFLT_AUTOFLUSH (1 * 1024 * 1024)
-#define LSM_DFLT_AUTOCHECKPOINT (i64)(2 * 1024 * 1024)
-#define LSM_DFLT_AUTOWORK 1
-#define LSM_DFLT_LOG_SIZE (128*1024)
-#define LSM_DFLT_AUTOMERGE 4
-#define LSM_DFLT_SAFETY LSM_SAFETY_NORMAL
-#define LSM_DFLT_MMAP (LSM_IS_64_BIT ? 1 : 32768)
-#define LSM_DFLT_MULTIPLE_PROCESSES 1
-#define LSM_DFLT_USE_LOG 1
-
-/* Initial values for log file checksums. These are only used if the
-** database file does not contain a valid checkpoint. */
-#define LSM_CKSUM0_INIT 42
-#define LSM_CKSUM1_INIT 42
-
-/* "mmap" mode is currently only used in environments with 64-bit address
-** spaces. The following macro is used to test for this. */
-#define LSM_IS_64_BIT (sizeof(void*)==8)
-
-#define LSM_AUTOWORK_QUANT 32
-
-typedef struct Database Database;
-typedef struct DbLog DbLog;
-typedef struct FileSystem FileSystem;
-typedef struct Freelist Freelist;
-typedef struct FreelistEntry FreelistEntry;
-typedef struct Level Level;
-typedef struct LogMark LogMark;
-typedef struct LogRegion LogRegion;
-typedef struct LogWriter LogWriter;
-typedef struct LsmString LsmString;
-typedef struct Mempool Mempool;
-typedef struct Merge Merge;
-typedef struct MergeInput MergeInput;
-typedef struct MetaPage MetaPage;
-typedef struct MultiCursor MultiCursor;
-typedef struct Page Page;
-typedef struct Redirect Redirect;
-typedef struct Segment Segment;
-typedef struct SegmentMerger SegmentMerger;
-typedef struct ShmChunk ShmChunk;
-typedef struct ShmHeader ShmHeader;
-typedef struct ShmReader ShmReader;
-typedef struct Snapshot Snapshot;
-typedef struct TransMark TransMark;
-typedef struct Tree Tree;
-typedef struct TreeCursor TreeCursor;
-typedef struct TreeHeader TreeHeader;
-typedef struct TreeMark TreeMark;
-typedef struct TreeRoot TreeRoot;
-
-#ifndef _SQLITEINT_H_
-typedef unsigned char u8;
-typedef unsigned short int u16;
-typedef unsigned int u32;
-typedef lsm_i64 i64;
-typedef unsigned long long int u64;
-#endif
-
-/* A page number is a 64-bit integer. */
-typedef i64 LsmPgno;
-
-#ifdef LSM_DEBUG
-int lsmErrorBkpt(int);
-#else
-# define lsmErrorBkpt(x) (x)
-#endif
-
-#define LSM_PROTOCOL_BKPT lsmErrorBkpt(LSM_PROTOCOL)
-#define LSM_IOERR_BKPT lsmErrorBkpt(LSM_IOERR)
-#define LSM_NOMEM_BKPT lsmErrorBkpt(LSM_NOMEM)
-#define LSM_CORRUPT_BKPT lsmErrorBkpt(LSM_CORRUPT)
-#define LSM_MISUSE_BKPT lsmErrorBkpt(LSM_MISUSE)
-
-#define unused_parameter(x) (void)(x)
-#define array_size(x) (sizeof(x)/sizeof(x[0]))
-
-
-/* The size of each shared-memory chunk */
-#define LSM_SHM_CHUNK_SIZE (32*1024)
-
-/* The number of bytes reserved at the start of each shm chunk for MM. */
-#define LSM_SHM_CHUNK_HDR (sizeof(ShmChunk))
-
-/* The number of available read locks. */
-#define LSM_LOCK_NREADER 6
-
-/* The number of available read-write client locks. */
-#define LSM_LOCK_NRWCLIENT 16
-
-/* Lock definitions.
-*/
-#define LSM_LOCK_DMS1 1 /* Serialize connect/disconnect ops */
-#define LSM_LOCK_DMS2 2 /* Read-write connections */
-#define LSM_LOCK_DMS3 3 /* Read-only connections */
-#define LSM_LOCK_WRITER 4
-#define LSM_LOCK_WORKER 5
-#define LSM_LOCK_CHECKPOINTER 6
-#define LSM_LOCK_ROTRANS 7
-#define LSM_LOCK_READER(i) ((i) + LSM_LOCK_ROTRANS + 1)
-#define LSM_LOCK_RWCLIENT(i) ((i) + LSM_LOCK_READER(LSM_LOCK_NREADER))
-
-#define LSM_N_LOCK LSM_LOCK_RWCLIENT(LSM_LOCK_NRWCLIENT)
-
-/*
-** Meta-page size and usable size.
-*/
-#define LSM_META_PAGE_SIZE 4096
-
-#define LSM_META_RW_PAGE_SIZE (LSM_META_PAGE_SIZE - LSM_N_LOCK)
-
-/*
-** Hard limit on the number of free-list entries that may be stored in
-** a checkpoint (the remainder are stored as a system record in the LSM).
-** See also LSM_CONFIG_MAX_FREELIST.
-*/
-#define LSM_MAX_FREELIST_ENTRIES 24
-
-#define LSM_MAX_BLOCK_REDIRECTS 16
-
-#define LSM_ATTEMPTS_BEFORE_PROTOCOL 10000
-
-
-/*
-** Each entry stored in the LSM (or in-memory tree structure) has an
-** associated mask of the following flags.
-*/
-#define LSM_START_DELETE 0x01 /* Start of open-ended delete range */
-#define LSM_END_DELETE 0x02 /* End of open-ended delete range */
-#define LSM_POINT_DELETE 0x04 /* Delete this key */
-#define LSM_INSERT 0x08 /* Insert this key and value */
-#define LSM_SEPARATOR 0x10 /* True if entry is separator key only */
-#define LSM_SYSTEMKEY 0x20 /* True if entry is a system key (FREELIST) */
-
-#define LSM_CONTIGUOUS 0x40 /* Used in lsm_tree.c */
-
-/*
-** A string that can grow by appending.
-*/
-struct LsmString {
- lsm_env *pEnv; /* Run-time environment */
- int n; /* Size of string. -1 indicates error */
- int nAlloc; /* Space allocated for z[] */
- char *z; /* The string content */
-};
-
-typedef struct LsmFile LsmFile;
-struct LsmFile {
- lsm_file *pFile;
- LsmFile *pNext;
-};
-
-/*
-** An instance of the following type is used to store an ordered list of
-** u32 values.
-**
-** Note: This is a place-holder implementation. It should be replaced by
-** a version that avoids making a single large allocation when the array
-** contains a large number of values. For this reason, the internals of
-** this object should only manipulated by the intArrayXXX() functions in
-** lsm_tree.c.
-*/
-typedef struct IntArray IntArray;
-struct IntArray {
- int nAlloc;
- int nArray;
- u32 *aArray;
-};
-
-struct Redirect {
- int n; /* Number of redirects */
- struct RedirectEntry {
- int iFrom;
- int iTo;
- } *a;
-};
-
-/*
-** An instance of this structure represents a point in the history of the
-** tree structure to roll back to. Refer to comments in lsm_tree.c for
-** details.
-*/
-struct TreeMark {
- u32 iRoot; /* Offset of root node in shm file */
- u32 nHeight; /* Current height of tree structure */
- u32 iWrite; /* Write offset in shm file */
- u32 nChunk; /* Number of chunks in shared-memory file */
- u32 iFirst; /* First chunk in linked list */
- u32 iNextShmid; /* Next id to allocate */
- int iRollback; /* Index in lsm->rollback to revert to */
-};
-
-/*
-** An instance of this structure represents a point in the database log.
-*/
-struct LogMark {
- i64 iOff; /* Offset into log (see lsm_log.c) */
- int nBuf; /* Size of in-memory buffer here */
- u8 aBuf[8]; /* Bytes of content in aBuf[] */
- u32 cksum0; /* Checksum 0 at offset (iOff-nBuf) */
- u32 cksum1; /* Checksum 1 at offset (iOff-nBuf) */
-};
-
-struct TransMark {
- TreeMark tree;
- LogMark log;
-};
-
-/*
-** A structure that defines the start and end offsets of a region in the
-** log file. The size of the region in bytes is (iEnd - iStart), so if
-** iEnd==iStart the region is zero bytes in size.
-*/
-struct LogRegion {
- i64 iStart; /* Start of region in log file */
- i64 iEnd; /* End of region in log file */
-};
-
-struct DbLog {
- u32 cksum0; /* Checksum 0 at offset iOff */
- u32 cksum1; /* Checksum 1 at offset iOff */
- i64 iSnapshotId; /* Log space has been reclaimed to this ss */
- LogRegion aRegion[3]; /* Log file regions (see docs in lsm_log.c) */
-};
-
-struct TreeRoot {
- u32 iRoot;
- u32 nHeight;
- u32 nByte; /* Total size of this tree in bytes */
- u32 iTransId;
-};
-
-/*
-** Tree header structure.
-*/
-struct TreeHeader {
- u32 iUsedShmid; /* Id of first shm chunk used by this tree */
- u32 iNextShmid; /* Shm-id of next chunk allocated */
- u32 iFirst; /* Chunk number of smallest shm-id */
- u32 nChunk; /* Number of chunks in shared-memory file */
- TreeRoot root; /* Root and height of current tree */
- u32 iWrite; /* Write offset in shm file */
- TreeRoot oldroot; /* Root and height of the previous tree */
- u32 iOldShmid; /* Last shm-id used by previous tree */
- u32 iUsrVersion; /* get/set_user_version() value */
- i64 iOldLog; /* Log offset associated with old tree */
- u32 oldcksum0;
- u32 oldcksum1;
- DbLog log; /* Current layout of log file */
- u32 aCksum[2]; /* Checksums 1 and 2. */
-};
-
-/*
-** Database handle structure.
-**
-** mLock:
-** A bitmask representing the locks currently held by the connection.
-** An LSM database supports N distinct locks, where N is some number less
-** than or equal to 32. Locks are numbered starting from 1 (see the
-** definitions for LSM_LOCK_WRITER and co.).
-**
-** The least significant 32-bits in mLock represent EXCLUSIVE locks. The
-** most significant are SHARED locks. So, if a connection holds a SHARED
-** lock on lock region iLock, then the following is true:
-**
-** (mLock & ((iLock+32-1) << 1))
-**
-** Or for an EXCLUSIVE lock:
-**
-** (mLock & ((iLock-1) << 1))
-**
-** pCsr:
-** Points to the head of a linked list that contains all currently open
-** cursors. Once this list becomes empty, the user has no outstanding
-** cursors and the database handle can be successfully closed.
-**
-** pCsrCache:
-** This list contains cursor objects that have been closed using
-** lsm_csr_close(). Each time a cursor is closed, it is shifted from
-** the pCsr list to this list. When a new cursor is opened, this list
-** is inspected to see if there exists a cursor object that can be
-** reused. This is an optimization only.
-*/
-struct lsm_db {
-
- /* Database handle configuration */
- lsm_env *pEnv; /* runtime environment */
- int (*xCmp)(void *, int, void *, int); /* Compare function */
-
- /* Values configured by calls to lsm_config */
- int eSafety; /* LSM_SAFETY_OFF, NORMAL or FULL */
- int bAutowork; /* Configured by LSM_CONFIG_AUTOWORK */
- int nTreeLimit; /* Configured by LSM_CONFIG_AUTOFLUSH */
- int nMerge; /* Configured by LSM_CONFIG_AUTOMERGE */
- int bUseLog; /* Configured by LSM_CONFIG_USE_LOG */
- int nDfltPgsz; /* Configured by LSM_CONFIG_PAGE_SIZE */
- int nDfltBlksz; /* Configured by LSM_CONFIG_BLOCK_SIZE */
- int nMaxFreelist; /* Configured by LSM_CONFIG_MAX_FREELIST */
- int iMmap; /* Configured by LSM_CONFIG_MMAP */
- i64 nAutockpt; /* Configured by LSM_CONFIG_AUTOCHECKPOINT */
- int bMultiProc; /* Configured by L_C_MULTIPLE_PROCESSES */
- int bReadonly; /* Configured by LSM_CONFIG_READONLY */
- lsm_compress compress; /* Compression callbacks */
- lsm_compress_factory factory; /* Compression callback factory */
-
- /* Sub-system handles */
- FileSystem *pFS; /* On-disk portion of database */
- Database *pDatabase; /* Database shared data */
-
- int iRwclient; /* Read-write client lock held (-1 == none) */
-
- /* Client transaction context */
- Snapshot *pClient; /* Client snapshot */
- int iReader; /* Read lock held (-1 == unlocked) */
- int bRoTrans; /* True if a read-only db trans is open */
- MultiCursor *pCsr; /* List of all open cursors */
- LogWriter *pLogWriter; /* Context for writing to the log file */
- int nTransOpen; /* Number of opened write transactions */
- int nTransAlloc; /* Allocated size of aTrans[] array */
- TransMark *aTrans; /* Array of marks for transaction rollback */
- IntArray rollback; /* List of tree-nodes to roll back */
- int bDiscardOld; /* True if lsmTreeDiscardOld() was called */
-
- MultiCursor *pCsrCache; /* List of all closed cursors */
-
- /* Worker context */
- Snapshot *pWorker; /* Worker snapshot (or NULL) */
- Freelist *pFreelist; /* See sortedNewToplevel() */
- int bUseFreelist; /* True to use pFreelist */
- int bIncrMerge; /* True if currently doing a merge */
-
- int bInFactory; /* True if within factory.xFactory() */
-
- /* Debugging message callback */
- void (*xLog)(void *, int, const char *);
- void *pLogCtx;
-
- /* Work done notification callback */
- void (*xWork)(lsm_db *, void *);
- void *pWorkCtx;
-
- u64 mLock; /* Mask of current locks. See lsmShmLock(). */
- lsm_db *pNext; /* Next connection to same database */
-
- int nShm; /* Size of apShm[] array */
- void **apShm; /* Shared memory chunks */
- ShmHeader *pShmhdr; /* Live shared-memory header */
- TreeHeader treehdr; /* Local copy of tree-header */
- u32 aSnapshot[LSM_META_PAGE_SIZE / sizeof(u32)];
-};
-
-struct Segment {
- LsmPgno iFirst; /* First page of this run */
- LsmPgno iLastPg; /* Last page of this run */
- LsmPgno iRoot; /* Root page number (if any) */
- LsmPgno nSize; /* Size of this run in pages */
-
- Redirect *pRedirect; /* Block redirects (or NULL) */
-};
-
-/*
-** iSplitTopic/pSplitKey/nSplitKey:
-** If nRight>0, this buffer contains a copy of the largest key that has
-** already been written to the left-hand-side of the level.
-*/
-struct Level {
- Segment lhs; /* Left-hand (main) segment */
- int nRight; /* Size of apRight[] array */
- Segment *aRhs; /* Old segments being merged into this */
- int iSplitTopic; /* Split key topic (if nRight>0) */
- void *pSplitKey; /* Pointer to split-key (if nRight>0) */
- int nSplitKey; /* Number of bytes in split-key */
-
- u16 iAge; /* Number of times data has been written */
- u16 flags; /* Mask of LEVEL_XXX bits */
- Merge *pMerge; /* Merge operation currently underway */
- Level *pNext; /* Next level in tree */
-};
-
-/*
-** The Level.flags field is set to a combination of the following bits.
-**
-** LEVEL_FREELIST_ONLY:
-** Set if the level consists entirely of free-list entries.
-**
-** LEVEL_INCOMPLETE:
-** This is set while a new toplevel level is being constructed. It is
-** never set for any level other than a new toplevel.
-*/
-#define LEVEL_FREELIST_ONLY 0x0001
-#define LEVEL_INCOMPLETE 0x0002
-
-
-/*
-** A structure describing an ongoing merge. There is an instance of this
-** structure for every Level currently undergoing a merge in the worker
-** snapshot.
-**
-** It is assumed that code that uses an instance of this structure has
-** access to the associated Level struct.
-**
-** iOutputOff:
-** The byte offset to write to next within the last page of the
-** output segment.
-*/
-struct MergeInput {
- LsmPgno iPg; /* Page on which next input is stored */
- int iCell; /* Cell containing next input to merge */
-};
-struct Merge {
- int nInput; /* Number of input runs being merged */
- MergeInput *aInput; /* Array nInput entries in size */
- MergeInput splitkey; /* Location in file of current splitkey */
- int nSkip; /* Number of separators entries to skip */
- int iOutputOff; /* Write offset on output page */
- LsmPgno iCurrentPtr; /* Current pointer value */
-};
-
-/*
-** The first argument to this macro is a pointer to a Segment structure.
-** Returns true if the structure instance indicates that the separators
-** array is valid.
-*/
-#define segmentHasSeparators(pSegment) ((pSegment)->sep.iFirst>0)
-
-/*
-** The values that accompany the lock held by a database reader.
-*/
-struct ShmReader {
- u32 iTreeId;
- i64 iLsmId;
-};
-
-/*
-** An instance of this structure is stored in the first shared-memory
-** page. The shared-memory header.
-**
-** bWriter:
-** Immediately after opening a write transaction taking the WRITER lock,
-** each writer client sets this flag. It is cleared right before the
-** WRITER lock is relinquished. If a subsequent writer finds that this
-** flag is already set when a write transaction is opened, this indicates
-** that a previous writer failed mid-transaction.
-**
-** iMetaPage:
-** If the database file does not contain a valid, synced, checkpoint, this
-** value is set to 0. Otherwise, it is set to the meta-page number that
-** contains the most recently written checkpoint (either 1 or 2).
-**
-** hdr1, hdr2:
-** The two copies of the in-memory tree header. Two copies are required
-** in case a writer fails while updating one of them.
-*/
-struct ShmHeader {
- u32 aSnap1[LSM_META_PAGE_SIZE / 4];
- u32 aSnap2[LSM_META_PAGE_SIZE / 4];
- u32 bWriter;
- u32 iMetaPage;
- TreeHeader hdr1;
- TreeHeader hdr2;
- ShmReader aReader[LSM_LOCK_NREADER];
-};
-
-/*
-** An instance of this structure is stored at the start of each shared-memory
-** chunk except the first (which is the header chunk - see above).
-*/
-struct ShmChunk {
- u32 iShmid;
- u32 iNext;
-};
-
-/*
-** Maximum number of shared-memory chunks allowed in the *-shm file. Since
-** each shared-memory chunk is 32KB in size, this is a theoretical limit only.
-*/
-#define LSM_MAX_SHMCHUNKS (1<<30)
-
-/* Return true if shm-sequence "a" is larger than or equal to "b" */
-#define shm_sequence_ge(a, b) (((u32)a-(u32)b) < LSM_MAX_SHMCHUNKS)
-
-#define LSM_APPLIST_SZ 4
-
-/*
-** An instance of the following structure stores the in-memory part of
-** the current free block list. This structure is to the free block list
-** as the in-memory tree is to the users database content. The contents
-** of the free block list is found by merging the in-memory components
-** with those stored in the LSM, just as the contents of the database is
-** found by merging the in-memory tree with the user data entries in the
-** LSM.
-**
-** Each FreelistEntry structure in the array represents either an insert
-** or delete operation on the free-list. For deletes, the FreelistEntry.iId
-** field is set to -1. For inserts, it is set to zero or greater.
-**
-** The array of FreelistEntry structures is always sorted in order of
-** block number (ascending).
-**
-** When the in-memory free block list is written into the LSM, each insert
-** operation is written separately. The entry key is the bitwise inverse
-** of the block number as a 32-bit big-endian integer. This is done so that
-** the entries in the LSM are sorted in descending order of block id.
-** The associated value is the snapshot id, formated as a varint.
-*/
-struct Freelist {
- FreelistEntry *aEntry; /* Free list entries */
- int nEntry; /* Number of valid slots in aEntry[] */
- int nAlloc; /* Allocated size of aEntry[] */
-};
-struct FreelistEntry {
- u32 iBlk; /* Block number */
- i64 iId; /* Largest snapshot id to use this block */
-};
-
-/*
-** A snapshot of a database. A snapshot contains all the information required
-** to read or write a database file on disk. See the description of struct
-** Database below for further details.
-*/
-struct Snapshot {
- Database *pDatabase; /* Database this snapshot belongs to */
- u32 iCmpId; /* Id of compression scheme */
- Level *pLevel; /* Pointer to level 0 of snapshot (or NULL) */
- i64 iId; /* Snapshot id */
- i64 iLogOff; /* Log file offset */
- Redirect redirect; /* Block redirection array */
-
- /* Used by worker snapshots only */
- int nBlock; /* Number of blocks in database file */
- LsmPgno aiAppend[LSM_APPLIST_SZ]; /* Append point list */
- Freelist freelist; /* Free block list */
- u32 nWrite; /* Total number of pages written to disk */
-};
-#define LSM_INITIAL_SNAPSHOT_ID 11
-
-/*
-** Functions from file "lsm_ckpt.c".
-*/
-int lsmCheckpointWrite(lsm_db *, u32 *);
-int lsmCheckpointLevels(lsm_db *, int, void **, int *);
-int lsmCheckpointLoadLevels(lsm_db *pDb, void *pVal, int nVal);
-
-int lsmCheckpointRecover(lsm_db *);
-int lsmCheckpointDeserialize(lsm_db *, int, u32 *, Snapshot **);
-
-int lsmCheckpointLoadWorker(lsm_db *pDb);
-int lsmCheckpointStore(lsm_db *pDb, int);
-
-int lsmCheckpointLoad(lsm_db *pDb, int *);
-int lsmCheckpointLoadOk(lsm_db *pDb, int);
-int lsmCheckpointClientCacheOk(lsm_db *);
-
-u32 lsmCheckpointNBlock(u32 *);
-i64 lsmCheckpointId(u32 *, int);
-u32 lsmCheckpointNWrite(u32 *, int);
-i64 lsmCheckpointLogOffset(u32 *);
-int lsmCheckpointPgsz(u32 *);
-int lsmCheckpointBlksz(u32 *);
-void lsmCheckpointLogoffset(u32 *aCkpt, DbLog *pLog);
-void lsmCheckpointZeroLogoffset(lsm_db *);
-
-int lsmCheckpointSaveWorker(lsm_db *pDb, int);
-int lsmDatabaseFull(lsm_db *pDb);
-int lsmCheckpointSynced(lsm_db *pDb, i64 *piId, i64 *piLog, u32 *pnWrite);
-
-int lsmCheckpointSize(lsm_db *db, int *pnByte);
-
-int lsmInfoCompressionId(lsm_db *db, u32 *piCmpId);
-
-/*
-** Functions from file "lsm_tree.c".
-*/
-int lsmTreeNew(lsm_env *, int (*)(void *, int, void *, int), Tree **ppTree);
-void lsmTreeRelease(lsm_env *, Tree *);
-int lsmTreeInit(lsm_db *);
-int lsmTreeRepair(lsm_db *);
-
-void lsmTreeMakeOld(lsm_db *pDb);
-void lsmTreeDiscardOld(lsm_db *pDb);
-int lsmTreeHasOld(lsm_db *pDb);
-
-int lsmTreeSize(lsm_db *);
-int lsmTreeEndTransaction(lsm_db *pDb, int bCommit);
-int lsmTreeLoadHeader(lsm_db *pDb, int *);
-int lsmTreeLoadHeaderOk(lsm_db *, int);
-
-int lsmTreeInsert(lsm_db *pDb, void *pKey, int nKey, void *pVal, int nVal);
-int lsmTreeDelete(lsm_db *db, void *pKey1, int nKey1, void *pKey2, int nKey2);
-void lsmTreeRollback(lsm_db *pDb, TreeMark *pMark);
-void lsmTreeMark(lsm_db *pDb, TreeMark *pMark);
-
-int lsmTreeCursorNew(lsm_db *pDb, int, TreeCursor **);
-void lsmTreeCursorDestroy(TreeCursor *);
-
-int lsmTreeCursorSeek(TreeCursor *pCsr, void *pKey, int nKey, int *pRes);
-int lsmTreeCursorNext(TreeCursor *pCsr);
-int lsmTreeCursorPrev(TreeCursor *pCsr);
-int lsmTreeCursorEnd(TreeCursor *pCsr, int bLast);
-void lsmTreeCursorReset(TreeCursor *pCsr);
-int lsmTreeCursorKey(TreeCursor *pCsr, int *pFlags, void **ppKey, int *pnKey);
-int lsmTreeCursorFlags(TreeCursor *pCsr);
-int lsmTreeCursorValue(TreeCursor *pCsr, void **ppVal, int *pnVal);
-int lsmTreeCursorValid(TreeCursor *pCsr);
-int lsmTreeCursorSave(TreeCursor *pCsr);
-
-void lsmFlagsToString(int flags, char *zFlags);
-
-/*
-** Functions from file "mem.c".
-*/
-void *lsmMalloc(lsm_env*, size_t);
-void lsmFree(lsm_env*, void *);
-void *lsmRealloc(lsm_env*, void *, size_t);
-void *lsmReallocOrFree(lsm_env*, void *, size_t);
-void *lsmReallocOrFreeRc(lsm_env *, void *, size_t, int *);
-
-void *lsmMallocZeroRc(lsm_env*, size_t, int *);
-void *lsmMallocRc(lsm_env*, size_t, int *);
-
-void *lsmMallocZero(lsm_env *pEnv, size_t);
-char *lsmMallocStrdup(lsm_env *pEnv, const char *);
-
-/*
-** Functions from file "lsm_mutex.c".
-*/
-int lsmMutexStatic(lsm_env*, int, lsm_mutex **);
-int lsmMutexNew(lsm_env*, lsm_mutex **);
-void lsmMutexDel(lsm_env*, lsm_mutex *);
-void lsmMutexEnter(lsm_env*, lsm_mutex *);
-int lsmMutexTry(lsm_env*, lsm_mutex *);
-void lsmMutexLeave(lsm_env*, lsm_mutex *);
-
-#ifndef NDEBUG
-int lsmMutexHeld(lsm_env *, lsm_mutex *);
-int lsmMutexNotHeld(lsm_env *, lsm_mutex *);
-#endif
-
-/**************************************************************************
-** Start of functions from "lsm_file.c".
-*/
-int lsmFsOpen(lsm_db *, const char *, int);
-int lsmFsOpenLog(lsm_db *, int *);
-void lsmFsCloseLog(lsm_db *);
-void lsmFsClose(FileSystem *);
-
-int lsmFsUnmap(FileSystem *);
-
-int lsmFsConfigure(lsm_db *db);
-
-int lsmFsBlockSize(FileSystem *);
-void lsmFsSetBlockSize(FileSystem *, int);
-int lsmFsMoveBlock(FileSystem *pFS, Segment *pSeg, int iTo, int iFrom);
-
-int lsmFsPageSize(FileSystem *);
-void lsmFsSetPageSize(FileSystem *, int);
-
-int lsmFsFileid(lsm_db *pDb, void **ppId, int *pnId);
-
-/* Creating, populating, gobbling and deleting sorted runs. */
-void lsmFsGobble(lsm_db *, Segment *, LsmPgno *, int);
-int lsmFsSortedDelete(FileSystem *, Snapshot *, int, Segment *);
-int lsmFsSortedFinish(FileSystem *, Segment *);
-int lsmFsSortedAppend(FileSystem *, Snapshot *, Level *, int, Page **);
-int lsmFsSortedPadding(FileSystem *, Snapshot *, Segment *);
-
-/* Functions to retrieve the lsm_env pointer from a FileSystem or Page object */
-lsm_env *lsmFsEnv(FileSystem *);
-lsm_env *lsmPageEnv(Page *);
-FileSystem *lsmPageFS(Page *);
-
-int lsmFsSectorSize(FileSystem *);
-
-void lsmSortedSplitkey(lsm_db *, Level *, int *);
-
-/* Reading sorted run content. */
-int lsmFsDbPageLast(FileSystem *pFS, Segment *pSeg, Page **ppPg);
-int lsmFsDbPageGet(FileSystem *, Segment *, LsmPgno, Page **);
-int lsmFsDbPageNext(Segment *, Page *, int eDir, Page **);
-
-u8 *lsmFsPageData(Page *, int *);
-int lsmFsPageRelease(Page *);
-int lsmFsPagePersist(Page *);
-void lsmFsPageRef(Page *);
-LsmPgno lsmFsPageNumber(Page *);
-
-int lsmFsNRead(FileSystem *);
-int lsmFsNWrite(FileSystem *);
-
-int lsmFsMetaPageGet(FileSystem *, int, int, MetaPage **);
-int lsmFsMetaPageRelease(MetaPage *);
-u8 *lsmFsMetaPageData(MetaPage *, int *);
-
-#ifdef LSM_DEBUG
-int lsmFsDbPageIsLast(Segment *pSeg, Page *pPg);
-int lsmFsIntegrityCheck(lsm_db *);
-#endif
-
-LsmPgno lsmFsRedirectPage(FileSystem *, Redirect *, LsmPgno);
-
-int lsmFsPageWritable(Page *);
-
-/* Functions to read, write and sync the log file. */
-int lsmFsWriteLog(FileSystem *pFS, i64 iOff, LsmString *pStr);
-int lsmFsSyncLog(FileSystem *pFS);
-int lsmFsReadLog(FileSystem *pFS, i64 iOff, int nRead, LsmString *pStr);
-int lsmFsTruncateLog(FileSystem *pFS, i64 nByte);
-int lsmFsTruncateDb(FileSystem *pFS, i64 nByte);
-int lsmFsCloseAndDeleteLog(FileSystem *pFS);
-
-LsmFile *lsmFsDeferClose(FileSystem *pFS);
-
-/* And to sync the db file */
-int lsmFsSyncDb(FileSystem *, int);
-
-void lsmFsFlushWaiting(FileSystem *, int *);
-
-/* Used by lsm_info(ARRAY_STRUCTURE) and lsm_config(MMAP) */
-int lsmInfoArrayStructure(lsm_db *pDb, int bBlock, LsmPgno iFirst, char **pz);
-int lsmInfoArrayPages(lsm_db *pDb, LsmPgno iFirst, char **pzOut);
-int lsmConfigMmap(lsm_db *pDb, int *piParam);
-
-int lsmEnvOpen(lsm_env *, const char *, int, lsm_file **);
-int lsmEnvClose(lsm_env *pEnv, lsm_file *pFile);
-int lsmEnvLock(lsm_env *pEnv, lsm_file *pFile, int iLock, int eLock);
-int lsmEnvTestLock(lsm_env *pEnv, lsm_file *pFile, int iLock, int nLock, int);
-
-int lsmEnvShmMap(lsm_env *, lsm_file *, int, int, void **);
-void lsmEnvShmBarrier(lsm_env *);
-void lsmEnvShmUnmap(lsm_env *, lsm_file *, int);
-
-void lsmEnvSleep(lsm_env *, int);
-
-int lsmFsReadSyncedId(lsm_db *db, int, i64 *piVal);
-
-int lsmFsSegmentContainsPg(FileSystem *pFS, Segment *, LsmPgno, int *);
-
-void lsmFsPurgeCache(FileSystem *);
-
-/*
-** End of functions from "lsm_file.c".
-**************************************************************************/
-
-/*
-** Functions from file "lsm_sorted.c".
-*/
-int lsmInfoPageDump(lsm_db *, LsmPgno, int, char **);
-void lsmSortedCleanup(lsm_db *);
-int lsmSortedAutoWork(lsm_db *, int nUnit);
-
-int lsmSortedWalkFreelist(lsm_db *, int, int (*)(void *, int, i64), void *);
-
-int lsmSaveWorker(lsm_db *, int);
-
-int lsmFlushTreeToDisk(lsm_db *pDb);
-
-void lsmSortedRemap(lsm_db *pDb);
-
-void lsmSortedFreeLevel(lsm_env *pEnv, Level *);
-
-int lsmSortedAdvanceAll(lsm_db *pDb);
-
-int lsmSortedLoadMerge(lsm_db *, Level *, u32 *, int *);
-int lsmSortedLoadFreelist(lsm_db *pDb, void **, int *);
-
-void *lsmSortedSplitKey(Level *pLevel, int *pnByte);
-
-void lsmSortedSaveTreeCursors(lsm_db *);
-
-int lsmMCursorNew(lsm_db *, MultiCursor **);
-void lsmMCursorClose(MultiCursor *, int);
-int lsmMCursorSeek(MultiCursor *, int, void *, int , int);
-int lsmMCursorFirst(MultiCursor *);
-int lsmMCursorPrev(MultiCursor *);
-int lsmMCursorLast(MultiCursor *);
-int lsmMCursorValid(MultiCursor *);
-int lsmMCursorNext(MultiCursor *);
-int lsmMCursorKey(MultiCursor *, void **, int *);
-int lsmMCursorValue(MultiCursor *, void **, int *);
-int lsmMCursorType(MultiCursor *, int *);
-lsm_db *lsmMCursorDb(MultiCursor *);
-void lsmMCursorFreeCache(lsm_db *);
-
-int lsmSaveCursors(lsm_db *pDb);
-int lsmRestoreCursors(lsm_db *pDb);
-
-void lsmSortedDumpStructure(lsm_db *pDb, Snapshot *, int, int, const char *);
-void lsmFsDumpBlocklists(lsm_db *);
-
-void lsmSortedExpandBtreePage(Page *pPg, int nOrig);
-
-void lsmPutU32(u8 *, u32);
-u32 lsmGetU32(u8 *);
-u64 lsmGetU64(u8 *);
-
-/*
-** Functions from "lsm_varint.c".
-*/
-int lsmVarintPut32(u8 *, int);
-int lsmVarintGet32(u8 *, int *);
-int lsmVarintPut64(u8 *aData, i64 iVal);
-int lsmVarintGet64(const u8 *aData, i64 *piVal);
-
-int lsmVarintLen64(i64);
-
-int lsmVarintLen32(int);
-int lsmVarintSize(u8 c);
-
-/*
-** Functions from file "main.c".
-*/
-void lsmLogMessage(lsm_db *, int, const char *, ...);
-int lsmInfoFreelist(lsm_db *pDb, char **pzOut);
-
-/*
-** Functions from file "lsm_log.c".
-*/
-int lsmLogBegin(lsm_db *pDb);
-int lsmLogWrite(lsm_db *, int, void *, int, void *, int);
-int lsmLogCommit(lsm_db *);
-void lsmLogEnd(lsm_db *pDb, int bCommit);
-void lsmLogTell(lsm_db *, LogMark *);
-void lsmLogSeek(lsm_db *, LogMark *);
-void lsmLogClose(lsm_db *);
-
-int lsmLogRecover(lsm_db *);
-int lsmInfoLogStructure(lsm_db *pDb, char **pzVal);
-
-/* Valid values for the second argument to lsmLogWrite(). */
-#define LSM_WRITE 0x06
-#define LSM_DELETE 0x08
-#define LSM_DRANGE 0x0A
-
-/**************************************************************************
-** Functions from file "lsm_shared.c".
-*/
-
-int lsmDbDatabaseConnect(lsm_db*, const char *);
-void lsmDbDatabaseRelease(lsm_db *);
-
-int lsmBeginReadTrans(lsm_db *);
-int lsmBeginWriteTrans(lsm_db *);
-int lsmBeginFlush(lsm_db *);
-
-int lsmDetectRoTrans(lsm_db *db, int *);
-int lsmBeginRoTrans(lsm_db *db);
-
-int lsmBeginWork(lsm_db *);
-void lsmFinishWork(lsm_db *, int, int *);
-
-int lsmFinishRecovery(lsm_db *);
-void lsmFinishReadTrans(lsm_db *);
-int lsmFinishWriteTrans(lsm_db *, int);
-int lsmFinishFlush(lsm_db *, int);
-
-int lsmSnapshotSetFreelist(lsm_db *, int *, int);
-
-Snapshot *lsmDbSnapshotClient(lsm_db *);
-Snapshot *lsmDbSnapshotWorker(lsm_db *);
-
-void lsmSnapshotSetCkptid(Snapshot *, i64);
-
-Level *lsmDbSnapshotLevel(Snapshot *);
-void lsmDbSnapshotSetLevel(Snapshot *, Level *);
-
-void lsmDbRecoveryComplete(lsm_db *, int);
-
-int lsmBlockAllocate(lsm_db *, int, int *);
-int lsmBlockFree(lsm_db *, int);
-int lsmBlockRefree(lsm_db *, int);
-
-void lsmFreelistDeltaBegin(lsm_db *);
-void lsmFreelistDeltaEnd(lsm_db *);
-int lsmFreelistDelta(lsm_db *pDb);
-
-DbLog *lsmDatabaseLog(lsm_db *pDb);
-
-#ifdef LSM_DEBUG
- int lsmHoldingClientMutex(lsm_db *pDb);
- int lsmShmAssertLock(lsm_db *db, int iLock, int eOp);
- int lsmShmAssertWorker(lsm_db *db);
-#endif
-
-void lsmFreeSnapshot(lsm_env *, Snapshot *);
-
-
-/* Candidate values for the 3rd argument to lsmShmLock() */
-#define LSM_LOCK_UNLOCK 0
-#define LSM_LOCK_SHARED 1
-#define LSM_LOCK_EXCL 2
-
-int lsmShmCacheChunks(lsm_db *db, int nChunk);
-int lsmShmLock(lsm_db *db, int iLock, int eOp, int bBlock);
-int lsmShmTestLock(lsm_db *db, int iLock, int nLock, int eOp);
-void lsmShmBarrier(lsm_db *db);
-
-#ifdef LSM_DEBUG
-void lsmShmHasLock(lsm_db *db, int iLock, int eOp);
-#else
-# define lsmShmHasLock(x,y,z)
-#endif
-
-int lsmReadlock(lsm_db *, i64 iLsm, u32 iShmMin, u32 iShmMax);
-
-int lsmLsmInUse(lsm_db *db, i64 iLsmId, int *pbInUse);
-int lsmTreeInUse(lsm_db *db, u32 iLsmId, int *pbInUse);
-int lsmFreelistAppend(lsm_env *pEnv, Freelist *p, int iBlk, i64 iId);
-
-int lsmDbMultiProc(lsm_db *);
-void lsmDbDeferredClose(lsm_db *, lsm_file *, LsmFile *);
-LsmFile *lsmDbRecycleFd(lsm_db *);
-
-int lsmWalkFreelist(lsm_db *, int, int (*)(void *, int, i64), void *);
-
-int lsmCheckCompressionId(lsm_db *, u32);
-
-
-/**************************************************************************
-** functions in lsm_str.c
-*/
-void lsmStringInit(LsmString*, lsm_env *pEnv);
-int lsmStringExtend(LsmString*, int);
-int lsmStringAppend(LsmString*, const char *, int);
-void lsmStringVAppendf(LsmString*, const char *zFormat, va_list, va_list);
-void lsmStringAppendf(LsmString*, const char *zFormat, ...);
-void lsmStringClear(LsmString*);
-char *lsmMallocPrintf(lsm_env*, const char*, ...);
-int lsmStringBinAppend(LsmString *pStr, const u8 *a, int n);
-
-int lsmStrlen(const char *zName);
-
-
-
-/*
-** Round up a number to the next larger multiple of 8. This is used
-** to force 8-byte alignment on 64-bit architectures.
-*/
-#define ROUND8(x) (((x)+7)&~7)
-
-#define LSM_MIN(x,y) ((x)>(y) ? (y) : (x))
-#define LSM_MAX(x,y) ((x)>(y) ? (x) : (y))
-
-#endif
diff --git a/ext/lsm1/lsm_ckpt.c b/ext/lsm1/lsm_ckpt.c
deleted file mode 100644
index dbfa1a61ff..0000000000
--- a/ext/lsm1/lsm_ckpt.c
+++ /dev/null
@@ -1,1239 +0,0 @@
-/*
-** 2011-09-11
-**
-** The author disclaims copyright to this source code. In place of
-** a legal notice, here is a blessing:
-**
-** May you do good and not evil.
-** May you find forgiveness for yourself and forgive others.
-** May you share freely, never taking more than you give.
-**
-*************************************************************************
-**
-** This file contains code to read and write checkpoints.
-**
-** A checkpoint represents the database layout at a single point in time.
-** It includes a log offset. When an existing database is opened, the
-** current state is determined by reading the newest checkpoint and updating
-** it with all committed transactions from the log that follow the specified
-** offset.
-*/
-#include "lsmInt.h"
-
-/*
-** CHECKPOINT BLOB FORMAT:
-**
-** A checkpoint blob is a series of unsigned 32-bit integers stored in
-** big-endian byte order. As follows:
-**
-** Checkpoint header (see the CKPT_HDR_XXX #defines):
-**
-** 1. The checkpoint id MSW.
-** 2. The checkpoint id LSW.
-** 3. The number of integer values in the entire checkpoint, including
-** the two checksum values.
-** 4. The compression scheme id.
-** 5. The total number of blocks in the database.
-** 6. The block size.
-** 7. The number of levels.
-** 8. The nominal database page size.
-** 9. The number of pages (in total) written to the database file.
-**
-** Log pointer:
-**
-** 1. The log offset MSW.
-** 2. The log offset LSW.
-** 3. Log checksum 0.
-** 4. Log checksum 1.
-**
-** Note that the "log offset" is not the literal byte offset. Instead,
-** it is the byte offset multiplied by 2, with least significant bit
-** toggled each time the log pointer value is changed. This is to make
-** sure that this field changes each time the log pointer is updated,
-** even if the log file itself is disabled. See lsmTreeMakeOld().
-**
-** See ckptExportLog() and ckptImportLog().
-**
-** Append points:
-**
-** 8 integers (4 * 64-bit page numbers). See ckptExportAppendlist().
-**
-** For each level in the database, a level record. Formatted as follows:
-**
-** 0. Age of the level (least significant 16-bits). And flags mask (most
-** significant 16-bits).
-** 1. The number of right-hand segments (nRight, possibly 0),
-** 2. Segment record for left-hand segment (8 integers defined below),
-** 3. Segment record for each right-hand segment (8 integers defined below),
-** 4. If nRight>0, The number of segments involved in the merge
-** 5. if nRight>0, Current nSkip value (see Merge structure defn.),
-** 6. For each segment in the merge:
-** 5a. Page number of next cell to read during merge (this field
-** is 64-bits - 2 integers)
-** 5b. Cell number of next cell to read during merge
-** 7. Page containing current split-key (64-bits - 2 integers).
-** 8. Cell within page containing current split-key.
-** 9. Current pointer value (64-bits - 2 integers).
-**
-** The block redirect array:
-**
-** 1. Number of redirections (maximum LSM_MAX_BLOCK_REDIRECTS).
-** 2. For each redirection:
-** a. "from" block number
-** b. "to" block number
-**
-** The in-memory freelist entries. Each entry is either an insert or a
-** delete. The in-memory freelist is to the free-block-list as the
-** in-memory tree is to the users database content.
-**
-** 1. Number of free-list entries stored in checkpoint header.
-** 2. Number of free blocks (in total).
-** 3. Total number of blocks freed during database lifetime.
-** 4. For each entry:
-** 2a. Block number of free block.
-** 2b. A 64-bit integer (MSW followed by LSW). -1 for a delete entry,
-** or the associated checkpoint id for an insert.
-**
-** The checksum:
-**
-** 1. Checksum value 1.
-** 2. Checksum value 2.
-**
-** In the above, a segment record consists of the following four 64-bit
-** fields (converted to 2 * u32 by storing the MSW followed by LSW):
-**
-** 1. First page of array,
-** 2. Last page of array,
-** 3. Root page of array (or 0),
-** 4. Size of array in pages.
-*/
-
-/*
-** LARGE NUMBERS OF LEVEL RECORDS:
-**
-** A limit on the number of rhs segments that may be present in the database
-** file. Defining this limit ensures that all level records fit within
-** the 4096 byte limit for checkpoint blobs.
-**
-** The number of right-hand-side segments in a database is counted as
-** follows:
-**
-** * For each level in the database not undergoing a merge, add 1.
-**
-** * For each level in the database that is undergoing a merge, add
-** the number of segments on the rhs of the level.
-**
-** A level record not undergoing a merge is 10 integers. A level record
-** with nRhs rhs segments and (nRhs+1) input segments (i.e. including the
-** separators from the next level) is (11*nRhs+20) integers. The maximum
-** per right-hand-side level is therefore 21 integers. So the maximum
-** size of all level records in a checkpoint is 21*40=820 integers.
-**
-** TODO: Before pointer values were changed from 32 to 64 bits, the above
-** used to come to 420 bytes - leaving significant space for a free-list
-** prefix. No more. To fix this, reduce the size of the level records in
-** a db snapshot, and improve management of the free-list tail in
-** lsm_sorted.c.
-*/
-#define LSM_MAX_RHS_SEGMENTS 40
-
-/*
-** LARGE NUMBERS OF FREELIST ENTRIES:
-**
-** There is also a limit (LSM_MAX_FREELIST_ENTRIES - defined in lsmInt.h)
-** on the number of free-list entries stored in a checkpoint. Since each
-** free-list entry consists of 3 integers, the maximum free-list size is
-** 3*100=300 integers. Combined with the limit on rhs segments defined
-** above, this ensures that a checkpoint always fits within a 4096 byte
-** meta page.
-**
-** If the database contains more than 100 free blocks, the "overflow" flag
-** in the checkpoint header is set and the remainder are stored in the
-** system FREELIST entry in the LSM (along with user data). The value
-** accompanying the FREELIST key in the LSM is, like a checkpoint, an array
-** of 32-bit big-endian integers. As follows:
-**
-** For each entry:
-** a. Block number of free block.
-** b. MSW of associated checkpoint id.
-** c. LSW of associated checkpoint id.
-**
-** The number of entries is not required - it is implied by the size of the
-** value blob containing the integer array.
-**
-** Note that the limit defined by LSM_MAX_FREELIST_ENTRIES is a hard limit.
-** The actual value used may be configured using LSM_CONFIG_MAX_FREELIST.
-*/
-
-/*
-** The argument to this macro must be of type u32. On a little-endian
-** architecture, it returns the u32 value that results from interpreting
-** the 4 bytes as a big-endian value. On a big-endian architecture, it
-** returns the value that would be produced by interpreting the 4 bytes
-** of the input value as a little-endian integer.
-*/
-#define BYTESWAP32(x) ( \
- (((x)&0x000000FF)<<24) + (((x)&0x0000FF00)<<8) \
- + (((x)&0x00FF0000)>>8) + (((x)&0xFF000000)>>24) \
-)
-
-static const int one = 1;
-#define LSM_LITTLE_ENDIAN (*(u8 *)(&one))
-
-/* Sizes, in integers, of various parts of the checkpoint. */
-#define CKPT_HDR_SIZE 9
-#define CKPT_LOGPTR_SIZE 4
-#define CKPT_APPENDLIST_SIZE (LSM_APPLIST_SZ * 2)
-
-/* A #define to describe each integer in the checkpoint header. */
-#define CKPT_HDR_ID_MSW 0
-#define CKPT_HDR_ID_LSW 1
-#define CKPT_HDR_NCKPT 2
-#define CKPT_HDR_CMPID 3
-#define CKPT_HDR_NBLOCK 4
-#define CKPT_HDR_BLKSZ 5
-#define CKPT_HDR_NLEVEL 6
-#define CKPT_HDR_PGSZ 7
-#define CKPT_HDR_NWRITE 8
-
-#define CKPT_HDR_LO_MSW 9
-#define CKPT_HDR_LO_LSW 10
-#define CKPT_HDR_LO_CKSUM1 11
-#define CKPT_HDR_LO_CKSUM2 12
-
-typedef struct CkptBuffer CkptBuffer;
-
-/*
-** Dynamic buffer used to accumulate data for a checkpoint.
-*/
-struct CkptBuffer {
- lsm_env *pEnv;
- int nAlloc;
- u32 *aCkpt;
-};
-
-/*
-** Calculate the checksum of the checkpoint specified by arguments aCkpt and
-** nCkpt. Store the checksum in *piCksum1 and *piCksum2 before returning.
-**
-** The value of the nCkpt parameter includes the two checksum values at
-** the end of the checkpoint. They are not used as inputs to the checksum
-** calculation. The checksum is based on the array of (nCkpt-2) integers
-** at aCkpt[].
-*/
-static void ckptChecksum(u32 *aCkpt, u32 nCkpt, u32 *piCksum1, u32 *piCksum2){
- u32 i;
- u32 cksum1 = 1;
- u32 cksum2 = 2;
-
- if( nCkpt % 2 ){
- cksum1 += aCkpt[nCkpt-3] & 0x0000FFFF;
- cksum2 += aCkpt[nCkpt-3] & 0xFFFF0000;
- }
-
- for(i=0; (i+3)=p->nAlloc ){
- int nNew = LSM_MAX(8, iIdx*2);
- p->aCkpt = (u32 *)lsmReallocOrFree(p->pEnv, p->aCkpt, nNew*sizeof(u32));
- if( !p->aCkpt ){
- *pRc = LSM_NOMEM_BKPT;
- return;
- }
- p->nAlloc = nNew;
- }
- p->aCkpt[iIdx] = iVal;
-}
-
-/*
-** Argument aInt points to an array nInt elements in size. Switch the
-** endian-ness of each element of the array.
-*/
-static void ckptChangeEndianness(u32 *aInt, int nInt){
- if( LSM_LITTLE_ENDIAN ){
- int i;
- for(i=0; iaCkpt, nCkpt+2, &aCksum[0], &aCksum[1]);
- ckptSetValue(p, nCkpt, aCksum[0], pRc);
- ckptSetValue(p, nCkpt+1, aCksum[1], pRc);
- }
-}
-
-static void ckptAppend64(CkptBuffer *p, int *piOut, i64 iVal, int *pRc){
- int iOut = *piOut;
- ckptSetValue(p, iOut++, (iVal >> 32) & 0xFFFFFFFF, pRc);
- ckptSetValue(p, iOut++, (iVal & 0xFFFFFFFF), pRc);
- *piOut = iOut;
-}
-
-static i64 ckptRead64(u32 *a){
- return (((i64)a[0]) << 32) + (i64)a[1];
-}
-
-static i64 ckptGobble64(u32 *a, int *piIn){
- int iIn = *piIn;
- *piIn += 2;
- return ckptRead64(&a[iIn]);
-}
-
-
-/*
-** Append a 6-value segment record corresponding to pSeg to the checkpoint
-** buffer passed as the third argument.
-*/
-static void ckptExportSegment(
- Segment *pSeg,
- CkptBuffer *p,
- int *piOut,
- int *pRc
-){
- ckptAppend64(p, piOut, pSeg->iFirst, pRc);
- ckptAppend64(p, piOut, pSeg->iLastPg, pRc);
- ckptAppend64(p, piOut, pSeg->iRoot, pRc);
- ckptAppend64(p, piOut, pSeg->nSize, pRc);
-}
-
-static void ckptExportLevel(
- Level *pLevel, /* Level object to serialize */
- CkptBuffer *p, /* Append new level record to this ckpt */
- int *piOut, /* IN/OUT: Size of checkpoint so far */
- int *pRc /* IN/OUT: Error code */
-){
- int iOut = *piOut;
- Merge *pMerge;
-
- pMerge = pLevel->pMerge;
- ckptSetValue(p, iOut++, (u32)pLevel->iAge + (u32)(pLevel->flags<<16), pRc);
- ckptSetValue(p, iOut++, pLevel->nRight, pRc);
- ckptExportSegment(&pLevel->lhs, p, &iOut, pRc);
-
- assert( (pLevel->nRight>0)==(pMerge!=0) );
- if( pMerge ){
- int i;
- for(i=0; inRight; i++){
- ckptExportSegment(&pLevel->aRhs[i], p, &iOut, pRc);
- }
- assert( pMerge->nInput==pLevel->nRight
- || pMerge->nInput==pLevel->nRight+1
- );
- ckptSetValue(p, iOut++, pMerge->nInput, pRc);
- ckptSetValue(p, iOut++, pMerge->nSkip, pRc);
- for(i=0; inInput; i++){
- ckptAppend64(p, &iOut, pMerge->aInput[i].iPg, pRc);
- ckptSetValue(p, iOut++, pMerge->aInput[i].iCell, pRc);
- }
- ckptAppend64(p, &iOut, pMerge->splitkey.iPg, pRc);
- ckptSetValue(p, iOut++, pMerge->splitkey.iCell, pRc);
- ckptAppend64(p, &iOut, pMerge->iCurrentPtr, pRc);
- }
-
- *piOut = iOut;
-}
-
-/*
-** Populate the log offset fields of the checkpoint buffer. 4 values.
-*/
-static void ckptExportLog(
- lsm_db *pDb,
- int bFlush,
- CkptBuffer *p,
- int *piOut,
- int *pRc
-){
- int iOut = *piOut;
-
- assert( iOut==CKPT_HDR_LO_MSW );
-
- if( bFlush ){
- i64 iOff = pDb->treehdr.iOldLog;
- ckptAppend64(p, &iOut, iOff, pRc);
- ckptSetValue(p, iOut++, pDb->treehdr.oldcksum0, pRc);
- ckptSetValue(p, iOut++, pDb->treehdr.oldcksum1, pRc);
- }else{
- for(; iOut<=CKPT_HDR_LO_CKSUM2; iOut++){
- ckptSetValue(p, iOut, pDb->pShmhdr->aSnap2[iOut], pRc);
- }
- }
-
- assert( *pRc || iOut==CKPT_HDR_LO_CKSUM2+1 );
- *piOut = iOut;
-}
-
-static void ckptExportAppendlist(
- lsm_db *db, /* Database connection */
- CkptBuffer *p, /* Checkpoint buffer to write to */
- int *piOut, /* IN/OUT: Offset within checkpoint buffer */
- int *pRc /* IN/OUT: Error code */
-){
- int i;
- LsmPgno *aiAppend = db->pWorker->aiAppend;
-
- for(i=0; ipFS; /* File system object */
- Snapshot *pSnap = pDb->pWorker; /* Worker snapshot */
- int nLevel = 0; /* Number of levels in checkpoint */
- int iLevel; /* Used to count out nLevel levels */
- int iOut = 0; /* Current offset in aCkpt[] */
- Level *pLevel; /* Level iterator */
- int i; /* Iterator used while serializing freelist */
- CkptBuffer ckpt;
-
- /* Initialize the output buffer */
- memset(&ckpt, 0, sizeof(CkptBuffer));
- ckpt.pEnv = pDb->pEnv;
- iOut = CKPT_HDR_SIZE;
-
- /* Write the log offset into the checkpoint. */
- ckptExportLog(pDb, bLog, &ckpt, &iOut, &rc);
-
- /* Write the append-point list */
- ckptExportAppendlist(pDb, &ckpt, &iOut, &rc);
-
- /* Figure out how many levels will be written to the checkpoint. */
- for(pLevel=lsmDbSnapshotLevel(pSnap); pLevel; pLevel=pLevel->pNext) nLevel++;
-
- /* Serialize nLevel levels. */
- iLevel = 0;
- for(pLevel=lsmDbSnapshotLevel(pSnap); iLevelpNext){
- ckptExportLevel(pLevel, &ckpt, &iOut, &rc);
- iLevel++;
- }
-
- /* Write the block-redirect list */
- ckptSetValue(&ckpt, iOut++, pSnap->redirect.n, &rc);
- for(i=0; iredirect.n; i++){
- ckptSetValue(&ckpt, iOut++, pSnap->redirect.a[i].iFrom, &rc);
- ckptSetValue(&ckpt, iOut++, pSnap->redirect.a[i].iTo, &rc);
- }
-
- /* Write the freelist */
- assert( pSnap->freelist.nEntry<=pDb->nMaxFreelist );
- if( rc==LSM_OK ){
- int nFree = pSnap->freelist.nEntry;
- ckptSetValue(&ckpt, iOut++, nFree, &rc);
- for(i=0; ifreelist.aEntry[i];
- ckptSetValue(&ckpt, iOut++, p->iBlk, &rc);
- ckptSetValue(&ckpt, iOut++, (p->iId >> 32) & 0xFFFFFFFF, &rc);
- ckptSetValue(&ckpt, iOut++, p->iId & 0xFFFFFFFF, &rc);
- }
- }
-
- /* Write the checkpoint header */
- assert( iId>=0 );
- assert( pSnap->iCmpId==pDb->compress.iId
- || pSnap->iCmpId==LSM_COMPRESSION_EMPTY
- );
- ckptSetValue(&ckpt, CKPT_HDR_ID_MSW, (u32)(iId>>32), &rc);
- ckptSetValue(&ckpt, CKPT_HDR_ID_LSW, (u32)(iId&0xFFFFFFFF), &rc);
- ckptSetValue(&ckpt, CKPT_HDR_NCKPT, iOut+2, &rc);
- ckptSetValue(&ckpt, CKPT_HDR_CMPID, pDb->compress.iId, &rc);
- ckptSetValue(&ckpt, CKPT_HDR_NBLOCK, pSnap->nBlock, &rc);
- ckptSetValue(&ckpt, CKPT_HDR_BLKSZ, lsmFsBlockSize(pFS), &rc);
- ckptSetValue(&ckpt, CKPT_HDR_NLEVEL, nLevel, &rc);
- ckptSetValue(&ckpt, CKPT_HDR_PGSZ, lsmFsPageSize(pFS), &rc);
- ckptSetValue(&ckpt, CKPT_HDR_NWRITE, pSnap->nWrite, &rc);
-
- if( bCksum ){
- ckptAddChecksum(&ckpt, iOut, &rc);
- }else{
- ckptSetValue(&ckpt, iOut, 0, &rc);
- ckptSetValue(&ckpt, iOut+1, 0, &rc);
- }
- iOut += 2;
- assert( iOut<=1024 );
-
-#ifdef LSM_LOG_FREELIST
- lsmLogMessage(pDb, rc,
- "ckptExportSnapshot(): id=%lld freelist: %d", iId, pSnap->freelist.nEntry
- );
- for(i=0; ifreelist.nEntry; i++){
- lsmLogMessage(pDb, rc,
- "ckptExportSnapshot(): iBlk=%d id=%lld",
- pSnap->freelist.aEntry[i].iBlk,
- pSnap->freelist.aEntry[i].iId
- );
- }
-#endif
-
- *ppCkpt = (void *)ckpt.aCkpt;
- if( pnCkpt ) *pnCkpt = sizeof(u32)*iOut;
- return rc;
-}
-
-
-/*
-** Helper function for ckptImport().
-*/
-static void ckptNewSegment(
- u32 *aIn,
- int *piIn,
- Segment *pSegment /* Populate this structure */
-){
- assert( pSegment->iFirst==0 && pSegment->iLastPg==0 );
- assert( pSegment->nSize==0 && pSegment->iRoot==0 );
- pSegment->iFirst = ckptGobble64(aIn, piIn);
- pSegment->iLastPg = ckptGobble64(aIn, piIn);
- pSegment->iRoot = ckptGobble64(aIn, piIn);
- pSegment->nSize = ckptGobble64(aIn, piIn);
- assert( pSegment->iFirst );
-}
-
-static int ckptSetupMerge(lsm_db *pDb, u32 *aInt, int *piIn, Level *pLevel){
- Merge *pMerge; /* Allocated Merge object */
- int nInput; /* Number of input segments in merge */
- int iIn = *piIn; /* Next value to read from aInt[] */
- int i; /* Iterator variable */
- int nByte; /* Number of bytes to allocate */
-
- /* Allocate the Merge object. If malloc() fails, return LSM_NOMEM. */
- nInput = (int)aInt[iIn++];
- nByte = sizeof(Merge) + sizeof(MergeInput) * nInput;
- pMerge = (Merge *)lsmMallocZero(pDb->pEnv, nByte);
- if( !pMerge ) return LSM_NOMEM_BKPT;
- pLevel->pMerge = pMerge;
-
- /* Populate the Merge object. */
- pMerge->aInput = (MergeInput *)&pMerge[1];
- pMerge->nInput = nInput;
- pMerge->iOutputOff = -1;
- pMerge->nSkip = (int)aInt[iIn++];
- for(i=0; iaInput[i].iPg = ckptGobble64(aInt, &iIn);
- pMerge->aInput[i].iCell = (int)aInt[iIn++];
- }
- pMerge->splitkey.iPg = ckptGobble64(aInt, &iIn);
- pMerge->splitkey.iCell = (int)aInt[iIn++];
- pMerge->iCurrentPtr = ckptGobble64(aInt, &iIn);
-
- /* Set *piIn and return LSM_OK. */
- *piIn = iIn;
- return LSM_OK;
-}
-
-
-static int ckptLoadLevels(
- lsm_db *pDb,
- u32 *aIn,
- int *piIn,
- int nLevel,
- Level **ppLevel
-){
- int i;
- int rc = LSM_OK;
- Level *pRet = 0;
- Level **ppNext;
- int iIn = *piIn;
-
- ppNext = &pRet;
- for(i=0; rc==LSM_OK && ipEnv, sizeof(Level), &rc);
- if( rc==LSM_OK ){
- pLevel->iAge = (u16)(aIn[iIn] & 0x0000FFFF);
- pLevel->flags = (u16)((aIn[iIn]>>16) & 0x0000FFFF);
- iIn++;
- pLevel->nRight = aIn[iIn++];
- if( pLevel->nRight ){
- int nByte = sizeof(Segment) * pLevel->nRight;
- pLevel->aRhs = (Segment *)lsmMallocZeroRc(pDb->pEnv, nByte, &rc);
- }
- if( rc==LSM_OK ){
- *ppNext = pLevel;
- ppNext = &pLevel->pNext;
-
- /* Allocate the main segment */
- ckptNewSegment(aIn, &iIn, &pLevel->lhs);
-
- /* Allocate each of the right-hand segments, if any */
- for(iRight=0; iRightnRight; iRight++){
- ckptNewSegment(aIn, &iIn, &pLevel->aRhs[iRight]);
- }
-
- /* Set up the Merge object, if required */
- if( pLevel->nRight>0 ){
- rc = ckptSetupMerge(pDb, aIn, &iIn, pLevel);
- }
- }
- }
- }
-
- if( rc!=LSM_OK ){
- /* An OOM must have occurred. Free any level structures allocated and
- ** return the error to the caller. */
- lsmSortedFreeLevel(pDb->pEnv, pRet);
- pRet = 0;
- }
-
- *ppLevel = pRet;
- *piIn = iIn;
- return rc;
-}
-
-
-int lsmCheckpointLoadLevels(lsm_db *pDb, void *pVal, int nVal){
- int rc = LSM_OK;
- if( nVal>0 ){
- u32 *aIn;
-
- aIn = lsmMallocRc(pDb->pEnv, nVal, &rc);
- if( aIn ){
- Level *pLevel = 0;
- Level *pParent;
-
- int nIn;
- int nLevel;
- int iIn = 1;
- memcpy(aIn, pVal, nVal);
- nIn = nVal / sizeof(u32);
-
- ckptChangeEndianness(aIn, nIn);
- nLevel = aIn[0];
- rc = ckptLoadLevels(pDb, aIn, &iIn, nLevel, &pLevel);
- lsmFree(pDb->pEnv, aIn);
- assert( rc==LSM_OK || pLevel==0 );
- if( rc==LSM_OK ){
- pParent = lsmDbSnapshotLevel(pDb->pWorker);
- assert( pParent );
- while( pParent->pNext ) pParent = pParent->pNext;
- pParent->pNext = pLevel;
- }
- }
- }
-
- return rc;
-}
-
-/*
-** Return the data for the LEVELS record.
-**
-** The size of the checkpoint that can be stored in the database header
-** must not exceed 1024 32-bit integers. Normally, it does not. However,
-** if it does, part of the checkpoint must be stored in the LSM. This
-** routine returns that part.
-*/
-int lsmCheckpointLevels(
- lsm_db *pDb, /* Database handle */
- int nLevel, /* Number of levels to write to blob */
- void **paVal, /* OUT: Pointer to LEVELS blob */
- int *pnVal /* OUT: Size of LEVELS blob in bytes */
-){
- Level *p; /* Used to iterate through levels */
- int nAll= 0;
- int rc;
- int i;
- int iOut;
- CkptBuffer ckpt;
- assert( nLevel>0 );
-
- for(p=lsmDbSnapshotLevel(pDb->pWorker); p; p=p->pNext) nAll++;
-
- assert( nAll>nLevel );
- nAll -= nLevel;
- for(p=lsmDbSnapshotLevel(pDb->pWorker); p && nAll>0; p=p->pNext) nAll--;
-
- memset(&ckpt, 0, sizeof(CkptBuffer));
- ckpt.pEnv = pDb->pEnv;
-
- ckptSetValue(&ckpt, 0, nLevel, &rc);
- iOut = 1;
- for(i=0; rc==LSM_OK && ipNext;
- }
- assert( rc!=LSM_OK || p==0 );
-
- if( rc==LSM_OK ){
- ckptChangeEndianness(ckpt.aCkpt, iOut);
- *paVal = (void *)ckpt.aCkpt;
- *pnVal = iOut * sizeof(u32);
- }else{
- *pnVal = 0;
- *paVal = 0;
- }
-
- return rc;
-}
-
-/*
-** Read the checkpoint id from meta-page pPg.
-*/
-static i64 ckptLoadId(MetaPage *pPg){
- i64 ret = 0;
- if( pPg ){
- int nData;
- u8 *aData = lsmFsMetaPageData(pPg, &nData);
- ret = (((i64)lsmGetU32(&aData[CKPT_HDR_ID_MSW*4])) << 32) +
- ((i64)lsmGetU32(&aData[CKPT_HDR_ID_LSW*4]));
- }
- return ret;
-}
-
-/*
-** Return true if the buffer passed as an argument contains a valid
-** checkpoint.
-*/
-static int ckptChecksumOk(u32 *aCkpt){
- u32 nCkpt = aCkpt[CKPT_HDR_NCKPT];
- u32 cksum1;
- u32 cksum2;
-
- if( nCkpt(LSM_META_RW_PAGE_SIZE)/sizeof(u32) ){
- return 0;
- }
- ckptChecksum(aCkpt, nCkpt, &cksum1, &cksum2);
- return (cksum1==aCkpt[nCkpt-2] && cksum2==aCkpt[nCkpt-1]);
-}
-
-/*
-** Attempt to load a checkpoint from meta page iMeta.
-**
-** This function is a no-op if *pRc is set to any value other than LSM_OK
-** when it is called. If an error occurs, *pRc is set to an LSM error code
-** before returning.
-**
-** If no error occurs and the checkpoint is successfully loaded, copy it to
-** ShmHeader.aSnap1[] and ShmHeader.aSnap2[], and set ShmHeader.iMetaPage
-** to indicate its origin. In this case return 1. Or, if the checkpoint
-** cannot be loaded (because the checksum does not compute), return 0.
-*/
-static int ckptTryLoad(lsm_db *pDb, MetaPage *pPg, u32 iMeta, int *pRc){
- int bLoaded = 0; /* Return value */
- if( *pRc==LSM_OK ){
- int rc = LSM_OK; /* Error code */
- u32 *aCkpt = 0; /* Pointer to buffer containing checkpoint */
- u32 nCkpt; /* Number of elements in aCkpt[] */
- int nData; /* Bytes of data in aData[] */
- u8 *aData; /* Meta page data */
-
- aData = lsmFsMetaPageData(pPg, &nData);
- nCkpt = (u32)lsmGetU32(&aData[CKPT_HDR_NCKPT*sizeof(u32)]);
- if( nCkpt<=nData/sizeof(u32) && nCkpt>CKPT_HDR_NCKPT ){
- aCkpt = (u32 *)lsmMallocRc(pDb->pEnv, nCkpt*sizeof(u32), &rc);
- }
- if( aCkpt ){
- memcpy(aCkpt, aData, nCkpt*sizeof(u32));
- ckptChangeEndianness(aCkpt, nCkpt);
- if( ckptChecksumOk(aCkpt) ){
- ShmHeader *pShm = pDb->pShmhdr;
- memcpy(pShm->aSnap1, aCkpt, nCkpt*sizeof(u32));
- memcpy(pShm->aSnap2, aCkpt, nCkpt*sizeof(u32));
- memcpy(pDb->aSnapshot, aCkpt, nCkpt*sizeof(u32));
- pShm->iMetaPage = iMeta;
- bLoaded = 1;
- }
- }
-
- lsmFree(pDb->pEnv, aCkpt);
- *pRc = rc;
- }
- return bLoaded;
-}
-
-/*
-** Initialize the shared-memory header with an empty snapshot. This function
-** is called when no valid snapshot can be found in the database header.
-*/
-static void ckptLoadEmpty(lsm_db *pDb){
- u32 aCkpt[] = {
- 0, /* CKPT_HDR_ID_MSW */
- 10, /* CKPT_HDR_ID_LSW */
- 0, /* CKPT_HDR_NCKPT */
- LSM_COMPRESSION_EMPTY, /* CKPT_HDR_CMPID */
- 0, /* CKPT_HDR_NBLOCK */
- 0, /* CKPT_HDR_BLKSZ */
- 0, /* CKPT_HDR_NLEVEL */
- 0, /* CKPT_HDR_PGSZ */
- 0, /* CKPT_HDR_NWRITE */
- 0, 0, 1234, 5678, /* The log pointer and initial checksum */
- 0,0,0,0, 0,0,0,0, /* The append list */
- 0, /* The redirected block list */
- 0, /* The free block list */
- 0, 0 /* Space for checksum values */
- };
- u32 nCkpt = array_size(aCkpt);
- ShmHeader *pShm = pDb->pShmhdr;
-
- aCkpt[CKPT_HDR_NCKPT] = nCkpt;
- aCkpt[CKPT_HDR_BLKSZ] = pDb->nDfltBlksz;
- aCkpt[CKPT_HDR_PGSZ] = pDb->nDfltPgsz;
- ckptChecksum(aCkpt, array_size(aCkpt), &aCkpt[nCkpt-2], &aCkpt[nCkpt-1]);
-
- memcpy(pShm->aSnap1, aCkpt, nCkpt*sizeof(u32));
- memcpy(pShm->aSnap2, aCkpt, nCkpt*sizeof(u32));
- memcpy(pDb->aSnapshot, aCkpt, nCkpt*sizeof(u32));
-}
-
-/*
-** This function is called as part of database recovery to initialize the
-** ShmHeader.aSnap1[] and ShmHeader.aSnap2[] snapshots.
-*/
-int lsmCheckpointRecover(lsm_db *pDb){
- int rc = LSM_OK; /* Return Code */
- i64 iId1; /* Id of checkpoint on meta-page 1 */
- i64 iId2; /* Id of checkpoint on meta-page 2 */
- int bLoaded = 0; /* True once checkpoint has been loaded */
- int cmp; /* True if (iId2>iId1) */
- MetaPage *apPg[2] = {0, 0}; /* Meta-pages 1 and 2 */
-
- rc = lsmFsMetaPageGet(pDb->pFS, 0, 1, &apPg[0]);
- if( rc==LSM_OK ) rc = lsmFsMetaPageGet(pDb->pFS, 0, 2, &apPg[1]);
-
- iId1 = ckptLoadId(apPg[0]);
- iId2 = ckptLoadId(apPg[1]);
- cmp = (iId2 > iId1);
- bLoaded = ckptTryLoad(pDb, apPg[cmp?1:0], (cmp?2:1), &rc);
- if( bLoaded==0 ){
- bLoaded = ckptTryLoad(pDb, apPg[cmp?0:1], (cmp?1:2), &rc);
- }
-
- /* The database does not contain a valid checkpoint. Initialize the shared
- ** memory header with an empty checkpoint. */
- if( bLoaded==0 ){
- ckptLoadEmpty(pDb);
- }
-
- lsmFsMetaPageRelease(apPg[0]);
- lsmFsMetaPageRelease(apPg[1]);
-
- return rc;
-}
-
-/*
-** Store the snapshot in pDb->aSnapshot[] in meta-page iMeta.
-*/
-int lsmCheckpointStore(lsm_db *pDb, int iMeta){
- MetaPage *pPg = 0;
- int rc;
-
- assert( iMeta==1 || iMeta==2 );
- rc = lsmFsMetaPageGet(pDb->pFS, 1, iMeta, &pPg);
- if( rc==LSM_OK ){
- u8 *aData;
- int nData;
- int nCkpt;
-
- nCkpt = (int)pDb->aSnapshot[CKPT_HDR_NCKPT];
- aData = lsmFsMetaPageData(pPg, &nData);
- memcpy(aData, pDb->aSnapshot, nCkpt*sizeof(u32));
- ckptChangeEndianness((u32 *)aData, nCkpt);
- rc = lsmFsMetaPageRelease(pPg);
- }
-
- return rc;
-}
-
-/*
-** Copy the current client snapshot from shared-memory to pDb->aSnapshot[].
-*/
-int lsmCheckpointLoad(lsm_db *pDb, int *piRead){
- int nRem = LSM_ATTEMPTS_BEFORE_PROTOCOL;
- ShmHeader *pShm = pDb->pShmhdr;
- while( (nRem--)>0 ){
- int nInt;
-
- nInt = pShm->aSnap1[CKPT_HDR_NCKPT];
- if( nInt<=(LSM_META_RW_PAGE_SIZE / sizeof(u32)) ){
- memcpy(pDb->aSnapshot, pShm->aSnap1, nInt*sizeof(u32));
- if( ckptChecksumOk(pDb->aSnapshot) ){
- if( piRead ) *piRead = 1;
- return LSM_OK;
- }
- }
-
- nInt = pShm->aSnap2[CKPT_HDR_NCKPT];
- if( nInt<=(LSM_META_RW_PAGE_SIZE / sizeof(u32)) ){
- memcpy(pDb->aSnapshot, pShm->aSnap2, nInt*sizeof(u32));
- if( ckptChecksumOk(pDb->aSnapshot) ){
- if( piRead ) *piRead = 2;
- return LSM_OK;
- }
- }
-
- lsmShmBarrier(pDb);
- }
- return LSM_PROTOCOL_BKPT;
-}
-
-int lsmInfoCompressionId(lsm_db *db, u32 *piCmpId){
- int rc;
-
- assert( db->pClient==0 && db->pWorker==0 );
- rc = lsmCheckpointLoad(db, 0);
- if( rc==LSM_OK ){
- *piCmpId = db->aSnapshot[CKPT_HDR_CMPID];
- }
-
- return rc;
-}
-
-int lsmCheckpointLoadOk(lsm_db *pDb, int iSnap){
- u32 *aShm;
- assert( iSnap==1 || iSnap==2 );
- aShm = (iSnap==1) ? pDb->pShmhdr->aSnap1 : pDb->pShmhdr->aSnap2;
- return (lsmCheckpointId(pDb->aSnapshot, 0)==lsmCheckpointId(aShm, 0) );
-}
-
-int lsmCheckpointClientCacheOk(lsm_db *pDb){
- return ( pDb->pClient
- && pDb->pClient->iId==lsmCheckpointId(pDb->aSnapshot, 0)
- && pDb->pClient->iId==lsmCheckpointId(pDb->pShmhdr->aSnap1, 0)
- && pDb->pClient->iId==lsmCheckpointId(pDb->pShmhdr->aSnap2, 0)
- );
-}
-
-int lsmCheckpointLoadWorker(lsm_db *pDb){
- int rc;
- ShmHeader *pShm = pDb->pShmhdr;
- int nInt1;
- int nInt2;
-
- /* Must be holding the WORKER lock to do this. Or DMS2. */
- assert(
- lsmShmAssertLock(pDb, LSM_LOCK_WORKER, LSM_LOCK_EXCL)
- || lsmShmAssertLock(pDb, LSM_LOCK_DMS1, LSM_LOCK_EXCL)
- );
-
- /* Check that the two snapshots match. If not, repair them. */
- nInt1 = pShm->aSnap1[CKPT_HDR_NCKPT];
- nInt2 = pShm->aSnap2[CKPT_HDR_NCKPT];
- if( nInt1!=nInt2 || memcmp(pShm->aSnap1, pShm->aSnap2, nInt2*sizeof(u32)) ){
- if( ckptChecksumOk(pShm->aSnap1) ){
- memcpy(pShm->aSnap2, pShm->aSnap1, sizeof(u32)*nInt1);
- }else if( ckptChecksumOk(pShm->aSnap2) ){
- memcpy(pShm->aSnap1, pShm->aSnap2, sizeof(u32)*nInt2);
- }else{
- return LSM_PROTOCOL_BKPT;
- }
- }
-
- rc = lsmCheckpointDeserialize(pDb, 1, pShm->aSnap1, &pDb->pWorker);
- if( pDb->pWorker ) pDb->pWorker->pDatabase = pDb->pDatabase;
-
- if( rc==LSM_OK ){
- rc = lsmCheckCompressionId(pDb, pDb->pWorker->iCmpId);
- }
-
-#if 0
- assert( rc!=LSM_OK || lsmFsIntegrityCheck(pDb) );
-#endif
- return rc;
-}
-
-int lsmCheckpointDeserialize(
- lsm_db *pDb,
- int bInclFreelist, /* If true, deserialize free-list */
- u32 *aCkpt,
- Snapshot **ppSnap
-){
- int rc = LSM_OK;
- Snapshot *pNew;
-
- pNew = (Snapshot *)lsmMallocZeroRc(pDb->pEnv, sizeof(Snapshot), &rc);
- if( rc==LSM_OK ){
- Level *pLvl;
- int nFree;
- int i;
- int nLevel = (int)aCkpt[CKPT_HDR_NLEVEL];
- int iIn = CKPT_HDR_SIZE + CKPT_APPENDLIST_SIZE + CKPT_LOGPTR_SIZE;
-
- pNew->iId = lsmCheckpointId(aCkpt, 0);
- pNew->nBlock = aCkpt[CKPT_HDR_NBLOCK];
- pNew->nWrite = aCkpt[CKPT_HDR_NWRITE];
- rc = ckptLoadLevels(pDb, aCkpt, &iIn, nLevel, &pNew->pLevel);
- pNew->iLogOff = lsmCheckpointLogOffset(aCkpt);
- pNew->iCmpId = aCkpt[CKPT_HDR_CMPID];
-
- /* Make a copy of the append-list */
- for(i=0; iaiAppend[i] = ckptRead64(a);
- }
-
- /* Read the block-redirect list */
- pNew->redirect.n = aCkpt[iIn++];
- if( pNew->redirect.n ){
- pNew->redirect.a = lsmMallocZeroRc(pDb->pEnv,
- (sizeof(struct RedirectEntry) * LSM_MAX_BLOCK_REDIRECTS), &rc
- );
- if( rc==LSM_OK ){
- for(i=0; iredirect.n; i++){
- pNew->redirect.a[i].iFrom = aCkpt[iIn++];
- pNew->redirect.a[i].iTo = aCkpt[iIn++];
- }
- }
- for(pLvl=pNew->pLevel; pLvl->pNext; pLvl=pLvl->pNext);
- if( pLvl->nRight ){
- pLvl->aRhs[pLvl->nRight-1].pRedirect = &pNew->redirect;
- }else{
- pLvl->lhs.pRedirect = &pNew->redirect;
- }
- }
-
- /* Copy the free-list */
- if( rc==LSM_OK && bInclFreelist ){
- nFree = aCkpt[iIn++];
- if( nFree ){
- pNew->freelist.aEntry = (FreelistEntry *)lsmMallocZeroRc(
- pDb->pEnv, sizeof(FreelistEntry)*nFree, &rc
- );
- if( rc==LSM_OK ){
- int j;
- for(j=0; jfreelist.aEntry[j];
- p->iBlk = aCkpt[iIn++];
- p->iId = ((i64)(aCkpt[iIn])<<32) + aCkpt[iIn+1];
- iIn += 2;
- }
- pNew->freelist.nEntry = pNew->freelist.nAlloc = nFree;
- }
- }
- }
- }
-
- if( rc!=LSM_OK ){
- lsmFreeSnapshot(pDb->pEnv, pNew);
- pNew = 0;
- }
-
- *ppSnap = pNew;
- return rc;
-}
-
-/*
-** Connection pDb must be the worker connection in order to call this
-** function. It returns true if the database already contains the maximum
-** number of levels or false otherwise.
-**
-** This is used when flushing the in-memory tree to disk. If the database
-** is already full, then the caller should invoke lsm_work() or similar
-** until it is not full before creating a new level by flushing the in-memory
-** tree to disk. Limiting the number of levels in the database ensures that
-** the records describing them always fit within the checkpoint blob.
-*/
-int lsmDatabaseFull(lsm_db *pDb){
- Level *p;
- int nRhs = 0;
-
- assert( lsmShmAssertLock(pDb, LSM_LOCK_WORKER, LSM_LOCK_EXCL) );
- assert( pDb->pWorker );
-
- for(p=pDb->pWorker->pLevel; p; p=p->pNext){
- nRhs += (p->nRight ? p->nRight : 1);
- }
-
- return (nRhs >= LSM_MAX_RHS_SEGMENTS);
-}
-
-/*
-** The connection passed as the only argument is currently the worker
-** connection. Some work has been performed on the database by the connection,
-** but no new snapshot has been written into shared memory.
-**
-** This function updates the shared-memory worker and client snapshots with
-** the new snapshot produced by the work performed by pDb.
-**
-** If successful, LSM_OK is returned. Otherwise, if an error occurs, an LSM
-** error code is returned.
-*/
-int lsmCheckpointSaveWorker(lsm_db *pDb, int bFlush){
- Snapshot *pSnap = pDb->pWorker;
- ShmHeader *pShm = pDb->pShmhdr;
- void *p = 0;
- int n = 0;
- int rc;
-
- pSnap->iId++;
- rc = ckptExportSnapshot(pDb, bFlush, pSnap->iId, 1, &p, &n);
- if( rc!=LSM_OK ) return rc;
- assert( ckptChecksumOk((u32 *)p) );
-
- assert( n<=LSM_META_RW_PAGE_SIZE );
- memcpy(pShm->aSnap2, p, n);
- lsmShmBarrier(pDb);
- memcpy(pShm->aSnap1, p, n);
- lsmFree(pDb->pEnv, p);
-
- /* assert( lsmFsIntegrityCheck(pDb) ); */
- return LSM_OK;
-}
-
-/*
-** This function is used to determine the snapshot-id of the most recently
-** checkpointed snapshot. Variable ShmHeader.iMetaPage indicates which of
-** the two meta-pages said snapshot resides on (if any).
-**
-** If successful, this function loads the snapshot from the meta-page,
-** verifies its checksum and sets *piId to the snapshot-id before returning
-** LSM_OK. Or, if the checksum attempt fails, *piId is set to zero and
-** LSM_OK returned. If an error occurs, an LSM error code is returned and
-** the final value of *piId is undefined.
-*/
-int lsmCheckpointSynced(lsm_db *pDb, i64 *piId, i64 *piLog, u32 *pnWrite){
- int rc = LSM_OK;
- MetaPage *pPg;
- u32 iMeta;
-
- iMeta = pDb->pShmhdr->iMetaPage;
- if( iMeta==1 || iMeta==2 ){
- rc = lsmFsMetaPageGet(pDb->pFS, 0, iMeta, &pPg);
- if( rc==LSM_OK ){
- int nCkpt;
- int nData;
- u8 *aData;
-
- aData = lsmFsMetaPageData(pPg, &nData);
- assert( nData==LSM_META_RW_PAGE_SIZE );
- nCkpt = lsmGetU32(&aData[CKPT_HDR_NCKPT*sizeof(u32)]);
- if( nCkpt<(LSM_META_RW_PAGE_SIZE/sizeof(u32)) ){
- u32 *aCopy = lsmMallocRc(pDb->pEnv, sizeof(u32) * nCkpt, &rc);
- if( aCopy ){
- memcpy(aCopy, aData, nCkpt*sizeof(u32));
- ckptChangeEndianness(aCopy, nCkpt);
- if( ckptChecksumOk(aCopy) ){
- if( piId ) *piId = lsmCheckpointId(aCopy, 0);
- if( piLog ) *piLog = (lsmCheckpointLogOffset(aCopy) >> 1);
- if( pnWrite ) *pnWrite = aCopy[CKPT_HDR_NWRITE];
- }
- lsmFree(pDb->pEnv, aCopy);
- }
- }
- lsmFsMetaPageRelease(pPg);
- }
- }
-
- if( (iMeta!=1 && iMeta!=2) || rc!=LSM_OK || pDb->pShmhdr->iMetaPage!=iMeta ){
- if( piId ) *piId = 0;
- if( piLog ) *piLog = 0;
- if( pnWrite ) *pnWrite = 0;
- }
- return rc;
-}
-
-/*
-** Return the checkpoint-id of the checkpoint array passed as the first
-** argument to this function. If the second argument is true, then assume
-** that the checkpoint is made up of 32-bit big-endian integers. If it
-** is false, assume that the integers are in machine byte order.
-*/
-i64 lsmCheckpointId(u32 *aCkpt, int bDisk){
- i64 iId;
- if( bDisk ){
- u8 *aData = (u8 *)aCkpt;
- iId = (((i64)lsmGetU32(&aData[CKPT_HDR_ID_MSW*4])) << 32);
- iId += ((i64)lsmGetU32(&aData[CKPT_HDR_ID_LSW*4]));
- }else{
- iId = ((i64)aCkpt[CKPT_HDR_ID_MSW] << 32) + (i64)aCkpt[CKPT_HDR_ID_LSW];
- }
- return iId;
-}
-
-u32 lsmCheckpointNBlock(u32 *aCkpt){
- return aCkpt[CKPT_HDR_NBLOCK];
-}
-
-u32 lsmCheckpointNWrite(u32 *aCkpt, int bDisk){
- if( bDisk ){
- return lsmGetU32((u8 *)&aCkpt[CKPT_HDR_NWRITE]);
- }else{
- return aCkpt[CKPT_HDR_NWRITE];
- }
-}
-
-i64 lsmCheckpointLogOffset(u32 *aCkpt){
- return ((i64)aCkpt[CKPT_HDR_LO_MSW] << 32) + (i64)aCkpt[CKPT_HDR_LO_LSW];
-}
-
-int lsmCheckpointPgsz(u32 *aCkpt){ return (int)aCkpt[CKPT_HDR_PGSZ]; }
-
-int lsmCheckpointBlksz(u32 *aCkpt){ return (int)aCkpt[CKPT_HDR_BLKSZ]; }
-
-void lsmCheckpointLogoffset(
- u32 *aCkpt,
- DbLog *pLog
-){
- pLog->aRegion[2].iStart = (lsmCheckpointLogOffset(aCkpt) >> 1);
-
- pLog->cksum0 = aCkpt[CKPT_HDR_LO_CKSUM1];
- pLog->cksum1 = aCkpt[CKPT_HDR_LO_CKSUM2];
- pLog->iSnapshotId = lsmCheckpointId(aCkpt, 0);
-}
-
-void lsmCheckpointZeroLogoffset(lsm_db *pDb){
- u32 nCkpt;
-
- nCkpt = pDb->aSnapshot[CKPT_HDR_NCKPT];
- assert( nCkpt>CKPT_HDR_NCKPT );
- assert( nCkpt==pDb->pShmhdr->aSnap1[CKPT_HDR_NCKPT] );
- assert( 0==memcmp(pDb->aSnapshot, pDb->pShmhdr->aSnap1, nCkpt*sizeof(u32)) );
- assert( 0==memcmp(pDb->aSnapshot, pDb->pShmhdr->aSnap2, nCkpt*sizeof(u32)) );
-
- pDb->aSnapshot[CKPT_HDR_LO_MSW] = 0;
- pDb->aSnapshot[CKPT_HDR_LO_LSW] = 0;
- ckptChecksum(pDb->aSnapshot, nCkpt,
- &pDb->aSnapshot[nCkpt-2], &pDb->aSnapshot[nCkpt-1]
- );
-
- memcpy(pDb->pShmhdr->aSnap1, pDb->aSnapshot, nCkpt*sizeof(u32));
- memcpy(pDb->pShmhdr->aSnap2, pDb->aSnapshot, nCkpt*sizeof(u32));
-}
-
-/*
-** Set the output variable to the number of KB of data written into the
-** database file since the most recent checkpoint.
-*/
-int lsmCheckpointSize(lsm_db *db, int *pnKB){
- int rc = LSM_OK;
- u32 nSynced;
-
- /* Set nSynced to the number of pages that had been written when the
- ** database was last checkpointed. */
- rc = lsmCheckpointSynced(db, 0, 0, &nSynced);
-
- if( rc==LSM_OK ){
- u32 nPgsz = db->pShmhdr->aSnap1[CKPT_HDR_PGSZ];
- u32 nWrite = db->pShmhdr->aSnap1[CKPT_HDR_NWRITE];
- *pnKB = (int)(( ((i64)(nWrite - nSynced) * nPgsz) + 1023) / 1024);
- }
-
- return rc;
-}
diff --git a/ext/lsm1/lsm_file.c b/ext/lsm1/lsm_file.c
deleted file mode 100644
index 9f4144618a..0000000000
--- a/ext/lsm1/lsm_file.c
+++ /dev/null
@@ -1,3311 +0,0 @@
-/*
-** 2011-08-26
-**
-** The author disclaims copyright to this source code. In place of
-** a legal notice, here is a blessing:
-**
-** May you do good and not evil.
-** May you find forgiveness for yourself and forgive others.
-** May you share freely, never taking more than you give.
-**
-*************************************************************************
-**
-** NORMAL DATABASE FILE FORMAT
-**
-** The following database file format concepts are used by the code in
-** this file to read and write the database file.
-**
-** Pages:
-**
-** A database file is divided into pages. The first 8KB of the file consists
-** of two 4KB meta-pages. The meta-page size is not configurable. The
-** remainder of the file is made up of database pages. The default database
-** page size is 4KB. Database pages are aligned to page-size boundaries,
-** so if the database page size is larger than 8KB there is a gap between
-** the end of the meta pages and the start of the database pages.
-**
-** Database pages are numbered based on their position in the file. Page N
-** begins at byte offset ((N-1)*pgsz). This means that page 1 does not
-** exist - since it would always overlap with the meta pages. If the
-** page-size is (say) 512 bytes, then the first usable page in the database
-** is page 33.
-**
-** It is assumed that the first two meta pages and the data that follows
-** them are located on different disk sectors. So that if a power failure
-** while writing to a meta page there is no risk of damage to the other
-** meta page or any other part of the database file. TODO: This may need
-** to be revisited.
-**
-** Blocks:
-**
-** The database file is also divided into blocks. The default block size is
-** 1MB. When writing to the database file, an attempt is made to write data
-** in contiguous block-sized chunks.
-**
-** The first and last page on each block are special in that they are 4
-** bytes smaller than all other pages. This is because the last four bytes
-** of space on the first and last pages of each block are reserved for
-** pointers to other blocks (i.e. a 32-bit block number).
-**
-** Runs:
-**
-** A run is a sequence of pages that the upper layer uses to store a
-** sorted array of database keys (and accompanying data - values, FC
-** pointers and so on). Given a page within a run, it is possible to
-** navigate to the next page in the run as follows:
-**
-** a) if the current page is not the last in a block, the next page
-** in the run is located immediately after the current page, OR
-**
-** b) if the current page is the last page in a block, the next page
-** in the run is the first page on the block identified by the
-** block pointer stored in the last 4 bytes of the current block.
-**
-** It is possible to navigate to the previous page in a similar fashion,
-** using the block pointer embedded in the last 4 bytes of the first page
-** of each block as required.
-**
-** The upper layer is responsible for identifying by page number the
-** first and last page of any run that it needs to navigate - there are
-** no "end-of-run" markers stored or identified by this layer. This is
-** necessary as clients reading different database snapshots may access
-** different subsets of a run.
-**
-** THE LOG FILE
-**
-** This file opens and closes the log file. But it does not contain any
-** logic related to the log file format. Instead, it exports the following
-** functions that are used by the code in lsm_log.c to read and write the
-** log file:
-**
-** lsmFsOpenLog
-** lsmFsWriteLog
-** lsmFsSyncLog
-** lsmFsReadLog
-** lsmFsTruncateLog
-** lsmFsCloseAndDeleteLog
-**
-** COMPRESSED DATABASE FILE FORMAT
-**
-** The compressed database file format is very similar to the normal format.
-** The file still begins with two 4KB meta-pages (which are never compressed).
-** It is still divided into blocks.
-**
-** The first and last four bytes of each block are reserved for 32-bit
-** pointer values. Similar to the way four bytes are carved from the end of
-** the first and last page of each block in uncompressed databases. From
-** the point of view of the upper layer, all pages are the same size - this
-** is different from the uncompressed format where the first and last pages
-** on each block are 4 bytes smaller than the others.
-**
-** Pages are stored in variable length compressed form, as follows:
-**
-** * 3-byte size field containing the size of the compressed page image
-** in bytes. The most significant bit of each byte of the size field
-** is always set. The remaining 7 bits are used to store a 21-bit
-** integer value (in big-endian order - the first byte in the field
-** contains the most significant 7 bits). Since the maximum allowed
-** size of a compressed page image is (2^17 - 1) bytes, there are
-** actually 4 unused bits in the size field.
-**
-** In other words, if the size of the compressed page image is nSz,
-** the header can be serialized as follows:
-**
-** u8 aHdr[3]
-** aHdr[0] = 0x80 | (u8)(nSz >> 14);
-** aHdr[1] = 0x80 | (u8)(nSz >> 7);
-** aHdr[2] = 0x80 | (u8)(nSz >> 0);
-**
-** * Compressed page image.
-**
-** * A second copy of the 3-byte record header.
-**
-** A page number is a byte offset into the database file. So the smallest
-** possible page number is 8192 (immediately after the two meta-pages).
-** The first and root page of a segment are identified by a page number
-** corresponding to the byte offset of the first byte in the corresponding
-** page record. The last page of a segment is identified by the byte offset
-** of the last byte in its record.
-**
-** Unlike uncompressed pages, compressed page records may span blocks.
-**
-** Sometimes, in order to avoid touching sectors that contain synced data
-** when writing, it is necessary to insert unused space between compressed
-** page records. This can be done as follows:
-**
-** * For less than 6 bytes of empty space, the first and last byte
-** of the free space contain the total number of free bytes. For
-** example:
-**
-** Block of 4 free bytes: 0x04 0x?? 0x?? 0x04
-** Block of 2 free bytes: 0x02 0x02
-** A single free byte: 0x01
-**
-** * For 6 or more bytes of empty space, a record similar to a
-** compressed page record is added to the segment. A padding record
-** is distinguished from a compressed page record by the most
-** significant bit of the second byte of the size field, which is
-** cleared instead of set.
-*/
-#include "lsmInt.h"
-
-#include
-#include
-#include
-
-/*
-** File-system object. Each database connection allocates a single instance
-** of the following structure. It is used for all access to the database and
-** log files.
-**
-** The database file may be accessed via two methods - using mmap() or using
-** read() and write() calls. In the general case both methods are used - a
-** prefix of the file is mapped into memory and the remainder accessed using
-** read() and write(). This is helpful when accessing very large files (or
-** files that may grow very large during the lifetime of a database
-** connection) on systems with 32-bit address spaces. However, it also requires
-** that this object manage two distinct types of Page objects simultaneously -
-** those that carry pointers to the mapped file and those that carry arrays
-** populated by read() calls.
-**
-** pFree:
-** The head of a singly-linked list that containing currently unused Page
-** structures suitable for use as mmap-page handles. Connected by the
-** Page.pFreeNext pointers.
-**
-** pMapped:
-** The head of a singly-linked list that contains all pages that currently
-** carry pointers to the mapped region. This is used if the region is
-** every remapped - the pointers carried by existing pages can be adjusted
-** to account for the remapping. Connected by the Page.pMappedNext pointers.
-**
-** pWaiting:
-** When the upper layer wishes to append a new b-tree page to a segment,
-** it allocates a Page object that carries a malloc'd block of memory -
-** regardless of the mmap-related configuration. The page is not assigned
-** a page number at first. When the upper layer has finished constructing
-** the page contents, it calls lsmFsPagePersist() to assign a page number
-** to it. At this point it is likely that N pages have been written to the
-** segment, the (N+1)th page is still outstanding and the b-tree page is
-** assigned page number (N+2). To avoid writing page (N+2) before page
-** (N+1), the recently completed b-tree page is held in the singly linked
-** list headed by pWaiting until page (N+1) has been written.
-**
-** Function lsmFsFlushWaiting() is responsible for eventually writing
-** waiting pages to disk.
-**
-** apHash/nHash:
-** Hash table used to store all Page objects that carry malloc'd arrays,
-** except those b-tree pages that have not yet been assigned page numbers.
-** Once they have been assigned page numbers - they are added to this
-** hash table.
-**
-** Hash table overflow chains are connected using the Page.pHashNext
-** pointers.
-**
-** pLruFirst, pLruLast:
-** The first and last entries in a doubly-linked list of pages. This
-** list contains all pages with malloc'd data that are present in the
-** hash table and have a ref-count of zero.
-*/
-struct FileSystem {
- lsm_db *pDb; /* Database handle that owns this object */
- lsm_env *pEnv; /* Environment pointer */
- char *zDb; /* Database file name */
- char *zLog; /* Database file name */
- int nMetasize; /* Size of meta pages in bytes */
- int nMetaRwSize; /* Read/written size of meta pages in bytes */
- i64 nPagesize; /* Database page-size in bytes */
- i64 nBlocksize; /* Database block-size in bytes */
-
- /* r/w file descriptors for both files. */
- LsmFile *pLsmFile; /* Used after lsm_close() to link into list */
- lsm_file *fdDb; /* Database file */
- lsm_file *fdLog; /* Log file */
- int szSector; /* Database file sector size */
-
- /* If this is a compressed database, a pointer to the compression methods.
- ** For an uncompressed database, a NULL pointer. */
- lsm_compress *pCompress;
- u8 *aIBuffer; /* Buffer to compress to */
- u8 *aOBuffer; /* Buffer to uncompress from */
- int nBuffer; /* Allocated size of above buffers in bytes */
-
- /* mmap() page related things */
- i64 nMapLimit; /* Maximum bytes of file to map */
- void *pMap; /* Current mapping of database file */
- i64 nMap; /* Bytes mapped at pMap */
- Page *pFree; /* Unused Page structures */
- Page *pMapped; /* List of Page structs that point to pMap */
-
- /* Page cache parameters for non-mmap() pages */
- int nCacheMax; /* Configured cache size (in pages) */
- int nCacheAlloc; /* Current cache size (in pages) */
- Page *pLruFirst; /* Head of the LRU list */
- Page *pLruLast; /* Tail of the LRU list */
- int nHash; /* Number of hash slots in hash table */
- Page **apHash; /* nHash Hash slots */
- Page *pWaiting; /* b-tree pages waiting to be written */
-
- /* Statistics */
- int nOut; /* Number of outstanding pages */
- int nWrite; /* Total number of pages written */
- int nRead; /* Total number of pages read */
-};
-
-/*
-** Database page handle.
-**
-** pSeg:
-** When lsmFsSortedAppend() is called on a compressed database, the new
-** page is not assigned a page number or location in the database file
-** immediately. Instead, these are assigned by the lsmFsPagePersist() call
-** right before it writes the compressed page image to disk.
-**
-** The lsmFsSortedAppend() function sets the pSeg pointer to point to the
-** segment that the new page will be a part of. It is unset by
-** lsmFsPagePersist() after the page is written to disk.
-*/
-struct Page {
- u8 *aData; /* Buffer containing page data */
- int nData; /* Bytes of usable data at aData[] */
- LsmPgno iPg; /* Page number */
- int nRef; /* Number of outstanding references */
- int flags; /* Combination of PAGE_XXX flags */
- Page *pHashNext; /* Next page in hash table slot */
- Page *pLruNext; /* Next page in LRU list */
- Page *pLruPrev; /* Previous page in LRU list */
- FileSystem *pFS; /* File system that owns this page */
-
- /* Only used in compressed database mode: */
- int nCompress; /* Compressed size (or 0 for uncomp. db) */
- int nCompressPrev; /* Compressed size of prev page */
- Segment *pSeg; /* Segment this page will be written to */
-
- /* Pointers for singly linked lists */
- Page *pWaitingNext; /* Next page in FileSystem.pWaiting list */
- Page *pFreeNext; /* Next page in FileSystem.pFree list */
- Page *pMappedNext; /* Next page in FileSystem.pMapped list */
-};
-
-/*
-** Meta-data page handle. There are two meta-data pages at the start of
-** the database file, each FileSystem.nMetasize bytes in size.
-*/
-struct MetaPage {
- int iPg; /* Either 1 or 2 */
- int bWrite; /* Write back to db file on release */
- u8 *aData; /* Pointer to buffer */
- FileSystem *pFS; /* FileSystem that owns this page */
-};
-
-/*
-** Values for LsmPage.flags
-*/
-#define PAGE_DIRTY 0x00000001 /* Set if page is dirty */
-#define PAGE_FREE 0x00000002 /* Set if Page.aData requires lsmFree() */
-#define PAGE_HASPREV 0x00000004 /* Set if page is first on uncomp. block */
-
-/*
-** Number of pgsz byte pages omitted from the start of block 1. The start
-** of block 1 contains two 4096 byte meta pages (8192 bytes in total).
-*/
-#define BLOCK1_HDR_SIZE(pgsz) LSM_MAX(1, 8192/(pgsz))
-
-/*
-** If NDEBUG is not defined, set a breakpoint in function lsmIoerrBkpt()
-** to catch IO errors (any error returned by a VFS method).
-*/
-#ifndef NDEBUG
-static void lsmIoerrBkpt(void){
- static int nErr = 0;
- nErr++;
-}
-static int IOERR_WRAPPER(int rc){
- if( rc!=LSM_OK ) lsmIoerrBkpt();
- return rc;
-}
-#else
-# define IOERR_WRAPPER(rc) (rc)
-#endif
-
-#ifdef NDEBUG
-# define assert_lists_are_ok(x)
-#else
-static Page *fsPageFindInHash(FileSystem *pFS, LsmPgno iPg, int *piHash);
-
-static void assert_lists_are_ok(FileSystem *pFS){
-#if 0
- Page *p;
-
- assert( pFS->nMapLimit>=0 );
-
- /* Check that all pages in the LRU list have nRef==0, pointers to buffers
- ** in heap memory, and corresponding entries in the hash table. */
- for(p=pFS->pLruFirst; p; p=p->pLruNext){
- assert( p==pFS->pLruFirst || p->pLruPrev!=0 );
- assert( p==pFS->pLruLast || p->pLruNext!=0 );
- assert( p->pLruPrev==0 || p->pLruPrev->pLruNext==p );
- assert( p->pLruNext==0 || p->pLruNext->pLruPrev==p );
- assert( p->nRef==0 );
- assert( p->flags & PAGE_FREE );
- assert( p==fsPageFindInHash(pFS, p->iPg, 0) );
- }
-#endif
-}
-#endif
-
-/*
-** Wrappers around the VFS methods of the lsm_env object:
-**
-** lsmEnvOpen()
-** lsmEnvRead()
-** lsmEnvWrite()
-** lsmEnvSync()
-** lsmEnvSectorSize()
-** lsmEnvClose()
-** lsmEnvTruncate()
-** lsmEnvUnlink()
-** lsmEnvRemap()
-*/
-int lsmEnvOpen(lsm_env *pEnv, const char *zFile, int flags, lsm_file **ppNew){
- return pEnv->xOpen(pEnv, zFile, flags, ppNew);
-}
-
-static int lsmEnvRead(
- lsm_env *pEnv,
- lsm_file *pFile,
- lsm_i64 iOff,
- void *pRead,
- int nRead
-){
- return IOERR_WRAPPER( pEnv->xRead(pFile, iOff, pRead, nRead) );
-}
-
-static int lsmEnvWrite(
- lsm_env *pEnv,
- lsm_file *pFile,
- lsm_i64 iOff,
- const void *pWrite,
- int nWrite
-){
- return IOERR_WRAPPER( pEnv->xWrite(pFile, iOff, (void *)pWrite, nWrite) );
-}
-
-static int lsmEnvSync(lsm_env *pEnv, lsm_file *pFile){
- return IOERR_WRAPPER( pEnv->xSync(pFile) );
-}
-
-static int lsmEnvSectorSize(lsm_env *pEnv, lsm_file *pFile){
- return pEnv->xSectorSize(pFile);
-}
-
-int lsmEnvClose(lsm_env *pEnv, lsm_file *pFile){
- return IOERR_WRAPPER( pEnv->xClose(pFile) );
-}
-
-static int lsmEnvTruncate(lsm_env *pEnv, lsm_file *pFile, lsm_i64 nByte){
- return IOERR_WRAPPER( pEnv->xTruncate(pFile, nByte) );
-}
-
-static int lsmEnvUnlink(lsm_env *pEnv, const char *zDel){
- return IOERR_WRAPPER( pEnv->xUnlink(pEnv, zDel) );
-}
-
-static int lsmEnvRemap(
- lsm_env *pEnv,
- lsm_file *pFile,
- i64 szMin,
- void **ppMap,
- i64 *pszMap
-){
- return pEnv->xRemap(pFile, szMin, ppMap, pszMap);
-}
-
-int lsmEnvLock(lsm_env *pEnv, lsm_file *pFile, int iLock, int eLock){
- if( pFile==0 ) return LSM_OK;
- return pEnv->xLock(pFile, iLock, eLock);
-}
-
-int lsmEnvTestLock(
- lsm_env *pEnv,
- lsm_file *pFile,
- int iLock,
- int nLock,
- int eLock
-){
- return pEnv->xTestLock(pFile, iLock, nLock, eLock);
-}
-
-int lsmEnvShmMap(
- lsm_env *pEnv,
- lsm_file *pFile,
- int iChunk,
- int sz,
- void **ppOut
-){
- return pEnv->xShmMap(pFile, iChunk, sz, ppOut);
-}
-
-void lsmEnvShmBarrier(lsm_env *pEnv){
- pEnv->xShmBarrier();
-}
-
-void lsmEnvShmUnmap(lsm_env *pEnv, lsm_file *pFile, int bDel){
- pEnv->xShmUnmap(pFile, bDel);
-}
-
-void lsmEnvSleep(lsm_env *pEnv, int nUs){
- pEnv->xSleep(pEnv, nUs);
-}
-
-
-/*
-** Write the contents of string buffer pStr into the log file, starting at
-** offset iOff.
-*/
-int lsmFsWriteLog(FileSystem *pFS, i64 iOff, LsmString *pStr){
- assert( pFS->fdLog );
- return lsmEnvWrite(pFS->pEnv, pFS->fdLog, iOff, pStr->z, pStr->n);
-}
-
-/*
-** fsync() the log file.
-*/
-int lsmFsSyncLog(FileSystem *pFS){
- assert( pFS->fdLog );
- return lsmEnvSync(pFS->pEnv, pFS->fdLog);
-}
-
-/*
-** Read nRead bytes of data starting at offset iOff of the log file. Append
-** the results to string buffer pStr.
-*/
-int lsmFsReadLog(FileSystem *pFS, i64 iOff, int nRead, LsmString *pStr){
- int rc; /* Return code */
- assert( pFS->fdLog );
- rc = lsmStringExtend(pStr, nRead);
- if( rc==LSM_OK ){
- rc = lsmEnvRead(pFS->pEnv, pFS->fdLog, iOff, &pStr->z[pStr->n], nRead);
- pStr->n += nRead;
- }
- return rc;
-}
-
-/*
-** Truncate the log file to nByte bytes in size.
-*/
-int lsmFsTruncateLog(FileSystem *pFS, i64 nByte){
- if( pFS->fdLog==0 ) return LSM_OK;
- return lsmEnvTruncate(pFS->pEnv, pFS->fdLog, nByte);
-}
-
-/*
-** Truncate the db file to nByte bytes in size.
-*/
-int lsmFsTruncateDb(FileSystem *pFS, i64 nByte){
- if( pFS->fdDb==0 ) return LSM_OK;
- return lsmEnvTruncate(pFS->pEnv, pFS->fdDb, nByte);
-}
-
-/*
-** Close the log file. Then delete it from the file-system. This function
-** is called during database shutdown only.
-*/
-int lsmFsCloseAndDeleteLog(FileSystem *pFS){
- char *zDel;
-
- if( pFS->fdLog ){
- lsmEnvClose(pFS->pEnv, pFS->fdLog );
- pFS->fdLog = 0;
- }
-
- zDel = lsmMallocPrintf(pFS->pEnv, "%s-log", pFS->zDb);
- if( zDel ){
- lsmEnvUnlink(pFS->pEnv, zDel);
- lsmFree(pFS->pEnv, zDel);
- }
- return LSM_OK;
-}
-
-/*
-** Return true if page iReal of the database should be accessed using mmap.
-** False otherwise.
-*/
-static int fsMmapPage(FileSystem *pFS, LsmPgno iReal){
- return ((i64)iReal*pFS->nPagesize <= pFS->nMapLimit);
-}
-
-/*
-** Given that there are currently nHash slots in the hash table, return
-** the hash key for file iFile, page iPg.
-*/
-static int fsHashKey(int nHash, LsmPgno iPg){
- return (iPg % nHash);
-}
-
-/*
-** This is a helper function for lsmFsOpen(). It opens a single file on
-** disk (either the database or log file).
-*/
-static lsm_file *fsOpenFile(
- FileSystem *pFS, /* File system object */
- int bReadonly, /* True to open this file read-only */
- int bLog, /* True for log, false for db */
- int *pRc /* IN/OUT: Error code */
-){
- lsm_file *pFile = 0;
- if( *pRc==LSM_OK ){
- int flags = (bReadonly ? LSM_OPEN_READONLY : 0);
- const char *zPath = (bLog ? pFS->zLog : pFS->zDb);
-
- *pRc = lsmEnvOpen(pFS->pEnv, zPath, flags, &pFile);
- }
- return pFile;
-}
-
-/*
-** If it is not already open, this function opens the log file. It returns
-** LSM_OK if successful (or if the log file was already open) or an LSM
-** error code otherwise.
-**
-** The log file must be opened before any of the following may be called:
-**
-** lsmFsWriteLog
-** lsmFsSyncLog
-** lsmFsReadLog
-*/
-int lsmFsOpenLog(lsm_db *db, int *pbOpen){
- int rc = LSM_OK;
- FileSystem *pFS = db->pFS;
-
- if( 0==pFS->fdLog ){
- pFS->fdLog = fsOpenFile(pFS, db->bReadonly, 1, &rc);
-
- if( rc==LSM_IOERR_NOENT && db->bReadonly ){
- rc = LSM_OK;
- }
- }
-
- if( pbOpen ) *pbOpen = (pFS->fdLog!=0);
- return rc;
-}
-
-/*
-** Close the log file, if it is open.
-*/
-void lsmFsCloseLog(lsm_db *db){
- FileSystem *pFS = db->pFS;
- if( pFS->fdLog ){
- lsmEnvClose(pFS->pEnv, pFS->fdLog);
- pFS->fdLog = 0;
- }
-}
-
-/*
-** Open a connection to a database stored within the file-system.
-**
-** If parameter bReadonly is true, then open a read-only file-descriptor
-** on the database file. It is possible that bReadonly will be false even
-** if the user requested that pDb be opened read-only. This is because the
-** file-descriptor may later on be recycled by a read-write connection.
-** If the db file can be opened for read-write access, it always is. Parameter
-** bReadonly is only ever true if it has already been determined that the
-** db can only be opened for read-only access.
-**
-** Return LSM_OK if successful or an lsm error code otherwise.
-*/
-int lsmFsOpen(
- lsm_db *pDb, /* Database connection to open fd for */
- const char *zDb, /* Full path to database file */
- int bReadonly /* True to open db file read-only */
-){
- FileSystem *pFS;
- int rc = LSM_OK;
- int nDb = strlen(zDb);
- int nByte;
-
- assert( pDb->pFS==0 );
- assert( pDb->pWorker==0 && pDb->pClient==0 );
-
- nByte = sizeof(FileSystem) + nDb+1 + nDb+4+1;
- pFS = (FileSystem *)lsmMallocZeroRc(pDb->pEnv, nByte, &rc);
- if( pFS ){
- LsmFile *pLsmFile;
- pFS->zDb = (char *)&pFS[1];
- pFS->zLog = &pFS->zDb[nDb+1];
- pFS->nPagesize = LSM_DFLT_PAGE_SIZE;
- pFS->nBlocksize = LSM_DFLT_BLOCK_SIZE;
- pFS->nMetasize = LSM_META_PAGE_SIZE;
- pFS->nMetaRwSize = LSM_META_RW_PAGE_SIZE;
- pFS->pDb = pDb;
- pFS->pEnv = pDb->pEnv;
-
- /* Make a copy of the database and log file names. */
- memcpy(pFS->zDb, zDb, nDb+1);
- memcpy(pFS->zLog, zDb, nDb);
- memcpy(&pFS->zLog[nDb], "-log", 5);
-
- /* Allocate the hash-table here. At some point, it should be changed
- ** so that it can grow dynamicly. */
- pFS->nCacheMax = 2048*1024 / pFS->nPagesize;
- pFS->nHash = 4096;
- pFS->apHash = lsmMallocZeroRc(pDb->pEnv, sizeof(Page *) * pFS->nHash, &rc);
-
- /* Open the database file */
- pLsmFile = lsmDbRecycleFd(pDb);
- if( pLsmFile ){
- pFS->pLsmFile = pLsmFile;
- pFS->fdDb = pLsmFile->pFile;
- memset(pLsmFile, 0, sizeof(LsmFile));
- }else{
- pFS->pLsmFile = lsmMallocZeroRc(pDb->pEnv, sizeof(LsmFile), &rc);
- if( rc==LSM_OK ){
- pFS->fdDb = fsOpenFile(pFS, bReadonly, 0, &rc);
- }
- }
-
- if( rc!=LSM_OK ){
- lsmFsClose(pFS);
- pFS = 0;
- }else{
- pFS->szSector = lsmEnvSectorSize(pFS->pEnv, pFS->fdDb);
- }
- }
-
- pDb->pFS = pFS;
- return rc;
-}
-
-/*
-** Configure the file-system object according to the current values of
-** the LSM_CONFIG_MMAP and LSM_CONFIG_SET_COMPRESSION options.
-*/
-int lsmFsConfigure(lsm_db *db){
- FileSystem *pFS = db->pFS;
- if( pFS ){
- lsm_env *pEnv = pFS->pEnv;
- Page *pPg;
-
- assert( pFS->nOut==0 );
- assert( pFS->pWaiting==0 );
- assert( pFS->pMapped==0 );
-
- /* Reset any compression/decompression buffers already allocated */
- lsmFree(pEnv, pFS->aIBuffer);
- lsmFree(pEnv, pFS->aOBuffer);
- pFS->nBuffer = 0;
-
- /* Unmap the file, if it is currently mapped */
- if( pFS->pMap ){
- lsmEnvRemap(pEnv, pFS->fdDb, -1, &pFS->pMap, &pFS->nMap);
- pFS->nMapLimit = 0;
- }
-
- /* Free all allocated page structures */
- pPg = pFS->pLruFirst;
- while( pPg ){
- Page *pNext = pPg->pLruNext;
- assert( pPg->flags & PAGE_FREE );
- lsmFree(pEnv, pPg->aData);
- lsmFree(pEnv, pPg);
- pPg = pNext;
- }
-
- pPg = pFS->pFree;
- while( pPg ){
- Page *pNext = pPg->pFreeNext;
- lsmFree(pEnv, pPg);
- pPg = pNext;
- }
-
- /* Zero pointers that point to deleted page objects */
- pFS->nCacheAlloc = 0;
- pFS->pLruFirst = 0;
- pFS->pLruLast = 0;
- pFS->pFree = 0;
- if( pFS->apHash ){
- memset(pFS->apHash, 0, pFS->nHash*sizeof(pFS->apHash[0]));
- }
-
- /* Configure the FileSystem object */
- if( db->compress.xCompress ){
- pFS->pCompress = &db->compress;
- pFS->nMapLimit = 0;
- }else{
- pFS->pCompress = 0;
- if( db->iMmap==1 ){
- /* Unlimited */
- pFS->nMapLimit = (i64)1 << 60;
- }else{
- /* iMmap is a limit in KB. Set nMapLimit to the same value in bytes. */
- pFS->nMapLimit = (i64)db->iMmap * 1024;
- }
- }
- }
-
- return LSM_OK;
-}
-
-/*
-** Close and destroy a FileSystem object.
-*/
-void lsmFsClose(FileSystem *pFS){
- if( pFS ){
- Page *pPg;
- lsm_env *pEnv = pFS->pEnv;
-
- assert( pFS->nOut==0 );
- pPg = pFS->pLruFirst;
- while( pPg ){
- Page *pNext = pPg->pLruNext;
- if( pPg->flags & PAGE_FREE ) lsmFree(pEnv, pPg->aData);
- lsmFree(pEnv, pPg);
- pPg = pNext;
- }
-
- pPg = pFS->pFree;
- while( pPg ){
- Page *pNext = pPg->pFreeNext;
- if( pPg->flags & PAGE_FREE ) lsmFree(pEnv, pPg->aData);
- lsmFree(pEnv, pPg);
- pPg = pNext;
- }
-
- if( pFS->fdDb ) lsmEnvClose(pFS->pEnv, pFS->fdDb );
- if( pFS->fdLog ) lsmEnvClose(pFS->pEnv, pFS->fdLog );
- lsmFree(pEnv, pFS->pLsmFile);
- lsmFree(pEnv, pFS->apHash);
- lsmFree(pEnv, pFS->aIBuffer);
- lsmFree(pEnv, pFS->aOBuffer);
- lsmFree(pEnv, pFS);
- }
-}
-
-/*
-** This function is called when closing a database handle (i.e. lsm_close())
-** if there exist other connections to the same database within this process.
-** In that case the file-descriptor open on the database file is not closed
-** when the FileSystem object is destroyed, as this would cause any POSIX
-** locks held by the other connections to be silently dropped (see "man close"
-** for details). Instead, the file-descriptor is stored in a list by the
-** lsm_shared.c module until it is either closed or reused.
-**
-** This function returns a pointer to an object that can be linked into
-** the list described above. The returned object now 'owns' the database
-** file descriptor, so that when the FileSystem object is destroyed, it
-** will not be closed.
-**
-** This function may be called at most once in the life-time of a
-** FileSystem object. The results of any operations involving the database
-** file descriptor are undefined once this function has been called.
-**
-** None of this is necessary on non-POSIX systems. But we do it anyway in
-** the name of using as similar code as possible on all platforms.
-*/
-LsmFile *lsmFsDeferClose(FileSystem *pFS){
- LsmFile *p = pFS->pLsmFile;
- assert( p->pNext==0 );
- p->pFile = pFS->fdDb;
- pFS->fdDb = 0;
- pFS->pLsmFile = 0;
- return p;
-}
-
-/*
-** Allocate a buffer and populate it with the output of the xFileid()
-** method of the database file handle. If successful, set *ppId to point
-** to the buffer and *pnId to the number of bytes in the buffer and return
-** LSM_OK. Otherwise, set *ppId and *pnId to zero and return an LSM
-** error code.
-*/
-int lsmFsFileid(lsm_db *pDb, void **ppId, int *pnId){
- lsm_env *pEnv = pDb->pEnv;
- FileSystem *pFS = pDb->pFS;
- int rc;
- int nId = 0;
- void *pId;
-
- rc = pEnv->xFileid(pFS->fdDb, 0, &nId);
- pId = lsmMallocZeroRc(pEnv, nId, &rc);
- if( rc==LSM_OK ) rc = pEnv->xFileid(pFS->fdDb, pId, &nId);
-
- if( rc!=LSM_OK ){
- lsmFree(pEnv, pId);
- pId = 0;
- nId = 0;
- }
-
- *ppId = pId;
- *pnId = nId;
- return rc;
-}
-
-/*
-** Return the nominal page-size used by this file-system. Actual pages
-** may be smaller or larger than this value.
-*/
-int lsmFsPageSize(FileSystem *pFS){
- return pFS->nPagesize;
-}
-
-/*
-** Return the block-size used by this file-system.
-*/
-int lsmFsBlockSize(FileSystem *pFS){
- return pFS->nBlocksize;
-}
-
-/*
-** Configure the nominal page-size used by this file-system. Actual
-** pages may be smaller or larger than this value.
-*/
-void lsmFsSetPageSize(FileSystem *pFS, int nPgsz){
- pFS->nPagesize = nPgsz;
- pFS->nCacheMax = 2048*1024 / pFS->nPagesize;
-}
-
-/*
-** Configure the block-size used by this file-system.
-*/
-void lsmFsSetBlockSize(FileSystem *pFS, int nBlocksize){
- pFS->nBlocksize = nBlocksize;
-}
-
-/*
-** Return the page number of the first page on block iBlock. Blocks are
-** numbered starting from 1.
-**
-** For a compressed database, page numbers are byte offsets. The first
-** page on each block is the byte offset immediately following the 4-byte
-** "previous block" pointer at the start of each block.
-*/
-static LsmPgno fsFirstPageOnBlock(FileSystem *pFS, int iBlock){
- LsmPgno iPg;
- if( pFS->pCompress ){
- if( iBlock==1 ){
- iPg = pFS->nMetasize * 2 + 4;
- }else{
- iPg = pFS->nBlocksize * (LsmPgno)(iBlock-1) + 4;
- }
- }else{
- const i64 nPagePerBlock = (pFS->nBlocksize / pFS->nPagesize);
- if( iBlock==1 ){
- iPg = 1 + ((pFS->nMetasize*2 + pFS->nPagesize - 1) / pFS->nPagesize);
- }else{
- iPg = 1 + (iBlock-1) * nPagePerBlock;
- }
- }
- return iPg;
-}
-
-/*
-** Return the page number of the last page on block iBlock. Blocks are
-** numbered starting from 1.
-**
-** For a compressed database, page numbers are byte offsets. The first
-** page on each block is the byte offset of the byte immediately before
-** the 4-byte "next block" pointer at the end of each block.
-*/
-static LsmPgno fsLastPageOnBlock(FileSystem *pFS, int iBlock){
- if( pFS->pCompress ){
- return pFS->nBlocksize * (LsmPgno)iBlock - 1 - 4;
- }else{
- const int nPagePerBlock = (pFS->nBlocksize / pFS->nPagesize);
- return iBlock * nPagePerBlock;
- }
-}
-
-/*
-** Return the block number of the block that page iPg is located on.
-** Blocks are numbered starting from 1.
-*/
-static int fsPageToBlock(FileSystem *pFS, LsmPgno iPg){
- if( pFS->pCompress ){
- return (int)((iPg / pFS->nBlocksize) + 1);
- }else{
- return (int)(1 + ((iPg-1) / (pFS->nBlocksize / pFS->nPagesize)));
- }
-}
-
-/*
-** Return true if page iPg is the last page on its block.
-**
-** This function is only called in non-compressed database mode.
-*/
-static int fsIsLast(FileSystem *pFS, LsmPgno iPg){
- const int nPagePerBlock = (pFS->nBlocksize / pFS->nPagesize);
- assert( !pFS->pCompress );
- return ( iPg && (iPg % nPagePerBlock)==0 );
-}
-
-/*
-** Return true if page iPg is the first page on its block.
-**
-** This function is only called in non-compressed database mode.
-*/
-static int fsIsFirst(FileSystem *pFS, LsmPgno iPg){
- const int nPagePerBlock = (pFS->nBlocksize / pFS->nPagesize);
- assert( !pFS->pCompress );
- return ( (iPg % nPagePerBlock)==1
- || (iPgnData;
- }
- return pPage->aData;
-}
-
-/*
-** Return the page number of a page.
-*/
-LsmPgno lsmFsPageNumber(Page *pPage){
- /* assert( (pPage->flags & PAGE_DIRTY)==0 ); */
- return pPage ? pPage->iPg : 0;
-}
-
-/*
-** Page pPg is currently part of the LRU list belonging to pFS. Remove
-** it from the list. pPg->pLruNext and pPg->pLruPrev are cleared by this
-** operation.
-*/
-static void fsPageRemoveFromLru(FileSystem *pFS, Page *pPg){
- assert( pPg->pLruNext || pPg==pFS->pLruLast );
- assert( pPg->pLruPrev || pPg==pFS->pLruFirst );
- if( pPg->pLruNext ){
- pPg->pLruNext->pLruPrev = pPg->pLruPrev;
- }else{
- pFS->pLruLast = pPg->pLruPrev;
- }
- if( pPg->pLruPrev ){
- pPg->pLruPrev->pLruNext = pPg->pLruNext;
- }else{
- pFS->pLruFirst = pPg->pLruNext;
- }
- pPg->pLruPrev = 0;
- pPg->pLruNext = 0;
-}
-
-/*
-** Page pPg is not currently part of the LRU list belonging to pFS. Add it.
-*/
-static void fsPageAddToLru(FileSystem *pFS, Page *pPg){
- assert( pPg->pLruNext==0 && pPg->pLruPrev==0 );
- pPg->pLruPrev = pFS->pLruLast;
- if( pPg->pLruPrev ){
- pPg->pLruPrev->pLruNext = pPg;
- }else{
- pFS->pLruFirst = pPg;
- }
- pFS->pLruLast = pPg;
-}
-
-/*
-** Page pPg is currently stored in the apHash/nHash hash table. Remove it.
-*/
-static void fsPageRemoveFromHash(FileSystem *pFS, Page *pPg){
- int iHash;
- Page **pp;
-
- iHash = fsHashKey(pFS->nHash, pPg->iPg);
- for(pp=&pFS->apHash[iHash]; *pp!=pPg; pp=&(*pp)->pHashNext);
- *pp = pPg->pHashNext;
- pPg->pHashNext = 0;
-}
-
-/*
-** Free a Page object allocated by fsPageBuffer().
-*/
-static void fsPageBufferFree(Page *pPg){
- pPg->pFS->nCacheAlloc--;
- lsmFree(pPg->pFS->pEnv, pPg->aData);
- lsmFree(pPg->pFS->pEnv, pPg);
-}
-
-
-/*
-** Purge the cache of all non-mmap pages with nRef==0.
-*/
-void lsmFsPurgeCache(FileSystem *pFS){
- Page *pPg;
-
- pPg = pFS->pLruFirst;
- while( pPg ){
- Page *pNext = pPg->pLruNext;
- assert( pPg->flags & PAGE_FREE );
- fsPageRemoveFromHash(pFS, pPg);
- fsPageBufferFree(pPg);
- pPg = pNext;
- }
- pFS->pLruFirst = 0;
- pFS->pLruLast = 0;
-
- assert( pFS->nCacheAlloc<=pFS->nOut && pFS->nCacheAlloc>=0 );
-}
-
-/*
-** Search the hash-table for page iPg. If an entry is round, return a pointer
-** to it. Otherwise, return NULL.
-**
-** Either way, if argument piHash is not NULL set *piHash to the hash slot
-** number that page iPg would be stored in before returning.
-*/
-static Page *fsPageFindInHash(FileSystem *pFS, LsmPgno iPg, int *piHash){
- Page *p; /* Return value */
- int iHash = fsHashKey(pFS->nHash, iPg);
-
- if( piHash ) *piHash = iHash;
- for(p=pFS->apHash[iHash]; p; p=p->pHashNext){
- if( p->iPg==iPg) break;
- }
- return p;
-}
-
-/*
-** Allocate and return a non-mmap Page object. If there are already
-** nCacheMax such Page objects outstanding, try to recycle an existing
-** Page instead.
-*/
-static int fsPageBuffer(
- FileSystem *pFS,
- Page **ppOut
-){
- int rc = LSM_OK;
- Page *pPage = 0;
- if( pFS->pLruFirst==0 || pFS->nCacheAllocnCacheMax ){
- /* Allocate a new Page object */
- pPage = lsmMallocZero(pFS->pEnv, sizeof(Page));
- if( !pPage ){
- rc = LSM_NOMEM_BKPT;
- }else{
- pPage->aData = (u8 *)lsmMalloc(pFS->pEnv, pFS->nPagesize);
- if( !pPage->aData ){
- lsmFree(pFS->pEnv, pPage);
- rc = LSM_NOMEM_BKPT;
- pPage = 0;
- }else{
- pFS->nCacheAlloc++;
- }
- }
- }else{
- /* Reuse an existing Page object */
- u8 *aData;
- pPage = pFS->pLruFirst;
- aData = pPage->aData;
- fsPageRemoveFromLru(pFS, pPage);
- fsPageRemoveFromHash(pFS, pPage);
-
- memset(pPage, 0, sizeof(Page));
- pPage->aData = aData;
- }
-
- if( pPage ){
- pPage->flags = PAGE_FREE;
- }
- *ppOut = pPage;
- return rc;
-}
-
-/*
-** Assuming *pRc is initially LSM_OK, attempt to ensure that the
-** memory-mapped region is at least iSz bytes in size. If it is not already,
-** iSz bytes in size, extend it and update the pointers associated with any
-** outstanding Page objects.
-**
-** If *pRc is not LSM_OK when this function is called, it is a no-op.
-** Otherwise, *pRc is set to an lsm error code if an error occurs, or
-** left unmodified otherwise.
-**
-** This function is never called in compressed database mode.
-*/
-static void fsGrowMapping(
- FileSystem *pFS, /* File system object */
- i64 iSz, /* Minimum size to extend mapping to */
- int *pRc /* IN/OUT: Error code */
-){
- assert( PAGE_HASPREV==4 );
-
- if( *pRc==LSM_OK && iSz>pFS->nMap ){
- int rc;
- u8 *aOld = pFS->pMap;
- rc = lsmEnvRemap(pFS->pEnv, pFS->fdDb, iSz, &pFS->pMap, &pFS->nMap);
- if( rc==LSM_OK && pFS->pMap!=aOld ){
- Page *pFix;
- i64 iOff = (u8 *)pFS->pMap - aOld;
- for(pFix=pFS->pMapped; pFix; pFix=pFix->pMappedNext){
- pFix->aData += iOff;
- }
- lsmSortedRemap(pFS->pDb);
- }
- *pRc = rc;
- }
-}
-
-/*
-** If it is mapped, unmap the database file.
-*/
-int lsmFsUnmap(FileSystem *pFS){
- int rc = LSM_OK;
- if( pFS ){
- rc = lsmEnvRemap(pFS->pEnv, pFS->fdDb, -1, &pFS->pMap, &pFS->nMap);
- }
- return rc;
-}
-
-/*
-** fsync() the database file.
-*/
-int lsmFsSyncDb(FileSystem *pFS, int nBlock){
- return lsmEnvSync(pFS->pEnv, pFS->fdDb);
-}
-
-/*
-** If block iBlk has been redirected according to the redirections in the
-** object passed as the first argument, return the destination block to
-** which it is redirected. Otherwise, return a copy of iBlk.
-*/
-static int fsRedirectBlock(Redirect *p, int iBlk){
- if( p ){
- int i;
- for(i=0; in; i++){
- if( iBlk==p->a[i].iFrom ) return p->a[i].iTo;
- }
- }
- assert( iBlk!=0 );
- return iBlk;
-}
-
-/*
-** If page iPg has been redirected according to the redirections in the
-** object passed as the second argument, return the destination page to
-** which it is redirected. Otherwise, return a copy of iPg.
-*/
-LsmPgno lsmFsRedirectPage(FileSystem *pFS, Redirect *pRedir, LsmPgno iPg){
- LsmPgno iReal = iPg;
-
- if( pRedir ){
- const int nPagePerBlock = (
- pFS->pCompress ? pFS->nBlocksize : (pFS->nBlocksize / pFS->nPagesize)
- );
- int iBlk = fsPageToBlock(pFS, iPg);
- int i;
- for(i=0; in; i++){
- int iFrom = pRedir->a[i].iFrom;
- if( iFrom>iBlk ) break;
- if( iFrom==iBlk ){
- int iTo = pRedir->a[i].iTo;
- iReal = iPg - (LsmPgno)(iFrom - iTo) * nPagePerBlock;
- if( iTo==1 ){
- iReal += (fsFirstPageOnBlock(pFS, 1)-1);
- }
- break;
- }
- }
- }
-
- assert( iReal!=0 );
- return iReal;
-}
-
-/* Required by the circular fsBlockNext<->fsPageGet dependency. */
-static int fsPageGet(FileSystem *, Segment *, LsmPgno, int, Page **, int *);
-
-/*
-** Parameter iBlock is a database file block. This function reads the value
-** stored in the blocks "next block" pointer and stores it in *piNext.
-** LSM_OK is returned if everything is successful, or an LSM error code
-** otherwise.
-*/
-static int fsBlockNext(
- FileSystem *pFS, /* File-system object handle */
- Segment *pSeg, /* Use this segment for block redirects */
- int iBlock, /* Read field from this block */
- int *piNext /* OUT: Next block in linked list */
-){
- int rc;
- int iRead; /* Read block from here */
-
- if( pSeg ){
- iRead = fsRedirectBlock(pSeg->pRedirect, iBlock);
- }else{
- iRead = iBlock;
- }
-
- assert( pFS->nMapLimit==0 || pFS->pCompress==0 );
- if( pFS->pCompress ){
- i64 iOff; /* File offset to read data from */
- u8 aNext[4]; /* 4-byte pointer read from db file */
-
- iOff = (i64)iRead * pFS->nBlocksize - sizeof(aNext);
- rc = lsmEnvRead(pFS->pEnv, pFS->fdDb, iOff, aNext, sizeof(aNext));
- if( rc==LSM_OK ){
- *piNext = (int)lsmGetU32(aNext);
- }
- }else{
- const int nPagePerBlock = (pFS->nBlocksize / pFS->nPagesize);
- Page *pLast;
- rc = fsPageGet(pFS, 0, iRead*nPagePerBlock, 0, &pLast, 0);
- if( rc==LSM_OK ){
- *piNext = lsmGetU32(&pLast->aData[pFS->nPagesize-4]);
- lsmFsPageRelease(pLast);
- }
- }
-
- if( pSeg ){
- *piNext = fsRedirectBlock(pSeg->pRedirect, *piNext);
- }
- return rc;
-}
-
-/*
-** Return the page number of the last page on the same block as page iPg.
-*/
-LsmPgno fsLastPageOnPagesBlock(FileSystem *pFS, LsmPgno iPg){
- return fsLastPageOnBlock(pFS, fsPageToBlock(pFS, iPg));
-}
-
-/*
-** Read nData bytes of data from offset iOff of the database file into
-** buffer aData. If this means reading past the end of a block, follow
-** the block pointer to the next block and continue reading.
-**
-** Offset iOff is an absolute offset - not subject to any block redirection.
-** However any block pointer followed is. Use pSeg->pRedirect in this case.
-**
-** This function is only called in compressed database mode.
-*/
-static int fsReadData(
- FileSystem *pFS, /* File-system handle */
- Segment *pSeg, /* Block redirection */
- i64 iOff, /* Read data from this offset */
- u8 *aData, /* Buffer to read data into */
- int nData /* Number of bytes to read */
-){
- i64 iEob; /* End of block */
- int nRead;
- int rc;
-
- assert( pFS->pCompress );
-
- iEob = fsLastPageOnPagesBlock(pFS, iOff) + 1;
- nRead = (int)LSM_MIN(iEob - iOff, nData);
-
- rc = lsmEnvRead(pFS->pEnv, pFS->fdDb, iOff, aData, nRead);
- if( rc==LSM_OK && nRead!=nData ){
- int iBlk;
-
- rc = fsBlockNext(pFS, pSeg, fsPageToBlock(pFS, iOff), &iBlk);
- if( rc==LSM_OK ){
- i64 iOff2 = fsFirstPageOnBlock(pFS, iBlk);
- rc = lsmEnvRead(pFS->pEnv, pFS->fdDb, iOff2, &aData[nRead], nData-nRead);
- }
- }
-
- return rc;
-}
-
-/*
-** Parameter iBlock is a database file block. This function reads the value
-** stored in the blocks "previous block" pointer and stores it in *piPrev.
-** LSM_OK is returned if everything is successful, or an LSM error code
-** otherwise.
-*/
-static int fsBlockPrev(
- FileSystem *pFS, /* File-system object handle */
- Segment *pSeg, /* Use this segment for block redirects */
- int iBlock, /* Read field from this block */
- int *piPrev /* OUT: Previous block in linked list */
-){
- int rc = LSM_OK; /* Return code */
-
- assert( pFS->nMapLimit==0 || pFS->pCompress==0 );
- assert( iBlock>0 );
-
- if( pFS->pCompress ){
- i64 iOff = fsFirstPageOnBlock(pFS, iBlock) - 4;
- u8 aPrev[4]; /* 4-byte pointer read from db file */
- rc = lsmEnvRead(pFS->pEnv, pFS->fdDb, iOff, aPrev, sizeof(aPrev));
- if( rc==LSM_OK ){
- Redirect *pRedir = (pSeg ? pSeg->pRedirect : 0);
- *piPrev = fsRedirectBlock(pRedir, (int)lsmGetU32(aPrev));
- }
- }else{
- assert( 0 );
- }
- return rc;
-}
-
-/*
-** Encode and decode routines for record size fields.
-*/
-static void putRecordSize(u8 *aBuf, int nByte, int bFree){
- aBuf[0] = (u8)(nByte >> 14) | 0x80;
- aBuf[1] = ((u8)(nByte >> 7) & 0x7F) | (bFree ? 0x00 : 0x80);
- aBuf[2] = (u8)nByte | 0x80;
-}
-static int getRecordSize(u8 *aBuf, int *pbFree){
- int nByte;
- nByte = (aBuf[0] & 0x7F) << 14;
- nByte += (aBuf[1] & 0x7F) << 7;
- nByte += (aBuf[2] & 0x7F);
- *pbFree = !(aBuf[1] & 0x80);
- return nByte;
-}
-
-/*
-** Subtract iSub from database file offset iOff and set *piRes to the
-** result. If doing so means passing the start of a block, follow the
-** block pointer stored in the first 4 bytes of the block.
-**
-** Offset iOff is an absolute offset - not subject to any block redirection.
-** However any block pointer followed is. Use pSeg->pRedirect in this case.
-**
-** Return LSM_OK if successful or an lsm error code if an error occurs.
-*/
-static int fsSubtractOffset(
- FileSystem *pFS,
- Segment *pSeg,
- i64 iOff,
- int iSub,
- i64 *piRes
-){
- i64 iStart;
- int iBlk = 0;
- int rc;
-
- assert( pFS->pCompress );
-
- iStart = fsFirstPageOnBlock(pFS, fsPageToBlock(pFS, iOff));
- if( (iOff-iSub)>=iStart ){
- *piRes = (iOff-iSub);
- return LSM_OK;
- }
-
- rc = fsBlockPrev(pFS, pSeg, fsPageToBlock(pFS, iOff), &iBlk);
- *piRes = fsLastPageOnBlock(pFS, iBlk) - iSub + (iOff - iStart + 1);
- return rc;
-}
-
-/*
-** Add iAdd to database file offset iOff and set *piRes to the
-** result. If doing so means passing the end of a block, follow the
-** block pointer stored in the last 4 bytes of the block.
-**
-** Offset iOff is an absolute offset - not subject to any block redirection.
-** However any block pointer followed is. Use pSeg->pRedirect in this case.
-**
-** Return LSM_OK if successful or an lsm error code if an error occurs.
-*/
-static int fsAddOffset(
- FileSystem *pFS,
- Segment *pSeg,
- i64 iOff,
- int iAdd,
- i64 *piRes
-){
- i64 iEob;
- int iBlk;
- int rc;
-
- assert( pFS->pCompress );
-
- iEob = fsLastPageOnPagesBlock(pFS, iOff);
- if( (iOff+iAdd)<=iEob ){
- *piRes = (iOff+iAdd);
- return LSM_OK;
- }
-
- rc = fsBlockNext(pFS, pSeg, fsPageToBlock(pFS, iOff), &iBlk);
- *piRes = fsFirstPageOnBlock(pFS, iBlk) + iAdd - (iEob - iOff + 1);
- return rc;
-}
-
-/*
-** If it is not already allocated, allocate either the FileSystem.aOBuffer (if
-** bWrite is true) or the FileSystem.aIBuffer (if bWrite is false). Return
-** LSM_OK if successful if the attempt to allocate memory fails.
-*/
-static int fsAllocateBuffer(FileSystem *pFS, int bWrite){
- u8 **pp; /* Pointer to either aIBuffer or aOBuffer */
-
- assert( pFS->pCompress );
-
- /* If neither buffer has been allocated, figure out how large they
- ** should be. Store this value in FileSystem.nBuffer. */
- if( pFS->nBuffer==0 ){
- assert( pFS->aIBuffer==0 && pFS->aOBuffer==0 );
- pFS->nBuffer = pFS->pCompress->xBound(pFS->pCompress->pCtx, pFS->nPagesize);
- if( pFS->nBuffer<(pFS->szSector+6) ){
- pFS->nBuffer = pFS->szSector+6;
- }
- }
-
- pp = (bWrite ? &pFS->aOBuffer : &pFS->aIBuffer);
- if( *pp==0 ){
- *pp = lsmMalloc(pFS->pEnv, LSM_MAX(pFS->nBuffer, pFS->nPagesize));
- if( *pp==0 ) return LSM_NOMEM_BKPT;
- }
-
- return LSM_OK;
-}
-
-/*
-** This function is only called in compressed database mode. It reads and
-** uncompresses the compressed data for page pPg from the database and
-** populates the pPg->aData[] buffer and pPg->nCompress field.
-**
-** It is possible that instead of a page record, there is free space
-** at offset pPg->iPgno. In this case no data is read from the file, but
-** output variable *pnSpace is set to the total number of free bytes.
-**
-** LSM_OK is returned if successful, or an LSM error code otherwise.
-*/
-static int fsReadPagedata(
- FileSystem *pFS, /* File-system handle */
- Segment *pSeg, /* pPg is part of this segment */
- Page *pPg, /* Page to read and uncompress data for */
- int *pnSpace /* OUT: Total bytes of free space */
-){
- lsm_compress *p = pFS->pCompress;
- i64 iOff = pPg->iPg;
- u8 aSz[3];
- int rc;
-
- assert( p && pPg->nCompress==0 );
-
- if( fsAllocateBuffer(pFS, 0) ) return LSM_NOMEM;
-
- rc = fsReadData(pFS, pSeg, iOff, aSz, sizeof(aSz));
-
- if( rc==LSM_OK ){
- int bFree;
- if( aSz[0] & 0x80 ){
- pPg->nCompress = (int)getRecordSize(aSz, &bFree);
- }else{
- pPg->nCompress = (int)aSz[0] - sizeof(aSz)*2;
- bFree = 1;
- }
- if( bFree ){
- if( pnSpace ){
- *pnSpace = pPg->nCompress + sizeof(aSz)*2;
- }else{
- rc = LSM_CORRUPT_BKPT;
- }
- }else{
- rc = fsAddOffset(pFS, pSeg, iOff, 3, &iOff);
- if( rc==LSM_OK ){
- if( pPg->nCompress>pFS->nBuffer ){
- rc = LSM_CORRUPT_BKPT;
- }else{
- rc = fsReadData(pFS, pSeg, iOff, pFS->aIBuffer, pPg->nCompress);
- }
- if( rc==LSM_OK ){
- int n = pFS->nPagesize;
- rc = p->xUncompress(p->pCtx,
- (char *)pPg->aData, &n,
- (const char *)pFS->aIBuffer, pPg->nCompress
- );
- if( rc==LSM_OK && n!=pPg->pFS->nPagesize ){
- rc = LSM_CORRUPT_BKPT;
- }
- }
- }
- }
- }
- return rc;
-}
-
-/*
-** Return a handle for a database page.
-**
-** If this file-system object is accessing a compressed database it may be
-** that there is no page record at database file offset iPg. Instead, there
-** may be a free space record. In this case, set *ppPg to NULL and *pnSpace
-** to the total number of free bytes before returning.
-**
-** If no error occurs, LSM_OK is returned. Otherwise, an lsm error code.
-*/
-static int fsPageGet(
- FileSystem *pFS, /* File-system handle */
- Segment *pSeg, /* Block redirection to use (or NULL) */
- LsmPgno iPg, /* Page id */
- int noContent, /* True to not load content from disk */
- Page **ppPg, /* OUT: New page handle */
- int *pnSpace /* OUT: Bytes of free space */
-){
- Page *p;
- int iHash;
- int rc = LSM_OK;
-
- /* In most cases iReal is the same as iPg. Except, if pSeg->pRedirect is
- ** not NULL, and the block containing iPg has been redirected, then iReal
- ** is the page number after redirection. */
- LsmPgno iReal = lsmFsRedirectPage(pFS, (pSeg ? pSeg->pRedirect : 0), iPg);
-
- assert_lists_are_ok(pFS);
- assert( iPg>=fsFirstPageOnBlock(pFS, 1) );
- assert( iReal>=fsFirstPageOnBlock(pFS, 1) );
- *ppPg = 0;
-
- /* Search the hash-table for the page */
- p = fsPageFindInHash(pFS, iReal, &iHash);
-
- if( p ){
- assert( p->flags & PAGE_FREE );
- if( p->nRef==0 ) fsPageRemoveFromLru(pFS, p);
- }else{
-
- if( fsMmapPage(pFS, iReal) ){
- i64 iEnd = (i64)iReal * pFS->nPagesize;
- fsGrowMapping(pFS, iEnd, &rc);
- if( rc!=LSM_OK ) return rc;
-
- if( pFS->pFree ){
- p = pFS->pFree;
- pFS->pFree = p->pFreeNext;
- assert( p->nRef==0 );
- }else{
- p = lsmMallocZeroRc(pFS->pEnv, sizeof(Page), &rc);
- if( rc ) return rc;
- p->pFS = pFS;
- }
- p->aData = &((u8 *)pFS->pMap)[pFS->nPagesize * (iReal-1)];
- p->iPg = iReal;
-
- /* This page now carries a pointer to the mapping. Link it in to
- ** the FileSystem.pMapped list. */
- assert( p->pMappedNext==0 );
- p->pMappedNext = pFS->pMapped;
- pFS->pMapped = p;
-
- assert( pFS->pCompress==0 );
- assert( (p->flags & PAGE_FREE)==0 );
- }else{
- rc = fsPageBuffer(pFS, &p);
- if( rc==LSM_OK ){
- int nSpace = 0;
- p->iPg = iReal;
- p->nRef = 0;
- p->pFS = pFS;
- assert( p->flags==0 || p->flags==PAGE_FREE );
-
-#ifdef LSM_DEBUG
- memset(p->aData, 0x56, pFS->nPagesize);
-#endif
- assert( p->pLruNext==0 && p->pLruPrev==0 );
- if( noContent==0 ){
- if( pFS->pCompress ){
- rc = fsReadPagedata(pFS, pSeg, p, &nSpace);
- }else{
- int nByte = pFS->nPagesize;
- i64 iOff = (i64)(iReal-1) * pFS->nPagesize;
- rc = lsmEnvRead(pFS->pEnv, pFS->fdDb, iOff, p->aData, nByte);
- }
- pFS->nRead++;
- }
-
- /* If the xRead() call was successful (or not attempted), link the
- ** page into the page-cache hash-table. Otherwise, if it failed,
- ** free the buffer. */
- if( rc==LSM_OK && nSpace==0 ){
- p->pHashNext = pFS->apHash[iHash];
- pFS->apHash[iHash] = p;
- }else{
- fsPageBufferFree(p);
- p = 0;
- if( pnSpace ) *pnSpace = nSpace;
- }
- }
- }
-
- assert( (rc==LSM_OK && (p || (pnSpace && *pnSpace)))
- || (rc!=LSM_OK && p==0)
- );
- }
-
- if( rc==LSM_OK && p ){
- if( pFS->pCompress==0 && (fsIsLast(pFS, iReal) || fsIsFirst(pFS, iReal)) ){
- p->nData = pFS->nPagesize - 4;
- if( fsIsFirst(pFS, iReal) && p->nRef==0 ){
- p->aData += 4;
- p->flags |= PAGE_HASPREV;
- }
- }else{
- p->nData = pFS->nPagesize;
- }
- pFS->nOut += (p->nRef==0);
- p->nRef++;
- }
- *ppPg = p;
- return rc;
-}
-
-/*
-** Read the 64-bit checkpoint id of the checkpoint currently stored on meta
-** page iMeta of the database file. If no error occurs, store the id value
-** in *piVal and return LSM_OK. Otherwise, return an LSM error code and leave
-** *piVal unmodified.
-**
-** If a checkpointer connection is currently updating meta-page iMeta, or an
-** earlier checkpointer crashed while doing so, the value read into *piVal
-** may be garbage. It is the callers responsibility to deal with this.
-*/
-int lsmFsReadSyncedId(lsm_db *db, int iMeta, i64 *piVal){
- FileSystem *pFS = db->pFS;
- int rc = LSM_OK;
-
- assert( iMeta==1 || iMeta==2 );
- if( pFS->nMapLimit>0 ){
- fsGrowMapping(pFS, iMeta*LSM_META_PAGE_SIZE, &rc);
- if( rc==LSM_OK ){
- *piVal = (i64)lsmGetU64(&((u8 *)pFS->pMap)[(iMeta-1)*LSM_META_PAGE_SIZE]);
- }
- }else{
- MetaPage *pMeta = 0;
- rc = lsmFsMetaPageGet(pFS, 0, iMeta, &pMeta);
- if( rc==LSM_OK ){
- *piVal = (i64)lsmGetU64(pMeta->aData);
- lsmFsMetaPageRelease(pMeta);
- }
- }
-
- return rc;
-}
-
-
-/*
-** Return true if the first or last page of segment pRun falls between iFirst
-** and iLast, inclusive, and pRun is not equal to pIgnore.
-*/
-static int fsRunEndsBetween(
- Segment *pRun,
- Segment *pIgnore,
- LsmPgno iFirst,
- LsmPgno iLast
-){
- return (pRun!=pIgnore && (
- (pRun->iFirst>=iFirst && pRun->iFirst<=iLast)
- || (pRun->iLastPg>=iFirst && pRun->iLastPg<=iLast)
- ));
-}
-
-/*
-** Return true if level pLevel contains a segment other than pIgnore for
-** which the first or last page is between iFirst and iLast, inclusive.
-*/
-static int fsLevelEndsBetween(
- Level *pLevel,
- Segment *pIgnore,
- LsmPgno iFirst,
- LsmPgno iLast
-){
- int i;
-
- if( fsRunEndsBetween(&pLevel->lhs, pIgnore, iFirst, iLast) ){
- return 1;
- }
- for(i=0; inRight; i++){
- if( fsRunEndsBetween(&pLevel->aRhs[i], pIgnore, iFirst, iLast) ){
- return 1;
- }
- }
-
- return 0;
-}
-
-/*
-** Block iBlk is no longer in use by segment pIgnore. If it is not in use
-** by any other segment, move it to the free block list.
-*/
-static int fsFreeBlock(
- FileSystem *pFS, /* File system object */
- Snapshot *pSnapshot, /* Worker snapshot */
- Segment *pIgnore, /* Ignore this run when searching */
- int iBlk /* Block number of block to free */
-){
- int rc = LSM_OK; /* Return code */
- LsmPgno iFirst; /* First page on block iBlk */
- LsmPgno iLast; /* Last page on block iBlk */
- Level *pLevel; /* Used to iterate through levels */
-
- int iIn; /* Used to iterate through append points */
- int iOut = 0; /* Used to output append points */
- LsmPgno *aApp = pSnapshot->aiAppend;
-
- iFirst = fsFirstPageOnBlock(pFS, iBlk);
- iLast = fsLastPageOnBlock(pFS, iBlk);
-
- /* Check if any other run in the snapshot has a start or end page
- ** within this block. If there is such a run, return early. */
- for(pLevel=lsmDbSnapshotLevel(pSnapshot); pLevel; pLevel=pLevel->pNext){
- if( fsLevelEndsBetween(pLevel, pIgnore, iFirst, iLast) ){
- return LSM_OK;
- }
- }
-
- /* Remove any entries that lie on this block from the append-list. */
- for(iIn=0; iIniLast ){
- aApp[iOut++] = aApp[iIn];
- }
- }
- while( iOutpDb, iBlk);
- }
- return rc;
-}
-
-/*
-** Delete or otherwise recycle the blocks currently occupied by run pDel.
-*/
-int lsmFsSortedDelete(
- FileSystem *pFS,
- Snapshot *pSnapshot,
- int bZero, /* True to zero the Segment structure */
- Segment *pDel
-){
- if( pDel->iFirst ){
- int rc = LSM_OK;
-
- int iBlk;
- int iLastBlk;
-
- iBlk = fsPageToBlock(pFS, pDel->iFirst);
- iLastBlk = fsPageToBlock(pFS, pDel->iLastPg);
-
- /* Mark all blocks currently used by this sorted run as free */
- while( iBlk && rc==LSM_OK ){
- int iNext = 0;
- if( iBlk!=iLastBlk ){
- rc = fsBlockNext(pFS, pDel, iBlk, &iNext);
- }else if( bZero==0 && pDel->iLastPg!=fsLastPageOnBlock(pFS, iLastBlk) ){
- break;
- }
- rc = fsFreeBlock(pFS, pSnapshot, pDel, iBlk);
- iBlk = iNext;
- }
-
- if( pDel->pRedirect ){
- assert( pDel->pRedirect==&pSnapshot->redirect );
- pSnapshot->redirect.n = 0;
- }
-
- if( bZero ) memset(pDel, 0, sizeof(Segment));
- }
- return LSM_OK;
-}
-
-/*
-** aPgno is an array containing nPgno page numbers. Return the smallest page
-** number from the array that falls on block iBlk. Or, if none of the pages
-** in aPgno[] fall on block iBlk, return 0.
-*/
-static LsmPgno firstOnBlock(
- FileSystem *pFS,
- int iBlk,
- LsmPgno *aPgno,
- int nPgno
-){
- LsmPgno iRet = 0;
- int i;
- for(i=0; ipRedirect, iPg));
-}
-
-/*
-** Return true if the second argument is not NULL and any of the first
-** last or root pages lie on a redirected block.
-*/
-static int fsSegmentRedirects(FileSystem *pFS, Segment *p){
- return (p && (
- fsPageRedirects(pFS, p, p->iFirst)
- || fsPageRedirects(pFS, p, p->iRoot)
- || fsPageRedirects(pFS, p, p->iLastPg)
- ));
-}
-#endif
-
-/*
-** Argument aPgno is an array of nPgno page numbers. All pages belong to
-** the segment pRun. This function gobbles from the start of the run to the
-** first page that appears in aPgno[] (i.e. so that the aPgno[] entry is
-** the new first page of the run).
-*/
-void lsmFsGobble(
- lsm_db *pDb,
- Segment *pRun,
- LsmPgno *aPgno,
- int nPgno
-){
- int rc = LSM_OK;
- FileSystem *pFS = pDb->pFS;
- Snapshot *pSnapshot = pDb->pWorker;
- int iBlk;
-
- assert( pRun->nSize>0 );
- assert( 0==fsSegmentRedirects(pFS, pRun) );
- assert( nPgno>0 && 0==fsPageRedirects(pFS, pRun, aPgno[0]) );
-
- iBlk = fsPageToBlock(pFS, pRun->iFirst);
- pRun->nSize += (pRun->iFirst - fsFirstPageOnBlock(pFS, iBlk));
-
- while( rc==LSM_OK ){
- int iNext = 0;
- LsmPgno iFirst = firstOnBlock(pFS, iBlk, aPgno, nPgno);
- if( iFirst ){
- pRun->iFirst = iFirst;
- break;
- }
- rc = fsBlockNext(pFS, pRun, iBlk, &iNext);
- if( rc==LSM_OK ) rc = fsFreeBlock(pFS, pSnapshot, pRun, iBlk);
- pRun->nSize -= (
- 1 + fsLastPageOnBlock(pFS, iBlk) - fsFirstPageOnBlock(pFS, iBlk)
- );
- iBlk = iNext;
- }
-
- pRun->nSize -= (pRun->iFirst - fsFirstPageOnBlock(pFS, iBlk));
- assert( pRun->nSize>0 );
-}
-
-/*
-** This function is only used in compressed database mode.
-**
-** Argument iPg is the page number (byte offset) of a page within segment
-** pSeg. The page record, including all headers, is nByte bytes in size.
-** Before returning, set *piNext to the page number of the next page in
-** the segment, or to zero if iPg is the last.
-**
-** In other words, do:
-**
-** *piNext = iPg + nByte;
-**
-** But take block overflow and redirection into account.
-*/
-static int fsNextPageOffset(
- FileSystem *pFS, /* File system object */
- Segment *pSeg, /* Segment to move within */
- LsmPgno iPg, /* Offset of current page */
- int nByte, /* Size of current page including headers */
- LsmPgno *piNext /* OUT: Offset of next page. Or zero (EOF) */
-){
- LsmPgno iNext;
- int rc;
-
- assert( pFS->pCompress );
-
- rc = fsAddOffset(pFS, pSeg, iPg, nByte-1, &iNext);
- if( pSeg && iNext==pSeg->iLastPg ){
- iNext = 0;
- }else if( rc==LSM_OK ){
- rc = fsAddOffset(pFS, pSeg, iNext, 1, &iNext);
- }
-
- *piNext = iNext;
- return rc;
-}
-
-/*
-** This function is only used in compressed database mode.
-**
-** Argument iPg is the page number of a pagethat appears in segment pSeg.
-** This function determines the page number of the previous page in the
-** same run. *piPrev is set to the previous page number before returning.
-**
-** LSM_OK is returned if no error occurs. Otherwise, an lsm error code.
-** If any value other than LSM_OK is returned, then the final value of
-** *piPrev is undefined.
-*/
-static int fsGetPageBefore(
- FileSystem *pFS,
- Segment *pSeg,
- LsmPgno iPg,
- LsmPgno *piPrev
-){
- u8 aSz[3];
- int rc;
- i64 iRead;
-
- assert( pFS->pCompress );
-
- rc = fsSubtractOffset(pFS, pSeg, iPg, sizeof(aSz), &iRead);
- if( rc==LSM_OK ) rc = fsReadData(pFS, pSeg, iRead, aSz, sizeof(aSz));
-
- if( rc==LSM_OK ){
- int bFree;
- int nSz;
- if( aSz[2] & 0x80 ){
- nSz = getRecordSize(aSz, &bFree) + sizeof(aSz)*2;
- }else{
- nSz = (int)(aSz[2] & 0x7F);
- bFree = 1;
- }
- rc = fsSubtractOffset(pFS, pSeg, iPg, nSz, piPrev);
- }
-
- return rc;
-}
-
-/*
-** The first argument to this function is a valid reference to a database
-** file page that is part of a sorted run. If parameter eDir is -1, this
-** function attempts to locate and load the previous page in the same run.
-** Or, if eDir is +1, it attempts to find the next page in the same run.
-** The results of passing an eDir value other than positive or negative one
-** are undefined.
-**
-** If parameter pRun is not NULL then it must point to the run that page
-** pPg belongs to. In this case, if pPg is the first or last page of the
-** run, and the request is for the previous or next page, respectively,
-** *ppNext is set to NULL before returning LSM_OK. If pRun is NULL, then it
-** is assumed that the next or previous page, as requested, exists.
-**
-** If the previous/next page does exist and is successfully loaded, *ppNext
-** is set to point to it and LSM_OK is returned. Otherwise, if an error
-** occurs, *ppNext is set to NULL and and lsm error code returned.
-**
-** Page references returned by this function should be released by the
-** caller using lsmFsPageRelease().
-*/
-int lsmFsDbPageNext(Segment *pRun, Page *pPg, int eDir, Page **ppNext){
- int rc = LSM_OK;
- FileSystem *pFS = pPg->pFS;
- LsmPgno iPg = pPg->iPg;
-
- assert( 0==fsSegmentRedirects(pFS, pRun) );
- if( pFS->pCompress ){
- int nSpace = pPg->nCompress + 2*3;
-
- do {
- if( eDir>0 ){
- rc = fsNextPageOffset(pFS, pRun, iPg, nSpace, &iPg);
- }else{
- if( iPg==pRun->iFirst ){
- iPg = 0;
- }else{
- rc = fsGetPageBefore(pFS, pRun, iPg, &iPg);
- }
- }
-
- nSpace = 0;
- if( iPg!=0 ){
- rc = fsPageGet(pFS, pRun, iPg, 0, ppNext, &nSpace);
- assert( (*ppNext==0)==(rc!=LSM_OK || nSpace>0) );
- }else{
- *ppNext = 0;
- }
- }while( nSpace>0 && rc==LSM_OK );
-
- }else{
- Redirect *pRedir = pRun ? pRun->pRedirect : 0;
- assert( eDir==1 || eDir==-1 );
- if( eDir<0 ){
- if( pRun && iPg==pRun->iFirst ){
- *ppNext = 0;
- return LSM_OK;
- }else if( fsIsFirst(pFS, iPg) ){
- assert( pPg->flags & PAGE_HASPREV );
- iPg = fsLastPageOnBlock(pFS, lsmGetU32(&pPg->aData[-4]));
- }else{
- iPg--;
- }
- }else{
- if( pRun ){
- if( iPg==pRun->iLastPg ){
- *ppNext = 0;
- return LSM_OK;
- }
- }
-
- if( fsIsLast(pFS, iPg) ){
- int iBlk = fsRedirectBlock(
- pRedir, lsmGetU32(&pPg->aData[pFS->nPagesize-4])
- );
- iPg = fsFirstPageOnBlock(pFS, iBlk);
- }else{
- iPg++;
- }
- }
- rc = fsPageGet(pFS, pRun, iPg, 0, ppNext, 0);
- }
-
- return rc;
-}
-
-/*
-** This function is called when creating a new segment to determine if the
-** first part of it can be written following an existing segment on an
-** already allocated block. If it is possible, the page number of the first
-** page to use for the new segment is returned. Otherwise zero.
-**
-** If argument pLvl is not NULL, then this function will not attempt to
-** start the new segment immediately following any segment that is part
-** of the right-hand-side of pLvl.
-*/
-static LsmPgno findAppendPoint(FileSystem *pFS, Level *pLvl){
- int i;
- LsmPgno *aiAppend = pFS->pDb->pWorker->aiAppend;
- LsmPgno iRet = 0;
-
- for(i=LSM_APPLIST_SZ-1; iRet==0 && i>=0; i--){
- if( (iRet = aiAppend[i]) ){
- if( pLvl ){
- int iBlk = fsPageToBlock(pFS, iRet);
- int j;
- for(j=0; iRet && jnRight; j++){
- if( fsPageToBlock(pFS, pLvl->aRhs[j].iLastPg)==iBlk ){
- iRet = 0;
- }
- }
- }
- if( iRet ) aiAppend[i] = 0;
- }
- }
- return iRet;
-}
-
-/*
-** Append a page to the left-hand-side of pLvl. Set the ref-count to 1 and
-** return a pointer to it. The page is writable until either
-** lsmFsPagePersist() is called on it or the ref-count drops to zero.
-*/
-int lsmFsSortedAppend(
- FileSystem *pFS,
- Snapshot *pSnapshot,
- Level *pLvl,
- int bDefer,
- Page **ppOut
-){
- int rc = LSM_OK;
- Page *pPg = 0;
- LsmPgno iApp = 0;
- LsmPgno iNext = 0;
- Segment *p = &pLvl->lhs;
- LsmPgno iPrev = p->iLastPg;
-
- *ppOut = 0;
- assert( p->pRedirect==0 );
-
- if( pFS->pCompress || bDefer ){
- /* In compressed database mode the page is not assigned a page number
- ** or location in the database file at this point. This will be done
- ** by the lsmFsPagePersist() call. */
- rc = fsPageBuffer(pFS, &pPg);
- if( rc==LSM_OK ){
- pPg->pFS = pFS;
- pPg->pSeg = p;
- pPg->iPg = 0;
- pPg->flags |= PAGE_DIRTY;
- pPg->nData = pFS->nPagesize;
- assert( pPg->aData );
- if( pFS->pCompress==0 ) pPg->nData -= 4;
-
- pPg->nRef = 1;
- pFS->nOut++;
- }
- }else{
- if( iPrev==0 ){
- iApp = findAppendPoint(pFS, pLvl);
- }else if( fsIsLast(pFS, iPrev) ){
- int iNext2;
- rc = fsBlockNext(pFS, 0, fsPageToBlock(pFS, iPrev), &iNext2);
- if( rc!=LSM_OK ) return rc;
- iApp = fsFirstPageOnBlock(pFS, iNext2);
- }else{
- iApp = iPrev + 1;
- }
-
- /* If this is the first page allocated, or if the page allocated is the
- ** last in the block, also allocate the next block here. */
- if( iApp==0 || fsIsLast(pFS, iApp) ){
- int iNew; /* New block number */
-
- rc = lsmBlockAllocate(pFS->pDb, 0, &iNew);
- if( rc!=LSM_OK ) return rc;
- if( iApp==0 ){
- iApp = fsFirstPageOnBlock(pFS, iNew);
- }else{
- iNext = fsFirstPageOnBlock(pFS, iNew);
- }
- }
-
- /* Grab the new page. */
- pPg = 0;
- rc = fsPageGet(pFS, 0, iApp, 1, &pPg, 0);
- assert( rc==LSM_OK || pPg==0 );
-
- /* If this is the first or last page of a block, fill in the pointer
- ** value at the end of the new page. */
- if( rc==LSM_OK ){
- p->nSize++;
- p->iLastPg = iApp;
- if( p->iFirst==0 ) p->iFirst = iApp;
- pPg->flags |= PAGE_DIRTY;
-
- if( fsIsLast(pFS, iApp) ){
- lsmPutU32(&pPg->aData[pFS->nPagesize-4], fsPageToBlock(pFS, iNext));
- }else if( fsIsFirst(pFS, iApp) ){
- lsmPutU32(&pPg->aData[-4], fsPageToBlock(pFS, iPrev));
- }
- }
- }
-
- *ppOut = pPg;
- return rc;
-}
-
-/*
-** Mark the segment passed as the second argument as finished. Once a segment
-** is marked as finished it is not possible to append any further pages to
-** it.
-**
-** Return LSM_OK if successful or an lsm error code if an error occurs.
-*/
-int lsmFsSortedFinish(FileSystem *pFS, Segment *p){
- int rc = LSM_OK;
- if( p && p->iLastPg ){
- assert( p->pRedirect==0 );
-
- /* Check if the last page of this run happens to be the last of a block.
- ** If it is, then an extra block has already been allocated for this run.
- ** Shift this extra block back to the free-block list.
- **
- ** Otherwise, add the first free page in the last block used by the run
- ** to the lAppend list.
- */
- if( fsLastPageOnPagesBlock(pFS, p->iLastPg)!=p->iLastPg ){
- int i;
- LsmPgno *aiAppend = pFS->pDb->pWorker->aiAppend;
- for(i=0; iiLastPg+1;
- break;
- }
- }
- }else if( pFS->pCompress==0 ){
- Page *pLast;
- rc = fsPageGet(pFS, 0, p->iLastPg, 0, &pLast, 0);
- if( rc==LSM_OK ){
- int iBlk = (int)lsmGetU32(&pLast->aData[pFS->nPagesize-4]);
- lsmBlockRefree(pFS->pDb, iBlk);
- lsmFsPageRelease(pLast);
- }
- }else{
- int iBlk = 0;
- rc = fsBlockNext(pFS, p, fsPageToBlock(pFS, p->iLastPg), &iBlk);
- if( rc==LSM_OK ){
- lsmBlockRefree(pFS->pDb, iBlk);
- }
- }
- }
- return rc;
-}
-
-/*
-** Obtain a reference to page number iPg.
-**
-** Return LSM_OK if successful, or an lsm error code if an error occurs.
-*/
-int lsmFsDbPageGet(FileSystem *pFS, Segment *pSeg, LsmPgno iPg, Page **ppPg){
- return fsPageGet(pFS, pSeg, iPg, 0, ppPg, 0);
-}
-
-/*
-** Obtain a reference to the last page in the segment passed as the
-** second argument.
-**
-** Return LSM_OK if successful, or an lsm error code if an error occurs.
-*/
-int lsmFsDbPageLast(FileSystem *pFS, Segment *pSeg, Page **ppPg){
- int rc;
- LsmPgno iPg = pSeg->iLastPg;
- if( pFS->pCompress ){
- int nSpace;
- iPg++;
- do {
- nSpace = 0;
- rc = fsGetPageBefore(pFS, pSeg, iPg, &iPg);
- if( rc==LSM_OK ){
- rc = fsPageGet(pFS, pSeg, iPg, 0, ppPg, &nSpace);
- }
- }while( rc==LSM_OK && nSpace>0 );
-
- }else{
- rc = fsPageGet(pFS, pSeg, iPg, 0, ppPg, 0);
- }
- return rc;
-}
-
-/*
-** Return a reference to meta-page iPg. If successful, LSM_OK is returned
-** and *ppPg populated with the new page reference. The reference should
-** be released by the caller using lsmFsPageRelease().
-**
-** Otherwise, if an error occurs, *ppPg is set to NULL and an LSM error
-** code is returned.
-*/
-int lsmFsMetaPageGet(
- FileSystem *pFS, /* File-system connection */
- int bWrite, /* True for write access, false for read */
- int iPg, /* Either 1 or 2 */
- MetaPage **ppPg /* OUT: Pointer to MetaPage object */
-){
- int rc = LSM_OK;
- MetaPage *pPg;
- assert( iPg==1 || iPg==2 );
-
- pPg = lsmMallocZeroRc(pFS->pEnv, sizeof(Page), &rc);
-
- if( pPg ){
- i64 iOff = (iPg-1) * pFS->nMetasize;
- if( pFS->nMapLimit>0 ){
- fsGrowMapping(pFS, 2*pFS->nMetasize, &rc);
- pPg->aData = (u8 *)(pFS->pMap) + iOff;
- }else{
- pPg->aData = lsmMallocRc(pFS->pEnv, pFS->nMetasize, &rc);
- if( rc==LSM_OK && bWrite==0 ){
- rc = lsmEnvRead(
- pFS->pEnv, pFS->fdDb, iOff, pPg->aData, pFS->nMetaRwSize
- );
- }
-#ifndef NDEBUG
- /* pPg->aData causes an uninitialized access via a downstream write().
- After discussion on this list, this memory should not, for performance
- reasons, be memset. However, tracking down "real" misuse is more
- difficult with this "false" positive, so it is set when NDEBUG.
- */
- else if( rc==LSM_OK ){
- memset( pPg->aData, 0x77, pFS->nMetasize );
- }
-#endif
- }
-
- if( rc!=LSM_OK ){
- if( pFS->nMapLimit==0 ) lsmFree(pFS->pEnv, pPg->aData);
- lsmFree(pFS->pEnv, pPg);
- pPg = 0;
- }else{
- pPg->iPg = iPg;
- pPg->bWrite = bWrite;
- pPg->pFS = pFS;
- }
- }
-
- *ppPg = pPg;
- return rc;
-}
-
-/*
-** Release a meta-page reference obtained via a call to lsmFsMetaPageGet().
-*/
-int lsmFsMetaPageRelease(MetaPage *pPg){
- int rc = LSM_OK;
- if( pPg ){
- FileSystem *pFS = pPg->pFS;
-
- if( pFS->nMapLimit==0 ){
- if( pPg->bWrite ){
- i64 iOff = (pPg->iPg==2 ? pFS->nMetasize : 0);
- int nWrite = pFS->nMetaRwSize;
- rc = lsmEnvWrite(pFS->pEnv, pFS->fdDb, iOff, pPg->aData, nWrite);
- }
- lsmFree(pFS->pEnv, pPg->aData);
- }
-
- lsmFree(pFS->pEnv, pPg);
- }
- return rc;
-}
-
-/*
-** Return a pointer to a buffer containing the data associated with the
-** meta-page passed as the first argument. If parameter pnData is not NULL,
-** set *pnData to the size of the meta-page in bytes before returning.
-*/
-u8 *lsmFsMetaPageData(MetaPage *pPg, int *pnData){
- if( pnData ) *pnData = pPg->pFS->nMetaRwSize;
- return pPg->aData;
-}
-
-/*
-** Return true if page is currently writable. This is used in assert()
-** statements only.
-*/
-#ifndef NDEBUG
-int lsmFsPageWritable(Page *pPg){
- return (pPg->flags & PAGE_DIRTY) ? 1 : 0;
-}
-#endif
-
-/*
-** This is called when block iFrom is being redirected to iTo. If page
-** number (*piPg) lies on block iFrom, then calculate the equivalent
-** page on block iTo and set *piPg to this value before returning.
-*/
-static void fsMovePage(
- FileSystem *pFS, /* File system object */
- int iTo, /* Destination block */
- int iFrom, /* Source block */
- LsmPgno *piPg /* IN/OUT: Page number */
-){
- LsmPgno iPg = *piPg;
- if( iFrom==fsPageToBlock(pFS, iPg) ){
- const int nPagePerBlock = (
- pFS->pCompress ? pFS ->nBlocksize : (pFS->nBlocksize / pFS->nPagesize)
- );
- *piPg = iPg - (LsmPgno)(iFrom - iTo) * nPagePerBlock;
- }
-}
-
-/*
-** Copy the contents of block iFrom to block iTo.
-**
-** It is safe to assume that there are no outstanding references to pages
-** on block iTo. And that block iFrom is not currently being written. In
-** other words, the data can be read and written directly.
-*/
-int lsmFsMoveBlock(FileSystem *pFS, Segment *pSeg, int iTo, int iFrom){
- Snapshot *p = pFS->pDb->pWorker;
- int rc = LSM_OK;
- int i;
- i64 nMap;
-
- i64 iFromOff = (i64)(iFrom-1) * pFS->nBlocksize;
- i64 iToOff = (i64)(iTo-1) * pFS->nBlocksize;
-
- assert( iTo!=1 );
- assert( iFrom>iTo );
-
- /* Grow the mapping as required. */
- nMap = LSM_MIN(pFS->nMapLimit, (i64)iFrom * pFS->nBlocksize);
- fsGrowMapping(pFS, nMap, &rc);
-
- if( rc==LSM_OK ){
- const int nPagePerBlock = (pFS->nBlocksize / pFS->nPagesize);
- int nSz = pFS->nPagesize;
- u8 *aBuf = 0;
- u8 *aData = 0;
-
- for(i=0; rc==LSM_OK && inMapLimit ){
- u8 *aMap = (u8 *)(pFS->pMap);
- aData = &aMap[iOff];
- }else{
- if( aBuf==0 ){
- aBuf = (u8 *)lsmMallocRc(pFS->pEnv, nSz, &rc);
- if( aBuf==0 ) break;
- }
- aData = aBuf;
- rc = lsmEnvRead(pFS->pEnv, pFS->fdDb, iOff, aData, nSz);
- }
-
- /* Copy aData to the to page */
- if( rc==LSM_OK ){
- iOff = iToOff + i*nSz;
- if( (iOff+nSz)<=pFS->nMapLimit ){
- u8 *aMap = (u8 *)(pFS->pMap);
- memcpy(&aMap[iOff], aData, nSz);
- }else{
- rc = lsmEnvWrite(pFS->pEnv, pFS->fdDb, iOff, aData, nSz);
- }
- }
- }
- lsmFree(pFS->pEnv, aBuf);
- lsmFsPurgeCache(pFS);
- }
-
- /* Update append-point list if necessary */
- for(i=0; iaiAppend[i]);
- }
-
- /* Update the Segment structure itself */
- fsMovePage(pFS, iTo, iFrom, &pSeg->iFirst);
- fsMovePage(pFS, iTo, iFrom, &pSeg->iLastPg);
- fsMovePage(pFS, iTo, iFrom, &pSeg->iRoot);
-
- return rc;
-}
-
-/*
-** Append raw data to a segment. Return the database file offset that the
-** data is written to (this may be used as the page number if the data
-** being appended is a new page record).
-**
-** This function is only used in compressed database mode.
-*/
-static LsmPgno fsAppendData(
- FileSystem *pFS, /* File-system handle */
- Segment *pSeg, /* Segment to append to */
- const u8 *aData, /* Buffer containing data to write */
- int nData, /* Size of buffer aData[] in bytes */
- int *pRc /* IN/OUT: Error code */
-){
- LsmPgno iRet = 0;
- int rc = *pRc;
- assert( pFS->pCompress );
- if( rc==LSM_OK ){
- int nRem = 0;
- int nWrite = 0;
- LsmPgno iLastOnBlock;
- LsmPgno iApp = pSeg->iLastPg+1;
-
- /* If this is the first data written into the segment, find an append-point
- ** or allocate a new block. */
- if( iApp==1 ){
- pSeg->iFirst = iApp = findAppendPoint(pFS, 0);
- if( iApp==0 ){
- int iBlk;
- rc = lsmBlockAllocate(pFS->pDb, 0, &iBlk);
- pSeg->iFirst = iApp = fsFirstPageOnBlock(pFS, iBlk);
- }
- }
- iRet = iApp;
-
- /* Write as much data as is possible at iApp (usually all of it). */
- iLastOnBlock = fsLastPageOnPagesBlock(pFS, iApp);
- if( rc==LSM_OK ){
- int nSpace = (int)(iLastOnBlock - iApp + 1);
- nWrite = LSM_MIN(nData, nSpace);
- nRem = nData - nWrite;
- assert( nWrite>=0 );
- if( nWrite!=0 ){
- rc = lsmEnvWrite(pFS->pEnv, pFS->fdDb, iApp, aData, nWrite);
- }
- iApp += nWrite;
- }
-
- /* If required, allocate a new block and write the rest of the data
- ** into it. Set the next and previous block pointers to link the new
- ** block to the old. */
- assert( nRem<=0 || (iApp-1)==iLastOnBlock );
- if( rc==LSM_OK && (iApp-1)==iLastOnBlock ){
- u8 aPtr[4]; /* Space to serialize a u32 */
- int iBlk; /* New block number */
-
- if( nWrite>0 ){
- /* Allocate a new block. */
- rc = lsmBlockAllocate(pFS->pDb, 0, &iBlk);
-
- /* Set the "next" pointer on the old block */
- if( rc==LSM_OK ){
- assert( iApp==(fsPageToBlock(pFS, iApp)*pFS->nBlocksize)-4 );
- lsmPutU32(aPtr, iBlk);
- rc = lsmEnvWrite(pFS->pEnv, pFS->fdDb, iApp, aPtr, sizeof(aPtr));
- }
-
- /* Set the "prev" pointer on the new block */
- if( rc==LSM_OK ){
- LsmPgno iWrite;
- lsmPutU32(aPtr, fsPageToBlock(pFS, iApp));
- iWrite = fsFirstPageOnBlock(pFS, iBlk);
- rc = lsmEnvWrite(pFS->pEnv, pFS->fdDb, iWrite-4, aPtr, sizeof(aPtr));
- if( nRem>0 ) iApp = iWrite;
- }
- }else{
- /* The next block is already allocated. */
- assert( nRem>0 );
- assert( pSeg->pRedirect==0 );
- rc = fsBlockNext(pFS, 0, fsPageToBlock(pFS, iApp), &iBlk);
- iRet = iApp = fsFirstPageOnBlock(pFS, iBlk);
- }
-
- /* Write the remaining data into the new block */
- if( rc==LSM_OK && nRem>0 ){
- rc = lsmEnvWrite(pFS->pEnv, pFS->fdDb, iApp, &aData[nWrite], nRem);
- iApp += nRem;
- }
- }
-
- pSeg->iLastPg = iApp-1;
- *pRc = rc;
- }
-
- return iRet;
-}
-
-/*
-** This function is only called in compressed database mode. It
-** compresses the contents of page pPg and writes the result to the
-** buffer at pFS->aOBuffer. The size of the compressed data is stored in
-** pPg->nCompress.
-**
-** If buffer pFS->aOBuffer[] has not been allocated then this function
-** allocates it. If this fails, LSM_NOMEM is returned. Otherwise, LSM_OK.
-*/
-static int fsCompressIntoBuffer(FileSystem *pFS, Page *pPg){
- lsm_compress *p = pFS->pCompress;
-
- if( fsAllocateBuffer(pFS, 1) ) return LSM_NOMEM;
- assert( pPg->nData==pFS->nPagesize );
-
- pPg->nCompress = pFS->nBuffer;
- return p->xCompress(p->pCtx,
- (char *)pFS->aOBuffer, &pPg->nCompress,
- (const char *)pPg->aData, pPg->nData
- );
-}
-
-/*
-** Append a new page to segment pSeg. Set output variable *piNew to the
-** page number of the new page before returning.
-**
-** If the new page is the last on its block, then the 'next' block that
-** will be used by the segment is allocated here too. In this case output
-** variable *piNext is set to the block number of the next block.
-**
-** If the new page is the first on its block but not the first in the
-** entire segment, set output variable *piPrev to the block number of
-** the previous block in the segment.
-**
-** LSM_OK is returned if successful, or an lsm error code otherwise. If
-** any value other than LSM_OK is returned, then the final value of all
-** output variables is undefined.
-*/
-static int fsAppendPage(
- FileSystem *pFS,
- Segment *pSeg,
- LsmPgno *piNew,
- int *piPrev,
- int *piNext
-){
- LsmPgno iPrev = pSeg->iLastPg;
- int rc;
- assert( iPrev!=0 );
-
- *piPrev = 0;
- *piNext = 0;
-
- if( fsIsLast(pFS, iPrev) ){
- /* Grab the first page on the next block (which has already be
- ** allocated). In this case set *piPrev to tell the caller to set
- ** the "previous block" pointer in the first 4 bytes of the page.
- */
- int iNext;
- int iBlk = fsPageToBlock(pFS, iPrev);
- assert( pSeg->pRedirect==0 );
- rc = fsBlockNext(pFS, 0, iBlk, &iNext);
- if( rc!=LSM_OK ) return rc;
- *piNew = fsFirstPageOnBlock(pFS, iNext);
- *piPrev = iBlk;
- }else{
- *piNew = iPrev+1;
- if( fsIsLast(pFS, *piNew) ){
- /* Allocate the next block here. */
- int iBlk;
- rc = lsmBlockAllocate(pFS->pDb, 0, &iBlk);
- if( rc!=LSM_OK ) return rc;
- *piNext = iBlk;
- }
- }
-
- pSeg->nSize++;
- pSeg->iLastPg = *piNew;
- return LSM_OK;
-}
-
-/*
-** Flush all pages in the FileSystem.pWaiting list to disk.
-*/
-void lsmFsFlushWaiting(FileSystem *pFS, int *pRc){
- int rc = *pRc;
- Page *pPg;
-
- pPg = pFS->pWaiting;
- pFS->pWaiting = 0;
-
- while( pPg ){
- Page *pNext = pPg->pWaitingNext;
- if( rc==LSM_OK ) rc = lsmFsPagePersist(pPg);
- assert( pPg->nRef==1 );
- lsmFsPageRelease(pPg);
- pPg = pNext;
- }
- *pRc = rc;
-}
-
-/*
-** If there exists a hash-table entry associated with page iPg, remove it.
-*/
-static void fsRemoveHashEntry(FileSystem *pFS, LsmPgno iPg){
- Page *p;
- int iHash = fsHashKey(pFS->nHash, iPg);
-
- for(p=pFS->apHash[iHash]; p && p->iPg!=iPg; p=p->pHashNext);
-
- if( p ){
- assert( p->nRef==0 || (p->flags & PAGE_FREE)==0 );
- fsPageRemoveFromHash(pFS, p);
- p->iPg = 0;
- iHash = fsHashKey(pFS->nHash, 0);
- p->pHashNext = pFS->apHash[iHash];
- pFS->apHash[iHash] = p;
- }
-}
-
-/*
-** If the page passed as an argument is dirty, update the database file
-** (or mapping of the database file) with its current contents and mark
-** the page as clean.
-**
-** Return LSM_OK if the operation is a success, or an LSM error code
-** otherwise.
-*/
-int lsmFsPagePersist(Page *pPg){
- int rc = LSM_OK;
- if( pPg && (pPg->flags & PAGE_DIRTY) ){
- FileSystem *pFS = pPg->pFS;
-
- if( pFS->pCompress ){
- int iHash; /* Hash key of assigned page number */
- u8 aSz[3]; /* pPg->nCompress as a 24-bit big-endian */
- assert( pPg->pSeg && pPg->iPg==0 && pPg->nCompress==0 );
-
- /* Compress the page image. */
- rc = fsCompressIntoBuffer(pFS, pPg);
-
- /* Serialize the compressed size into buffer aSz[] */
- putRecordSize(aSz, pPg->nCompress, 0);
-
- /* Write the serialized page record into the database file. */
- pPg->iPg = fsAppendData(pFS, pPg->pSeg, aSz, sizeof(aSz), &rc);
- fsAppendData(pFS, pPg->pSeg, pFS->aOBuffer, pPg->nCompress, &rc);
- fsAppendData(pFS, pPg->pSeg, aSz, sizeof(aSz), &rc);
-
- /* Now that it has a page number, insert the page into the hash table */
- iHash = fsHashKey(pFS->nHash, pPg->iPg);
- pPg->pHashNext = pFS->apHash[iHash];
- pFS->apHash[iHash] = pPg;
-
- pPg->pSeg->nSize += (sizeof(aSz) * 2) + pPg->nCompress;
-
- pPg->flags &= ~PAGE_DIRTY;
- pFS->nWrite++;
- }else{
-
- if( pPg->iPg==0 ){
- /* No page number has been assigned yet. This occurs with pages used
- ** in the b-tree hierarchy. They were not assigned page numbers when
- ** they were created as doing so would cause this call to
- ** lsmFsPagePersist() to write an out-of-order page. Instead a page
- ** number is assigned here so that the page data will be appended
- ** to the current segment.
- */
- Page **pp;
- int iPrev = 0;
- int iNext = 0;
- int iHash;
-
- assert( pPg->pSeg->iFirst );
- assert( pPg->flags & PAGE_FREE );
- assert( (pPg->flags & PAGE_HASPREV)==0 );
- assert( pPg->nData==pFS->nPagesize-4 );
-
- rc = fsAppendPage(pFS, pPg->pSeg, &pPg->iPg, &iPrev, &iNext);
- if( rc!=LSM_OK ) return rc;
-
- assert( pPg->flags & PAGE_FREE );
- iHash = fsHashKey(pFS->nHash, pPg->iPg);
- fsRemoveHashEntry(pFS, pPg->iPg);
- pPg->pHashNext = pFS->apHash[iHash];
- pFS->apHash[iHash] = pPg;
- assert( pPg->pHashNext==0 || pPg->pHashNext->iPg!=pPg->iPg );
-
- if( iPrev ){
- assert( iNext==0 );
- memmove(&pPg->aData[4], pPg->aData, pPg->nData);
- lsmPutU32(pPg->aData, iPrev);
- pPg->flags |= PAGE_HASPREV;
- pPg->aData += 4;
- }else if( iNext ){
- assert( iPrev==0 );
- lsmPutU32(&pPg->aData[pPg->nData], iNext);
- }else{
- int nData = pPg->nData;
- pPg->nData += 4;
- lsmSortedExpandBtreePage(pPg, nData);
- }
-
- pPg->nRef++;
- for(pp=&pFS->pWaiting; *pp; pp=&(*pp)->pWaitingNext);
- *pp = pPg;
- assert( pPg->pWaitingNext==0 );
-
- }else{
- i64 iOff; /* Offset to write within database file */
-
- iOff = (i64)pFS->nPagesize * (i64)(pPg->iPg-1);
- if( fsMmapPage(pFS, pPg->iPg)==0 ){
- u8 *aData = pPg->aData - (pPg->flags & PAGE_HASPREV);
- rc = lsmEnvWrite(pFS->pEnv, pFS->fdDb, iOff, aData, pFS->nPagesize);
- }else if( pPg->flags & PAGE_FREE ){
- fsGrowMapping(pFS, iOff + pFS->nPagesize, &rc);
- if( rc==LSM_OK ){
- u8 *aTo = &((u8 *)(pFS->pMap))[iOff];
- u8 *aFrom = pPg->aData - (pPg->flags & PAGE_HASPREV);
- memcpy(aTo, aFrom, pFS->nPagesize);
- lsmFree(pFS->pEnv, aFrom);
- pFS->nCacheAlloc--;
- pPg->aData = aTo + (pPg->flags & PAGE_HASPREV);
- pPg->flags &= ~PAGE_FREE;
- fsPageRemoveFromHash(pFS, pPg);
- pPg->pMappedNext = pFS->pMapped;
- pFS->pMapped = pPg;
- }
- }
-
- lsmFsFlushWaiting(pFS, &rc);
- pPg->flags &= ~PAGE_DIRTY;
- pFS->nWrite++;
- }
- }
- }
-
- return rc;
-}
-
-/*
-** For non-compressed databases, this function is a no-op. For compressed
-** databases, it adds a padding record to the segment passed as the third
-** argument.
-**
-** The size of the padding records is selected so that the last byte
-** written is the last byte of a disk sector. This means that if a
-** snapshot is taken and checkpointed, subsequent worker processes will
-** not write to any sector that contains checkpointed data.
-*/
-int lsmFsSortedPadding(
- FileSystem *pFS,
- Snapshot *pSnapshot,
- Segment *pSeg
-){
- int rc = LSM_OK;
- if( pFS->pCompress && pSeg->iFirst ){
- LsmPgno iLast2;
- LsmPgno iLast = pSeg->iLastPg; /* Current last page of segment */
- int nPad; /* Bytes of padding required */
- u8 aSz[3];
-
- iLast2 = (1 + iLast/pFS->szSector) * pFS->szSector - 1;
- assert( fsPageToBlock(pFS, iLast)==fsPageToBlock(pFS, iLast2) );
- nPad = (int)(iLast2 - iLast);
-
- if( iLast2>fsLastPageOnPagesBlock(pFS, iLast) ){
- nPad -= 4;
- }
- assert( nPad>=0 );
-
- if( nPad>=6 ){
- pSeg->nSize += nPad;
- nPad -= 6;
- putRecordSize(aSz, nPad, 1);
- fsAppendData(pFS, pSeg, aSz, sizeof(aSz), &rc);
- memset(pFS->aOBuffer, 0, nPad);
- fsAppendData(pFS, pSeg, pFS->aOBuffer, nPad, &rc);
- fsAppendData(pFS, pSeg, aSz, sizeof(aSz), &rc);
- }else if( nPad>0 ){
- u8 aBuf[5] = {0,0,0,0,0};
- aBuf[0] = (u8)nPad;
- aBuf[nPad-1] = (u8)nPad;
- fsAppendData(pFS, pSeg, aBuf, nPad, &rc);
- }
-
- assert( rc!=LSM_OK
- || pSeg->iLastPg==fsLastPageOnPagesBlock(pFS, pSeg->iLastPg)
- || ((pSeg->iLastPg + 1) % pFS->szSector)==0
- );
- }
-
- return rc;
-}
-
-
-/*
-** Increment the reference count on the page object passed as the first
-** argument.
-*/
-void lsmFsPageRef(Page *pPg){
- if( pPg ){
- pPg->nRef++;
- }
-}
-
-/*
-** Release a page-reference obtained using fsPageGet().
-*/
-int lsmFsPageRelease(Page *pPg){
- int rc = LSM_OK;
- if( pPg ){
- assert( pPg->nRef>0 );
- pPg->nRef--;
- if( pPg->nRef==0 ){
- FileSystem *pFS = pPg->pFS;
- rc = lsmFsPagePersist(pPg);
- pFS->nOut--;
-
- assert( pPg->pFS->pCompress
- || fsIsFirst(pPg->pFS, pPg->iPg)==0
- || (pPg->flags & PAGE_HASPREV)
- );
- pPg->aData -= (pPg->flags & PAGE_HASPREV);
- pPg->flags &= ~PAGE_HASPREV;
-
- if( (pPg->flags & PAGE_FREE)==0 ){
- /* Removed from mapped list */
- Page **pp;
- for(pp=&pFS->pMapped; (*pp)!=pPg; pp=&(*pp)->pMappedNext);
- *pp = pPg->pMappedNext;
- pPg->pMappedNext = 0;
-
- /* Add to free list */
- pPg->pFreeNext = pFS->pFree;
- pFS->pFree = pPg;
- }else{
- fsPageAddToLru(pFS, pPg);
- }
- }
- }
-
- return rc;
-}
-
-/*
-** Return the total number of pages read from the database file.
-*/
-int lsmFsNRead(FileSystem *pFS){ return pFS->nRead; }
-
-/*
-** Return the total number of pages written to the database file.
-*/
-int lsmFsNWrite(FileSystem *pFS){ return pFS->nWrite; }
-
-/*
-** Return a copy of the environment pointer used by the file-system object.
-*/
-lsm_env *lsmFsEnv(FileSystem *pFS){
- return pFS->pEnv;
-}
-
-/*
-** Return a copy of the environment pointer used by the file-system object
-** to which this page belongs.
-*/
-lsm_env *lsmPageEnv(Page *pPg) {
- return pPg->pFS->pEnv;
-}
-
-/*
-** Return a pointer to the file-system object associated with the Page
-** passed as the only argument.
-*/
-FileSystem *lsmPageFS(Page *pPg){
- return pPg->pFS;
-}
-
-/*
-** Return the sector-size as reported by the log file handle.
-*/
-int lsmFsSectorSize(FileSystem *pFS){
- return pFS->szSector;
-}
-
-/*
-** Helper function for lsmInfoArrayStructure().
-*/
-static Segment *startsWith(Segment *pRun, LsmPgno iFirst){
- return (iFirst==pRun->iFirst) ? pRun : 0;
-}
-
-/*
-** Return the segment that starts with page iFirst, if any. If no such segment
-** can be found, return NULL.
-*/
-static Segment *findSegment(Snapshot *pWorker, LsmPgno iFirst){
- Level *pLvl; /* Used to iterate through db levels */
- Segment *pSeg = 0; /* Pointer to segment to return */
-
- for(pLvl=lsmDbSnapshotLevel(pWorker); pLvl && pSeg==0; pLvl=pLvl->pNext){
- if( 0==(pSeg = startsWith(&pLvl->lhs, iFirst)) ){
- int i;
- for(i=0; inRight; i++){
- if( (pSeg = startsWith(&pLvl->aRhs[i], iFirst)) ) break;
- }
- }
- }
-
- return pSeg;
-}
-
-/*
-** This function implements the lsm_info(LSM_INFO_ARRAY_STRUCTURE) request.
-** If successful, *pzOut is set to point to a nul-terminated string
-** containing the array structure and LSM_OK is returned. The caller should
-** eventually free the string using lsmFree().
-**
-** If an error occurs, *pzOut is set to NULL and an LSM error code returned.
-*/
-int lsmInfoArrayStructure(
- lsm_db *pDb,
- int bBlock, /* True for block numbers only */
- LsmPgno iFirst,
- char **pzOut
-){
- int rc = LSM_OK;
- Snapshot *pWorker; /* Worker snapshot */
- Segment *pArray = 0; /* Array to report on */
- int bUnlock = 0;
-
- *pzOut = 0;
- if( iFirst==0 ) return LSM_ERROR;
-
- /* Obtain the worker snapshot */
- pWorker = pDb->pWorker;
- if( !pWorker ){
- rc = lsmBeginWork(pDb);
- if( rc!=LSM_OK ) return rc;
- pWorker = pDb->pWorker;
- bUnlock = 1;
- }
-
- /* Search for the array that starts on page iFirst */
- pArray = findSegment(pWorker, iFirst);
-
- if( pArray==0 ){
- /* Could not find the requested array. This is an error. */
- rc = LSM_ERROR;
- }else{
- FileSystem *pFS = pDb->pFS;
- LsmString str;
- int iBlk;
- int iLastBlk;
-
- iBlk = fsPageToBlock(pFS, pArray->iFirst);
- iLastBlk = fsPageToBlock(pFS, pArray->iLastPg);
-
- lsmStringInit(&str, pDb->pEnv);
- if( bBlock ){
- lsmStringAppendf(&str, "%d", iBlk);
- while( iBlk!=iLastBlk ){
- fsBlockNext(pFS, pArray, iBlk, &iBlk);
- lsmStringAppendf(&str, " %d", iBlk);
- }
- }else{
- lsmStringAppendf(&str, "%d", pArray->iFirst);
- while( iBlk!=iLastBlk ){
- lsmStringAppendf(&str, " %d", fsLastPageOnBlock(pFS, iBlk));
- fsBlockNext(pFS, pArray, iBlk, &iBlk);
- lsmStringAppendf(&str, " %d", fsFirstPageOnBlock(pFS, iBlk));
- }
- lsmStringAppendf(&str, " %d", pArray->iLastPg);
- }
-
- *pzOut = str.z;
- }
-
- if( bUnlock ){
- int rcwork = LSM_BUSY;
- lsmFinishWork(pDb, 0, &rcwork);
- }
- return rc;
-}
-
-int lsmFsSegmentContainsPg(
- FileSystem *pFS,
- Segment *pSeg,
- LsmPgno iPg,
- int *pbRes
-){
- Redirect *pRedir = pSeg->pRedirect;
- int rc = LSM_OK;
- int iBlk;
- int iLastBlk;
- int iPgBlock; /* Block containing page iPg */
-
- iPgBlock = fsPageToBlock(pFS, pSeg->iFirst);
- iBlk = fsRedirectBlock(pRedir, fsPageToBlock(pFS, pSeg->iFirst));
- iLastBlk = fsRedirectBlock(pRedir, fsPageToBlock(pFS, pSeg->iLastPg));
-
- while( iBlk!=iLastBlk && iBlk!=iPgBlock && rc==LSM_OK ){
- rc = fsBlockNext(pFS, pSeg, iBlk, &iBlk);
- }
-
- *pbRes = (iBlk==iPgBlock);
- return rc;
-}
-
-/*
-** This function implements the lsm_info(LSM_INFO_ARRAY_PAGES) request.
-** If successful, *pzOut is set to point to a nul-terminated string
-** containing the array structure and LSM_OK is returned. The caller should
-** eventually free the string using lsmFree().
-**
-** If an error occurs, *pzOut is set to NULL and an LSM error code returned.
-*/
-int lsmInfoArrayPages(lsm_db *pDb, LsmPgno iFirst, char **pzOut){
- int rc = LSM_OK;
- Snapshot *pWorker; /* Worker snapshot */
- Segment *pSeg = 0; /* Array to report on */
- int bUnlock = 0;
-
- *pzOut = 0;
- if( iFirst==0 ) return LSM_ERROR;
-
- /* Obtain the worker snapshot */
- pWorker = pDb->pWorker;
- if( !pWorker ){
- rc = lsmBeginWork(pDb);
- if( rc!=LSM_OK ) return rc;
- pWorker = pDb->pWorker;
- bUnlock = 1;
- }
-
- /* Search for the array that starts on page iFirst */
- pSeg = findSegment(pWorker, iFirst);
-
- if( pSeg==0 ){
- /* Could not find the requested array. This is an error. */
- rc = LSM_ERROR;
- }else{
- Page *pPg = 0;
- FileSystem *pFS = pDb->pFS;
- LsmString str;
-
- lsmStringInit(&str, pDb->pEnv);
- rc = lsmFsDbPageGet(pFS, pSeg, iFirst, &pPg);
- while( rc==LSM_OK && pPg ){
- Page *pNext = 0;
- lsmStringAppendf(&str, " %lld", lsmFsPageNumber(pPg));
- rc = lsmFsDbPageNext(pSeg, pPg, 1, &pNext);
- lsmFsPageRelease(pPg);
- pPg = pNext;
- }
-
- if( rc!=LSM_OK ){
- lsmFree(pDb->pEnv, str.z);
- }else{
- *pzOut = str.z;
- }
- }
-
- if( bUnlock ){
- int rcwork = LSM_BUSY;
- lsmFinishWork(pDb, 0, &rcwork);
- }
- return rc;
-}
-
-/*
-** The following macros are used by the integrity-check code. Associated with
-** each block in the database is an 8-bit bit mask (the entry in the aUsed[]
-** array). As the integrity-check meanders through the database, it sets the
-** following bits to indicate how each block is used.
-**
-** INTEGRITY_CHECK_FIRST_PG:
-** First page of block is in use by sorted run.
-**
-** INTEGRITY_CHECK_LAST_PG:
-** Last page of block is in use by sorted run.
-**
-** INTEGRITY_CHECK_USED:
-** At least one page of the block is in use by a sorted run.
-**
-** INTEGRITY_CHECK_FREE:
-** The free block list contains an entry corresponding to this block.
-*/
-#define INTEGRITY_CHECK_FIRST_PG 0x01
-#define INTEGRITY_CHECK_LAST_PG 0x02
-#define INTEGRITY_CHECK_USED 0x04
-#define INTEGRITY_CHECK_FREE 0x08
-
-/*
-** Helper function for lsmFsIntegrityCheck()
-*/
-static void checkBlocks(
- FileSystem *pFS,
- Segment *pSeg,
- int bExtra, /* If true, count the "next" block if any */
- int nUsed,
- u8 *aUsed
-){
- if( pSeg ){
- if( pSeg && pSeg->nSize>0 ){
- int rc;
- int iBlk; /* Current block (during iteration) */
- int iLastBlk; /* Last block of segment */
- int iFirstBlk; /* First block of segment */
- int bLastIsLastOnBlock; /* True iLast is the last on its block */
-
- assert( 0==fsSegmentRedirects(pFS, pSeg) );
- iBlk = iFirstBlk = fsPageToBlock(pFS, pSeg->iFirst);
- iLastBlk = fsPageToBlock(pFS, pSeg->iLastPg);
-
- bLastIsLastOnBlock = (fsLastPageOnBlock(pFS, iLastBlk)==pSeg->iLastPg);
- assert( iBlk>0 );
-
- do {
- /* iBlk is a part of this sorted run. */
- aUsed[iBlk-1] |= INTEGRITY_CHECK_USED;
-
- /* If the first page of this block is also part of the segment,
- ** set the flag to indicate that the first page of iBlk is in use.
- */
- if( fsFirstPageOnBlock(pFS, iBlk)==pSeg->iFirst || iBlk!=iFirstBlk ){
- assert( (aUsed[iBlk-1] & INTEGRITY_CHECK_FIRST_PG)==0 );
- aUsed[iBlk-1] |= INTEGRITY_CHECK_FIRST_PG;
- }
-
- /* Unless the sorted run finishes before the last page on this block,
- ** the last page of this block is also in use. */
- if( iBlk!=iLastBlk || bLastIsLastOnBlock ){
- assert( (aUsed[iBlk-1] & INTEGRITY_CHECK_LAST_PG)==0 );
- aUsed[iBlk-1] |= INTEGRITY_CHECK_LAST_PG;
- }
-
- /* Special case. The sorted run being scanned is the output run of
- ** a level currently undergoing an incremental merge. The sorted
- ** run ends on the last page of iBlk, but the next block has already
- ** been allocated. So mark it as in use as well. */
- if( iBlk==iLastBlk && bLastIsLastOnBlock && bExtra ){
- int iExtra = 0;
- rc = fsBlockNext(pFS, pSeg, iBlk, &iExtra);
- assert( rc==LSM_OK );
-
- assert( aUsed[iExtra-1]==0 );
- aUsed[iExtra-1] |= INTEGRITY_CHECK_USED;
- aUsed[iExtra-1] |= INTEGRITY_CHECK_FIRST_PG;
- aUsed[iExtra-1] |= INTEGRITY_CHECK_LAST_PG;
- }
-
- /* Move on to the next block in the sorted run. Or set iBlk to zero
- ** in order to break out of the loop if this was the last block in
- ** the run. */
- if( iBlk==iLastBlk ){
- iBlk = 0;
- }else{
- rc = fsBlockNext(pFS, pSeg, iBlk, &iBlk);
- assert( rc==LSM_OK );
- }
- }while( iBlk );
- }
- }
-}
-
-typedef struct CheckFreelistCtx CheckFreelistCtx;
-struct CheckFreelistCtx {
- u8 *aUsed;
- int nBlock;
-};
-static int checkFreelistCb(void *pCtx, int iBlk, i64 iSnapshot){
- CheckFreelistCtx *p = (CheckFreelistCtx *)pCtx;
-
- assert( iBlk>=1 );
- assert( iBlk<=p->nBlock );
- assert( p->aUsed[iBlk-1]==0 );
- p->aUsed[iBlk-1] = INTEGRITY_CHECK_FREE;
- return 0;
-}
-
-/*
-** This function checks that all blocks in the database file are accounted
-** for. For each block, exactly one of the following must be true:
-**
-** + the block is part of a sorted run, or
-** + the block is on the free-block list
-**
-** This function also checks that there are no references to blocks with
-** out-of-range block numbers.
-**
-** If no errors are found, non-zero is returned. If an error is found, an
-** assert() fails.
-*/
-int lsmFsIntegrityCheck(lsm_db *pDb){
- CheckFreelistCtx ctx;
- FileSystem *pFS = pDb->pFS;
- int i;
- int rc;
- Freelist freelist = {0, 0, 0};
- u8 *aUsed;
- Level *pLevel;
- Snapshot *pWorker = pDb->pWorker;
- int nBlock = pWorker->nBlock;
-
-#if 0
- static int nCall = 0;
- nCall++;
- printf("%d calls\n", nCall);
-#endif
-
- aUsed = lsmMallocZero(pDb->pEnv, nBlock);
- if( aUsed==0 ){
- /* Malloc has failed. Since this function is only called within debug
- ** builds, this probably means the user is running an OOM injection test.
- ** Regardless, it will not be possible to run the integrity-check at this
- ** time, so assume the database is Ok and return non-zero. */
- return 1;
- }
-
- for(pLevel=pWorker->pLevel; pLevel; pLevel=pLevel->pNext){
- int j;
- checkBlocks(pFS, &pLevel->lhs, (pLevel->nRight!=0), nBlock, aUsed);
- for(j=0; jnRight; j++){
- checkBlocks(pFS, &pLevel->aRhs[j], 0, nBlock, aUsed);
- }
- }
-
- /* Mark all blocks in the free-list as used */
- ctx.aUsed = aUsed;
- ctx.nBlock = nBlock;
- rc = lsmWalkFreelist(pDb, 0, checkFreelistCb, (void *)&ctx);
-
- if( rc==LSM_OK ){
- for(i=0; ipEnv, aUsed);
- lsmFree(pDb->pEnv, freelist.aEntry);
-
- return 1;
-}
-
-#ifndef NDEBUG
-/*
-** Return true if pPg happens to be the last page in segment pSeg. Or false
-** otherwise. This function is only invoked as part of assert() conditions.
-*/
-int lsmFsDbPageIsLast(Segment *pSeg, Page *pPg){
- if( pPg->pFS->pCompress ){
- LsmPgno iNext = 0;
- int rc;
- rc = fsNextPageOffset(pPg->pFS, pSeg, pPg->iPg, pPg->nCompress+6, &iNext);
- return (rc!=LSM_OK || iNext==0);
- }
- return (pPg->iPg==pSeg->iLastPg);
-}
-#endif
diff --git a/ext/lsm1/lsm_log.c b/ext/lsm1/lsm_log.c
deleted file mode 100644
index 3dcef42f70..0000000000
--- a/ext/lsm1/lsm_log.c
+++ /dev/null
@@ -1,1156 +0,0 @@
-/*
-** 2011-08-13
-**
-** The author disclaims copyright to this source code. In place of
-** a legal notice, here is a blessing:
-**
-** May you do good and not evil.
-** May you find forgiveness for yourself and forgive others.
-** May you share freely, never taking more than you give.
-**
-*************************************************************************
-**
-** This file contains the implementation of LSM database logging. Logging
-** has one purpose in LSM - to make transactions durable.
-**
-** When data is written to an LSM database, it is initially stored in an
-** in-memory tree structure. Since this structure is in volatile memory,
-** if a power failure or application crash occurs it may be lost. To
-** prevent loss of data in this case, each time a record is written to the
-** in-memory tree an equivalent record is appended to the log on disk.
-** If a power failure or application crash does occur, data can be recovered
-** by reading the log.
-**
-** A log file consists of the following types of records representing data
-** written into the database:
-**
-** LOG_WRITE: A key-value pair written to the database.
-** LOG_DELETE: A delete key issued to the database.
-** LOG_COMMIT: A transaction commit.
-**
-** And the following types of records for ancillary purposes..
-**
-** LOG_EOF: A record indicating the end of a log file.
-** LOG_PAD1: A single byte padding record.
-** LOG_PAD2: An N byte padding record (N>1).
-** LOG_JUMP: A pointer to another offset within the log file.
-**
-** Each transaction written to the log contains one or more LOG_WRITE and/or
-** LOG_DELETE records, followed by a LOG_COMMIT record. The LOG_COMMIT record
-** contains an 8-byte checksum based on all previous data written to the
-** log file.
-**
-** LOG CHECKSUMS & RECOVERY
-**
-** Checksums are found in two types of log records: LOG_COMMIT and
-** LOG_CKSUM records. In order to recover content from a log, a client
-** reads each record from the start of the log, calculating a checksum as
-** it does. Each time a LOG_COMMIT or LOG_CKSUM is encountered, the
-** recovery process verifies that the checksum stored in the log
-** matches the calculated checksum. If it does not, the recovery process
-** can stop reading the log.
-**
-** If a recovery process reads records (other than COMMIT or CKSUM)
-** consisting of at least LSM_CKSUM_MAXDATA bytes, then the next record in
-** the log must be either a LOG_CKSUM or LOG_COMMIT record. If it is
-** not, the recovery process also stops reading the log.
-**
-** To recover the log file, it must be read twice. The first time to
-** determine the location of the last valid commit record. And the second
-** time to load data into the in-memory tree.
-**
-** Todo: Surely there is a better way...
-**
-** LOG WRAPPING
-**
-** If the log file were never deleted or wrapped, it would be possible to
-** read it from start to end each time is required recovery (i.e each time
-** the number of database clients changes from 0 to 1). Effectively reading
-** the entire history of the database each time. This would quickly become
-** inefficient. Additionally, since the log file would grow without bound,
-** it wastes storage space.
-**
-** Instead, part of each checkpoint written into the database file contains
-** a log offset (and other information required to read the log starting at
-** at this offset) at which to begin recovery. Offset $O.
-**
-** Once a checkpoint has been written and synced into the database file, it
-** is guaranteed that no recovery process will need to read any data before
-** offset $O of the log file. It is therefore safe to begin overwriting
-** any data that occurs before offset $O.
-**
-** This implementation separates the log into three regions mapped into
-** the log file - regions 0, 1 and 2. During recovery, regions are read
-** in ascending order (i.e. 0, then 1, then 2). Each region is zero or
-** more bytes in size.
-**
-** |---1---|..|--0--|.|--2--|....
-**
-** New records are always appended to the end of region 2.
-**
-** Initially (when it is empty), all three regions are zero bytes in size.
-** Each of them are located at the beginning of the file. As records are
-** added to the log, region 2 grows, so that the log consists of a zero
-** byte region 1, followed by a zero byte region 0, followed by an N byte
-** region 2. After one or more checkpoints have been written to disk,
-** the start point of region 2 is moved to $O. For example:
-**
-** A) ||.........|--2--|....
-**
-** (both regions 0 and 1 are 0 bytes in size at offset 0).
-**
-** Eventually, the log wraps around to write new records into the start.
-** At this point, region 2 is renamed to region 0. Region 0 is renamed
-** to region 2. After appending a few records to the new region 2, the
-** log file looks like this:
-**
-** B) ||--2--|...|--0--|....
-**
-** (region 1 is still 0 bytes in size, located at offset 0).
-**
-** Any checkpoints made at this point may reduce the size of region 0.
-** However, if they do not, and region 2 expands so that it is about to
-** overwrite the start of region 0, then region 2 is renamed to region 1,
-** and a new region 2 created at the end of the file following the existing
-** region 0.
-**
-** C) |---1---|..|--0--|.|-2-|
-**
-** In this state records are appended to region 2 until checkpoints have
-** contracted regions 0 AND 1 UNTil they are both zero bytes in size. They
-** are then shifted to the start of the log file, leaving the system in
-** the equivalent of state A above.
-**
-** Alternatively, state B may transition directly to state A if the size
-** of region 0 is reduced to zero bytes before region 2 threatens to
-** encroach upon it.
-**
-** LOG_PAD1 & LOG_PAD2 RECORDS
-**
-** PAD1 and PAD2 records may appear in a log file at any point. They allow
-** a process writing the log file align the beginning of transactions with
-** the beginning of disk sectors, which increases robustness.
-**
-** RECORD FORMATS:
-**
-** LOG_EOF: * A single 0x00 byte.
-**
-** LOG_PAD1: * A single 0x01 byte.
-**
-** LOG_PAD2: * A single 0x02 byte, followed by
-** * The number of unused bytes (N) as a varint,
-** * An N byte block of unused space.
-**
-** LOG_COMMIT: * A single 0x03 byte.
-** * An 8-byte checksum.
-**
-** LOG_JUMP: * A single 0x04 byte.
-** * Absolute file offset to jump to, encoded as a varint.
-**
-** LOG_WRITE: * A single 0x06 or 0x07 byte,
-** * The number of bytes in the key, encoded as a varint,
-** * The number of bytes in the value, encoded as a varint,
-** * If the first byte was 0x07, an 8 byte checksum.
-** * The key data,
-** * The value data.
-**
-** LOG_DELETE: * A single 0x08 or 0x09 byte,
-** * The number of bytes in the key, encoded as a varint,
-** * If the first byte was 0x09, an 8 byte checksum.
-** * The key data.
-**
-** Varints are as described in lsm_varint.c (SQLite 4 format).
-**
-** CHECKSUMS:
-**
-** The checksum is calculated using two 32-bit unsigned integers, s0 and
-** s1. The initial value for both is 42. It is updated each time a record
-** is written into the log file by treating the encoded (binary) record as
-** an array of 32-bit little-endian integers. Then, if x[] is the integer
-** array, updating the checksum accumulators as follows:
-**
-** for i from 0 to n-1 step 2:
-** s0 += x[i] + s1;
-** s1 += x[i+1] + s0;
-** endfor
-**
-** If the record is not an even multiple of 8-bytes in size it is padded
-** with zeroes to make it so before the checksum is updated.
-**
-** The checksum stored in a COMMIT, WRITE or DELETE is based on all bytes
-** up to the start of the 8-byte checksum itself, including the COMMIT,
-** WRITE or DELETE fields that appear before the checksum in the record.
-**
-** VARINT FORMAT
-**
-** See lsm_varint.c.
-*/
-
-#ifndef _LSM_INT_H
-# include "lsmInt.h"
-#endif
-
-/* Log record types */
-#define LSM_LOG_EOF 0x00
-#define LSM_LOG_PAD1 0x01
-#define LSM_LOG_PAD2 0x02
-#define LSM_LOG_COMMIT 0x03
-#define LSM_LOG_JUMP 0x04
-
-#define LSM_LOG_WRITE 0x06
-#define LSM_LOG_WRITE_CKSUM 0x07
-
-#define LSM_LOG_DELETE 0x08
-#define LSM_LOG_DELETE_CKSUM 0x09
-
-#define LSM_LOG_DRANGE 0x0A
-#define LSM_LOG_DRANGE_CKSUM 0x0B
-
-/* Require a checksum every 32KB. */
-#define LSM_CKSUM_MAXDATA (32*1024)
-
-/* Do not wrap a log file smaller than this in bytes. */
-#define LSM_MIN_LOGWRAP (128*1024)
-
-/*
-** szSector:
-** Commit records must be aligned to end on szSector boundaries. If
-** the safety-mode is set to NORMAL or OFF, this value is 1. Otherwise,
-** if the safety-mode is set to FULL, it is the size of the file-system
-** sectors as reported by lsmFsSectorSize().
-*/
-struct LogWriter {
- u32 cksum0; /* Checksum 0 at offset iOff */
- u32 cksum1; /* Checksum 1 at offset iOff */
- int iCksumBuf; /* Bytes of buf that have been checksummed */
- i64 iOff; /* Offset at start of buffer buf */
- int szSector; /* Sector size for this transaction */
- LogRegion jump; /* Avoid writing to this region */
- i64 iRegion1End; /* End of first region written by trans */
- i64 iRegion2Start; /* Start of second regions written by trans */
- LsmString buf; /* Buffer containing data not yet written */
-};
-
-/*
-** Return the result of interpreting the first 4 bytes in buffer aIn as
-** a 32-bit unsigned little-endian integer.
-*/
-static u32 getU32le(u8 *aIn){
- return ((u32)aIn[3] << 24)
- + ((u32)aIn[2] << 16)
- + ((u32)aIn[1] << 8)
- + ((u32)aIn[0]);
-}
-
-
-/*
-** This function is the same as logCksum(), except that pointer "a" need
-** not be aligned to an 8-byte boundary or padded with zero bytes. This
-** version is slower, but sometimes more convenient to use.
-*/
-static void logCksumUnaligned(
- char *z, /* Input buffer */
- int n, /* Size of input buffer in bytes */
- u32 *pCksum0, /* IN/OUT: Checksum value 1 */
- u32 *pCksum1 /* IN/OUT: Checksum value 2 */
-){
- u8 *a = (u8 *)z;
- u32 cksum0 = *pCksum0;
- u32 cksum1 = *pCksum1;
- int nIn = (n/8) * 8;
- int i;
-
- assert( n>0 );
- for(i=0; inIn );
- memcpy(aBuf, &a[nIn], n-nIn);
- cksum0 += getU32le(aBuf) + cksum1;
- cksum1 += getU32le(&aBuf[4]) + cksum0;
- }
-
- *pCksum0 = cksum0;
- *pCksum1 = cksum1;
-}
-
-/*
-** Update pLog->cksum0 and pLog->cksum1 so that the first nBuf bytes in the
-** write buffer (pLog->buf) are included in the checksum.
-*/
-static void logUpdateCksum(LogWriter *pLog, int nBuf){
- assert( (pLog->iCksumBuf % 8)==0 );
- assert( pLog->iCksumBuf<=nBuf );
- assert( (nBuf % 8)==0 || nBuf==pLog->buf.n );
- if( nBuf>pLog->iCksumBuf ){
- logCksumUnaligned(
- &pLog->buf.z[pLog->iCksumBuf], nBuf-pLog->iCksumBuf,
- &pLog->cksum0, &pLog->cksum1
- );
- }
- pLog->iCksumBuf = nBuf;
-}
-
-static i64 firstByteOnSector(LogWriter *pLog, i64 iOff){
- return (iOff / pLog->szSector) * pLog->szSector;
-}
-static i64 lastByteOnSector(LogWriter *pLog, i64 iOff){
- return firstByteOnSector(pLog, iOff) + pLog->szSector - 1;
-}
-
-/*
-** If possible, reclaim log file space. Log file space is reclaimed after
-** a snapshot that points to the same data in the database file is synced
-** into the db header.
-*/
-static int logReclaimSpace(lsm_db *pDb){
- int rc;
- int iMeta;
- int bRotrans; /* True if there exists some ro-trans */
-
- /* Test if there exists some other connection with a read-only transaction
- ** open. If there does, then log file space may not be reclaimed. */
- rc = lsmDetectRoTrans(pDb, &bRotrans);
- if( rc!=LSM_OK || bRotrans ) return rc;
-
- iMeta = (int)pDb->pShmhdr->iMetaPage;
- if( iMeta==1 || iMeta==2 ){
- DbLog *pLog = &pDb->treehdr.log;
- i64 iSyncedId;
-
- /* Read the snapshot-id of the snapshot stored on meta-page iMeta. Note
- ** that in theory, the value read is untrustworthy (due to a race
- ** condition - see comments above lsmFsReadSyncedId()). So it is only
- ** ever used to conclude that no log space can be reclaimed. If it seems
- ** to indicate that it may be possible to reclaim log space, a
- ** second call to lsmCheckpointSynced() (which does return trustworthy
- ** values) is made below to confirm. */
- rc = lsmFsReadSyncedId(pDb, iMeta, &iSyncedId);
-
- if( rc==LSM_OK && pLog->iSnapshotId!=iSyncedId ){
- i64 iSnapshotId = 0;
- i64 iOff = 0;
- rc = lsmCheckpointSynced(pDb, &iSnapshotId, &iOff, 0);
- if( rc==LSM_OK && pLog->iSnapshotIdaRegion[iRegion];
- if( iOff>=p->iStart && iOff<=p->iEnd ) break;
- p->iStart = 0;
- p->iEnd = 0;
- }
- assert( iRegion<3 );
- pLog->aRegion[iRegion].iStart = iOff;
- pLog->iSnapshotId = iSnapshotId;
- }
- }
- }
- return rc;
-}
-
-/*
-** This function is called when a write-transaction is first opened. It
-** is assumed that the caller is holding the client-mutex when it is
-** called.
-**
-** Before returning, this function allocates the LogWriter object that
-** will be used to write to the log file during the write transaction.
-** LSM_OK is returned if no error occurs, otherwise an LSM error code.
-*/
-int lsmLogBegin(lsm_db *pDb){
- int rc = LSM_OK;
- LogWriter *pNew;
- LogRegion *aReg;
-
- if( pDb->bUseLog==0 ) return LSM_OK;
-
- /* If the log file has not yet been opened, open it now. Also allocate
- ** the LogWriter structure, if it has not already been allocated. */
- rc = lsmFsOpenLog(pDb, 0);
- if( pDb->pLogWriter==0 ){
- pNew = lsmMallocZeroRc(pDb->pEnv, sizeof(LogWriter), &rc);
- if( pNew ){
- lsmStringInit(&pNew->buf, pDb->pEnv);
- rc = lsmStringExtend(&pNew->buf, 2);
- }
- pDb->pLogWriter = pNew;
- }else{
- pNew = pDb->pLogWriter;
- assert( (u8 *)(&pNew[1])==(u8 *)(&((&pNew->buf)[1])) );
- memset(pNew, 0, ((u8 *)&pNew->buf) - (u8 *)pNew);
- pNew->buf.n = 0;
- }
-
- if( rc==LSM_OK ){
- /* The following call detects whether or not a new snapshot has been
- ** synced into the database file. If so, it updates the contents of
- ** the pDb->treehdr.log structure to reclaim any space in the log
- ** file that is no longer required.
- **
- ** TODO: Calling this every transaction is overkill. And since the
- ** call has to read and checksum a snapshot from the database file,
- ** it is expensive. It would be better to figure out a way so that
- ** this is only called occasionally - say for every 32KB written to
- ** the log file.
- */
- rc = logReclaimSpace(pDb);
- }
- if( rc!=LSM_OK ){
- lsmLogClose(pDb);
- return rc;
- }
-
- /* Set the effective sector-size for this transaction. Sectors are assumed
- ** to be one byte in size if the safety-mode is OFF or NORMAL, or as
- ** reported by lsmFsSectorSize if it is FULL. */
- if( pDb->eSafety==LSM_SAFETY_FULL ){
- pNew->szSector = lsmFsSectorSize(pDb->pFS);
- assert( pNew->szSector>0 );
- }else{
- pNew->szSector = 1;
- }
-
- /* There are now three scenarios:
- **
- ** 1) Regions 0 and 1 are both zero bytes in size and region 2 begins
- ** at a file offset greater than LSM_MIN_LOGWRAP. In this case, wrap
- ** around to the start and write data into the start of the log file.
- **
- ** 2) Region 1 is zero bytes in size and region 2 occurs earlier in the
- ** file than region 0. In this case, append data to region 2, but
- ** remember to jump over region 1 if required.
- **
- ** 3) Region 2 is the last in the file. Append to it.
- */
- aReg = &pDb->treehdr.log.aRegion[0];
-
- assert( aReg[0].iEnd==0 || aReg[0].iEnd>aReg[0].iStart );
- assert( aReg[1].iEnd==0 || aReg[1].iEnd>aReg[1].iStart );
-
- pNew->cksum0 = pDb->treehdr.log.cksum0;
- pNew->cksum1 = pDb->treehdr.log.cksum1;
-
- if( aReg[0].iEnd==0 && aReg[1].iEnd==0 && aReg[2].iStart>=LSM_MIN_LOGWRAP ){
- /* Case 1. Wrap around to the start of the file. Write an LSM_LOG_JUMP
- ** into the log file in this case. Pad it out to 8 bytes using a PAD2
- ** record so that the checksums can be updated immediately. */
- u8 aJump[] = {
- LSM_LOG_PAD2, 0x04, 0x00, 0x00, 0x00, 0x00, LSM_LOG_JUMP, 0x00
- };
-
- lsmStringBinAppend(&pNew->buf, aJump, sizeof(aJump));
- logUpdateCksum(pNew, pNew->buf.n);
- rc = lsmFsWriteLog(pDb->pFS, aReg[2].iEnd, &pNew->buf);
- pNew->iCksumBuf = pNew->buf.n = 0;
-
- aReg[2].iEnd += 8;
- pNew->jump = aReg[0] = aReg[2];
- aReg[2].iStart = aReg[2].iEnd = 0;
- }else if( aReg[1].iEnd==0 && aReg[2].iEndiOff = aReg[2].iEnd;
- pNew->jump = aReg[0];
- }else{
- /* Case 3. */
- assert( aReg[2].iStart>=aReg[0].iEnd && aReg[2].iStart>=aReg[1].iEnd );
- pNew->iOff = aReg[2].iEnd;
- }
-
- if( pNew->jump.iStart ){
- i64 iRound;
- assert( pNew->jump.iStart>pNew->iOff );
-
- iRound = firstByteOnSector(pNew, pNew->jump.iStart);
- if( iRound>pNew->iOff ) pNew->jump.iStart = iRound;
- pNew->jump.iEnd = lastByteOnSector(pNew, pNew->jump.iEnd);
- }
-
- assert( pDb->pLogWriter==pNew );
- return rc;
-}
-
-/*
-** This function is called when a write-transaction is being closed.
-** Parameter bCommit is true if the transaction is being committed,
-** or false otherwise. The caller must hold the client-mutex to call
-** this function.
-**
-** A call to this function deletes the LogWriter object allocated by
-** lsmLogBegin(). If the transaction is being committed, the shared state
-** in *pLog is updated before returning.
-*/
-void lsmLogEnd(lsm_db *pDb, int bCommit){
- DbLog *pLog;
- LogWriter *p;
- p = pDb->pLogWriter;
-
- if( p==0 ) return;
- pLog = &pDb->treehdr.log;
-
- if( bCommit ){
- pLog->aRegion[2].iEnd = p->iOff;
- pLog->cksum0 = p->cksum0;
- pLog->cksum1 = p->cksum1;
- if( p->iRegion1End ){
- /* This happens when the transaction had to jump over some other
- ** part of the log. */
- assert( pLog->aRegion[1].iEnd==0 );
- assert( pLog->aRegion[2].iStartiRegion1End );
- pLog->aRegion[1].iStart = pLog->aRegion[2].iStart;
- pLog->aRegion[1].iEnd = p->iRegion1End;
- pLog->aRegion[2].iStart = p->iRegion2Start;
- }
- }
-}
-
-static int jumpIfRequired(
- lsm_db *pDb,
- LogWriter *pLog,
- int nReq,
- int *pbJump
-){
- /* Determine if it is necessary to add an LSM_LOG_JUMP to jump over the
- ** jump region before writing the LSM_LOG_WRITE or DELETE record. This
- ** is necessary if there is insufficient room between the current offset
- ** and the jump region to fit the new WRITE/DELETE record and the largest
- ** possible JUMP record with up to 7 bytes of padding (a total of 17
- ** bytes). */
- if( (pLog->jump.iStart > (pLog->iOff + pLog->buf.n))
- && (pLog->jump.iStart < (pLog->iOff + pLog->buf.n + (nReq + 17)))
- ){
- int rc; /* Return code */
- i64 iJump; /* Offset to jump to */
- u8 aJump[10]; /* Encoded jump record */
- int nJump; /* Valid bytes in aJump[] */
- int nPad; /* Bytes of padding required */
-
- /* Serialize the JUMP record */
- iJump = pLog->jump.iEnd+1;
- aJump[0] = LSM_LOG_JUMP;
- nJump = 1 + lsmVarintPut64(&aJump[1], iJump);
-
- /* Adding padding to the contents of the buffer so that it will be a
- ** multiple of 8 bytes in size after the JUMP record is appended. This
- ** is not strictly required, it just makes the keeping the running
- ** checksum up to date in this file a little simpler. */
- nPad = (pLog->buf.n + nJump) % 8;
- if( nPad ){
- u8 aPad[7] = {0,0,0,0,0,0,0};
- nPad = 8-nPad;
- if( nPad==1 ){
- aPad[0] = LSM_LOG_PAD1;
- }else{
- aPad[0] = LSM_LOG_PAD2;
- aPad[1] = (u8)(nPad-2);
- }
- rc = lsmStringBinAppend(&pLog->buf, aPad, nPad);
- if( rc!=LSM_OK ) return rc;
- }
-
- /* Append the JUMP record to the buffer. Then flush the buffer to disk
- ** and update the checksums. The next write to the log file (assuming
- ** there is no transaction rollback) will be to offset iJump (just past
- ** the jump region). */
- rc = lsmStringBinAppend(&pLog->buf, aJump, nJump);
- if( rc!=LSM_OK ) return rc;
- assert( (pLog->buf.n % 8)==0 );
- rc = lsmFsWriteLog(pDb->pFS, pLog->iOff, &pLog->buf);
- if( rc!=LSM_OK ) return rc;
- logUpdateCksum(pLog, pLog->buf.n);
- pLog->iRegion1End = (pLog->iOff + pLog->buf.n);
- pLog->iRegion2Start = iJump;
- pLog->iOff = iJump;
- pLog->iCksumBuf = pLog->buf.n = 0;
- if( pbJump ) *pbJump = 1;
- }
-
- return LSM_OK;
-}
-
-static int logCksumAndFlush(lsm_db *pDb){
- int rc; /* Return code */
- LogWriter *pLog = pDb->pLogWriter;
-
- /* Calculate the checksum value. Append it to the buffer. */
- logUpdateCksum(pLog, pLog->buf.n);
- lsmPutU32((u8 *)&pLog->buf.z[pLog->buf.n], pLog->cksum0);
- pLog->buf.n += 4;
- lsmPutU32((u8 *)&pLog->buf.z[pLog->buf.n], pLog->cksum1);
- pLog->buf.n += 4;
-
- /* Write the contents of the buffer to disk. */
- rc = lsmFsWriteLog(pDb->pFS, pLog->iOff, &pLog->buf);
- pLog->iOff += pLog->buf.n;
- pLog->iCksumBuf = pLog->buf.n = 0;
-
- return rc;
-}
-
-/*
-** Write the contents of the log-buffer to disk. Then write either a CKSUM
-** or COMMIT record, depending on the value of parameter eType.
-*/
-static int logFlush(lsm_db *pDb, int eType){
- int rc;
- int nReq;
- LogWriter *pLog = pDb->pLogWriter;
-
- assert( eType==LSM_LOG_COMMIT );
- assert( pLog );
-
- /* Commit record is always 9 bytes in size. */
- nReq = 9;
- if( eType==LSM_LOG_COMMIT && pLog->szSector>1 ) nReq += pLog->szSector + 17;
- rc = jumpIfRequired(pDb, pLog, nReq, 0);
-
- /* If this is a COMMIT, add padding to the log so that the COMMIT record
- ** is aligned against the end of a disk sector. In other words, add padding
- ** so that the first byte following the COMMIT record lies on a different
- ** sector. */
- if( eType==LSM_LOG_COMMIT && pLog->szSector>1 ){
- int nPad; /* Bytes of padding to add */
-
- /* Determine the value of nPad. */
- nPad = ((pLog->iOff + pLog->buf.n + 9) % pLog->szSector);
- if( nPad ) nPad = pLog->szSector - nPad;
- rc = lsmStringExtend(&pLog->buf, nPad);
- if( rc!=LSM_OK ) return rc;
-
- while( nPad ){
- if( nPad==1 ){
- pLog->buf.z[pLog->buf.n++] = LSM_LOG_PAD1;
- nPad = 0;
- }else{
- int n = LSM_MIN(200, nPad-2);
- pLog->buf.z[pLog->buf.n++] = LSM_LOG_PAD2;
- pLog->buf.z[pLog->buf.n++] = (char)n;
- nPad -= 2;
- memset(&pLog->buf.z[pLog->buf.n], 0x2B, n);
- pLog->buf.n += n;
- nPad -= n;
- }
- }
- }
-
- /* Make sure there is room in the log-buffer to add the CKSUM or COMMIT
- ** record. Then add the first byte of it. */
- rc = lsmStringExtend(&pLog->buf, 9);
- if( rc!=LSM_OK ) return rc;
- pLog->buf.z[pLog->buf.n++] = (char)eType;
- memset(&pLog->buf.z[pLog->buf.n], 0, 8);
-
- rc = logCksumAndFlush(pDb);
-
- /* If this is a commit and synchronous=full, sync the log to disk. */
- if( rc==LSM_OK && eType==LSM_LOG_COMMIT && pDb->eSafety==LSM_SAFETY_FULL ){
- rc = lsmFsSyncLog(pDb->pFS);
- }
- return rc;
-}
-
-/*
-** Append an LSM_LOG_WRITE (if nVal>=0) or LSM_LOG_DELETE (if nVal<0)
-** record to the database log.
-*/
-int lsmLogWrite(
- lsm_db *pDb, /* Database handle */
- int eType,
- void *pKey, int nKey, /* Database key to write to log */
- void *pVal, int nVal /* Database value (or nVal<0) to write */
-){
- int rc = LSM_OK;
- LogWriter *pLog; /* Log object to write to */
- int nReq; /* Bytes of space required in log */
- int bCksum = 0; /* True to embed a checksum in this record */
-
- assert( eType==LSM_WRITE || eType==LSM_DELETE || eType==LSM_DRANGE );
- assert( LSM_LOG_WRITE==LSM_WRITE );
- assert( LSM_LOG_DELETE==LSM_DELETE );
- assert( LSM_LOG_DRANGE==LSM_DRANGE );
- assert( (eType==LSM_LOG_DELETE)==(nVal<0) );
-
- if( pDb->bUseLog==0 ) return LSM_OK;
- pLog = pDb->pLogWriter;
-
- /* Determine how many bytes of space are required, assuming that a checksum
- ** will be embedded in this record (even though it may not be). */
- nReq = 1 + lsmVarintLen32(nKey) + 8 + nKey;
- if( eType!=LSM_LOG_DELETE ) nReq += lsmVarintLen32(nVal) + nVal;
-
- /* Jump over the jump region if required. Set bCksum to true to tell the
- ** code below to include a checksum in the record if either (a) writing
- ** this record would mean that more than LSM_CKSUM_MAXDATA bytes of data
- ** have been written to the log since the last checksum, or (b) the jump
- ** is taken. */
- rc = jumpIfRequired(pDb, pLog, nReq, &bCksum);
- if( (pLog->buf.n+nReq) > LSM_CKSUM_MAXDATA ) bCksum = 1;
-
- if( rc==LSM_OK ){
- rc = lsmStringExtend(&pLog->buf, nReq);
- }
- if( rc==LSM_OK ){
- u8 *a = (u8 *)&pLog->buf.z[pLog->buf.n];
-
- /* Write the record header - the type byte followed by either 1 (for
- ** DELETE) or 2 (for WRITE) varints. */
- assert( LSM_LOG_WRITE_CKSUM == (LSM_LOG_WRITE | 0x0001) );
- assert( LSM_LOG_DELETE_CKSUM == (LSM_LOG_DELETE | 0x0001) );
- assert( LSM_LOG_DRANGE_CKSUM == (LSM_LOG_DRANGE | 0x0001) );
- *(a++) = (u8)eType | (u8)bCksum;
- a += lsmVarintPut32(a, nKey);
- if( eType!=LSM_LOG_DELETE ) a += lsmVarintPut32(a, nVal);
-
- if( bCksum ){
- pLog->buf.n = (a - (u8 *)pLog->buf.z);
- rc = logCksumAndFlush(pDb);
- a = (u8 *)&pLog->buf.z[pLog->buf.n];
- }
-
- memcpy(a, pKey, nKey);
- a += nKey;
- if( eType!=LSM_LOG_DELETE ){
- memcpy(a, pVal, nVal);
- a += nVal;
- }
- pLog->buf.n = a - (u8 *)pLog->buf.z;
- assert( pLog->buf.n<=pLog->buf.nAlloc );
- }
-
- return rc;
-}
-
-/*
-** Append an LSM_LOG_COMMIT record to the database log.
-*/
-int lsmLogCommit(lsm_db *pDb){
- if( pDb->bUseLog==0 ) return LSM_OK;
- return logFlush(pDb, LSM_LOG_COMMIT);
-}
-
-/*
-** Store the current offset and other checksum related information in the
-** structure *pMark. Later, *pMark can be passed to lsmLogSeek() to "rewind"
-** the LogWriter object to the current log file offset. This is used when
-** rolling back savepoint transactions.
-*/
-void lsmLogTell(
- lsm_db *pDb, /* Database handle */
- LogMark *pMark /* Populate this object with current offset */
-){
- LogWriter *pLog;
- int nCksum;
-
- if( pDb->bUseLog==0 ) return;
- pLog = pDb->pLogWriter;
- nCksum = pLog->buf.n & 0xFFFFFFF8;
- logUpdateCksum(pLog, nCksum);
- assert( pLog->iCksumBuf==nCksum );
- pMark->nBuf = pLog->buf.n - nCksum;
- memcpy(pMark->aBuf, &pLog->buf.z[nCksum], pMark->nBuf);
-
- pMark->iOff = pLog->iOff + pLog->buf.n;
- pMark->cksum0 = pLog->cksum0;
- pMark->cksum1 = pLog->cksum1;
-}
-
-/*
-** Seek (rewind) back to the log file offset stored by an earlier call to
-** lsmLogTell() in *pMark.
-*/
-void lsmLogSeek(
- lsm_db *pDb, /* Database handle */
- LogMark *pMark /* Object containing log offset to seek to */
-){
- LogWriter *pLog;
-
- if( pDb->bUseLog==0 ) return;
- pLog = pDb->pLogWriter;
-
- assert( pMark->iOff<=pLog->iOff+pLog->buf.n );
- if( (pMark->iOff & 0xFFFFFFF8)>=pLog->iOff ){
- pLog->buf.n = (int)(pMark->iOff - pLog->iOff);
- pLog->iCksumBuf = (pLog->buf.n & 0xFFFFFFF8);
- }else{
- pLog->buf.n = pMark->nBuf;
- memcpy(pLog->buf.z, pMark->aBuf, pMark->nBuf);
- pLog->iCksumBuf = 0;
- pLog->iOff = pMark->iOff - pMark->nBuf;
- }
- pLog->cksum0 = pMark->cksum0;
- pLog->cksum1 = pMark->cksum1;
-
- if( pMark->iOff > pLog->iRegion1End ) pLog->iRegion1End = 0;
- if( pMark->iOff > pLog->iRegion2Start ) pLog->iRegion2Start = 0;
-}
-
-/*
-** This function does the work for an lsm_info(LOG_STRUCTURE) request.
-*/
-int lsmInfoLogStructure(lsm_db *pDb, char **pzVal){
- int rc = LSM_OK;
- char *zVal = 0;
-
- /* If there is no read or write transaction open, read the latest
- ** tree-header from shared-memory to report on. If necessary, update
- ** it based on the contents of the database header.
- **
- ** No locks are taken here - these are passive read operations only.
- */
- if( pDb->pCsr==0 && pDb->nTransOpen==0 ){
- rc = lsmTreeLoadHeader(pDb, 0);
- if( rc==LSM_OK ) rc = logReclaimSpace(pDb);
- }
-
- if( rc==LSM_OK ){
- DbLog *pLog = &pDb->treehdr.log;
- zVal = lsmMallocPrintf(pDb->pEnv,
- "%d %d %d %d %d %d",
- (int)pLog->aRegion[0].iStart, (int)pLog->aRegion[0].iEnd,
- (int)pLog->aRegion[1].iStart, (int)pLog->aRegion[1].iEnd,
- (int)pLog->aRegion[2].iStart, (int)pLog->aRegion[2].iEnd
- );
- if( !zVal ) rc = LSM_NOMEM_BKPT;
- }
-
- *pzVal = zVal;
- return rc;
-}
-
-/*************************************************************************
-** Begin code for log recovery.
-*/
-
-typedef struct LogReader LogReader;
-struct LogReader {
- FileSystem *pFS; /* File system to read from */
- i64 iOff; /* File offset at end of buf content */
- int iBuf; /* Current read offset in buf */
- LsmString buf; /* Buffer containing file content */
-
- int iCksumBuf; /* Offset in buf corresponding to cksum[01] */
- u32 cksum0; /* Checksum 0 at offset iCksumBuf */
- u32 cksum1; /* Checksum 1 at offset iCksumBuf */
-};
-
-static void logReaderBlob(
- LogReader *p, /* Log reader object */
- LsmString *pBuf, /* Dynamic storage, if required */
- int nBlob, /* Number of bytes to read */
- u8 **ppBlob, /* OUT: Pointer to blob read */
- int *pRc /* IN/OUT: Error code */
-){
- static const int LOG_READ_SIZE = 512;
- int rc = *pRc; /* Return code */
- int nReq = nBlob; /* Bytes required */
-
- while( rc==LSM_OK && nReq>0 ){
- int nAvail; /* Bytes of data available in p->buf */
- if( p->buf.n==p->iBuf ){
- int nCksum; /* Total bytes requiring checksum */
- int nCarry = 0; /* Total bytes requiring checksum */
-
- nCksum = p->iBuf - p->iCksumBuf;
- if( nCksum>0 ){
- nCarry = nCksum % 8;
- nCksum = ((nCksum / 8) * 8);
- if( nCksum>0 ){
- logCksumUnaligned(
- &p->buf.z[p->iCksumBuf], nCksum, &p->cksum0, &p->cksum1
- );
- }
- }
- if( nCarry>0 ) memcpy(p->buf.z, &p->buf.z[p->iBuf-nCarry], nCarry);
- p->buf.n = nCarry;
- p->iBuf = nCarry;
-
- rc = lsmFsReadLog(p->pFS, p->iOff, LOG_READ_SIZE, &p->buf);
- if( rc!=LSM_OK ) break;
- p->iCksumBuf = 0;
- p->iOff += LOG_READ_SIZE;
- }
-
- nAvail = p->buf.n - p->iBuf;
- if( ppBlob && nReq==nBlob && nBlob<=nAvail ){
- *ppBlob = (u8 *)&p->buf.z[p->iBuf];
- p->iBuf += nBlob;
- nReq = 0;
- }else{
- int nCopy = LSM_MIN(nAvail, nReq);
- if( nBlob==nReq ){
- pBuf->n = 0;
- }
- rc = lsmStringBinAppend(pBuf, (u8 *)&p->buf.z[p->iBuf], nCopy);
- nReq -= nCopy;
- p->iBuf += nCopy;
- if( nReq==0 && ppBlob ){
- *ppBlob = (u8*)pBuf->z;
- }
- }
- }
-
- *pRc = rc;
-}
-
-static void logReaderVarint(
- LogReader *p,
- LsmString *pBuf,
- int *piVal, /* OUT: Value read from log */
- int *pRc /* IN/OUT: Error code */
-){
- if( *pRc==LSM_OK ){
- u8 *aVarint;
- if( p->buf.n==p->iBuf ){
- logReaderBlob(p, 0, 10, &aVarint, pRc);
- if( LSM_OK==*pRc ) p->iBuf -= (10 - lsmVarintGet32(aVarint, piVal));
- }else{
- logReaderBlob(p, pBuf, lsmVarintSize(p->buf.z[p->iBuf]), &aVarint, pRc);
- if( LSM_OK==*pRc ) lsmVarintGet32(aVarint, piVal);
- }
- }
-}
-
-static void logReaderByte(LogReader *p, u8 *pByte, int *pRc){
- u8 *pPtr = 0;
- logReaderBlob(p, 0, 1, &pPtr, pRc);
- if( pPtr ) *pByte = *pPtr;
-}
-
-static void logReaderCksum(LogReader *p, LsmString *pBuf, int *pbEof, int *pRc){
- if( *pRc==LSM_OK ){
- u8 *pPtr = 0;
- u32 cksum0, cksum1;
- int nCksum = p->iBuf - p->iCksumBuf;
-
- /* Update in-memory (expected) checksums */
- assert( nCksum>=0 );
- logCksumUnaligned(&p->buf.z[p->iCksumBuf], nCksum, &p->cksum0, &p->cksum1);
- p->iCksumBuf = p->iBuf + 8;
- logReaderBlob(p, pBuf, 8, &pPtr, pRc);
- assert( pPtr || *pRc );
-
- /* Read the checksums from the log file. Set *pbEof if they do not match. */
- if( pPtr ){
- cksum0 = lsmGetU32(pPtr);
- cksum1 = lsmGetU32(&pPtr[4]);
- *pbEof = (cksum0!=p->cksum0 || cksum1!=p->cksum1);
- p->iCksumBuf = p->iBuf;
- }
- }
-}
-
-static void logReaderInit(
- lsm_db *pDb, /* Database handle */
- DbLog *pLog, /* Log object associated with pDb */
- int bInitBuf, /* True if p->buf is uninitialized */
- LogReader *p /* Initialize this LogReader object */
-){
- p->pFS = pDb->pFS;
- p->iOff = pLog->aRegion[2].iStart;
- p->cksum0 = pLog->cksum0;
- p->cksum1 = pLog->cksum1;
- if( bInitBuf ){ lsmStringInit(&p->buf, pDb->pEnv); }
- p->buf.n = 0;
- p->iCksumBuf = 0;
- p->iBuf = 0;
-}
-
-/*
-** This function is called after reading the header of a LOG_DELETE or
-** LOG_WRITE record. Parameter nByte is the total size of the key and
-** value that follow the header just read. Return true if the size and
-** position of the record indicate that it should contain a checksum.
-*/
-static int logRequireCksum(LogReader *p, int nByte){
- return ((p->iBuf + nByte - p->iCksumBuf) > LSM_CKSUM_MAXDATA);
-}
-
-/*
-** Recover the contents of the log file.
-*/
-int lsmLogRecover(lsm_db *pDb){
- LsmString buf1; /* Key buffer */
- LsmString buf2; /* Value buffer */
- LogReader reader; /* Log reader object */
- int rc = LSM_OK; /* Return code */
- int nCommit = 0; /* Number of transactions to recover */
- int iPass;
- int nJump = 0; /* Number of LSM_LOG_JUMP records in pass 0 */
- DbLog *pLog;
- int bOpen;
-
- rc = lsmFsOpenLog(pDb, &bOpen);
- if( rc!=LSM_OK ) return rc;
-
- rc = lsmTreeInit(pDb);
- if( rc!=LSM_OK ) return rc;
-
- pLog = &pDb->treehdr.log;
- lsmCheckpointLogoffset(pDb->pShmhdr->aSnap2, pLog);
-
- logReaderInit(pDb, pLog, 1, &reader);
- lsmStringInit(&buf1, pDb->pEnv);
- lsmStringInit(&buf2, pDb->pEnv);
-
- /* The outer for() loop runs at most twice. The first iteration is to
- ** count the number of committed transactions in the log. The second
- ** iterates through those transactions and updates the in-memory tree
- ** structure with their contents. */
- if( bOpen ){
- for(iPass=0; iPass<2 && rc==LSM_OK; iPass++){
- int bEof = 0;
-
- while( rc==LSM_OK && !bEof ){
- u8 eType = 0;
- logReaderByte(&reader, &eType, &rc);
-
- switch( eType ){
- case LSM_LOG_PAD1:
- break;
-
- case LSM_LOG_PAD2: {
- int nPad;
- logReaderVarint(&reader, &buf1, &nPad, &rc);
- logReaderBlob(&reader, &buf1, nPad, 0, &rc);
- break;
- }
-
- case LSM_LOG_DRANGE:
- case LSM_LOG_DRANGE_CKSUM:
- case LSM_LOG_WRITE:
- case LSM_LOG_WRITE_CKSUM: {
- int nKey;
- int nVal;
- u8 *aVal;
- logReaderVarint(&reader, &buf1, &nKey, &rc);
- logReaderVarint(&reader, &buf2, &nVal, &rc);
-
- if( eType==LSM_LOG_WRITE_CKSUM || eType==LSM_LOG_DRANGE_CKSUM ){
- logReaderCksum(&reader, &buf1, &bEof, &rc);
- }else{
- bEof = logRequireCksum(&reader, nKey+nVal);
- }
- if( bEof ) break;
-
- logReaderBlob(&reader, &buf1, nKey, 0, &rc);
- logReaderBlob(&reader, &buf2, nVal, &aVal, &rc);
- if( iPass==1 && rc==LSM_OK ){
- if( eType==LSM_LOG_WRITE || eType==LSM_LOG_WRITE_CKSUM ){
- rc = lsmTreeInsert(pDb, (u8 *)buf1.z, nKey, aVal, nVal);
- }else{
- rc = lsmTreeDelete(pDb, (u8 *)buf1.z, nKey, aVal, nVal);
- }
- }
- break;
- }
-
- case LSM_LOG_DELETE:
- case LSM_LOG_DELETE_CKSUM: {
- int nKey; u8 *aKey;
- logReaderVarint(&reader, &buf1, &nKey, &rc);
-
- if( eType==LSM_LOG_DELETE_CKSUM ){
- logReaderCksum(&reader, &buf1, &bEof, &rc);
- }else{
- bEof = logRequireCksum(&reader, nKey);
- }
- if( bEof ) break;
-
- logReaderBlob(&reader, &buf1, nKey, &aKey, &rc);
- if( iPass==1 && rc==LSM_OK ){
- rc = lsmTreeInsert(pDb, aKey, nKey, NULL, -1);
- }
- break;
- }
-
- case LSM_LOG_COMMIT:
- logReaderCksum(&reader, &buf1, &bEof, &rc);
- if( bEof==0 ){
- nCommit++;
- assert( nCommit>0 || iPass==1 );
- if( nCommit==0 ) bEof = 1;
- }
- break;
-
- case LSM_LOG_JUMP: {
- int iOff = 0;
- logReaderVarint(&reader, &buf1, &iOff, &rc);
- if( rc==LSM_OK ){
- if( iPass==1 ){
- if( pLog->aRegion[2].iStart==0 ){
- assert( pLog->aRegion[1].iStart==0 );
- pLog->aRegion[1].iEnd = reader.iOff;
- }else{
- assert( pLog->aRegion[0].iStart==0 );
- pLog->aRegion[0].iStart = pLog->aRegion[2].iStart;
- pLog->aRegion[0].iEnd = reader.iOff-reader.buf.n+reader.iBuf;
- }
- pLog->aRegion[2].iStart = iOff;
- }else{
- if( (nJump++)==2 ){
- bEof = 1;
- }
- }
-
- reader.iOff = iOff;
- reader.buf.n = reader.iBuf;
- }
- break;
- }
-
- default:
- /* Including LSM_LOG_EOF */
- bEof = 1;
- break;
- }
- }
-
- if( rc==LSM_OK && iPass==0 ){
- if( nCommit==0 ){
- if( pLog->aRegion[2].iStart==0 ){
- iPass = 1;
- }else{
- pLog->aRegion[2].iStart = 0;
- iPass = -1;
- lsmCheckpointZeroLogoffset(pDb);
- }
- }
- logReaderInit(pDb, pLog, 0, &reader);
- nCommit = nCommit * -1;
- }
- }
- }
-
- /* Initialize DbLog object */
- if( rc==LSM_OK ){
- pLog->aRegion[2].iEnd = reader.iOff - reader.buf.n + reader.iBuf;
- pLog->cksum0 = reader.cksum0;
- pLog->cksum1 = reader.cksum1;
- }
-
- if( rc==LSM_OK ){
- rc = lsmFinishRecovery(pDb);
- }else{
- lsmFinishRecovery(pDb);
- }
-
- if( pDb->bRoTrans ){
- lsmFsCloseLog(pDb);
- }
-
- lsmStringClear(&buf1);
- lsmStringClear(&buf2);
- lsmStringClear(&reader.buf);
- return rc;
-}
-
-void lsmLogClose(lsm_db *db){
- if( db->pLogWriter ){
- lsmFree(db->pEnv, db->pLogWriter->buf.z);
- lsmFree(db->pEnv, db->pLogWriter);
- db->pLogWriter = 0;
- }
-}
diff --git a/ext/lsm1/lsm_main.c b/ext/lsm1/lsm_main.c
deleted file mode 100644
index f2b353105a..0000000000
--- a/ext/lsm1/lsm_main.c
+++ /dev/null
@@ -1,1008 +0,0 @@
-/*
-** 2011-08-18
-**
-** The author disclaims copyright to this source code. In place of
-** a legal notice, here is a blessing:
-**
-** May you do good and not evil.
-** May you find forgiveness for yourself and forgive others.
-** May you share freely, never taking more than you give.
-**
-*************************************************************************
-**
-** The main interface to the LSM module.
-*/
-#include "lsmInt.h"
-
-
-#ifdef LSM_DEBUG
-/*
-** This function returns a copy of its only argument.
-**
-** When the library is built with LSM_DEBUG defined, this function is called
-** whenever an error code is generated (not propagated - generated). So
-** if the library is mysteriously returning (say) LSM_IOERR, a breakpoint
-** may be set in this function to determine why.
-*/
-int lsmErrorBkpt(int rc){
- /* Set breakpoint here! */
- return rc;
-}
-
-/*
-** This function contains various assert() statements that test that the
-** lsm_db structure passed as an argument is internally consistent.
-*/
-static void assert_db_state(lsm_db *pDb){
-
- /* If there is at least one cursor or a write transaction open, the database
- ** handle must be holding a pointer to a client snapshot. And the reverse
- ** - if there are no open cursors and no write transactions then there must
- ** not be a client snapshot. */
-
- assert( (pDb->pCsr!=0||pDb->nTransOpen>0)==(pDb->iReader>=0||pDb->bRoTrans) );
-
- assert( (pDb->iReader<0 && pDb->bRoTrans==0) || pDb->pClient!=0 );
-
- assert( pDb->nTransOpen>=0 );
-}
-#else
-# define assert_db_state(x)
-#endif
-
-/*
-** The default key-compare function.
-*/
-static int xCmp(void *p1, int n1, void *p2, int n2){
- int res;
- res = memcmp(p1, p2, LSM_MIN(n1, n2));
- if( res==0 ) res = (n1-n2);
- return res;
-}
-
-static void xLog(void *pCtx, int rc, const char *z){
- (void)(rc);
- (void)(pCtx);
- fprintf(stderr, "%s\n", z);
- fflush(stderr);
-}
-
-/*
-** Allocate a new db handle.
-*/
-int lsm_new(lsm_env *pEnv, lsm_db **ppDb){
- lsm_db *pDb;
-
- /* If the user did not provide an environment, use the default. */
- if( pEnv==0 ) pEnv = lsm_default_env();
- assert( pEnv );
-
- /* Allocate the new database handle */
- *ppDb = pDb = (lsm_db *)lsmMallocZero(pEnv, sizeof(lsm_db));
- if( pDb==0 ) return LSM_NOMEM_BKPT;
-
- /* Initialize the new object */
- pDb->pEnv = pEnv;
- pDb->nTreeLimit = LSM_DFLT_AUTOFLUSH;
- pDb->nAutockpt = LSM_DFLT_AUTOCHECKPOINT;
- pDb->bAutowork = LSM_DFLT_AUTOWORK;
- pDb->eSafety = LSM_DFLT_SAFETY;
- pDb->xCmp = xCmp;
- pDb->nDfltPgsz = LSM_DFLT_PAGE_SIZE;
- pDb->nDfltBlksz = LSM_DFLT_BLOCK_SIZE;
- pDb->nMerge = LSM_DFLT_AUTOMERGE;
- pDb->nMaxFreelist = LSM_MAX_FREELIST_ENTRIES;
- pDb->bUseLog = LSM_DFLT_USE_LOG;
- pDb->iReader = -1;
- pDb->iRwclient = -1;
- pDb->bMultiProc = LSM_DFLT_MULTIPLE_PROCESSES;
- pDb->iMmap = LSM_DFLT_MMAP;
- pDb->xLog = xLog;
- pDb->compress.iId = LSM_COMPRESSION_NONE;
- return LSM_OK;
-}
-
-lsm_env *lsm_get_env(lsm_db *pDb){
- assert( pDb->pEnv );
- return pDb->pEnv;
-}
-
-/*
-** If database handle pDb is currently holding a client snapshot, but does
-** not have any open cursors or write transactions, release it.
-*/
-static void dbReleaseClientSnapshot(lsm_db *pDb){
- if( pDb->nTransOpen==0 && pDb->pCsr==0 ){
- lsmFinishReadTrans(pDb);
- }
-}
-
-static int getFullpathname(
- lsm_env *pEnv,
- const char *zRel,
- char **pzAbs
-){
- int nAlloc = 0;
- char *zAlloc = 0;
- int nReq = 0;
- int rc;
-
- do{
- nAlloc = nReq;
- rc = pEnv->xFullpath(pEnv, zRel, zAlloc, &nReq);
- if( nReq>nAlloc ){
- zAlloc = lsmReallocOrFreeRc(pEnv, zAlloc, nReq, &rc);
- }
- }while( nReq>nAlloc && rc==LSM_OK );
-
- if( rc!=LSM_OK ){
- lsmFree(pEnv, zAlloc);
- zAlloc = 0;
- }
- *pzAbs = zAlloc;
- return rc;
-}
-
-/*
-** Check that the bits in the db->mLock mask are consistent with the
-** value stored in db->iRwclient. An assert shall fail otherwise.
-*/
-static void assertRwclientLockValue(lsm_db *db){
-#ifndef NDEBUG
- u64 msk; /* Mask of mLock bits for RWCLIENT locks */
- u64 rwclient = 0; /* Bit corresponding to db->iRwclient */
-
- if( db->iRwclient>=0 ){
- rwclient = ((u64)1 << (LSM_LOCK_RWCLIENT(db->iRwclient)-1));
- }
- msk = ((u64)1 << (LSM_LOCK_RWCLIENT(LSM_LOCK_NRWCLIENT)-1)) - 1;
- msk -= (((u64)1 << (LSM_LOCK_RWCLIENT(0)-1)) - 1);
-
- assert( (db->mLock & msk)==rwclient );
-#endif
-}
-
-/*
-** Open a new connection to database zFilename.
-*/
-int lsm_open(lsm_db *pDb, const char *zFilename){
- int rc;
-
- if( pDb->pDatabase ){
- rc = LSM_MISUSE;
- }else{
- char *zFull;
-
- /* Translate the possibly relative pathname supplied by the user into
- ** an absolute pathname. This is required because the supplied path
- ** is used (either directly or with "-log" appended to it) for more
- ** than one purpose - to open both the database and log files, and
- ** perhaps to unlink the log file during disconnection. An absolute
- ** path is required to ensure that the correct files are operated
- ** on even if the application changes the cwd. */
- rc = getFullpathname(pDb->pEnv, zFilename, &zFull);
- assert( rc==LSM_OK || zFull==0 );
-
- /* Connect to the database. */
- if( rc==LSM_OK ){
- rc = lsmDbDatabaseConnect(pDb, zFull);
- }
-
- if( pDb->bReadonly==0 ){
- /* Configure the file-system connection with the page-size and block-size
- ** of this database. Even if the database file is zero bytes in size
- ** on disk, these values have been set in shared-memory by now, and so
- ** are guaranteed not to change during the lifetime of this connection.
- */
- if( rc==LSM_OK && LSM_OK==(rc = lsmCheckpointLoad(pDb, 0)) ){
- lsmFsSetPageSize(pDb->pFS, lsmCheckpointPgsz(pDb->aSnapshot));
- lsmFsSetBlockSize(pDb->pFS, lsmCheckpointBlksz(pDb->aSnapshot));
- }
- }
-
- lsmFree(pDb->pEnv, zFull);
- assertRwclientLockValue(pDb);
- }
-
- assert( pDb->bReadonly==0 || pDb->bReadonly==1 );
- assert( rc!=LSM_OK || (pDb->pShmhdr==0)==(pDb->bReadonly==1) );
-
- return rc;
-}
-
-int lsm_close(lsm_db *pDb){
- int rc = LSM_OK;
- if( pDb ){
- assert_db_state(pDb);
- if( pDb->pCsr || pDb->nTransOpen ){
- rc = LSM_MISUSE_BKPT;
- }else{
- lsmMCursorFreeCache(pDb);
- lsmFreeSnapshot(pDb->pEnv, pDb->pClient);
- pDb->pClient = 0;
-
- assertRwclientLockValue(pDb);
-
- lsmDbDatabaseRelease(pDb);
- lsmLogClose(pDb);
- lsmFsClose(pDb->pFS);
- /* assert( pDb->mLock==0 ); */
-
- /* Invoke any destructors registered for the compression or
- ** compression factory callbacks. */
- if( pDb->factory.xFree ) pDb->factory.xFree(pDb->factory.pCtx);
- if( pDb->compress.xFree ) pDb->compress.xFree(pDb->compress.pCtx);
-
- lsmFree(pDb->pEnv, pDb->rollback.aArray);
- lsmFree(pDb->pEnv, pDb->aTrans);
- lsmFree(pDb->pEnv, pDb->apShm);
- lsmFree(pDb->pEnv, pDb);
- }
- }
- return rc;
-}
-
-int lsm_config(lsm_db *pDb, int eParam, ...){
- int rc = LSM_OK;
- va_list ap;
- va_start(ap, eParam);
-
- switch( eParam ){
- case LSM_CONFIG_AUTOFLUSH: {
- /* This parameter is read and written in KB. But all internal
- ** processing is done in bytes. */
- int *piVal = va_arg(ap, int *);
- int iVal = *piVal;
- if( iVal>=0 && iVal<=(1024*1024) ){
- pDb->nTreeLimit = iVal*1024;
- }
- *piVal = (pDb->nTreeLimit / 1024);
- break;
- }
-
- case LSM_CONFIG_AUTOWORK: {
- int *piVal = va_arg(ap, int *);
- if( *piVal>=0 ){
- pDb->bAutowork = *piVal;
- }
- *piVal = pDb->bAutowork;
- break;
- }
-
- case LSM_CONFIG_AUTOCHECKPOINT: {
- /* This parameter is read and written in KB. But all internal processing
- ** (including the lsm_db.nAutockpt variable) is done in bytes. */
- int *piVal = va_arg(ap, int *);
- if( *piVal>=0 ){
- int iVal = *piVal;
- pDb->nAutockpt = (i64)iVal * 1024;
- }
- *piVal = (int)(pDb->nAutockpt / 1024);
- break;
- }
-
- case LSM_CONFIG_PAGE_SIZE: {
- int *piVal = va_arg(ap, int *);
- if( pDb->pDatabase ){
- /* If lsm_open() has been called, this is a read-only parameter.
- ** Set the output variable to the page-size according to the
- ** FileSystem object. */
- *piVal = lsmFsPageSize(pDb->pFS);
- }else{
- if( *piVal>=256 && *piVal<=65536 && ((*piVal-1) & *piVal)==0 ){
- pDb->nDfltPgsz = *piVal;
- }else{
- *piVal = pDb->nDfltPgsz;
- }
- }
- break;
- }
-
- case LSM_CONFIG_BLOCK_SIZE: {
- /* This parameter is read and written in KB. But all internal
- ** processing is done in bytes. */
- int *piVal = va_arg(ap, int *);
- if( pDb->pDatabase ){
- /* If lsm_open() has been called, this is a read-only parameter.
- ** Set the output variable to the block-size in KB according to the
- ** FileSystem object. */
- *piVal = lsmFsBlockSize(pDb->pFS) / 1024;
- }else{
- int iVal = *piVal;
- if( iVal>=64 && iVal<=65536 && ((iVal-1) & iVal)==0 ){
- pDb->nDfltBlksz = iVal * 1024;
- }else{
- *piVal = pDb->nDfltBlksz / 1024;
- }
- }
- break;
- }
-
- case LSM_CONFIG_SAFETY: {
- int *piVal = va_arg(ap, int *);
- if( *piVal>=0 && *piVal<=2 ){
- pDb->eSafety = *piVal;
- }
- *piVal = pDb->eSafety;
- break;
- }
-
- case LSM_CONFIG_MMAP: {
- int *piVal = va_arg(ap, int *);
- if( pDb->iReader<0 && *piVal>=0 ){
- pDb->iMmap = *piVal;
- rc = lsmFsConfigure(pDb);
- }
- *piVal = pDb->iMmap;
- break;
- }
-
- case LSM_CONFIG_USE_LOG: {
- int *piVal = va_arg(ap, int *);
- if( pDb->nTransOpen==0 && (*piVal==0 || *piVal==1) ){
- pDb->bUseLog = *piVal;
- }
- *piVal = pDb->bUseLog;
- break;
- }
-
- case LSM_CONFIG_AUTOMERGE: {
- int *piVal = va_arg(ap, int *);
- if( *piVal>1 ) pDb->nMerge = *piVal;
- *piVal = pDb->nMerge;
- break;
- }
-
- case LSM_CONFIG_MAX_FREELIST: {
- int *piVal = va_arg(ap, int *);
- if( *piVal>=2 && *piVal<=LSM_MAX_FREELIST_ENTRIES ){
- pDb->nMaxFreelist = *piVal;
- }
- *piVal = pDb->nMaxFreelist;
- break;
- }
-
- case LSM_CONFIG_MULTIPLE_PROCESSES: {
- int *piVal = va_arg(ap, int *);
- if( pDb->pDatabase ){
- /* If lsm_open() has been called, this is a read-only parameter.
- ** Set the output variable to true if this connection is currently
- ** in multi-process mode. */
- *piVal = lsmDbMultiProc(pDb);
- }else{
- pDb->bMultiProc = *piVal = (*piVal!=0);
- }
- break;
- }
-
- case LSM_CONFIG_READONLY: {
- int *piVal = va_arg(ap, int *);
- /* If lsm_open() has been called, this is a read-only parameter. */
- if( pDb->pDatabase==0 && *piVal>=0 ){
- pDb->bReadonly = *piVal = (*piVal!=0);
- }
- *piVal = pDb->bReadonly;
- break;
- }
-
- case LSM_CONFIG_SET_COMPRESSION: {
- lsm_compress *p = va_arg(ap, lsm_compress *);
- if( pDb->iReader>=0 && pDb->bInFactory==0 ){
- /* May not change compression schemes with an open transaction */
- rc = LSM_MISUSE_BKPT;
- }else{
- if( pDb->compress.xFree ){
- /* Invoke any destructor belonging to the current compression. */
- pDb->compress.xFree(pDb->compress.pCtx);
- }
- if( p->xBound==0 ){
- memset(&pDb->compress, 0, sizeof(lsm_compress));
- pDb->compress.iId = LSM_COMPRESSION_NONE;
- }else{
- memcpy(&pDb->compress, p, sizeof(lsm_compress));
- }
- rc = lsmFsConfigure(pDb);
- }
- break;
- }
-
- case LSM_CONFIG_SET_COMPRESSION_FACTORY: {
- lsm_compress_factory *p = va_arg(ap, lsm_compress_factory *);
- if( pDb->factory.xFree ){
- /* Invoke any destructor belonging to the current factory. */
- pDb->factory.xFree(pDb->factory.pCtx);
- }
- memcpy(&pDb->factory, p, sizeof(lsm_compress_factory));
- break;
- }
-
- case LSM_CONFIG_GET_COMPRESSION: {
- lsm_compress *p = va_arg(ap, lsm_compress *);
- memcpy(p, &pDb->compress, sizeof(lsm_compress));
- break;
- }
-
- default:
- rc = LSM_MISUSE;
- break;
- }
-
- va_end(ap);
- return rc;
-}
-
-void lsmAppendSegmentList(LsmString *pStr, char *zPre, Segment *pSeg){
- lsmStringAppendf(pStr, "%s{%lld %lld %lld %lld}", zPre,
- pSeg->iFirst, pSeg->iLastPg, pSeg->iRoot, pSeg->nSize
- );
-}
-
-static int infoGetWorker(lsm_db *pDb, Snapshot **pp, int *pbUnlock){
- int rc = LSM_OK;
-
- assert( *pbUnlock==0 );
- if( !pDb->pWorker ){
- rc = lsmBeginWork(pDb);
- if( rc!=LSM_OK ) return rc;
- *pbUnlock = 1;
- }
- if( pp ) *pp = pDb->pWorker;
- return rc;
-}
-
-static void infoFreeWorker(lsm_db *pDb, int bUnlock){
- if( bUnlock ){
- int rcdummy = LSM_BUSY;
- lsmFinishWork(pDb, 0, &rcdummy);
- }
-}
-
-int lsmStructList(
- lsm_db *pDb, /* Database handle */
- char **pzOut /* OUT: Nul-terminated string (tcl list) */
-){
- Level *pTopLevel = 0; /* Top level of snapshot to report on */
- int rc = LSM_OK;
- Level *p;
- LsmString s;
- Snapshot *pWorker; /* Worker snapshot */
- int bUnlock = 0;
-
- /* Obtain the worker snapshot */
- rc = infoGetWorker(pDb, &pWorker, &bUnlock);
- if( rc!=LSM_OK ) return rc;
-
- /* Format the contents of the snapshot as text */
- pTopLevel = lsmDbSnapshotLevel(pWorker);
- lsmStringInit(&s, pDb->pEnv);
- for(p=pTopLevel; rc==LSM_OK && p; p=p->pNext){
- int i;
- lsmStringAppendf(&s, "%s{%d", (s.n ? " " : ""), (int)p->iAge);
- lsmAppendSegmentList(&s, " ", &p->lhs);
- for(i=0; rc==LSM_OK && inRight; i++){
- lsmAppendSegmentList(&s, " ", &p->aRhs[i]);
- }
- lsmStringAppend(&s, "}", 1);
- }
- rc = s.n>=0 ? LSM_OK : LSM_NOMEM;
-
- /* Release the snapshot and return */
- infoFreeWorker(pDb, bUnlock);
- *pzOut = s.z;
- return rc;
-}
-
-static int infoFreelistCb(void *pCtx, int iBlk, i64 iSnapshot){
- LsmString *pStr = (LsmString *)pCtx;
- lsmStringAppendf(pStr, "%s{%d %lld}", (pStr->n?" ":""), iBlk, iSnapshot);
- return 0;
-}
-
-int lsmInfoFreelist(lsm_db *pDb, char **pzOut){
- Snapshot *pWorker; /* Worker snapshot */
- int bUnlock = 0;
- LsmString s;
- int rc;
-
- /* Obtain the worker snapshot */
- rc = infoGetWorker(pDb, &pWorker, &bUnlock);
- if( rc!=LSM_OK ) return rc;
-
- lsmStringInit(&s, pDb->pEnv);
- rc = lsmWalkFreelist(pDb, 0, infoFreelistCb, &s);
- if( rc!=LSM_OK ){
- lsmFree(pDb->pEnv, s.z);
- }else{
- *pzOut = s.z;
- }
-
- /* Release the snapshot and return */
- infoFreeWorker(pDb, bUnlock);
- return rc;
-}
-
-static int infoTreeSize(lsm_db *db, int *pnOldKB, int *pnNewKB){
- ShmHeader *pShm = db->pShmhdr;
- TreeHeader *p = &pShm->hdr1;
-
- /* The following code suffers from two race conditions, as it accesses and
- ** trusts the contents of shared memory without verifying checksums:
- **
- ** * The two values read - TreeHeader.root.nByte and oldroot.nByte - are
- ** 32-bit fields. It is assumed that reading from one of these
- ** is atomic - that it is not possible to read a partially written
- ** garbage value. However the two values may be mutually inconsistent.
- **
- ** * TreeHeader.iLogOff is a 64-bit value. And lsmCheckpointLogOffset()
- ** reads a 64-bit value from a snapshot stored in shared memory. It
- ** is assumed that in each case it is possible to read a partially
- ** written garbage value. If this occurs, then the value returned
- ** for the size of the "old" tree may reflect the size of an "old"
- ** tree that was recently flushed to disk.
- **
- ** Given the context in which this function is called (as a result of an
- ** lsm_info(LSM_INFO_TREE_SIZE) request), neither of these are considered to
- ** be problems.
- */
- *pnNewKB = ((int)p->root.nByte + 1023) / 1024;
- if( p->iOldShmid ){
- if( p->iOldLog==lsmCheckpointLogOffset(pShm->aSnap1) ){
- *pnOldKB = 0;
- }else{
- *pnOldKB = ((int)p->oldroot.nByte + 1023) / 1024;
- }
- }else{
- *pnOldKB = 0;
- }
-
- return LSM_OK;
-}
-
-int lsm_info(lsm_db *pDb, int eParam, ...){
- int rc = LSM_OK;
- va_list ap;
- va_start(ap, eParam);
-
- switch( eParam ){
- case LSM_INFO_NWRITE: {
- int *piVal = va_arg(ap, int *);
- *piVal = lsmFsNWrite(pDb->pFS);
- break;
- }
-
- case LSM_INFO_NREAD: {
- int *piVal = va_arg(ap, int *);
- *piVal = lsmFsNRead(pDb->pFS);
- break;
- }
-
- case LSM_INFO_DB_STRUCTURE: {
- char **pzVal = va_arg(ap, char **);
- rc = lsmStructList(pDb, pzVal);
- break;
- }
-
- case LSM_INFO_ARRAY_STRUCTURE: {
- LsmPgno pgno = va_arg(ap, LsmPgno);
- char **pzVal = va_arg(ap, char **);
- rc = lsmInfoArrayStructure(pDb, 0, pgno, pzVal);
- break;
- }
-
- case LSM_INFO_ARRAY_PAGES: {
- LsmPgno pgno = va_arg(ap, LsmPgno);
- char **pzVal = va_arg(ap, char **);
- rc = lsmInfoArrayPages(pDb, pgno, pzVal);
- break;
- }
-
- case LSM_INFO_PAGE_HEX_DUMP:
- case LSM_INFO_PAGE_ASCII_DUMP: {
- LsmPgno pgno = va_arg(ap, LsmPgno);
- char **pzVal = va_arg(ap, char **);
- int bUnlock = 0;
- rc = infoGetWorker(pDb, 0, &bUnlock);
- if( rc==LSM_OK ){
- int bHex = (eParam==LSM_INFO_PAGE_HEX_DUMP);
- rc = lsmInfoPageDump(pDb, pgno, bHex, pzVal);
- }
- infoFreeWorker(pDb, bUnlock);
- break;
- }
-
- case LSM_INFO_LOG_STRUCTURE: {
- char **pzVal = va_arg(ap, char **);
- rc = lsmInfoLogStructure(pDb, pzVal);
- break;
- }
-
- case LSM_INFO_FREELIST: {
- char **pzVal = va_arg(ap, char **);
- rc = lsmInfoFreelist(pDb, pzVal);
- break;
- }
-
- case LSM_INFO_CHECKPOINT_SIZE: {
- int *pnKB = va_arg(ap, int *);
- rc = lsmCheckpointSize(pDb, pnKB);
- break;
- }
-
- case LSM_INFO_TREE_SIZE: {
- int *pnOld = va_arg(ap, int *);
- int *pnNew = va_arg(ap, int *);
- rc = infoTreeSize(pDb, pnOld, pnNew);
- break;
- }
-
- case LSM_INFO_COMPRESSION_ID: {
- unsigned int *piOut = va_arg(ap, unsigned int *);
- if( pDb->pClient ){
- *piOut = pDb->pClient->iCmpId;
- }else{
- rc = lsmInfoCompressionId(pDb, piOut);
- }
- break;
- }
-
- default:
- rc = LSM_MISUSE;
- break;
- }
-
- va_end(ap);
- return rc;
-}
-
-static int doWriteOp(
- lsm_db *pDb,
- int bDeleteRange,
- const void *pKey, int nKey, /* Key to write or delete */
- const void *pVal, int nVal /* Value to write. Or nVal==-1 for a delete */
-){
- int rc = LSM_OK; /* Return code */
- int bCommit = 0; /* True to commit before returning */
-
- if( pDb->nTransOpen==0 ){
- bCommit = 1;
- rc = lsm_begin(pDb, 1);
- }
-
- if( rc==LSM_OK ){
- int eType = (bDeleteRange ? LSM_DRANGE : (nVal>=0?LSM_WRITE:LSM_DELETE));
- rc = lsmLogWrite(pDb, eType, (void *)pKey, nKey, (void *)pVal, nVal);
- }
-
- lsmSortedSaveTreeCursors(pDb);
-
- if( rc==LSM_OK ){
- int pgsz = lsmFsPageSize(pDb->pFS);
- int nQuant = LSM_AUTOWORK_QUANT * pgsz;
- int nBefore;
- int nAfter;
- int nDiff;
-
- if( nQuant>pDb->nTreeLimit ){
- nQuant = LSM_MAX(pDb->nTreeLimit, pgsz);
- }
-
- nBefore = lsmTreeSize(pDb);
- if( bDeleteRange ){
- rc = lsmTreeDelete(pDb, (void *)pKey, nKey, (void *)pVal, nVal);
- }else{
- rc = lsmTreeInsert(pDb, (void *)pKey, nKey, (void *)pVal, nVal);
- }
-
- nAfter = lsmTreeSize(pDb);
- nDiff = (nAfter/nQuant) - (nBefore/nQuant);
- if( rc==LSM_OK && pDb->bAutowork && nDiff!=0 ){
- rc = lsmSortedAutoWork(pDb, nDiff * LSM_AUTOWORK_QUANT);
- }
- }
-
- /* If a transaction was opened at the start of this function, commit it.
- ** Or, if an error has occurred, roll it back. */
- if( bCommit ){
- if( rc==LSM_OK ){
- rc = lsm_commit(pDb, 0);
- }else{
- lsm_rollback(pDb, 0);
- }
- }
-
- return rc;
-}
-
-/*
-** Write a new value into the database.
-*/
-int lsm_insert(
- lsm_db *db, /* Database connection */
- const void *pKey, int nKey, /* Key to write or delete */
- const void *pVal, int nVal /* Value to write. Or nVal==-1 for a delete */
-){
- return doWriteOp(db, 0, pKey, nKey, pVal, nVal);
-}
-
-/*
-** Delete a value from the database.
-*/
-int lsm_delete(lsm_db *db, const void *pKey, int nKey){
- return doWriteOp(db, 0, pKey, nKey, 0, -1);
-}
-
-/*
-** Delete a range of database keys.
-*/
-int lsm_delete_range(
- lsm_db *db, /* Database handle */
- const void *pKey1, int nKey1, /* Lower bound of range to delete */
- const void *pKey2, int nKey2 /* Upper bound of range to delete */
-){
- int rc = LSM_OK;
- if( db->xCmp((void *)pKey1, nKey1, (void *)pKey2, nKey2)<0 ){
- rc = doWriteOp(db, 1, pKey1, nKey1, pKey2, nKey2);
- }
- return rc;
-}
-
-/*
-** Open a new cursor handle.
-**
-** If there are currently no other open cursor handles, and no open write
-** transaction, open a read transaction here.
-*/
-int lsm_csr_open(lsm_db *pDb, lsm_cursor **ppCsr){
- int rc = LSM_OK; /* Return code */
- MultiCursor *pCsr = 0; /* New cursor object */
-
- /* Open a read transaction if one is not already open. */
- assert_db_state(pDb);
-
- if( pDb->pShmhdr==0 ){
- assert( pDb->bReadonly );
- rc = lsmBeginRoTrans(pDb);
- }else if( pDb->iReader<0 ){
- rc = lsmBeginReadTrans(pDb);
- }
-
- /* Allocate the multi-cursor. */
- if( rc==LSM_OK ){
- rc = lsmMCursorNew(pDb, &pCsr);
- }
-
- /* If an error has occured, set the output to NULL and delete any partially
- ** allocated cursor. If this means there are no open cursors, release the
- ** client snapshot. */
- if( rc!=LSM_OK ){
- lsmMCursorClose(pCsr, 0);
- dbReleaseClientSnapshot(pDb);
- }
-
- assert_db_state(pDb);
- *ppCsr = (lsm_cursor *)pCsr;
- return rc;
-}
-
-/*
-** Close a cursor opened using lsm_csr_open().
-*/
-int lsm_csr_close(lsm_cursor *p){
- if( p ){
- lsm_db *pDb = lsmMCursorDb((MultiCursor *)p);
- assert_db_state(pDb);
- lsmMCursorClose((MultiCursor *)p, 1);
- dbReleaseClientSnapshot(pDb);
- assert_db_state(pDb);
- }
- return LSM_OK;
-}
-
-/*
-** Attempt to seek the cursor to the database entry specified by pKey/nKey.
-** If an error occurs (e.g. an OOM or IO error), return an LSM error code.
-** Otherwise, return LSM_OK.
-*/
-int lsm_csr_seek(lsm_cursor *pCsr, const void *pKey, int nKey, int eSeek){
- return lsmMCursorSeek((MultiCursor *)pCsr, 0, (void *)pKey, nKey, eSeek);
-}
-
-int lsm_csr_next(lsm_cursor *pCsr){
- return lsmMCursorNext((MultiCursor *)pCsr);
-}
-
-int lsm_csr_prev(lsm_cursor *pCsr){
- return lsmMCursorPrev((MultiCursor *)pCsr);
-}
-
-int lsm_csr_first(lsm_cursor *pCsr){
- return lsmMCursorFirst((MultiCursor *)pCsr);
-}
-
-int lsm_csr_last(lsm_cursor *pCsr){
- return lsmMCursorLast((MultiCursor *)pCsr);
-}
-
-int lsm_csr_valid(lsm_cursor *pCsr){
- return lsmMCursorValid((MultiCursor *)pCsr);
-}
-
-int lsm_csr_key(lsm_cursor *pCsr, const void **ppKey, int *pnKey){
- return lsmMCursorKey((MultiCursor *)pCsr, (void **)ppKey, pnKey);
-}
-
-int lsm_csr_value(lsm_cursor *pCsr, const void **ppVal, int *pnVal){
- return lsmMCursorValue((MultiCursor *)pCsr, (void **)ppVal, pnVal);
-}
-
-void lsm_config_log(
- lsm_db *pDb,
- void (*xLog)(void *, int, const char *),
- void *pCtx
-){
- pDb->xLog = xLog;
- pDb->pLogCtx = pCtx;
-}
-
-void lsm_config_work_hook(
- lsm_db *pDb,
- void (*xWork)(lsm_db *, void *),
- void *pCtx
-){
- pDb->xWork = xWork;
- pDb->pWorkCtx = pCtx;
-}
-
-void lsmLogMessage(lsm_db *pDb, int rc, const char *zFormat, ...){
- if( pDb->xLog ){
- LsmString s;
- va_list ap, ap2;
- lsmStringInit(&s, pDb->pEnv);
- va_start(ap, zFormat);
- va_start(ap2, zFormat);
- lsmStringVAppendf(&s, zFormat, ap, ap2);
- va_end(ap);
- va_end(ap2);
- pDb->xLog(pDb->pLogCtx, rc, s.z);
- lsmStringClear(&s);
- }
-}
-
-int lsm_begin(lsm_db *pDb, int iLevel){
- int rc;
-
- assert_db_state( pDb );
- rc = (pDb->bReadonly ? LSM_READONLY : LSM_OK);
-
- /* A value less than zero means open one more transaction. */
- if( iLevel<0 ) iLevel = pDb->nTransOpen + 1;
- if( iLevel>pDb->nTransOpen ){
- int i;
-
- /* Extend the pDb->aTrans[] array if required. */
- if( rc==LSM_OK && pDb->nTransAllocpEnv, pDb->aTrans, nByte);
- if( !aNew ){
- rc = LSM_NOMEM;
- }else{
- nByte = sizeof(TransMark) * (iLevel+1 - pDb->nTransAlloc);
- memset(&aNew[pDb->nTransAlloc], 0, nByte);
- pDb->nTransAlloc = iLevel+1;
- pDb->aTrans = aNew;
- }
- }
-
- if( rc==LSM_OK && pDb->nTransOpen==0 ){
- rc = lsmBeginWriteTrans(pDb);
- }
-
- if( rc==LSM_OK ){
- for(i=pDb->nTransOpen; iaTrans[i].tree);
- lsmLogTell(pDb, &pDb->aTrans[i].log);
- }
- pDb->nTransOpen = iLevel;
- }
- }
-
- return rc;
-}
-
-int lsm_commit(lsm_db *pDb, int iLevel){
- int rc = LSM_OK;
-
- assert_db_state( pDb );
-
- /* A value less than zero means close the innermost nested transaction. */
- if( iLevel<0 ) iLevel = LSM_MAX(0, pDb->nTransOpen - 1);
-
- if( iLevelnTransOpen ){
- if( iLevel==0 ){
- int rc2;
- /* Commit the transaction to disk. */
- if( rc==LSM_OK ) rc = lsmLogCommit(pDb);
- if( rc==LSM_OK && pDb->eSafety==LSM_SAFETY_FULL ){
- rc = lsmFsSyncLog(pDb->pFS);
- }
- rc2 = lsmFinishWriteTrans(pDb, (rc==LSM_OK));
- if( rc==LSM_OK ) rc = rc2;
- }
- pDb->nTransOpen = iLevel;
- }
- dbReleaseClientSnapshot(pDb);
- return rc;
-}
-
-int lsm_rollback(lsm_db *pDb, int iLevel){
- int rc = LSM_OK;
- assert_db_state( pDb );
-
- if( pDb->nTransOpen ){
- /* A value less than zero means close the innermost nested transaction. */
- if( iLevel<0 ) iLevel = LSM_MAX(0, pDb->nTransOpen - 1);
-
- if( iLevel<=pDb->nTransOpen ){
- TransMark *pMark = &pDb->aTrans[(iLevel==0 ? 0 : iLevel-1)];
- lsmTreeRollback(pDb, &pMark->tree);
- if( iLevel ) lsmLogSeek(pDb, &pMark->log);
- pDb->nTransOpen = iLevel;
- }
-
- if( pDb->nTransOpen==0 ){
- lsmFinishWriteTrans(pDb, 0);
- }
- dbReleaseClientSnapshot(pDb);
- }
-
- return rc;
-}
-
-int lsm_get_user_version(lsm_db *pDb, unsigned int *piUsr){
- int rc = LSM_OK; /* Return code */
-
- /* Open a read transaction if one is not already open. */
- assert_db_state(pDb);
- if( pDb->pShmhdr==0 ){
- assert( pDb->bReadonly );
- rc = lsmBeginRoTrans(pDb);
- }else if( pDb->iReader<0 ){
- rc = lsmBeginReadTrans(pDb);
- }
-
- /* Allocate the multi-cursor. */
- if( rc==LSM_OK ){
- *piUsr = pDb->treehdr.iUsrVersion;
- }
-
- dbReleaseClientSnapshot(pDb);
- assert_db_state(pDb);
- return rc;
-}
-
-int lsm_set_user_version(lsm_db *pDb, unsigned int iUsr){
- int rc = LSM_OK; /* Return code */
- int bCommit = 0; /* True to commit before returning */
-
- if( pDb->nTransOpen==0 ){
- bCommit = 1;
- rc = lsm_begin(pDb, 1);
- }
-
- if( rc==LSM_OK ){
- pDb->treehdr.iUsrVersion = iUsr;
- }
-
- /* If a transaction was opened at the start of this function, commit it.
- ** Or, if an error has occurred, roll it back. */
- if( bCommit ){
- if( rc==LSM_OK ){
- rc = lsm_commit(pDb, 0);
- }else{
- lsm_rollback(pDb, 0);
- }
- }
-
- return rc;
-}
diff --git a/ext/lsm1/lsm_mem.c b/ext/lsm1/lsm_mem.c
deleted file mode 100644
index 13dd9fe312..0000000000
--- a/ext/lsm1/lsm_mem.c
+++ /dev/null
@@ -1,104 +0,0 @@
-/*
-** 2011-08-18
-**
-** The author disclaims copyright to this source code. In place of
-** a legal notice, here is a blessing:
-**
-** May you do good and not evil.
-** May you find forgiveness for yourself and forgive others.
-** May you share freely, never taking more than you give.
-**
-*************************************************************************
-**
-** Helper routines for memory allocation.
-*/
-#include "lsmInt.h"
-
-/*
-** The following routines are called internally by LSM sub-routines. In
-** this case a valid environment pointer must be supplied.
-*/
-void *lsmMalloc(lsm_env *pEnv, size_t N){
- assert( pEnv );
- return pEnv->xMalloc(pEnv, N);
-}
-void lsmFree(lsm_env *pEnv, void *p){
- assert( pEnv );
- pEnv->xFree(pEnv, p);
-}
-void *lsmRealloc(lsm_env *pEnv, void *p, size_t N){
- assert( pEnv );
- return pEnv->xRealloc(pEnv, p, N);
-}
-
-/*
-** Core memory allocation routines for LSM.
-*/
-void *lsm_malloc(lsm_env *pEnv, size_t N){
- return lsmMalloc(pEnv ? pEnv : lsm_default_env(), N);
-}
-void lsm_free(lsm_env *pEnv, void *p){
- lsmFree(pEnv ? pEnv : lsm_default_env(), p);
-}
-void *lsm_realloc(lsm_env *pEnv, void *p, size_t N){
- return lsmRealloc(pEnv ? pEnv : lsm_default_env(), p, N);
-}
-
-void *lsmMallocZero(lsm_env *pEnv, size_t N){
- void *pRet;
- assert( pEnv );
- pRet = lsmMalloc(pEnv, N);
- if( pRet ) memset(pRet, 0, N);
- return pRet;
-}
-
-void *lsmMallocRc(lsm_env *pEnv, size_t N, int *pRc){
- void *pRet = 0;
- if( *pRc==LSM_OK ){
- pRet = lsmMalloc(pEnv, N);
- if( pRet==0 ){
- *pRc = LSM_NOMEM_BKPT;
- }
- }
- return pRet;
-}
-
-void *lsmMallocZeroRc(lsm_env *pEnv, size_t N, int *pRc){
- void *pRet = 0;
- if( *pRc==LSM_OK ){
- pRet = lsmMallocZero(pEnv, N);
- if( pRet==0 ){
- *pRc = LSM_NOMEM_BKPT;
- }
- }
- return pRet;
-}
-
-void *lsmReallocOrFree(lsm_env *pEnv, void *p, size_t N){
- void *pNew;
- pNew = lsm_realloc(pEnv, p, N);
- if( !pNew ) lsm_free(pEnv, p);
- return pNew;
-}
-
-void *lsmReallocOrFreeRc(lsm_env *pEnv, void *p, size_t N, int *pRc){
- void *pRet = 0;
- if( *pRc ){
- lsmFree(pEnv, p);
- }else{
- pRet = lsmReallocOrFree(pEnv, p, N);
- if( !pRet ) *pRc = LSM_NOMEM_BKPT;
- }
- return pRet;
-}
-
-char *lsmMallocStrdup(lsm_env *pEnv, const char *zIn){
- int nByte;
- char *zRet;
- nByte = strlen(zIn);
- zRet = lsmMalloc(pEnv, nByte+1);
- if( zRet ){
- memcpy(zRet, zIn, nByte+1);
- }
- return zRet;
-}
diff --git a/ext/lsm1/lsm_mutex.c b/ext/lsm1/lsm_mutex.c
deleted file mode 100644
index cb99b2a61e..0000000000
--- a/ext/lsm1/lsm_mutex.c
+++ /dev/null
@@ -1,88 +0,0 @@
-/*
-** 2012-01-30
-**
-** The author disclaims copyright to this source code. In place of
-** a legal notice, here is a blessing:
-**
-** May you do good and not evil.
-** May you find forgiveness for yourself and forgive others.
-** May you share freely, never taking more than you give.
-**
-*************************************************************************
-**
-** Mutex functions for LSM.
-*/
-#include "lsmInt.h"
-
-/*
-** Allocate a new mutex.
-*/
-int lsmMutexNew(lsm_env *pEnv, lsm_mutex **ppNew){
- return pEnv->xMutexNew(pEnv, ppNew);
-}
-
-/*
-** Return a handle for one of the static mutexes.
-*/
-int lsmMutexStatic(lsm_env *pEnv, int iMutex, lsm_mutex **ppStatic){
- return pEnv->xMutexStatic(pEnv, iMutex, ppStatic);
-}
-
-/*
-** Free a mutex allocated by lsmMutexNew().
-*/
-void lsmMutexDel(lsm_env *pEnv, lsm_mutex *pMutex){
- if( pMutex ) pEnv->xMutexDel(pMutex);
-}
-
-/*
-** Enter a mutex.
-*/
-void lsmMutexEnter(lsm_env *pEnv, lsm_mutex *pMutex){
- pEnv->xMutexEnter(pMutex);
-}
-
-/*
-** Attempt to enter a mutex, but do not block. If successful, return zero.
-** Otherwise, if the mutex is already held by some other thread and is not
-** entered, return non zero.
-**
-** Each successful call to this function must be matched by a call to
-** lsmMutexLeave().
-*/
-int lsmMutexTry(lsm_env *pEnv, lsm_mutex *pMutex){
- return pEnv->xMutexTry(pMutex);
-}
-
-/*
-** Leave a mutex.
-*/
-void lsmMutexLeave(lsm_env *pEnv, lsm_mutex *pMutex){
- pEnv->xMutexLeave(pMutex);
-}
-
-#ifndef NDEBUG
-/*
-** Return non-zero if the mutex passed as the second argument is held
-** by the calling thread, or zero otherwise. If the implementation is not
-** able to tell if the mutex is held by the caller, it should return
-** non-zero.
-**
-** This function is only used as part of assert() statements.
-*/
-int lsmMutexHeld(lsm_env *pEnv, lsm_mutex *pMutex){
- return pEnv->xMutexHeld ? pEnv->xMutexHeld(pMutex) : 1;
-}
-
-/*
-** Return non-zero if the mutex passed as the second argument is not
-** held by the calling thread, or zero otherwise. If the implementation
-** is not able to tell if the mutex is held by the caller, it should
-** return non-zero.
-**
-** This function is only used as part of assert() statements.
-*/
-int lsmMutexNotHeld(lsm_env *pEnv, lsm_mutex *pMutex){
- return pEnv->xMutexNotHeld ? pEnv->xMutexNotHeld(pMutex) : 1;
-}
-#endif
diff --git a/ext/lsm1/lsm_shared.c b/ext/lsm1/lsm_shared.c
deleted file mode 100644
index 09f9338488..0000000000
--- a/ext/lsm1/lsm_shared.c
+++ /dev/null
@@ -1,1994 +0,0 @@
-/*
-** 2012-01-23
-**
-** The author disclaims copyright to this source code. In place of
-** a legal notice, here is a blessing:
-**
-** May you do good and not evil.
-** May you find forgiveness for yourself and forgive others.
-** May you share freely, never taking more than you give.
-**
-*************************************************************************
-**
-** Utilities used to help multiple LSM clients to coexist within the
-** same process space.
-*/
-#include "lsmInt.h"
-
-/*
-** Global data. All global variables used by code in this file are grouped
-** into the following structure instance.
-**
-** pDatabase:
-** Linked list of all Database objects allocated within this process.
-** This list may not be traversed without holding the global mutex (see
-** functions enterGlobalMutex() and leaveGlobalMutex()).
-*/
-static struct SharedData {
- Database *pDatabase; /* Linked list of all Database objects */
-} gShared;
-
-/*
-** Database structure. There is one such structure for each distinct
-** database accessed by this process. They are stored in the singly linked
-** list starting at global variable gShared.pDatabase. Database objects are
-** reference counted. Once the number of connections to the associated
-** database drops to zero, they are removed from the linked list and deleted.
-**
-** pFile:
-** In multi-process mode, this file descriptor is used to obtain locks
-** and to access shared-memory. In single process mode, its only job is
-** to hold the exclusive lock on the file.
-**
-*/
-struct Database {
- /* Protected by the global mutex (enterGlobalMutex/leaveGlobalMutex): */
- char *zName; /* Canonical path to database file */
- int nName; /* strlen(zName) */
- int nDbRef; /* Number of associated lsm_db handles */
- Database *pDbNext; /* Next Database structure in global list */
-
- /* Protected by the local mutex (pClientMutex) */
- int bReadonly; /* True if Database.pFile is read-only */
- int bMultiProc; /* True if running in multi-process mode */
- lsm_file *pFile; /* Used for locks/shm in multi-proc mode */
- LsmFile *pLsmFile; /* List of deferred closes */
- lsm_mutex *pClientMutex; /* Protects the apShmChunk[] and pConn */
- int nShmChunk; /* Number of entries in apShmChunk[] array */
- void **apShmChunk; /* Array of "shared" memory regions */
- lsm_db *pConn; /* List of connections to this db. */
-};
-
-/*
-** Functions to enter and leave the global mutex. This mutex is used
-** to protect the global linked-list headed at gShared.pDatabase.
-*/
-static int enterGlobalMutex(lsm_env *pEnv){
- lsm_mutex *p;
- int rc = lsmMutexStatic(pEnv, LSM_MUTEX_GLOBAL, &p);
- if( rc==LSM_OK ) lsmMutexEnter(pEnv, p);
- return rc;
-}
-static void leaveGlobalMutex(lsm_env *pEnv){
- lsm_mutex *p;
- lsmMutexStatic(pEnv, LSM_MUTEX_GLOBAL, &p);
- lsmMutexLeave(pEnv, p);
-}
-
-#ifdef LSM_DEBUG
-static int holdingGlobalMutex(lsm_env *pEnv){
- lsm_mutex *p;
- lsmMutexStatic(pEnv, LSM_MUTEX_GLOBAL, &p);
- return lsmMutexHeld(pEnv, p);
-}
-#endif
-
-#if 0
-static void assertNotInFreelist(Freelist *p, int iBlk){
- int i;
- for(i=0; inEntry; i++){
- assert( p->aEntry[i].iBlk!=iBlk );
- }
-}
-#else
-# define assertNotInFreelist(x,y)
-#endif
-
-/*
-** Append an entry to the free-list. If (iId==-1), this is a delete.
-*/
-int freelistAppend(lsm_db *db, u32 iBlk, i64 iId){
- lsm_env *pEnv = db->pEnv;
- Freelist *p;
- int i;
-
- assert( iId==-1 || iId>=0 );
- p = db->bUseFreelist ? db->pFreelist : &db->pWorker->freelist;
-
- /* Extend the space allocated for the freelist, if required */
- assert( p->nAlloc>=p->nEntry );
- if( p->nAlloc==p->nEntry ){
- int nNew;
- int nByte;
- FreelistEntry *aNew;
-
- nNew = (p->nAlloc==0 ? 4 : p->nAlloc*2);
- nByte = sizeof(FreelistEntry) * nNew;
- aNew = (FreelistEntry *)lsmRealloc(pEnv, p->aEntry, nByte);
- if( !aNew ) return LSM_NOMEM_BKPT;
- p->nAlloc = nNew;
- p->aEntry = aNew;
- }
-
- for(i=0; inEntry; i++){
- assert( i==0 || p->aEntry[i].iBlk > p->aEntry[i-1].iBlk );
- if( p->aEntry[i].iBlk>=iBlk ) break;
- }
-
- if( inEntry && p->aEntry[i].iBlk==iBlk ){
- /* Clobber an existing entry */
- p->aEntry[i].iId = iId;
- }else{
- /* Insert a new entry into the list */
- int nByte = sizeof(FreelistEntry)*(p->nEntry-i);
- memmove(&p->aEntry[i+1], &p->aEntry[i], nByte);
- p->aEntry[i].iBlk = iBlk;
- p->aEntry[i].iId = iId;
- p->nEntry++;
- }
-
- return LSM_OK;
-}
-
-/*
-** This function frees all resources held by the Database structure passed
-** as the only argument.
-*/
-static void freeDatabase(lsm_env *pEnv, Database *p){
- assert( holdingGlobalMutex(pEnv) );
- if( p ){
- /* Free the mutexes */
- lsmMutexDel(pEnv, p->pClientMutex);
-
- if( p->pFile ){
- lsmEnvClose(pEnv, p->pFile);
- }
-
- /* Free the array of shm pointers */
- lsmFree(pEnv, p->apShmChunk);
-
- /* Free the memory allocated for the Database struct itself */
- lsmFree(pEnv, p);
- }
-}
-
-typedef struct DbTruncateCtx DbTruncateCtx;
-struct DbTruncateCtx {
- int nBlock;
- i64 iInUse;
-};
-
-static int dbTruncateCb(void *pCtx, int iBlk, i64 iSnapshot){
- DbTruncateCtx *p = (DbTruncateCtx *)pCtx;
- if( iBlk!=p->nBlock || (p->iInUse>=0 && iSnapshot>=p->iInUse) ) return 1;
- p->nBlock--;
- return 0;
-}
-
-static int dbTruncate(lsm_db *pDb, i64 iInUse){
- int rc = LSM_OK;
-#if 0
- int i;
- DbTruncateCtx ctx;
-
- assert( pDb->pWorker );
- ctx.nBlock = pDb->pWorker->nBlock;
- ctx.iInUse = iInUse;
-
- rc = lsmWalkFreelist(pDb, 1, dbTruncateCb, (void *)&ctx);
- for(i=ctx.nBlock+1; rc==LSM_OK && i<=pDb->pWorker->nBlock; i++){
- rc = freelistAppend(pDb, i, -1);
- }
-
- if( rc==LSM_OK ){
-#ifdef LSM_LOG_FREELIST
- if( ctx.nBlock!=pDb->pWorker->nBlock ){
- lsmLogMessage(pDb, 0,
- "dbTruncate(): truncated db to %d blocks",ctx.nBlock
- );
- }
-#endif
- pDb->pWorker->nBlock = ctx.nBlock;
- }
-#endif
- return rc;
-}
-
-
-/*
-** This function is called during database shutdown (when the number of
-** connections drops from one to zero). It truncates the database file
-** to as small a size as possible without truncating away any blocks that
-** contain data.
-*/
-static int dbTruncateFile(lsm_db *pDb){
- int rc;
-
- assert( pDb->pWorker==0 );
- assert( lsmShmAssertLock(pDb, LSM_LOCK_DMS1, LSM_LOCK_EXCL) );
- rc = lsmCheckpointLoadWorker(pDb);
-
- if( rc==LSM_OK ){
- DbTruncateCtx ctx;
-
- /* Walk the database free-block-list in reverse order. Set ctx.nBlock
- ** to the block number of the last block in the database that actually
- ** contains data. */
- ctx.nBlock = pDb->pWorker->nBlock;
- ctx.iInUse = -1;
- rc = lsmWalkFreelist(pDb, 1, dbTruncateCb, (void *)&ctx);
-
- /* If the last block that contains data is not already the last block in
- ** the database file, truncate the database file so that it is. */
- if( rc==LSM_OK ){
- rc = lsmFsTruncateDb(
- pDb->pFS, (i64)ctx.nBlock*lsmFsBlockSize(pDb->pFS)
- );
- }
- }
-
- lsmFreeSnapshot(pDb->pEnv, pDb->pWorker);
- pDb->pWorker = 0;
- return rc;
-}
-
-static void doDbDisconnect(lsm_db *pDb){
- int rc;
-
- if( pDb->bReadonly ){
- lsmShmLock(pDb, LSM_LOCK_DMS3, LSM_LOCK_UNLOCK, 0);
- }else{
- /* Block for an exclusive lock on DMS1. This lock serializes all calls
- ** to doDbConnect() and doDbDisconnect() across all processes. */
- rc = lsmShmLock(pDb, LSM_LOCK_DMS1, LSM_LOCK_EXCL, 1);
- if( rc==LSM_OK ){
-
- lsmShmLock(pDb, LSM_LOCK_DMS2, LSM_LOCK_UNLOCK, 0);
-
- /* Try an exclusive lock on DMS2. If successful, this is the last
- ** connection to the database. In this case flush the contents of the
- ** in-memory tree to disk and write a checkpoint. */
- rc = lsmShmTestLock(pDb, LSM_LOCK_DMS2, 1, LSM_LOCK_EXCL);
- if( rc==LSM_OK ){
- rc = lsmShmTestLock(pDb, LSM_LOCK_CHECKPOINTER, 1, LSM_LOCK_EXCL);
- }
- if( rc==LSM_OK ){
- int bReadonly = 0; /* True if there exist read-only conns. */
-
- /* Flush the in-memory tree, if required. If there is data to flush,
- ** this will create a new client snapshot in Database.pClient. The
- ** checkpoint (serialization) of this snapshot may be written to disk
- ** by the following block.
- **
- ** There is no need to take a WRITER lock here. That there are no
- ** other locks on DMS2 guarantees that there are no other read-write
- ** connections at this time (and the lock on DMS1 guarantees that
- ** no new ones may appear).
- */
- rc = lsmTreeLoadHeader(pDb, 0);
- if( rc==LSM_OK && (lsmTreeHasOld(pDb) || lsmTreeSize(pDb)>0) ){
- rc = lsmFlushTreeToDisk(pDb);
- }
-
- /* Now check if there are any read-only connections. If there are,
- ** then do not truncate the db file or unlink the shared-memory
- ** region. */
- if( rc==LSM_OK ){
- rc = lsmShmTestLock(pDb, LSM_LOCK_DMS3, 1, LSM_LOCK_EXCL);
- if( rc==LSM_BUSY ){
- bReadonly = 1;
- rc = LSM_OK;
- }
- }
-
- /* Write a checkpoint to disk. */
- if( rc==LSM_OK ){
- rc = lsmCheckpointWrite(pDb, 0);
- }
-
- /* If the checkpoint was written successfully, delete the log file
- ** and, if possible, truncate the database file. */
- if( rc==LSM_OK ){
- int bRotrans = 0;
- Database *p = pDb->pDatabase;
-
- /* The log file may only be deleted if there are no clients
- ** read-only clients running rotrans transactions. */
- rc = lsmDetectRoTrans(pDb, &bRotrans);
- if( rc==LSM_OK && bRotrans==0 ){
- lsmFsCloseAndDeleteLog(pDb->pFS);
- }
-
- /* The database may only be truncated if there exist no read-only
- ** clients - either connected or running rotrans transactions. */
- if( bReadonly==0 && bRotrans==0 ){
- lsmFsUnmap(pDb->pFS);
- dbTruncateFile(pDb);
- if( p->pFile && p->bMultiProc ){
- lsmEnvShmUnmap(pDb->pEnv, p->pFile, 1);
- }
- }
- }
- }
- }
-
- if( pDb->iRwclient>=0 ){
- lsmShmLock(pDb, LSM_LOCK_RWCLIENT(pDb->iRwclient), LSM_LOCK_UNLOCK, 0);
- pDb->iRwclient = -1;
- }
-
- lsmShmLock(pDb, LSM_LOCK_DMS1, LSM_LOCK_UNLOCK, 0);
- }
- pDb->pShmhdr = 0;
-}
-
-static int doDbConnect(lsm_db *pDb){
- const int nUsMax = 100000; /* Max value for nUs */
- int nUs = 1000; /* us to wait between DMS1 attempts */
- int rc;
-
- /* Obtain a pointer to the shared-memory header */
- assert( pDb->pShmhdr==0 );
- assert( pDb->bReadonly==0 );
-
- /* Block for an exclusive lock on DMS1. This lock serializes all calls
- ** to doDbConnect() and doDbDisconnect() across all processes. */
- while( 1 ){
- rc = lsmShmLock(pDb, LSM_LOCK_DMS1, LSM_LOCK_EXCL, 1);
- if( rc!=LSM_BUSY ) break;
- lsmEnvSleep(pDb->pEnv, nUs);
- nUs = nUs * 2;
- if( nUs>nUsMax ) nUs = nUsMax;
- }
- if( rc==LSM_OK ){
- rc = lsmShmCacheChunks(pDb, 1);
- }
- if( rc!=LSM_OK ) return rc;
- pDb->pShmhdr = (ShmHeader *)pDb->apShm[0];
-
- /* Try an exclusive lock on DMS2/DMS3. If successful, this is the first
- ** and only connection to the database. In this case initialize the
- ** shared-memory and run log file recovery. */
- assert( LSM_LOCK_DMS3==1+LSM_LOCK_DMS2 );
- rc = lsmShmTestLock(pDb, LSM_LOCK_DMS2, 2, LSM_LOCK_EXCL);
- if( rc==LSM_OK ){
- memset(pDb->pShmhdr, 0, sizeof(ShmHeader));
- rc = lsmCheckpointRecover(pDb);
- if( rc==LSM_OK ){
- rc = lsmLogRecover(pDb);
- }
- if( rc==LSM_OK ){
- ShmHeader *pShm = pDb->pShmhdr;
- pShm->aReader[0].iLsmId = lsmCheckpointId(pShm->aSnap1, 0);
- pShm->aReader[0].iTreeId = pDb->treehdr.iUsedShmid;
- }
- }else if( rc==LSM_BUSY ){
- rc = LSM_OK;
- }
-
- /* Take a shared lock on DMS2. In multi-process mode this lock "cannot"
- ** fail, as connections may only hold an exclusive lock on DMS2 if they
- ** first hold an exclusive lock on DMS1. And this connection is currently
- ** holding the exclusive lock on DSM1.
- **
- ** However, if some other connection has the database open in single-process
- ** mode, this operation will fail. In this case, return the error to the
- ** caller - the attempt to connect to the db has failed.
- */
- if( rc==LSM_OK ){
- rc = lsmShmLock(pDb, LSM_LOCK_DMS2, LSM_LOCK_SHARED, 0);
- }
-
- /* If anything went wrong, unlock DMS2. Otherwise, try to take an exclusive
- ** lock on one of the LSM_LOCK_RWCLIENT() locks. Unlock DMS1 in any case. */
- if( rc!=LSM_OK ){
- pDb->pShmhdr = 0;
- }else{
- int i;
- for(i=0; iiRwclient = i;
- if( rc2!=LSM_BUSY ){
- rc = rc2;
- break;
- }
- }
- }
- lsmShmLock(pDb, LSM_LOCK_DMS1, LSM_LOCK_UNLOCK, 0);
-
- return rc;
-}
-
-static int dbOpenSharedFd(lsm_env *pEnv, Database *p, int bRoOk){
- int rc;
-
- rc = lsmEnvOpen(pEnv, p->zName, 0, &p->pFile);
- if( rc==LSM_IOERR && bRoOk ){
- rc = lsmEnvOpen(pEnv, p->zName, LSM_OPEN_READONLY, &p->pFile);
- p->bReadonly = 1;
- }
-
- return rc;
-}
-
-/*
-** Return a reference to the shared Database handle for the database
-** identified by canonical path zName. If this is the first connection to
-** the named database, a new Database object is allocated. Otherwise, a
-** pointer to an existing object is returned.
-**
-** If successful, *ppDatabase is set to point to the shared Database
-** structure and LSM_OK returned. Otherwise, *ppDatabase is set to NULL
-** and and LSM error code returned.
-**
-** Each successful call to this function should be (eventually) matched
-** by a call to lsmDbDatabaseRelease().
-*/
-int lsmDbDatabaseConnect(
- lsm_db *pDb, /* Database handle */
- const char *zName /* Full-path to db file */
-){
- lsm_env *pEnv = pDb->pEnv;
- int rc; /* Return code */
- Database *p = 0; /* Pointer returned via *ppDatabase */
- int nName = lsmStrlen(zName);
-
- assert( pDb->pDatabase==0 );
- rc = enterGlobalMutex(pEnv);
- if( rc==LSM_OK ){
-
- /* Search the global list for an existing object. TODO: Need something
- ** better than the memcmp() below to figure out if a given Database
- ** object represents the requested file. */
- for(p=gShared.pDatabase; p; p=p->pDbNext){
- if( nName==p->nName && 0==memcmp(zName, p->zName, nName) ) break;
- }
-
- /* If no suitable Database object was found, allocate a new one. */
- if( p==0 ){
- p = (Database *)lsmMallocZeroRc(pEnv, sizeof(Database)+nName+1, &rc);
-
- /* If the allocation was successful, fill in other fields and
- ** allocate the client mutex. */
- if( rc==LSM_OK ){
- p->bMultiProc = pDb->bMultiProc;
- p->zName = (char *)&p[1];
- p->nName = nName;
- memcpy((void *)p->zName, zName, nName+1);
- rc = lsmMutexNew(pEnv, &p->pClientMutex);
- }
-
- /* If nothing has gone wrong so far, open the shared fd. And if that
- ** succeeds and this connection requested single-process mode,
- ** attempt to take the exclusive lock on DMS2. */
- if( rc==LSM_OK ){
- int bReadonly = (pDb->bReadonly && pDb->bMultiProc);
- rc = dbOpenSharedFd(pDb->pEnv, p, bReadonly);
- }
-
- if( rc==LSM_OK && p->bMultiProc==0 ){
- /* Hold an exclusive lock DMS1 while grabbing DMS2. This ensures
- ** that any ongoing call to doDbDisconnect() (even one in another
- ** process) is finished before proceeding. */
- assert( p->bReadonly==0 );
- rc = lsmEnvLock(pDb->pEnv, p->pFile, LSM_LOCK_DMS1, LSM_LOCK_EXCL);
- if( rc==LSM_OK ){
- rc = lsmEnvLock(pDb->pEnv, p->pFile, LSM_LOCK_DMS2, LSM_LOCK_EXCL);
- lsmEnvLock(pDb->pEnv, p->pFile, LSM_LOCK_DMS1, LSM_LOCK_UNLOCK);
- }
- }
-
- if( rc==LSM_OK ){
- p->pDbNext = gShared.pDatabase;
- gShared.pDatabase = p;
- }else{
- freeDatabase(pEnv, p);
- p = 0;
- }
- }
-
- if( p ){
- p->nDbRef++;
- }
- leaveGlobalMutex(pEnv);
-
- if( p ){
- lsmMutexEnter(pDb->pEnv, p->pClientMutex);
- pDb->pNext = p->pConn;
- p->pConn = pDb;
- lsmMutexLeave(pDb->pEnv, p->pClientMutex);
- }
- }
-
- pDb->pDatabase = p;
- if( rc==LSM_OK ){
- assert( p );
- rc = lsmFsOpen(pDb, zName, p->bReadonly);
- }
-
- /* If the db handle is read-write, then connect to the system now. Run
- ** recovery as necessary. Or, if this is a read-only database handle,
- ** defer attempting to connect to the system until a read-transaction
- ** is opened. */
- if( rc==LSM_OK ){
- rc = lsmFsConfigure(pDb);
- }
- if( rc==LSM_OK && pDb->bReadonly==0 ){
- rc = doDbConnect(pDb);
- }
-
- return rc;
-}
-
-static void dbDeferClose(lsm_db *pDb){
- if( pDb->pFS ){
- LsmFile *pLsmFile;
- Database *p = pDb->pDatabase;
- pLsmFile = lsmFsDeferClose(pDb->pFS);
- pLsmFile->pNext = p->pLsmFile;
- p->pLsmFile = pLsmFile;
- }
-}
-
-LsmFile *lsmDbRecycleFd(lsm_db *db){
- LsmFile *pRet;
- Database *p = db->pDatabase;
- lsmMutexEnter(db->pEnv, p->pClientMutex);
- if( (pRet = p->pLsmFile)!=0 ){
- p->pLsmFile = pRet->pNext;
- }
- lsmMutexLeave(db->pEnv, p->pClientMutex);
- return pRet;
-}
-
-/*
-** Release a reference to a Database object obtained from
-** lsmDbDatabaseConnect(). There should be exactly one call to this function
-** for each successful call to Find().
-*/
-void lsmDbDatabaseRelease(lsm_db *pDb){
- Database *p = pDb->pDatabase;
- if( p ){
- lsm_db **ppDb;
-
- if( pDb->pShmhdr ){
- doDbDisconnect(pDb);
- }
-
- lsmFsUnmap(pDb->pFS);
- lsmMutexEnter(pDb->pEnv, p->pClientMutex);
- for(ppDb=&p->pConn; *ppDb!=pDb; ppDb=&((*ppDb)->pNext));
- *ppDb = pDb->pNext;
- dbDeferClose(pDb);
- lsmMutexLeave(pDb->pEnv, p->pClientMutex);
-
- enterGlobalMutex(pDb->pEnv);
- p->nDbRef--;
- if( p->nDbRef==0 ){
- LsmFile *pIter;
- LsmFile *pNext;
- Database **pp;
-
- /* Remove the Database structure from the linked list. */
- for(pp=&gShared.pDatabase; *pp!=p; pp=&((*pp)->pDbNext));
- *pp = p->pDbNext;
-
- /* If they were allocated from the heap, free the shared memory chunks */
- if( p->bMultiProc==0 ){
- int i;
- for(i=0; inShmChunk; i++){
- lsmFree(pDb->pEnv, p->apShmChunk[i]);
- }
- }
-
- /* Close any outstanding file descriptors */
- for(pIter=p->pLsmFile; pIter; pIter=pNext){
- pNext = pIter->pNext;
- lsmEnvClose(pDb->pEnv, pIter->pFile);
- lsmFree(pDb->pEnv, pIter);
- }
- freeDatabase(pDb->pEnv, p);
- }
- leaveGlobalMutex(pDb->pEnv);
- }
-}
-
-Level *lsmDbSnapshotLevel(Snapshot *pSnapshot){
- return pSnapshot->pLevel;
-}
-
-void lsmDbSnapshotSetLevel(Snapshot *pSnap, Level *pLevel){
- pSnap->pLevel = pLevel;
-}
-
-/* TODO: Shuffle things around to get rid of this */
-static int firstSnapshotInUse(lsm_db *, i64 *);
-
-/*
-** Context object used by the lsmWalkFreelist() utility.
-*/
-typedef struct WalkFreelistCtx WalkFreelistCtx;
-struct WalkFreelistCtx {
- lsm_db *pDb;
- int bReverse;
- Freelist *pFreelist;
- int iFree;
- int (*xUsr)(void *, int, i64); /* User callback function */
- void *pUsrctx; /* User callback context */
- int bDone; /* Set to true after xUsr() returns true */
-};
-
-/*
-** Callback used by lsmWalkFreelist().
-*/
-static int walkFreelistCb(void *pCtx, int iBlk, i64 iSnapshot){
- WalkFreelistCtx *p = (WalkFreelistCtx *)pCtx;
- const int iDir = (p->bReverse ? -1 : 1);
- Freelist *pFree = p->pFreelist;
-
- assert( p->bDone==0 );
- assert( iBlk>=0 );
- if( pFree ){
- while( (p->iFree < pFree->nEntry) && p->iFree>=0 ){
- FreelistEntry *pEntry = &pFree->aEntry[p->iFree];
- if( (p->bReverse==0 && pEntry->iBlk>(u32)iBlk)
- || (p->bReverse!=0 && pEntry->iBlk<(u32)iBlk)
- ){
- break;
- }else{
- p->iFree += iDir;
- if( pEntry->iId>=0
- && p->xUsr(p->pUsrctx, pEntry->iBlk, pEntry->iId)
- ){
- p->bDone = 1;
- return 1;
- }
- if( pEntry->iBlk==(u32)iBlk ) return 0;
- }
- }
- }
-
- if( p->xUsr(p->pUsrctx, iBlk, iSnapshot) ){
- p->bDone = 1;
- return 1;
- }
- return 0;
-}
-
-/*
-** The database handle passed as the first argument must be the worker
-** connection. This function iterates through the contents of the current
-** free block list, invoking the supplied callback once for each list
-** element.
-**
-** The difference between this function and lsmSortedWalkFreelist() is
-** that lsmSortedWalkFreelist() only considers those free-list elements
-** stored within the LSM. This function also merges in any in-memory
-** elements.
-*/
-int lsmWalkFreelist(
- lsm_db *pDb, /* Database handle (must be worker) */
- int bReverse, /* True to iterate from largest to smallest */
- int (*x)(void *, int, i64), /* Callback function */
- void *pCtx /* First argument to pass to callback */
-){
- const int iDir = (bReverse ? -1 : 1);
- int rc;
- int iCtx;
-
- WalkFreelistCtx ctx[2];
-
- ctx[0].pDb = pDb;
- ctx[0].bReverse = bReverse;
- ctx[0].pFreelist = &pDb->pWorker->freelist;
- if( ctx[0].pFreelist && bReverse ){
- ctx[0].iFree = ctx[0].pFreelist->nEntry-1;
- }else{
- ctx[0].iFree = 0;
- }
- ctx[0].xUsr = walkFreelistCb;
- ctx[0].pUsrctx = (void *)&ctx[1];
- ctx[0].bDone = 0;
-
- ctx[1].pDb = pDb;
- ctx[1].bReverse = bReverse;
- ctx[1].pFreelist = pDb->pFreelist;
- if( ctx[1].pFreelist && bReverse ){
- ctx[1].iFree = ctx[1].pFreelist->nEntry-1;
- }else{
- ctx[1].iFree = 0;
- }
- ctx[1].xUsr = x;
- ctx[1].pUsrctx = pCtx;
- ctx[1].bDone = 0;
-
- rc = lsmSortedWalkFreelist(pDb, bReverse, walkFreelistCb, (void *)&ctx[0]);
-
- if( ctx[0].bDone==0 ){
- for(iCtx=0; iCtx<2; iCtx++){
- int i;
- WalkFreelistCtx *p = &ctx[iCtx];
- for(i=p->iFree;
- p->pFreelist && rc==LSM_OK && ipFreelist->nEntry && i>=0;
- i += iDir
- ){
- FreelistEntry *pEntry = &p->pFreelist->aEntry[i];
- if( pEntry->iId>=0 && p->xUsr(p->pUsrctx, pEntry->iBlk, pEntry->iId) ){
- return LSM_OK;
- }
- }
- }
- }
-
- return rc;
-}
-
-
-typedef struct FindFreeblockCtx FindFreeblockCtx;
-struct FindFreeblockCtx {
- i64 iInUse;
- int iRet;
- int bNotOne;
-};
-
-static int findFreeblockCb(void *pCtx, int iBlk, i64 iSnapshot){
- FindFreeblockCtx *p = (FindFreeblockCtx *)pCtx;
- if( iSnapshotiInUse && (iBlk!=1 || p->bNotOne==0) ){
- p->iRet = iBlk;
- return 1;
- }
- return 0;
-}
-
-static int findFreeblock(lsm_db *pDb, i64 iInUse, int bNotOne, int *piRet){
- int rc; /* Return code */
- FindFreeblockCtx ctx; /* Context object */
-
- ctx.iInUse = iInUse;
- ctx.iRet = 0;
- ctx.bNotOne = bNotOne;
- rc = lsmWalkFreelist(pDb, 0, findFreeblockCb, (void *)&ctx);
- *piRet = ctx.iRet;
-
- return rc;
-}
-
-/*
-** Allocate a new database file block to write data to, either by extending
-** the database file or by recycling a free-list entry. The worker snapshot
-** must be held in order to call this function.
-**
-** If successful, *piBlk is set to the block number allocated and LSM_OK is
-** returned. Otherwise, *piBlk is zeroed and an lsm error code returned.
-*/
-int lsmBlockAllocate(lsm_db *pDb, int iBefore, int *piBlk){
- Snapshot *p = pDb->pWorker;
- int iRet = 0; /* Block number of allocated block */
- int rc = LSM_OK;
- i64 iInUse = 0; /* Snapshot id still in use */
- i64 iSynced = 0; /* Snapshot id synced to disk */
-
- assert( p );
-
-#ifdef LSM_LOG_FREELIST
- {
- static int nCall = 0;
- char *zFree = 0;
- nCall++;
- rc = lsmInfoFreelist(pDb, &zFree);
- if( rc!=LSM_OK ) return rc;
- lsmLogMessage(pDb, 0, "lsmBlockAllocate(): %d freelist: %s", nCall, zFree);
- lsmFree(pDb->pEnv, zFree);
- }
-#endif
-
- /* Set iInUse to the smallest snapshot id that is either:
- **
- ** * Currently in use by a database client,
- ** * May be used by a database client in the future, or
- ** * Is the most recently checkpointed snapshot (i.e. the one that will
- ** be used following recovery if a failure occurs at this point).
- */
- rc = lsmCheckpointSynced(pDb, &iSynced, 0, 0);
- if( rc==LSM_OK && iSynced==0 ) iSynced = p->iId;
- iInUse = iSynced;
- if( rc==LSM_OK && pDb->iReader>=0 ){
- assert( pDb->pClient );
- iInUse = LSM_MIN(iInUse, pDb->pClient->iId);
- }
- if( rc==LSM_OK ) rc = firstSnapshotInUse(pDb, &iInUse);
-
-#ifdef LSM_LOG_FREELIST
- {
- lsmLogMessage(pDb, 0, "lsmBlockAllocate(): "
- "snapshot-in-use: %lld (iSynced=%lld) (client-id=%lld)",
- iInUse, iSynced, (pDb->iReader>=0 ? pDb->pClient->iId : 0)
- );
- }
-#endif
-
-
- /* Unless there exists a read-only transaction (which prevents us from
- ** recycling any blocks regardless, query the free block list for a
- ** suitable block to reuse.
- **
- ** It might seem more natural to check for a read-only transaction at
- ** the start of this function. However, it is better do wait until after
- ** the call to lsmCheckpointSynced() to do so.
- */
- if( rc==LSM_OK ){
- int bRotrans;
- rc = lsmDetectRoTrans(pDb, &bRotrans);
-
- if( rc==LSM_OK && bRotrans==0 ){
- rc = findFreeblock(pDb, iInUse, (iBefore>0), &iRet);
- }
- }
-
- if( iBefore>0 && (iRet<=0 || iRet>=iBefore) ){
- iRet = 0;
-
- }else if( rc==LSM_OK ){
- /* If a block was found in the free block list, use it and remove it from
- ** the list. Otherwise, if no suitable block was found, allocate one from
- ** the end of the file. */
- if( iRet>0 ){
-#ifdef LSM_LOG_FREELIST
- lsmLogMessage(pDb, 0,
- "reusing block %d (snapshot-in-use=%lld)", iRet, iInUse);
-#endif
- rc = freelistAppend(pDb, iRet, -1);
- if( rc==LSM_OK ){
- rc = dbTruncate(pDb, iInUse);
- }
- }else{
- iRet = ++(p->nBlock);
-#ifdef LSM_LOG_FREELIST
- lsmLogMessage(pDb, 0, "extending file to %d blocks", iRet);
-#endif
- }
- }
-
- assert( iBefore>0 || iRet>0 || rc!=LSM_OK );
- *piBlk = iRet;
- return rc;
-}
-
-/*
-** Free a database block. The worker snapshot must be held in order to call
-** this function.
-**
-** If successful, LSM_OK is returned. Otherwise, an lsm error code (e.g.
-** LSM_NOMEM).
-*/
-int lsmBlockFree(lsm_db *pDb, int iBlk){
- Snapshot *p = pDb->pWorker;
- assert( lsmShmAssertWorker(pDb) );
-
-#ifdef LSM_LOG_FREELIST
- lsmLogMessage(pDb, LSM_OK, "lsmBlockFree(): Free block %d", iBlk);
-#endif
-
- return freelistAppend(pDb, iBlk, p->iId);
-}
-
-/*
-** Refree a database block. The worker snapshot must be held in order to call
-** this function.
-**
-** Refreeing is required when a block is allocated using lsmBlockAllocate()
-** but then not used. This function is used to push the block back onto
-** the freelist. Refreeing a block is different from freeing is, as a refreed
-** block may be reused immediately. Whereas a freed block can not be reused
-** until (at least) after the next checkpoint.
-*/
-int lsmBlockRefree(lsm_db *pDb, int iBlk){
- int rc = LSM_OK; /* Return code */
-
-#ifdef LSM_LOG_FREELIST
- lsmLogMessage(pDb, LSM_OK, "lsmBlockRefree(): Refree block %d", iBlk);
-#endif
-
- rc = freelistAppend(pDb, iBlk, 0);
- return rc;
-}
-
-/*
-** If required, copy a database checkpoint from shared memory into the
-** database itself.
-**
-** The WORKER lock must not be held when this is called. This is because
-** this function may indirectly call fsync(). And the WORKER lock should
-** not be held that long (in case it is required by a client flushing an
-** in-memory tree to disk).
-*/
-int lsmCheckpointWrite(lsm_db *pDb, u32 *pnWrite){
- int rc; /* Return Code */
- u32 nWrite = 0;
-
- assert( pDb->pWorker==0 );
- assert( 1 || pDb->pClient==0 );
- assert( lsmShmAssertLock(pDb, LSM_LOCK_WORKER, LSM_LOCK_UNLOCK) );
-
- rc = lsmShmLock(pDb, LSM_LOCK_CHECKPOINTER, LSM_LOCK_EXCL, 0);
- if( rc!=LSM_OK ) return rc;
-
- rc = lsmCheckpointLoad(pDb, 0);
- if( rc==LSM_OK ){
- int nBlock = lsmCheckpointNBlock(pDb->aSnapshot);
- ShmHeader *pShm = pDb->pShmhdr;
- int bDone = 0; /* True if checkpoint is already stored */
-
- /* Check if this checkpoint has already been written to the database
- ** file. If so, set variable bDone to true. */
- if( pShm->iMetaPage ){
- MetaPage *pPg; /* Meta page */
- u8 *aData; /* Meta-page data buffer */
- int nData; /* Size of aData[] in bytes */
- i64 iCkpt; /* Id of checkpoint just loaded */
- i64 iDisk = 0; /* Id of checkpoint already stored in db */
- iCkpt = lsmCheckpointId(pDb->aSnapshot, 0);
- rc = lsmFsMetaPageGet(pDb->pFS, 0, pShm->iMetaPage, &pPg);
- if( rc==LSM_OK ){
- aData = lsmFsMetaPageData(pPg, &nData);
- iDisk = lsmCheckpointId((u32 *)aData, 1);
- nWrite = lsmCheckpointNWrite((u32 *)aData, 1);
- lsmFsMetaPageRelease(pPg);
- }
- bDone = (iDisk>=iCkpt);
- }
-
- if( rc==LSM_OK && bDone==0 ){
- int iMeta = (pShm->iMetaPage % 2) + 1;
- if( pDb->eSafety!=LSM_SAFETY_OFF ){
- rc = lsmFsSyncDb(pDb->pFS, nBlock);
- }
- if( rc==LSM_OK ) rc = lsmCheckpointStore(pDb, iMeta);
- if( rc==LSM_OK && pDb->eSafety!=LSM_SAFETY_OFF){
- rc = lsmFsSyncDb(pDb->pFS, 0);
- }
- if( rc==LSM_OK ){
- pShm->iMetaPage = iMeta;
- nWrite = lsmCheckpointNWrite(pDb->aSnapshot, 0) - nWrite;
- }
-#ifdef LSM_LOG_WORK
- lsmLogMessage(pDb, 0, "finish checkpoint %d",
- (int)lsmCheckpointId(pDb->aSnapshot, 0)
- );
-#endif
- }
- }
-
- lsmShmLock(pDb, LSM_LOCK_CHECKPOINTER, LSM_LOCK_UNLOCK, 0);
- if( pnWrite && rc==LSM_OK ) *pnWrite = nWrite;
- return rc;
-}
-
-int lsmBeginWork(lsm_db *pDb){
- int rc;
-
- /* Attempt to take the WORKER lock */
- rc = lsmShmLock(pDb, LSM_LOCK_WORKER, LSM_LOCK_EXCL, 0);
-
- /* Deserialize the current worker snapshot */
- if( rc==LSM_OK ){
- rc = lsmCheckpointLoadWorker(pDb);
- }
- return rc;
-}
-
-void lsmFreeSnapshot(lsm_env *pEnv, Snapshot *p){
- if( p ){
- lsmSortedFreeLevel(pEnv, p->pLevel);
- lsmFree(pEnv, p->freelist.aEntry);
- lsmFree(pEnv, p->redirect.a);
- lsmFree(pEnv, p);
- }
-}
-
-/*
-** Attempt to populate one of the read-lock slots to contain lock values
-** iLsm/iShm. Or, if such a slot exists already, this function is a no-op.
-**
-** It is not an error if no slot can be populated because the write-lock
-** cannot be obtained. If any other error occurs, return an LSM error code.
-** Otherwise, LSM_OK.
-**
-** This function is called at various points to try to ensure that there
-** always exists at least one read-lock slot that can be used by a read-only
-** client. And so that, in the usual case, there is an "exact match" available
-** whenever a read transaction is opened by any client. At present this
-** function is called when:
-**
-** * A write transaction that called lsmTreeDiscardOld() is committed, and
-** * Whenever the working snapshot is updated (i.e. lsmFinishWork()).
-*/
-static int dbSetReadLock(lsm_db *db, i64 iLsm, u32 iShm){
- int rc = LSM_OK;
- ShmHeader *pShm = db->pShmhdr;
- int i;
-
- /* Check if there is already a slot containing the required values. */
- for(i=0; iaReader[i];
- if( p->iLsmId==iLsm && p->iTreeId==iShm ) return LSM_OK;
- }
-
- /* Iterate through all read-lock slots, attempting to take a write-lock
- ** on each of them. If a write-lock succeeds, populate the locked slot
- ** with the required values and break out of the loop. */
- for(i=0; rc==LSM_OK && iaReader[i];
- p->iLsmId = iLsm;
- p->iTreeId = iShm;
- lsmShmLock(db, LSM_LOCK_READER(i), LSM_LOCK_UNLOCK, 0);
- break;
- }
- }
-
- return rc;
-}
-
-/*
-** Release the read-lock currently held by connection db.
-*/
-int dbReleaseReadlock(lsm_db *db){
- int rc = LSM_OK;
- if( db->iReader>=0 ){
- rc = lsmShmLock(db, LSM_LOCK_READER(db->iReader), LSM_LOCK_UNLOCK, 0);
- db->iReader = -1;
- }
- db->bRoTrans = 0;
- return rc;
-}
-
-
-/*
-** Argument bFlush is true if the contents of the in-memory tree has just
-** been flushed to disk. The significance of this is that once the snapshot
-** created to hold the updated state of the database is synced to disk, log
-** file space can be recycled.
-*/
-void lsmFinishWork(lsm_db *pDb, int bFlush, int *pRc){
- int rc = *pRc;
- assert( rc!=0 || pDb->pWorker );
- if( pDb->pWorker ){
- /* If no error has occurred, serialize the worker snapshot and write
- ** it to shared memory. */
- if( rc==LSM_OK ){
- rc = lsmSaveWorker(pDb, bFlush);
- }
-
- /* Assuming no error has occurred, update a read lock slot with the
- ** new snapshot id (see comments above function dbSetReadLock()). */
- if( rc==LSM_OK ){
- if( pDb->iReader<0 ){
- rc = lsmTreeLoadHeader(pDb, 0);
- }
- if( rc==LSM_OK ){
- rc = dbSetReadLock(pDb, pDb->pWorker->iId, pDb->treehdr.iUsedShmid);
- }
- }
-
- /* Free the snapshot object. */
- lsmFreeSnapshot(pDb->pEnv, pDb->pWorker);
- pDb->pWorker = 0;
- }
-
- lsmShmLock(pDb, LSM_LOCK_WORKER, LSM_LOCK_UNLOCK, 0);
- *pRc = rc;
-}
-
-/*
-** Called when recovery is finished.
-*/
-int lsmFinishRecovery(lsm_db *pDb){
- lsmTreeEndTransaction(pDb, 1);
- return LSM_OK;
-}
-
-/*
-** Check if the currently configured compression functions
-** (LSM_CONFIG_SET_COMPRESSION) are compatible with a database that has its
-** compression id set to iReq. Compression routines are compatible if iReq
-** is zero (indicating the database is empty), or if it is equal to the
-** compression id of the configured compression routines.
-**
-** If the check shows that the current compression are incompatible and there
-** is a compression factory registered, give it a chance to install new
-** compression routines.
-**
-** If, after any registered factory is invoked, the compression functions
-** are still incompatible, return LSM_MISMATCH. Otherwise, LSM_OK.
-*/
-int lsmCheckCompressionId(lsm_db *pDb, u32 iReq){
- if( iReq!=LSM_COMPRESSION_EMPTY && pDb->compress.iId!=iReq ){
- if( pDb->factory.xFactory ){
- pDb->bInFactory = 1;
- pDb->factory.xFactory(pDb->factory.pCtx, pDb, iReq);
- pDb->bInFactory = 0;
- }
- if( pDb->compress.iId!=iReq ){
- /* Incompatible */
- return LSM_MISMATCH;
- }
- }
- /* Compatible */
- return LSM_OK;
-}
-
-/*
-** Begin a read transaction. This function is a no-op if the connection
-** passed as the only argument already has an open read transaction.
-*/
-int lsmBeginReadTrans(lsm_db *pDb){
- const int MAX_READLOCK_ATTEMPTS = 10;
- const int nMaxAttempt = (pDb->bRoTrans ? 1 : MAX_READLOCK_ATTEMPTS);
-
- int rc = LSM_OK; /* Return code */
- int iAttempt = 0;
-
- assert( pDb->pWorker==0 );
-
- while( rc==LSM_OK && pDb->iReader<0 && (iAttempt++)pCsr==0 && pDb->nTransOpen==0 );
-
- /* Load the in-memory tree header. */
- rc = lsmTreeLoadHeader(pDb, &iTreehdr);
-
- /* Load the database snapshot */
- if( rc==LSM_OK ){
- if( lsmCheckpointClientCacheOk(pDb)==0 ){
- lsmFreeSnapshot(pDb->pEnv, pDb->pClient);
- pDb->pClient = 0;
- lsmMCursorFreeCache(pDb);
- lsmFsPurgeCache(pDb->pFS);
- rc = lsmCheckpointLoad(pDb, &iSnap);
- }else{
- iSnap = 1;
- }
- }
-
- /* Take a read-lock on the tree and snapshot just loaded. Then check
- ** that the shared-memory still contains the same values. If so, proceed.
- ** Otherwise, relinquish the read-lock and retry the whole procedure
- ** (starting with loading the in-memory tree header). */
- if( rc==LSM_OK ){
- u32 iShmMax = pDb->treehdr.iUsedShmid;
- u32 iShmMin = pDb->treehdr.iNextShmid+1-LSM_MAX_SHMCHUNKS;
- rc = lsmReadlock(
- pDb, lsmCheckpointId(pDb->aSnapshot, 0), iShmMin, iShmMax
- );
- if( rc==LSM_OK ){
- if( lsmTreeLoadHeaderOk(pDb, iTreehdr)
- && lsmCheckpointLoadOk(pDb, iSnap)
- ){
- /* Read lock has been successfully obtained. Deserialize the
- ** checkpoint just loaded. TODO: This will be removed after
- ** lsm_sorted.c is changed to work directly from the serialized
- ** version of the snapshot. */
- if( pDb->pClient==0 ){
- rc = lsmCheckpointDeserialize(pDb, 0, pDb->aSnapshot,&pDb->pClient);
- }
- assert( (rc==LSM_OK)==(pDb->pClient!=0) );
- assert( pDb->iReader>=0 );
-
- /* Check that the client has the right compression hooks loaded.
- ** If not, set rc to LSM_MISMATCH. */
- if( rc==LSM_OK ){
- rc = lsmCheckCompressionId(pDb, pDb->pClient->iCmpId);
- }
- }else{
- rc = dbReleaseReadlock(pDb);
- }
- }
-
- if( rc==LSM_BUSY ){
- rc = LSM_OK;
- }
- }
-#if 0
-if( rc==LSM_OK && pDb->pClient ){
- fprintf(stderr,
- "reading %p: snapshot:%d used-shmid:%d trans-id:%d iOldShmid=%d\n",
- (void *)pDb,
- (int)pDb->pClient->iId, (int)pDb->treehdr.iUsedShmid,
- (int)pDb->treehdr.root.iTransId,
- (int)pDb->treehdr.iOldShmid
- );
-}
-#endif
- }
-
- if( rc==LSM_OK ){
- rc = lsmShmCacheChunks(pDb, pDb->treehdr.nChunk);
- }
- if( rc!=LSM_OK ){
- dbReleaseReadlock(pDb);
- }
- if( pDb->pClient==0 && rc==LSM_OK ) rc = LSM_BUSY;
- return rc;
-}
-
-/*
-** This function is used by a read-write connection to determine if there
-** are currently one or more read-only transactions open on the database
-** (in this context a read-only transaction is one opened by a read-only
-** connection on a non-live database).
-**
-** If no error occurs, LSM_OK is returned and *pbExists is set to true if
-** some other connection has a read-only transaction open, or false
-** otherwise. If an error occurs an LSM error code is returned and the final
-** value of *pbExist is undefined.
-*/
-int lsmDetectRoTrans(lsm_db *db, int *pbExist){
- int rc;
-
- /* Only a read-write connection may use this function. */
- assert( db->bReadonly==0 );
-
- rc = lsmShmTestLock(db, LSM_LOCK_ROTRANS, 1, LSM_LOCK_EXCL);
- if( rc==LSM_BUSY ){
- *pbExist = 1;
- rc = LSM_OK;
- }else{
- *pbExist = 0;
- }
-
- return rc;
-}
-
-/*
-** db is a read-only database handle in the disconnected state. This function
-** attempts to open a read-transaction on the database. This may involve
-** connecting to the database system (opening shared memory etc.).
-*/
-int lsmBeginRoTrans(lsm_db *db){
- int rc = LSM_OK;
-
- assert( db->bReadonly && db->pShmhdr==0 );
- assert( db->iReader<0 );
-
- if( db->bRoTrans==0 ){
-
- /* Attempt a shared-lock on DMS1. */
- rc = lsmShmLock(db, LSM_LOCK_DMS1, LSM_LOCK_SHARED, 0);
- if( rc!=LSM_OK ) return rc;
-
- rc = lsmShmTestLock(
- db, LSM_LOCK_RWCLIENT(0), LSM_LOCK_NREADER, LSM_LOCK_SHARED
- );
- if( rc==LSM_OK ){
- /* System is not live. Take a SHARED lock on the ROTRANS byte and
- ** release DMS1. Locking ROTRANS tells all read-write clients that they
- ** may not recycle any disk space from within the database or log files,
- ** as a read-only client may be using it. */
- rc = lsmShmLock(db, LSM_LOCK_ROTRANS, LSM_LOCK_SHARED, 0);
- lsmShmLock(db, LSM_LOCK_DMS1, LSM_LOCK_UNLOCK, 0);
-
- if( rc==LSM_OK ){
- db->bRoTrans = 1;
- rc = lsmShmCacheChunks(db, 1);
- if( rc==LSM_OK ){
- db->pShmhdr = (ShmHeader *)db->apShm[0];
- memset(db->pShmhdr, 0, sizeof(ShmHeader));
- rc = lsmCheckpointRecover(db);
- if( rc==LSM_OK ){
- rc = lsmLogRecover(db);
- }
- }
- }
- }else if( rc==LSM_BUSY ){
- /* System is live! */
- rc = lsmShmLock(db, LSM_LOCK_DMS3, LSM_LOCK_SHARED, 0);
- lsmShmLock(db, LSM_LOCK_DMS1, LSM_LOCK_UNLOCK, 0);
- if( rc==LSM_OK ){
- rc = lsmShmCacheChunks(db, 1);
- if( rc==LSM_OK ){
- db->pShmhdr = (ShmHeader *)db->apShm[0];
- }
- }
- }
-
- /* In 'lsm_open()' we don't update the page and block sizes in the
- ** Filesystem for 'readonly' connection. Because member 'db->pShmhdr' is a
- ** nullpointer, this prevents loading a checkpoint. Now that the system is
- ** live this member should be set. So we can update both values in
- ** the Filesystem.
- **
- ** Configure the file-system connection with the page-size and block-size
- ** of this database. Even if the database file is zero bytes in size
- ** on disk, these values have been set in shared-memory by now, and so
- ** are guaranteed not to change during the lifetime of this connection. */
- if( LSM_OK==rc
- && 0==lsmCheckpointClientCacheOk(db)
- && LSM_OK==(rc=lsmCheckpointLoad(db, 0))
- ){
- lsmFsSetPageSize(db->pFS, lsmCheckpointPgsz(db->aSnapshot));
- lsmFsSetBlockSize(db->pFS, lsmCheckpointBlksz(db->aSnapshot));
- }
-
- if( rc==LSM_OK ){
- rc = lsmBeginReadTrans(db);
- }
- }
-
- return rc;
-}
-
-/*
-** Close the currently open read transaction.
-*/
-void lsmFinishReadTrans(lsm_db *pDb){
-
- /* Worker connections should not be closing read transactions. And
- ** read transactions should only be closed after all cursors and write
- ** transactions have been closed. Finally pClient should be non-NULL
- ** only iff pDb->iReader>=0. */
- assert( pDb->pWorker==0 );
- assert( pDb->pCsr==0 && pDb->nTransOpen==0 );
-
- if( pDb->bRoTrans ){
- int i;
- for(i=0; inShm; i++){
- lsmFree(pDb->pEnv, pDb->apShm[i]);
- }
- lsmFree(pDb->pEnv, pDb->apShm);
- pDb->apShm = 0;
- pDb->nShm = 0;
- pDb->pShmhdr = 0;
-
- lsmShmLock(pDb, LSM_LOCK_ROTRANS, LSM_LOCK_UNLOCK, 0);
- }
- dbReleaseReadlock(pDb);
-}
-
-/*
-** Open a write transaction.
-*/
-int lsmBeginWriteTrans(lsm_db *pDb){
- int rc = LSM_OK; /* Return code */
- ShmHeader *pShm = pDb->pShmhdr; /* Shared memory header */
-
- assert( pDb->nTransOpen==0 );
- assert( pDb->bDiscardOld==0 );
- assert( pDb->bReadonly==0 );
-
- /* If there is no read-transaction open, open one now. */
- if( pDb->iReader<0 ){
- rc = lsmBeginReadTrans(pDb);
- }
-
- /* Attempt to take the WRITER lock */
- if( rc==LSM_OK ){
- rc = lsmShmLock(pDb, LSM_LOCK_WRITER, LSM_LOCK_EXCL, 0);
- }
-
- /* If the previous writer failed mid-transaction, run emergency rollback. */
- if( rc==LSM_OK && pShm->bWriter ){
- rc = lsmTreeRepair(pDb);
- if( rc==LSM_OK ) pShm->bWriter = 0;
- }
-
- /* Check that this connection is currently reading from the most recent
- ** version of the database. If not, return LSM_BUSY. */
- if( rc==LSM_OK && memcmp(&pShm->hdr1, &pDb->treehdr, sizeof(TreeHeader)) ){
- rc = LSM_BUSY;
- }
-
- if( rc==LSM_OK ){
- rc = lsmLogBegin(pDb);
- }
-
- /* If everything was successful, set the "transaction-in-progress" flag
- ** and return LSM_OK. Otherwise, if some error occurred, relinquish the
- ** WRITER lock and return an error code. */
- if( rc==LSM_OK ){
- TreeHeader *p = &pDb->treehdr;
- pShm->bWriter = 1;
- p->root.iTransId++;
- if( lsmTreeHasOld(pDb) && p->iOldLog==pDb->pClient->iLogOff ){
- lsmTreeDiscardOld(pDb);
- pDb->bDiscardOld = 1;
- }
- }else{
- lsmShmLock(pDb, LSM_LOCK_WRITER, LSM_LOCK_UNLOCK, 0);
- if( pDb->pCsr==0 ) lsmFinishReadTrans(pDb);
- }
- return rc;
-}
-
-/*
-** End the current write transaction. The connection is left with an open
-** read transaction. It is an error to call this if there is no open write
-** transaction.
-**
-** If the transaction was committed, then a commit record has already been
-** written into the log file when this function is called. Or, if the
-** transaction was rolled back, both the log file and in-memory tree
-** structure have already been restored. In either case, this function
-** merely releases locks and other resources held by the write-transaction.
-**
-** LSM_OK is returned if successful, or an LSM error code otherwise.
-*/
-int lsmFinishWriteTrans(lsm_db *pDb, int bCommit){
- int rc = LSM_OK;
- int bFlush = 0;
-
- lsmLogEnd(pDb, bCommit);
- if( rc==LSM_OK && bCommit && lsmTreeSize(pDb)>pDb->nTreeLimit ){
- bFlush = 1;
- lsmTreeMakeOld(pDb);
- }
- lsmTreeEndTransaction(pDb, bCommit);
-
- if( rc==LSM_OK ){
- if( bFlush && pDb->bAutowork ){
- rc = lsmSortedAutoWork(pDb, 1);
- }else if( bCommit && pDb->bDiscardOld ){
- rc = dbSetReadLock(pDb, pDb->pClient->iId, pDb->treehdr.iUsedShmid);
- }
- }
- pDb->bDiscardOld = 0;
- lsmShmLock(pDb, LSM_LOCK_WRITER, LSM_LOCK_UNLOCK, 0);
-
- if( bFlush && pDb->bAutowork==0 && pDb->xWork ){
- pDb->xWork(pDb, pDb->pWorkCtx);
- }
- return rc;
-}
-
-
-/*
-** Return non-zero if the caller is holding the client mutex.
-*/
-#ifdef LSM_DEBUG
-int lsmHoldingClientMutex(lsm_db *pDb){
- return lsmMutexHeld(pDb->pEnv, pDb->pDatabase->pClientMutex);
-}
-#endif
-
-static int slotIsUsable(ShmReader *p, i64 iLsm, u32 iShmMin, u32 iShmMax){
- return(
- p->iLsmId && p->iLsmId<=iLsm
- && shm_sequence_ge(iShmMax, p->iTreeId)
- && shm_sequence_ge(p->iTreeId, iShmMin)
- );
-}
-
-/*
-** Obtain a read-lock on database version identified by the combination
-** of snapshot iLsm and tree iTree. Return LSM_OK if successful, or
-** an LSM error code otherwise.
-*/
-int lsmReadlock(lsm_db *db, i64 iLsm, u32 iShmMin, u32 iShmMax){
- int rc = LSM_OK;
- ShmHeader *pShm = db->pShmhdr;
- int i;
-
- assert( db->iReader<0 );
- assert( shm_sequence_ge(iShmMax, iShmMin) );
-
- /* This is a no-op if the read-only transaction flag is set. */
- if( db->bRoTrans ){
- db->iReader = 0;
- return LSM_OK;
- }
-
- /* Search for an exact match. */
- for(i=0; db->iReader<0 && rc==LSM_OK && iaReader[i];
- if( p->iLsmId==iLsm && p->iTreeId==iShmMax ){
- rc = lsmShmLock(db, LSM_LOCK_READER(i), LSM_LOCK_SHARED, 0);
- if( rc==LSM_OK && p->iLsmId==iLsm && p->iTreeId==iShmMax ){
- db->iReader = i;
- }else if( rc==LSM_BUSY ){
- rc = LSM_OK;
- }
- }
- }
-
- /* Try to obtain a write-lock on each slot, in order. If successful, set
- ** the slot values to iLsm/iTree. */
- for(i=0; db->iReader<0 && rc==LSM_OK && iaReader[i];
- p->iLsmId = iLsm;
- p->iTreeId = iShmMax;
- rc = lsmShmLock(db, LSM_LOCK_READER(i), LSM_LOCK_SHARED, 0);
- assert( rc!=LSM_BUSY );
- if( rc==LSM_OK ) db->iReader = i;
- }
- }
-
- /* Search for any usable slot */
- for(i=0; db->iReader<0 && rc==LSM_OK && iaReader[i];
- if( slotIsUsable(p, iLsm, iShmMin, iShmMax) ){
- rc = lsmShmLock(db, LSM_LOCK_READER(i), LSM_LOCK_SHARED, 0);
- if( rc==LSM_OK && slotIsUsable(p, iLsm, iShmMin, iShmMax) ){
- db->iReader = i;
- }else if( rc==LSM_BUSY ){
- rc = LSM_OK;
- }
- }
- }
-
- if( rc==LSM_OK && db->iReader<0 ){
- rc = LSM_BUSY;
- }
- return rc;
-}
-
-/*
-** This is used to check if there exists a read-lock locking a particular
-** version of either the in-memory tree or database file.
-**
-** If iLsmId is non-zero, then it is a snapshot id. If there exists a
-** read-lock using this snapshot or newer, set *pbInUse to true. Or,
-** if there is no such read-lock, set it to false.
-**
-** Or, if iLsmId is zero, then iShmid is a shared-memory sequence id.
-** Search for a read-lock using this sequence id or newer. etc.
-*/
-static int isInUse(lsm_db *db, i64 iLsmId, u32 iShmid, int *pbInUse){
- ShmHeader *pShm = db->pShmhdr;
- int i;
- int rc = LSM_OK;
-
- for(i=0; rc==LSM_OK && iaReader[i];
- if( p->iLsmId ){
- if( (iLsmId!=0 && p->iLsmId!=0 && iLsmId>=p->iLsmId)
- || (iLsmId==0 && shm_sequence_ge(p->iTreeId, iShmid))
- ){
- rc = lsmShmLock(db, LSM_LOCK_READER(i), LSM_LOCK_EXCL, 0);
- if( rc==LSM_OK ){
- p->iLsmId = 0;
- lsmShmLock(db, LSM_LOCK_READER(i), LSM_LOCK_UNLOCK, 0);
- }
- }
- }
- }
-
- if( rc==LSM_BUSY ){
- *pbInUse = 1;
- return LSM_OK;
- }
- *pbInUse = 0;
- return rc;
-}
-
-/*
-** This function is called by worker connections to determine the smallest
-** snapshot id that is currently in use by a database client. The worker
-** connection uses this result to determine whether or not it is safe to
-** recycle a database block.
-*/
-static int firstSnapshotInUse(
- lsm_db *db, /* Database handle */
- i64 *piInUse /* IN/OUT: Smallest snapshot id in use */
-){
- ShmHeader *pShm = db->pShmhdr;
- i64 iInUse = *piInUse;
- int i;
-
- assert( iInUse>0 );
- for(i=0; iaReader[i];
- if( p->iLsmId ){
- i64 iThis = p->iLsmId;
- if( iThis!=0 && iInUse>iThis ){
- int rc = lsmShmLock(db, LSM_LOCK_READER(i), LSM_LOCK_EXCL, 0);
- if( rc==LSM_OK ){
- p->iLsmId = 0;
- lsmShmLock(db, LSM_LOCK_READER(i), LSM_LOCK_UNLOCK, 0);
- }else if( rc==LSM_BUSY ){
- iInUse = iThis;
- }else{
- /* Some error other than LSM_BUSY. Return the error code to
- ** the caller in this case. */
- return rc;
- }
- }
- }
- }
-
- *piInUse = iInUse;
- return LSM_OK;
-}
-
-int lsmTreeInUse(lsm_db *db, u32 iShmid, int *pbInUse){
- if( db->treehdr.iUsedShmid==iShmid ){
- *pbInUse = 1;
- return LSM_OK;
- }
- return isInUse(db, 0, iShmid, pbInUse);
-}
-
-int lsmLsmInUse(lsm_db *db, i64 iLsmId, int *pbInUse){
- if( db->pClient && db->pClient->iId<=iLsmId ){
- *pbInUse = 1;
- return LSM_OK;
- }
- return isInUse(db, iLsmId, 0, pbInUse);
-}
-
-/*
-** This function may only be called after a successful call to
-** lsmDbDatabaseConnect(). It returns true if the connection is in
-** multi-process mode, or false otherwise.
-*/
-int lsmDbMultiProc(lsm_db *pDb){
- return pDb->pDatabase && pDb->pDatabase->bMultiProc;
-}
-
-
-/*************************************************************************
-**************************************************************************
-**************************************************************************
-**************************************************************************
-**************************************************************************
-*************************************************************************/
-
-/*
-** Ensure that database connection db has cached pointers to at least the
-** first nChunk chunks of shared memory.
-*/
-int lsmShmCacheChunks(lsm_db *db, int nChunk){
- int rc = LSM_OK;
- if( nChunk>db->nShm ){
- static const int NINCR = 16;
- Database *p = db->pDatabase;
- lsm_env *pEnv = db->pEnv;
- int nAlloc;
- int i;
-
- /* Ensure that the db->apShm[] array is large enough. If an attempt to
- ** allocate memory fails, return LSM_NOMEM immediately. The apShm[] array
- ** is always extended in multiples of 16 entries - so the actual allocated
- ** size can be inferred from nShm. */
- nAlloc = ((db->nShm + NINCR - 1) / NINCR) * NINCR;
- while( nChunk>=nAlloc ){
- void **apShm;
- nAlloc += NINCR;
- apShm = lsmRealloc(pEnv, db->apShm, sizeof(void*)*nAlloc);
- if( !apShm ) return LSM_NOMEM_BKPT;
- db->apShm = apShm;
- }
-
- if( db->bRoTrans ){
- for(i=db->nShm; rc==LSM_OK && iapShm[i] = lsmMallocZeroRc(pEnv, LSM_SHM_CHUNK_SIZE, &rc);
- db->nShm++;
- }
-
- }else{
-
- /* Enter the client mutex */
- lsmMutexEnter(pEnv, p->pClientMutex);
-
- /* Extend the Database objects apShmChunk[] array if necessary. Using the
- ** same pattern as for the lsm_db.apShm[] array above. */
- nAlloc = ((p->nShmChunk + NINCR - 1) / NINCR) * NINCR;
- while( nChunk>=nAlloc ){
- void **apShm;
- nAlloc += NINCR;
- apShm = lsmRealloc(pEnv, p->apShmChunk, sizeof(void*)*nAlloc);
- if( !apShm ){
- rc = LSM_NOMEM_BKPT;
- break;
- }
- p->apShmChunk = apShm;
- }
-
- for(i=db->nShm; rc==LSM_OK && i=p->nShmChunk ){
- void *pChunk = 0;
- if( p->bMultiProc==0 ){
- /* Single process mode */
- pChunk = lsmMallocZeroRc(pEnv, LSM_SHM_CHUNK_SIZE, &rc);
- }else{
- /* Multi-process mode */
- rc = lsmEnvShmMap(pEnv, p->pFile, i, LSM_SHM_CHUNK_SIZE, &pChunk);
- }
- if( rc==LSM_OK ){
- p->apShmChunk[i] = pChunk;
- p->nShmChunk++;
- }
- }
- if( rc==LSM_OK ){
- db->apShm[i] = p->apShmChunk[i];
- db->nShm++;
- }
- }
-
- /* Release the client mutex */
- lsmMutexLeave(pEnv, p->pClientMutex);
- }
- }
-
- return rc;
-}
-
-static int lockSharedFile(lsm_env *pEnv, Database *p, int iLock, int eOp){
- int rc = LSM_OK;
- if( p->bMultiProc ){
- rc = lsmEnvLock(pEnv, p->pFile, iLock, eOp);
- }
- return rc;
-}
-
-/*
-** Test if it would be possible for connection db to obtain a lock of type
-** eType on the nLock locks starting at iLock. If so, return LSM_OK. If it
-** would not be possible to obtain the lock due to a lock held by another
-** connection, return LSM_BUSY. If an IO or other error occurs (i.e. in the
-** lsm_env.xTestLock function), return some other LSM error code.
-**
-** Note that this function never actually locks the database - it merely
-** queries the system to see if there exists a lock that would prevent
-** it from doing so.
-*/
-int lsmShmTestLock(
- lsm_db *db,
- int iLock,
- int nLock,
- int eOp
-){
- int rc = LSM_OK;
- lsm_db *pIter;
- Database *p = db->pDatabase;
- int i;
- u64 mask = 0;
-
- for(i=iLock; i<(iLock+nLock); i++){
- mask |= ((u64)1 << (iLock-1));
- if( eOp==LSM_LOCK_EXCL ) mask |= ((u64)1 << (iLock+32-1));
- }
-
- lsmMutexEnter(db->pEnv, p->pClientMutex);
- for(pIter=p->pConn; pIter; pIter=pIter->pNext){
- if( pIter!=db && (pIter->mLock & mask) ){
- assert( pIter!=db );
- break;
- }
- }
-
- if( pIter ){
- rc = LSM_BUSY;
- }else if( p->bMultiProc ){
- rc = lsmEnvTestLock(db->pEnv, p->pFile, iLock, nLock, eOp);
- }
-
- lsmMutexLeave(db->pEnv, p->pClientMutex);
- return rc;
-}
-
-/*
-** Attempt to obtain the lock identified by the iLock and bExcl parameters.
-** If successful, return LSM_OK. If the lock cannot be obtained because
-** there exists some other conflicting lock, return LSM_BUSY. If some other
-** error occurs, return an LSM error code.
-**
-** Parameter iLock must be one of LSM_LOCK_WRITER, WORKER or CHECKPOINTER,
-** or else a value returned by the LSM_LOCK_READER macro.
-*/
-int lsmShmLock(
- lsm_db *db,
- int iLock,
- int eOp, /* One of LSM_LOCK_UNLOCK, SHARED or EXCL */
- int bBlock /* True for a blocking lock */
-){
- lsm_db *pIter;
- const u64 me = ((u64)1 << (iLock-1));
- const u64 ms = ((u64)1 << (iLock+32-1));
- int rc = LSM_OK;
- Database *p = db->pDatabase;
-
- assert( eOp!=LSM_LOCK_EXCL || p->bReadonly==0 );
- assert( iLock>=1 && iLock<=LSM_LOCK_RWCLIENT(LSM_LOCK_NRWCLIENT-1) );
- assert( LSM_LOCK_RWCLIENT(LSM_LOCK_NRWCLIENT-1)<=32 );
- assert( eOp==LSM_LOCK_UNLOCK || eOp==LSM_LOCK_SHARED || eOp==LSM_LOCK_EXCL );
-
- /* Check for a no-op. Proceed only if this is not one of those. */
- if( (eOp==LSM_LOCK_UNLOCK && (db->mLock & (me|ms))!=0)
- || (eOp==LSM_LOCK_SHARED && (db->mLock & (me|ms))!=ms)
- || (eOp==LSM_LOCK_EXCL && (db->mLock & me)==0)
- ){
- int nExcl = 0; /* Number of connections holding EXCLUSIVE */
- int nShared = 0; /* Number of connections holding SHARED */
- lsmMutexEnter(db->pEnv, p->pClientMutex);
-
- /* Figure out the locks currently held by this process on iLock, not
- ** including any held by connection db. */
- for(pIter=p->pConn; pIter; pIter=pIter->pNext){
- assert( (pIter->mLock & me)==0 || (pIter->mLock & ms)!=0 );
- if( pIter!=db ){
- if( pIter->mLock & me ){
- nExcl++;
- }else if( pIter->mLock & ms ){
- nShared++;
- }
- }
- }
- assert( nExcl==0 || nExcl==1 );
- assert( nExcl==0 || nShared==0 );
- assert( nExcl==0 || (db->mLock & (me|ms))==0 );
-
- switch( eOp ){
- case LSM_LOCK_UNLOCK:
- if( nShared==0 ){
- lockSharedFile(db->pEnv, p, iLock, LSM_LOCK_UNLOCK);
- }
- db->mLock &= ~(me|ms);
- break;
-
- case LSM_LOCK_SHARED:
- if( nExcl ){
- rc = LSM_BUSY;
- }else{
- if( nShared==0 ){
- rc = lockSharedFile(db->pEnv, p, iLock, LSM_LOCK_SHARED);
- }
- if( rc==LSM_OK ){
- db->mLock |= ms;
- db->mLock &= ~me;
- }
- }
- break;
-
- default:
- assert( eOp==LSM_LOCK_EXCL );
- if( nExcl || nShared ){
- rc = LSM_BUSY;
- }else{
- rc = lockSharedFile(db->pEnv, p, iLock, LSM_LOCK_EXCL);
- if( rc==LSM_OK ){
- db->mLock |= (me|ms);
- }
- }
- break;
- }
-
- lsmMutexLeave(db->pEnv, p->pClientMutex);
- }
-
- return rc;
-}
-
-#ifdef LSM_DEBUG
-
-int shmLockType(lsm_db *db, int iLock){
- const u64 me = ((u64)1 << (iLock-1));
- const u64 ms = ((u64)1 << (iLock+32-1));
-
- if( db->mLock & me ) return LSM_LOCK_EXCL;
- if( db->mLock & ms ) return LSM_LOCK_SHARED;
- return LSM_LOCK_UNLOCK;
-}
-
-/*
-** The arguments passed to this function are similar to those passed to
-** the lsmShmLock() function. However, instead of obtaining a new lock
-** this function returns true if the specified connection already holds
-** (or does not hold) such a lock, depending on the value of eOp. As
-** follows:
-**
-** (eOp==LSM_LOCK_UNLOCK) -> true if db has no lock on iLock
-** (eOp==LSM_LOCK_SHARED) -> true if db has at least a SHARED lock on iLock.
-** (eOp==LSM_LOCK_EXCL) -> true if db has an EXCLUSIVE lock on iLock.
-*/
-int lsmShmAssertLock(lsm_db *db, int iLock, int eOp){
- int ret = 0;
- int eHave;
-
- assert( iLock>=1 && iLock<=LSM_LOCK_READER(LSM_LOCK_NREADER-1) );
- assert( iLock<=16 );
- assert( eOp==LSM_LOCK_UNLOCK || eOp==LSM_LOCK_SHARED || eOp==LSM_LOCK_EXCL );
-
- eHave = shmLockType(db, iLock);
-
- switch( eOp ){
- case LSM_LOCK_UNLOCK:
- ret = (eHave==LSM_LOCK_UNLOCK);
- break;
- case LSM_LOCK_SHARED:
- ret = (eHave!=LSM_LOCK_UNLOCK);
- break;
- case LSM_LOCK_EXCL:
- ret = (eHave==LSM_LOCK_EXCL);
- break;
- default:
- assert( !"bad eOp value passed to lsmShmAssertLock()" );
- break;
- }
-
- return ret;
-}
-
-int lsmShmAssertWorker(lsm_db *db){
- return lsmShmAssertLock(db, LSM_LOCK_WORKER, LSM_LOCK_EXCL) && db->pWorker;
-}
-
-/*
-** This function does not contribute to library functionality, and is not
-** included in release builds. It is intended to be called from within
-** an interactive debugger.
-**
-** When called, this function prints a single line of human readable output
-** to stdout describing the locks currently held by the connection. For
-** example:
-**
-** (gdb) call print_db_locks(pDb)
-** (shared on dms2) (exclusive on writer)
-*/
-void print_db_locks(lsm_db *db){
- int iLock;
- for(iLock=0; iLock<16; iLock++){
- int bOne = 0;
- const char *azLock[] = {0, "shared", "exclusive"};
- const char *azName[] = {
- 0, "dms1", "dms2", "writer", "worker", "checkpointer",
- "reader0", "reader1", "reader2", "reader3", "reader4", "reader5"
- };
- int eHave = shmLockType(db, iLock);
- if( azLock[eHave] ){
- printf("%s(%s on %s)", (bOne?" ":""), azLock[eHave], azName[iLock]);
- bOne = 1;
- }
- }
- printf("\n");
-}
-void print_all_db_locks(lsm_db *db){
- lsm_db *p;
- for(p=db->pDatabase->pConn; p; p=p->pNext){
- printf("%s connection %p ", ((p==db)?"*":""), p);
- print_db_locks(p);
- }
-}
-#endif
-
-void lsmShmBarrier(lsm_db *db){
- lsmEnvShmBarrier(db->pEnv);
-}
-
-int lsm_checkpoint(lsm_db *pDb, int *pnKB){
- int rc; /* Return code */
- u32 nWrite = 0; /* Number of pages checkpointed */
-
- /* Attempt the checkpoint. If successful, nWrite is set to the number of
- ** pages written between this and the previous checkpoint. */
- rc = lsmCheckpointWrite(pDb, &nWrite);
-
- /* If required, calculate the output variable (KB of data checkpointed).
- ** Set it to zero if an error occured. */
- if( pnKB ){
- int nKB = 0;
- if( rc==LSM_OK && nWrite ){
- nKB = (((i64)nWrite * lsmFsPageSize(pDb->pFS)) + 1023) / 1024;
- }
- *pnKB = nKB;
- }
-
- return rc;
-}
diff --git a/ext/lsm1/lsm_sorted.c b/ext/lsm1/lsm_sorted.c
deleted file mode 100644
index a72c8cafb2..0000000000
--- a/ext/lsm1/lsm_sorted.c
+++ /dev/null
@@ -1,6195 +0,0 @@
-/*
-** 2011-08-14
-**
-** The author disclaims copyright to this source code. In place of
-** a legal notice, here is a blessing:
-**
-** May you do good and not evil.
-** May you find forgiveness for yourself and forgive others.
-** May you share freely, never taking more than you give.
-**
-*************************************************************************
-**
-** PAGE FORMAT:
-**
-** The maximum page size is 65536 bytes.
-**
-** Since all records are equal to or larger than 2 bytes in size, and
-** some space within the page is consumed by the page footer, there must
-** be less than 2^15 records on each page.
-**
-** Each page ends with a footer that describes the pages contents. This
-** footer serves as similar purpose to the page header in an SQLite database.
-** A footer is used instead of a header because it makes it easier to
-** populate a new page based on a sorted list of key/value pairs.
-**
-** The footer consists of the following values (starting at the end of
-** the page and continuing backwards towards the start). All values are
-** stored as unsigned big-endian integers.
-**
-** * Number of records on page (2 bytes).
-** * Flags field (2 bytes).
-** * Left-hand pointer value (8 bytes).
-** * The starting offset of each record (2 bytes per record).
-**
-** Records may span pages. Unless it happens to be an exact fit, the part
-** of the final record that starts on page X that does not fit on page X
-** is stored at the start of page (X+1). This means there may be pages where
-** (N==0). And on most pages the first record that starts on the page will
-** not start at byte offset 0. For example:
-**
-** aaaaa bbbbb ccc