]> git.ipfire.org Git - thirdparty/glibc.git/commitdiff
Update. cvs/libc-ud-970827
authorUlrich Drepper <drepper@redhat.com>
Wed, 27 Aug 1997 20:26:10 +0000 (20:26 +0000)
committerUlrich Drepper <drepper@redhat.com>
Wed, 27 Aug 1997 20:26:10 +0000 (20:26 +0000)
1997-08-10 19:17  Philip Blundell  <Philip.Blundell@pobox.com>

* nss/nss_db/db-XXX.c: Include <db_185.h> not <db.h>.  Somebody
should update this to use the new db API.
* nss/nss_db/db-netgrp.c: Likewise.
* nss/nss_db/db-alias.c: Likewise.
* db2/Makefile: Makefile for db-2.x in glibc.

1997-08-27 21:20  Ulrich Drepper  <drepper@cygnus.com>

* csu/Makefile (before-compile): New goal.  Make sure abi-tag.h
is generated.
[$(elf)=yes] (asm-CPPFLAGS): Make sure abi-tag.h file can be found.

* Makeconfig [$(build-omitfp)=yes] (CFLAGS-.o): Add
-D__USE_STRING_INLINES.
* string/string.f: Move strnlen optimization after inclusion of
<bits/string.h>.  Include <bits/string.h> only if __USE_STRING_INLINES
is defined.
* sysdeps/generic/memcpy.c: Undef memcpy to allow macro of this name
in <bits/string.h>.
* sysdeps/generic/memset.c: Likewise.
* sysdeps/i386/string.h: i386 optimized string functions.
* sysdeps/i386/i486string.h: i486+ optimized string functions.

* Makefile (subdirs): Change db to db2.
* shlib-versions: Bump libdb verion number to 3.
* include/db.h: Include from db2 directory.
* include/db_185.h: New file.
* sysdeps/i386/Makefile [$(subdirs)=db2] (CPPFLAGS): Add macros
to provide spinlock information for db2.
* sysdeps/m68k/m68020/Makefile: New file.  Likewise.
* sysdeps/sparc/Makefile: New file.  Likewise.
* sysdeps/unix/sysv/linux/Makefile [$(subdirs)=db2] (CPPFLAGS):
Add -DHAVE_LLSEEK.
* db2/config.h: Hand-edited config file for db2 in glibc.
* db2/compat.h: New file from db-2.3.4.
* db2/db.h: Likewise.
* db2/db_185.h: Likewise.
* db2/db_int.h: Likewise.
* db2/makedb.c: Likewise.
* db2/btree/bt_close.c: Likewise.
* db2/btree/bt_compare.c: Likewise.
* db2/btree/bt_conv.c: Likewise.
* db2/btree/bt_cursor.c: Likewise.
* db2/btree/bt_delete.c: Likewise.
* db2/btree/bt_open.c: Likewise.
* db2/btree/bt_page.c: Likewise.
* db2/btree/bt_put.c: Likewise.
* db2/btree/bt_rec.c: Likewise.
* db2/btree/bt_recno.c: Likewise.
* db2/btree/btree_auto.c: Likewise.
* db2/btree/bt_rsearch.c: Likewise.
* db2/btree/bt_search.c: Likewise.
* db2/btree/bt_split.c: Likewise.
* db2/btree/bt_stat.c: Likewise.
* db2/btree/btree.src: Likewise.
* db2/common/db_appinit.c: Likewise.
* db2/common/db_err.c: Likewise.
* db2/common/db_byteorder.c: Likewise.
* db2/common/db_apprec.c: Likewise.
* db2/common/db_salloc.c: Likewise.
* db2/common/db_log2.c: Likewise.
* db2/common/db_region.c: Likewise.
* db2/common/db_shash.c: Likewise.
* db2/db/db.c: Likewise.
* db2/db/db.src: Likewise.
* db2/db/db_conv.c: Likewise.
* db2/db/db_dispatch.c: Likewise.
* db2/db/db_dup.c: Likewise.
* db2/db/db_overflow.c: Likewise.
* db2/db/db_pr.c: Likewise.
* db2/db/db_rec.c: Likewise.
* db2/db/db_ret.c: Likewise.
* db2/db/db_thread.c: Likewise.
* db2/db/db_auto.c: Likewise.
* db2/db185/db185.c: Likewise.
* db2/db185/db185_int.h: Likewise.
* db2/dbm/dbm.c: Likewise.
* db2/hash/hash.c: Likewise.
* db2/hash/hash.src: Likewise.
* db2/hash/hash_page.c: Likewise.
* db2/hash/hash_conv.c: Likewise.
* db2/hash/hash_debug.c: Likewise.
* db2/hash/hash_stat.c: Likewise.
* db2/hash/hash_rec.c: Likewise.
* db2/hash/hash_dup.c: Likewise.
* db2/hash/hash_func.c: Likewise.
* db2/hash/hash_auto.c: Likewise.
* db2/include/mp.h: Likewise.
* db2/include/btree.h: Likewise.
* db2/include/db.h.src: Likewise.
* db2/include/db_int.h.src: Likewise.
* db2/include/db_shash.h: Likewise.
* db2/include/db_swap.h: Likewise.
* db2/include/db_185.h.src: Likewise.
* db2/include/txn.h: Likewise.
* db2/include/db_am.h: Likewise.
* db2/include/shqueue.h: Likewise.
* db2/include/hash.h: Likewise.
* db2/include/db_dispatch.h: Likewise.
* db2/include/lock.h: Likewise.
* db2/include/db_page.h: Likewise.
* db2/include/log.h: Likewise.
* db2/include/db_auto.h: Likewise.
* db2/include/btree_auto.h: Likewise.
* db2/include/hash_auto.h: Likewise.
* db2/include/log_auto.h: Likewise.
* db2/include/txn_auto.h: Likewise.
* db2/include/db_ext.h: Likewise.
* db2/include/btree_ext.h: Likewise.
* db2/include/clib_ext.h: Likewise.
* db2/include/common_ext.h: Likewise.
* db2/include/hash_ext.h: Likewise.
* db2/include/lock_ext.h: Likewise.
* db2/include/log_ext.h: Likewise.
* db2/include/mp_ext.h: Likewise.
* db2/include/mutex_ext.h: Likewise.
* db2/include/os_ext.h: Likewise.
* db2/include/txn_ext.h: Likewise.
* db2/include/cxx_int.h: Likewise.
* db2/include/db_cxx.h: Likewise.
* db2/include/queue.h: Likewise.
* db2/lock/lock.c: Likewise.
* db2/lock/lock_conflict.c: Likewise.
* db2/lock/lock_util.c: Likewise.
* db2/lock/lock_deadlock.c: Likewise.
* db2/log/log.c: Likewise.
* db2/log/log_get.c: Likewise.
* db2/log/log.src: Likewise.
* db2/log/log_compare.c: Likewise.
* db2/log/log_put.c: Likewise.
* db2/log/log_rec.c: Likewise.
* db2/log/log_archive.c: Likewise.
* db2/log/log_register.c: Likewise.
* db2/log/log_auto.c: Likewise.
* db2/log/log_findckp.c: Likewise.
* db2/mp/mp_bh.c: Likewise.
* db2/mp/mp_fget.c: Likewise.
* db2/mp/mp_fopen.c: Likewise.
* db2/mp/mp_fput.c: Likewise.
* db2/mp/mp_fset.c: Likewise.
* db2/mp/mp_open.c: Likewise.
* db2/mp/mp_region.c: Likewise.
* db2/mp/mp_pr.c: Likewise.
* db2/mp/mp_sync.c: Likewise.
* db2/mutex/68020.gcc: Likewise.
* db2/mutex/mutex.c: Likewise.
* db2/mutex/README: Likewise.
* db2/mutex/x86.gcc: Likewise.
* db2/mutex/sparc.gcc: Likewise.
* db2/mutex/uts4.cc.s: Likewise.
* db2/mutex/alpha.dec: Likewise.
* db2/mutex/alpha.gcc: Likewise.
* db2/mutex/parisc.gcc: Likewise.
* db2/mutex/parisc.hp: Likewise.
* db2/os/db_os_abs.c: Likewise.
* db2/os/db_os_dir.c: Likewise.
* db2/os/db_os_fid.c: Likewise.
* db2/os/db_os_lseek.c: Likewise.
* db2/os/db_os_mmap.c: Likewise.
* db2/os/db_os_open.c: Likewise.
* db2/os/db_os_rw.c: Likewise.
* db2/os/db_os_sleep.c: Likewise.
* db2/os/db_os_stat.c: Likewise.
* db2/os/db_os_unlink.c: Likewise.
* db2/txn/txn.c: Likewise.
* db2/txn/txn.src: Likewise.
* db2/txn/txn_rec.c: Likewise.
* db2/txn/txn_auto.c: Likewise.
* db2/clib/getlong.c: Likewise.
* db2/progs/db_archive/db_archive.c: Likewise.
* db2/progs/db_checkpoint/db_checkpoint.c: Likewise.
* db2/progs/db_deadlock/db_deadlock.c: Likewise.
* db2/progs/db_dump/db_dump.c: Likewise.
* db2/progs/db_dump185/db_dump185.c: Likewise.
* db2/progs/db_load/db_load.c: Likewise.
* db2/progs/db_printlog/db_printlog.c: Likewise.
* db2/progs/db_recover/db_recover.c: Likewise.
* db2/progs/db_stat/db_stat.c: Likewise.

* libio/stdio.h [__cplusplus] (__STDIO_INLINE): Define as inline.

* po/de.po, po/sv.po: Update from 2.0.5 translations.

* sysdeps/unix/sysv/linux/netinet/tcp.h: Pretty print.

* sunrpc/rpc/xdr.h (XDR): Don't define argument of x_destroy callback
as const.
* sunrpc/xdr_mem.c (xdrmem_destroy): Don't define argument as const.
* sunrpx/xdr_rec.c (xdrrec_destroy): Likewise.
* sunrpx/xdr_stdio.c (xdrstdio_destroy): Likewise.

1997-08-27 18:47  Ulrich Drepper  <drepper@cygnus.com>

* sysdeps/unix/sysv/linux/if_index.c: Include <errno.h>.
Reported by Benjamin Kosnik <bkoz@cygnus.com>.

1997-08-27 02:27  Roland McGrath  <roland@baalperazim.frob.com>

* abi-tags: New file.
* csu/Makefile (distribute): Remove abi-tag.h.
($(objpfx)abi-tag.h): New target.
* Makefile (distribute): Add abi-tags.
* sysdeps/unix/sysv/linux/abi-tag.h: File removed.
* sysdeps/mach/hurd/abi-tag.h: File removed.
* sysdeps/stub/abi-tag.h: File removed.

1997-08-25  Andreas Schwab  <schwab@issan.informatik.uni-dortmund.de>

* sysdeps/unix/make-syscalls.sh: Change output so that it
generates compilation rules only for the currently selected object
suffixes.

1997-08-25  Andreas Schwab  <schwab@issan.informatik.uni-dortmund.de>

* sysdeps/m68k/dl-machine.h (RTLD_START): Switch back to previous
section to avoid confusing the compiler.
* sysdeps/alpha/dl-machine.h (RTLD_START): Likewise.
* sysdeps/i386/dl-machine.h (RTLD_START): Likewise.
* sysdeps/mips/dl-machine.h (RTLD_START): Likewise.
* sysdeps/mips/mips64/dl-machine.h (RTLD_START): Likewise.
* sysdeps/sparc/sparc32/dl-machine.h (RTLD_START): Likewise.

* sysdeps/m68k/dl-machine.h (elf_machine_load_address): Use a GOT
relocation instead of a constant to avoid text relocation.
(ELF_MACHINE_BEFORE_RTLD_RELOC): Removed.
(RTLD_START): Declare global labels as functions and add size
directive.

1997-08-25 17:01  Ulrich Drepper  <drepper@cygnus.com>

* sysdeps/i386/bits/select.h: Correct assembler versions to work even
for descriptors >= 32.

* stdlib/alloca.h: Don't define alloca to __alloca since if gcc
is used __alloca is not defined to __builtin_alloca and so might
not be available.
Reported by Uwe Ohse <uwe@ohse.de>.

* sysdeps/unix/sysv/linux/sys/sysmacros.h: Define macros in a special
way if gcc is not used and so dev_t is an array.
Reported by Uwe Ohse <uwe@ohse.de>.

1997-08-23  Andreas Schwab  <schwab@issan.informatik.uni-dortmund.de>

* manual/libc.texinfo: Reorder chapters to match logical order.

1997-08-25 12:22  Ulrich Drepper  <drepper@cygnus.com>

* sunrpc/rpc/xdr.h: Change name of parameters in prototypes of
xdr_reference, xdrmem_create, and xdrstdio_create because of clash
with g++ internal symbols.
Patch by Sudish Joseph <sj@eng.mindspring.net>.

* elf/dl-deps.c: Implement handling of DT_FILTER.

192 files changed:
ChangeLog
INSTALL
Makeconfig
Makefile
abi-tags [new file with mode: 0644]
config.guess
csu/Makefile
db2/Makefile [new file with mode: 0644]
db2/btree/bt_close.c [new file with mode: 0644]
db2/btree/bt_compare.c [new file with mode: 0644]
db2/btree/bt_conv.c [new file with mode: 0644]
db2/btree/bt_cursor.c [new file with mode: 0644]
db2/btree/bt_delete.c [new file with mode: 0644]
db2/btree/bt_open.c [new file with mode: 0644]
db2/btree/bt_page.c [new file with mode: 0644]
db2/btree/bt_put.c [new file with mode: 0644]
db2/btree/bt_rec.c [new file with mode: 0644]
db2/btree/bt_recno.c [new file with mode: 0644]
db2/btree/bt_rsearch.c [new file with mode: 0644]
db2/btree/bt_search.c [new file with mode: 0644]
db2/btree/bt_split.c [new file with mode: 0644]
db2/btree/bt_stat.c [new file with mode: 0644]
db2/btree/btree.src [new file with mode: 0644]
db2/btree/btree_auto.c [new file with mode: 0644]
db2/clib/getlong.c [new file with mode: 0644]
db2/common/db_appinit.c [new file with mode: 0644]
db2/common/db_apprec.c [new file with mode: 0644]
db2/common/db_byteorder.c [new file with mode: 0644]
db2/common/db_err.c [new file with mode: 0644]
db2/common/db_log2.c [new file with mode: 0644]
db2/common/db_region.c [new file with mode: 0644]
db2/common/db_salloc.c [new file with mode: 0644]
db2/common/db_shash.c [new file with mode: 0644]
db2/compat.h [new file with mode: 0644]
db2/config.h [new file with mode: 0644]
db2/db.h [new file with mode: 0644]
db2/db/db.c [new file with mode: 0644]
db2/db/db.src [new file with mode: 0644]
db2/db/db_auto.c [new file with mode: 0644]
db2/db/db_conv.c [new file with mode: 0644]
db2/db/db_dispatch.c [new file with mode: 0644]
db2/db/db_dup.c [new file with mode: 0644]
db2/db/db_overflow.c [new file with mode: 0644]
db2/db/db_pr.c [new file with mode: 0644]
db2/db/db_rec.c [new file with mode: 0644]
db2/db/db_ret.c [new file with mode: 0644]
db2/db/db_thread.c [new file with mode: 0644]
db2/db185/db185.c [new file with mode: 0644]
db2/db185/db185_int.h [new file with mode: 0644]
db2/db_185.h [new file with mode: 0644]
db2/db_int.h [new file with mode: 0644]
db2/dbm/dbm.c [new file with mode: 0644]
db2/hash/hash.c [new file with mode: 0644]
db2/hash/hash.src [new file with mode: 0644]
db2/hash/hash_auto.c [new file with mode: 0644]
db2/hash/hash_conv.c [new file with mode: 0644]
db2/hash/hash_debug.c [new file with mode: 0644]
db2/hash/hash_dup.c [new file with mode: 0644]
db2/hash/hash_func.c [new file with mode: 0644]
db2/hash/hash_page.c [new file with mode: 0644]
db2/hash/hash_rec.c [new file with mode: 0644]
db2/hash/hash_stat.c [new file with mode: 0644]
db2/include/btree.h [new file with mode: 0644]
db2/include/btree_auto.h [new file with mode: 0644]
db2/include/btree_ext.h [new file with mode: 0644]
db2/include/clib_ext.h [new file with mode: 0644]
db2/include/common_ext.h [new file with mode: 0644]
db2/include/cxx_int.h [new file with mode: 0644]
db2/include/db.h.src [new file with mode: 0644]
db2/include/db_185.h.src [new file with mode: 0644]
db2/include/db_am.h [new file with mode: 0644]
db2/include/db_auto.h [new file with mode: 0644]
db2/include/db_cxx.h [new file with mode: 0644]
db2/include/db_dispatch.h [new file with mode: 0644]
db2/include/db_ext.h [new file with mode: 0644]
db2/include/db_int.h.src [new file with mode: 0644]
db2/include/db_page.h [new file with mode: 0644]
db2/include/db_shash.h [new file with mode: 0644]
db2/include/db_swap.h [new file with mode: 0644]
db2/include/hash.h [new file with mode: 0644]
db2/include/hash_auto.h [new file with mode: 0644]
db2/include/hash_ext.h [new file with mode: 0644]
db2/include/lock.h [new file with mode: 0644]
db2/include/lock_ext.h [new file with mode: 0644]
db2/include/log.h [new file with mode: 0644]
db2/include/log_auto.h [new file with mode: 0644]
db2/include/log_ext.h [new file with mode: 0644]
db2/include/mp.h [new file with mode: 0644]
db2/include/mp_ext.h [new file with mode: 0644]
db2/include/mutex_ext.h [new file with mode: 0644]
db2/include/os_ext.h [new file with mode: 0644]
db2/include/queue.h [new file with mode: 0644]
db2/include/shqueue.h [new file with mode: 0644]
db2/include/txn.h [new file with mode: 0644]
db2/include/txn_auto.h [new file with mode: 0644]
db2/include/txn_ext.h [new file with mode: 0644]
db2/lock/lock.c [new file with mode: 0644]
db2/lock/lock_conflict.c [new file with mode: 0644]
db2/lock/lock_deadlock.c [new file with mode: 0644]
db2/lock/lock_util.c [new file with mode: 0644]
db2/log/log.c [new file with mode: 0644]
db2/log/log.src [new file with mode: 0644]
db2/log/log_archive.c [new file with mode: 0644]
db2/log/log_auto.c [new file with mode: 0644]
db2/log/log_compare.c [new file with mode: 0644]
db2/log/log_findckp.c [new file with mode: 0644]
db2/log/log_get.c [new file with mode: 0644]
db2/log/log_put.c [new file with mode: 0644]
db2/log/log_rec.c [new file with mode: 0644]
db2/log/log_register.c [new file with mode: 0644]
db2/makedb.c [new file with mode: 0644]
db2/mp/mp_bh.c [new file with mode: 0644]
db2/mp/mp_fget.c [new file with mode: 0644]
db2/mp/mp_fopen.c [new file with mode: 0644]
db2/mp/mp_fput.c [new file with mode: 0644]
db2/mp/mp_fset.c [new file with mode: 0644]
db2/mp/mp_open.c [new file with mode: 0644]
db2/mp/mp_pr.c [new file with mode: 0644]
db2/mp/mp_region.c [new file with mode: 0644]
db2/mp/mp_sync.c [new file with mode: 0644]
db2/mutex/68020.gcc [new file with mode: 0644]
db2/mutex/README [new file with mode: 0644]
db2/mutex/alpha.dec [new file with mode: 0644]
db2/mutex/alpha.gcc [new file with mode: 0644]
db2/mutex/mutex.c [new file with mode: 0644]
db2/mutex/parisc.gcc [new file with mode: 0644]
db2/mutex/parisc.hp [new file with mode: 0644]
db2/mutex/sparc.gcc [new file with mode: 0644]
db2/mutex/uts4.cc.s [new file with mode: 0644]
db2/mutex/x86.gcc [new file with mode: 0644]
db2/os/db_os_abs.c [new file with mode: 0644]
db2/os/db_os_dir.c [new file with mode: 0644]
db2/os/db_os_fid.c [new file with mode: 0644]
db2/os/db_os_lseek.c [new file with mode: 0644]
db2/os/db_os_mmap.c [new file with mode: 0644]
db2/os/db_os_open.c [new file with mode: 0644]
db2/os/db_os_rw.c [new file with mode: 0644]
db2/os/db_os_sleep.c [new file with mode: 0644]
db2/os/db_os_stat.c [new file with mode: 0644]
db2/os/db_os_unlink.c [new file with mode: 0644]
db2/progs/db_archive/db_archive.c [new file with mode: 0644]
db2/progs/db_checkpoint/db_checkpoint.c [new file with mode: 0644]
db2/progs/db_deadlock/db_deadlock.c [new file with mode: 0644]
db2/progs/db_dump/db_dump.c [new file with mode: 0644]
db2/progs/db_dump185/db_dump185.c [new file with mode: 0644]
db2/progs/db_load/db_load.c [new file with mode: 0644]
db2/progs/db_printlog/db_printlog.c [new file with mode: 0644]
db2/progs/db_recover/db_recover.c [new file with mode: 0644]
db2/progs/db_stat/db_stat.c [new file with mode: 0644]
db2/txn/txn.c [new file with mode: 0644]
db2/txn/txn.src [new file with mode: 0644]
db2/txn/txn_auto.c [new file with mode: 0644]
db2/txn/txn_rec.c [new file with mode: 0644]
include/db.h
include/db_185.h [new file with mode: 0644]
libio/stdio.h
manual/libc.texinfo
nss/nss_db/db-XXX.c
nss/nss_db/db-alias.c
nss/nss_db/db-netgrp.c
po/de.po
po/sv.po
shlib-versions
stdlib/alloca.h
string/argz.h
string/string.h
sunrpc/rpc/xdr.h
sunrpc/xdr_mem.c
sunrpc/xdr_rec.c
sunrpc/xdr_stdio.c
sysdeps/alpha/dl-machine.h
sysdeps/generic/memcpy.c
sysdeps/generic/memset.c
sysdeps/i386/Makefile
sysdeps/i386/bits/select.h
sysdeps/i386/bits/string.h [new file with mode: 0644]
sysdeps/i386/dl-machine.h
sysdeps/i386/i486/bits/string.h [new file with mode: 0644]
sysdeps/m68k/dl-machine.h
sysdeps/m68k/m68020/Makefile [new file with mode: 0644]
sysdeps/mach/hurd/abi-tag.h [deleted file]
sysdeps/mips/dl-machine.h
sysdeps/mips/mips64/dl-machine.h
sysdeps/sparc/Makefile [new file with mode: 0644]
sysdeps/sparc/sparc32/dl-machine.h
sysdeps/stub/abi-tag.h [deleted file]
sysdeps/unix/make-syscalls.sh
sysdeps/unix/sysv/linux/Makefile
sysdeps/unix/sysv/linux/abi-tag.h [deleted file]
sysdeps/unix/sysv/linux/if_index.c
sysdeps/unix/sysv/linux/netinet/tcp.h
sysdeps/unix/sysv/linux/sys/sysmacros.h

index 6d1e267c85dc31d1963a9738db527a50b1b36d08..95dde68ad906269cc45ce6bee58801def342a998 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,258 @@
+1997-08-10 19:17  Philip Blundell  <Philip.Blundell@pobox.com>
+
+       * nss/nss_db/db-XXX.c: Include <db_185.h> not <db.h>.  Somebody
+       should update this to use the new db API.
+       * nss/nss_db/db-netgrp.c: Likewise.
+       * nss/nss_db/db-alias.c: Likewise.
+       * db2/Makefile: Makefile for db-2.x in glibc.
+
+1997-08-27 21:20  Ulrich Drepper  <drepper@cygnus.com>
+
+       * csu/Makefile (before-compile): New goal.  Make sure abi-tag.h
+       is generated.
+       [$(elf)=yes] (asm-CPPFLAGS): Make sure abi-tag.h file can be found.
+
+       * Makeconfig [$(build-omitfp)=yes] (CFLAGS-.o): Add
+       -D__USE_STRING_INLINES.
+       * string/string.f: Move strnlen optimization after inclusion of
+       <bits/string.h>.  Include <bits/string.h> only if __USE_STRING_INLINES
+       is defined.
+       * sysdeps/generic/memcpy.c: Undef memcpy to allow macro of this name
+       in <bits/string.h>.
+       * sysdeps/generic/memset.c: Likewise.
+       * sysdeps/i386/string.h: i386 optimized string functions.
+       * sysdeps/i386/i486string.h: i486+ optimized string functions.
+
+       * Makefile (subdirs): Change db to db2.
+       * shlib-versions: Bump libdb verion number to 3.
+       * include/db.h: Include from db2 directory.
+       * include/db_185.h: New file.
+       * sysdeps/i386/Makefile [$(subdirs)=db2] (CPPFLAGS): Add macros
+       to provide spinlock information for db2.
+       * sysdeps/m68k/m68020/Makefile: New file.  Likewise.
+       * sysdeps/sparc/Makefile: New file.  Likewise.
+       * sysdeps/unix/sysv/linux/Makefile [$(subdirs)=db2] (CPPFLAGS):
+       Add -DHAVE_LLSEEK.
+       * db2/config.h: Hand-edited config file for db2 in glibc.
+       * db2/compat.h: New file from db-2.3.4.
+       * db2/db.h: Likewise.
+       * db2/db_185.h: Likewise.
+       * db2/db_int.h: Likewise.
+       * db2/makedb.c: Likewise.
+       * db2/btree/bt_close.c: Likewise.
+       * db2/btree/bt_compare.c: Likewise.
+       * db2/btree/bt_conv.c: Likewise.
+       * db2/btree/bt_cursor.c: Likewise.
+       * db2/btree/bt_delete.c: Likewise.
+       * db2/btree/bt_open.c: Likewise.
+       * db2/btree/bt_page.c: Likewise.
+       * db2/btree/bt_put.c: Likewise.
+       * db2/btree/bt_rec.c: Likewise.
+       * db2/btree/bt_recno.c: Likewise.
+       * db2/btree/btree_auto.c: Likewise.
+       * db2/btree/bt_rsearch.c: Likewise.
+       * db2/btree/bt_search.c: Likewise.
+       * db2/btree/bt_split.c: Likewise.
+       * db2/btree/bt_stat.c: Likewise.
+       * db2/btree/btree.src: Likewise.
+       * db2/common/db_appinit.c: Likewise.
+       * db2/common/db_err.c: Likewise.
+       * db2/common/db_byteorder.c: Likewise.
+       * db2/common/db_apprec.c: Likewise.
+       * db2/common/db_salloc.c: Likewise.
+       * db2/common/db_log2.c: Likewise.
+       * db2/common/db_region.c: Likewise.
+       * db2/common/db_shash.c: Likewise.
+       * db2/db/db.c: Likewise.
+       * db2/db/db.src: Likewise.
+       * db2/db/db_conv.c: Likewise.
+       * db2/db/db_dispatch.c: Likewise.
+       * db2/db/db_dup.c: Likewise.
+       * db2/db/db_overflow.c: Likewise.
+       * db2/db/db_pr.c: Likewise.
+       * db2/db/db_rec.c: Likewise.
+       * db2/db/db_ret.c: Likewise.
+       * db2/db/db_thread.c: Likewise.
+       * db2/db/db_auto.c: Likewise.
+       * db2/db185/db185.c: Likewise.
+       * db2/db185/db185_int.h: Likewise.
+       * db2/dbm/dbm.c: Likewise.
+       * db2/hash/hash.c: Likewise.
+       * db2/hash/hash.src: Likewise.
+       * db2/hash/hash_page.c: Likewise.
+       * db2/hash/hash_conv.c: Likewise.
+       * db2/hash/hash_debug.c: Likewise.
+       * db2/hash/hash_stat.c: Likewise.
+       * db2/hash/hash_rec.c: Likewise.
+       * db2/hash/hash_dup.c: Likewise.
+       * db2/hash/hash_func.c: Likewise.
+       * db2/hash/hash_auto.c: Likewise.
+       * db2/include/mp.h: Likewise.
+       * db2/include/btree.h: Likewise.
+       * db2/include/db.h.src: Likewise.
+       * db2/include/db_int.h.src: Likewise.
+       * db2/include/db_shash.h: Likewise.
+       * db2/include/db_swap.h: Likewise.
+       * db2/include/db_185.h.src: Likewise.
+       * db2/include/txn.h: Likewise.
+       * db2/include/db_am.h: Likewise.
+       * db2/include/shqueue.h: Likewise.
+       * db2/include/hash.h: Likewise.
+       * db2/include/db_dispatch.h: Likewise.
+       * db2/include/lock.h: Likewise.
+       * db2/include/db_page.h: Likewise.
+       * db2/include/log.h: Likewise.
+       * db2/include/db_auto.h: Likewise.
+       * db2/include/btree_auto.h: Likewise.
+       * db2/include/hash_auto.h: Likewise.
+       * db2/include/log_auto.h: Likewise.
+       * db2/include/txn_auto.h: Likewise.
+       * db2/include/db_ext.h: Likewise.
+       * db2/include/btree_ext.h: Likewise.
+       * db2/include/clib_ext.h: Likewise.
+       * db2/include/common_ext.h: Likewise.
+       * db2/include/hash_ext.h: Likewise.
+       * db2/include/lock_ext.h: Likewise.
+       * db2/include/log_ext.h: Likewise.
+       * db2/include/mp_ext.h: Likewise.
+       * db2/include/mutex_ext.h: Likewise.
+       * db2/include/os_ext.h: Likewise.
+       * db2/include/txn_ext.h: Likewise.
+       * db2/include/cxx_int.h: Likewise.
+       * db2/include/db_cxx.h: Likewise.
+       * db2/include/queue.h: Likewise.
+       * db2/lock/lock.c: Likewise.
+       * db2/lock/lock_conflict.c: Likewise.
+       * db2/lock/lock_util.c: Likewise.
+       * db2/lock/lock_deadlock.c: Likewise.
+       * db2/log/log.c: Likewise.
+       * db2/log/log_get.c: Likewise.
+       * db2/log/log.src: Likewise.
+       * db2/log/log_compare.c: Likewise.
+       * db2/log/log_put.c: Likewise.
+       * db2/log/log_rec.c: Likewise.
+       * db2/log/log_archive.c: Likewise.
+       * db2/log/log_register.c: Likewise.
+       * db2/log/log_auto.c: Likewise.
+       * db2/log/log_findckp.c: Likewise.
+       * db2/mp/mp_bh.c: Likewise.
+       * db2/mp/mp_fget.c: Likewise.
+       * db2/mp/mp_fopen.c: Likewise.
+       * db2/mp/mp_fput.c: Likewise.
+       * db2/mp/mp_fset.c: Likewise.
+       * db2/mp/mp_open.c: Likewise.
+       * db2/mp/mp_region.c: Likewise.
+       * db2/mp/mp_pr.c: Likewise.
+       * db2/mp/mp_sync.c: Likewise.
+       * db2/mutex/68020.gcc: Likewise.
+       * db2/mutex/mutex.c: Likewise.
+       * db2/mutex/README: Likewise.
+       * db2/mutex/x86.gcc: Likewise.
+       * db2/mutex/sparc.gcc: Likewise.
+       * db2/mutex/uts4.cc.s: Likewise.
+       * db2/mutex/alpha.dec: Likewise.
+       * db2/mutex/alpha.gcc: Likewise.
+       * db2/mutex/parisc.gcc: Likewise.
+       * db2/mutex/parisc.hp: Likewise.
+       * db2/os/db_os_abs.c: Likewise.
+       * db2/os/db_os_dir.c: Likewise.
+       * db2/os/db_os_fid.c: Likewise.
+       * db2/os/db_os_lseek.c: Likewise.
+       * db2/os/db_os_mmap.c: Likewise.
+       * db2/os/db_os_open.c: Likewise.
+       * db2/os/db_os_rw.c: Likewise.
+       * db2/os/db_os_sleep.c: Likewise.
+       * db2/os/db_os_stat.c: Likewise.
+       * db2/os/db_os_unlink.c: Likewise.
+       * db2/txn/txn.c: Likewise.
+       * db2/txn/txn.src: Likewise.
+       * db2/txn/txn_rec.c: Likewise.
+       * db2/txn/txn_auto.c: Likewise.
+       * db2/clib/getlong.c: Likewise.
+       * db2/progs/db_archive/db_archive.c: Likewise.
+       * db2/progs/db_checkpoint/db_checkpoint.c: Likewise.
+       * db2/progs/db_deadlock/db_deadlock.c: Likewise.
+       * db2/progs/db_dump/db_dump.c: Likewise.
+       * db2/progs/db_dump185/db_dump185.c: Likewise.
+       * db2/progs/db_load/db_load.c: Likewise.
+       * db2/progs/db_printlog/db_printlog.c: Likewise.
+       * db2/progs/db_recover/db_recover.c: Likewise.
+       * db2/progs/db_stat/db_stat.c: Likewise.
+
+       * libio/stdio.h [__cplusplus] (__STDIO_INLINE): Define as inline.
+
+       * po/de.po, po/sv.po: Update from 2.0.5 translations.
+
+       * sysdeps/unix/sysv/linux/netinet/tcp.h: Pretty print.
+
+       * sunrpc/rpc/xdr.h (XDR): Don't define argument of x_destroy callback
+       as const.
+       * sunrpc/xdr_mem.c (xdrmem_destroy): Don't define argument as const.
+       * sunrpx/xdr_rec.c (xdrrec_destroy): Likewise.
+       * sunrpx/xdr_stdio.c (xdrstdio_destroy): Likewise.
+
+1997-08-27 18:47  Ulrich Drepper  <drepper@cygnus.com>
+
+       * sysdeps/unix/sysv/linux/if_index.c: Include <errno.h>.
+       Reported by Benjamin Kosnik <bkoz@cygnus.com>.
+
+1997-08-27 02:27  Roland McGrath  <roland@baalperazim.frob.com>
+
+       * abi-tags: New file.
+       * csu/Makefile (distribute): Remove abi-tag.h.
+       ($(objpfx)abi-tag.h): New target.
+       * Makefile (distribute): Add abi-tags.
+       * sysdeps/unix/sysv/linux/abi-tag.h: File removed.
+       * sysdeps/mach/hurd/abi-tag.h: File removed.
+       * sysdeps/stub/abi-tag.h: File removed.
+
+1997-08-25  Andreas Schwab  <schwab@issan.informatik.uni-dortmund.de>
+
+       * sysdeps/unix/make-syscalls.sh: Change output so that it
+       generates compilation rules only for the currently selected object
+       suffixes.
+
+1997-08-25  Andreas Schwab  <schwab@issan.informatik.uni-dortmund.de>
+
+       * sysdeps/m68k/dl-machine.h (RTLD_START): Switch back to previous
+       section to avoid confusing the compiler.
+       * sysdeps/alpha/dl-machine.h (RTLD_START): Likewise.
+       * sysdeps/i386/dl-machine.h (RTLD_START): Likewise.
+       * sysdeps/mips/dl-machine.h (RTLD_START): Likewise.
+       * sysdeps/mips/mips64/dl-machine.h (RTLD_START): Likewise.
+       * sysdeps/sparc/sparc32/dl-machine.h (RTLD_START): Likewise.
+
+       * sysdeps/m68k/dl-machine.h (elf_machine_load_address): Use a GOT
+       relocation instead of a constant to avoid text relocation.
+       (ELF_MACHINE_BEFORE_RTLD_RELOC): Removed.
+       (RTLD_START): Declare global labels as functions and add size
+       directive.
+
+1997-08-25 17:01  Ulrich Drepper  <drepper@cygnus.com>
+
+       * sysdeps/i386/bits/select.h: Correct assembler versions to work even
+       for descriptors >= 32.
+
+       * stdlib/alloca.h: Don't define alloca to __alloca since if gcc
+       is used __alloca is not defined to __builtin_alloca and so might
+       not be available.
+       Reported by Uwe Ohse <uwe@ohse.de>.
+
+       * sysdeps/unix/sysv/linux/sys/sysmacros.h: Define macros in a special
+       way if gcc is not used and so dev_t is an array.
+       Reported by Uwe Ohse <uwe@ohse.de>.
+
+1997-08-23  Andreas Schwab  <schwab@issan.informatik.uni-dortmund.de>
+
+       * manual/libc.texinfo: Reorder chapters to match logical order.
+
+1997-08-25 12:22  Ulrich Drepper  <drepper@cygnus.com>
+
+       * sunrpc/rpc/xdr.h: Change name of parameters in prototypes of
+       xdr_reference, xdrmem_create, and xdrstdio_create because of clash
+       with g++ internal symbols.
+       Patch by Sudish Joseph <sj@eng.mindspring.net>.
+
 1997-08-24  Miles Bader  <miles@gnu.ai.mit.edu>
 
        * string/argz.h: Add missing __END_DECLS.
 
        * csu/Makefile (initfini.s): Disable optimization.
 
-       * elf/dl-deps.c: Implement handling of DL_FILTER.
+       * elf/dl-deps.c: Implement handling of DT_FILTER.
 
        * elf/dl-load.c (_dl_init_paths): Add error check.
 
        * sysdeps/mips/dl-machine.h: Remove extra stuff.
 
 1997-07-06 07:18  Geoff Keating  <geoffk@ozemail.com.au>
-
        * sysdeps/powerpc/bits/endian.h: Handle multiple endianess.
 
        * stdlib/grouping.h: Suppress gcc warning about testing
diff --git a/INSTALL b/INSTALL
index 9968b081b4cb64882b1cac60cebad6740ef878c1..889780433ee0ea65817a2e14518e3d52d648162f 100644 (file)
--- a/INSTALL
+++ b/INSTALL
@@ -994,3 +994,131 @@ parts of the library were contributed or worked on by other people.
        OF SUCH DAMAGE.
 
        If these license terms cause you a real problem, contact the author.
++
+   * The `db' library is taken from the db-2.3.4 distribution by Sleepycat
+     Software, and is covered by the following terms:
+
+       /*-
+        * @(#)LICENSE  10.4 (Sleepycat) 7/24/97
+        */
+
+       The following are the copyrights and redistribution conditions
+       that apply to this copy of the DB software.  For a license to use,
+       redistribute or sell DB software under conditions other than those
+       described here, or to purchase support for this software, please
+       contact Sleepycat Software at one of the following addresses:
+
+               Sleepycat Software              db@sleepycat.com
+               394 E. Riding Dr.               +1-508-287-4781
+               Carlisle, MA 01741
+               USA
+
+       =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
+       /*
+        * Copyright (c) 1990, 1993, 1994, 1995, 1996, 1997
+        *      Sleepycat Software.  All rights reserved.
+        *
+        * Redistribution and use in source and binary forms, with or without
+        * modification, are permitted provided that the following conditions
+        * are met:
+        * 1. Redistributions of source code must retain the above copyright
+        *    notice, this list of conditions and the following disclaimer.
+        * 2. Redistributions in binary form must reproduce the above copyright
+        *    notice, this list of conditions and the following disclaimer in
+        *    the documentation and/or other materials provided with the
+        *    distribution.
+        * 3. Redistributions in any form must be accompanied by information on
+        *    how to obtain complete source code for the DB software and any
+        *    accompanying software that uses the DB software.  The source code
+        *    must either be included in the distribution or be available for
+        *    no more than the cost of distribution plus a nominal fee, and
+        *    must be freely redistributable under reasonable conditions.  For
+        *    an executable file, complete source code means the source code
+        *    for all modules it contains.  It does not mean source code for
+        *    modules or files that typically accompany the operating system
+        *    on which the executable file runs, e.g., standard library
+        *    modules or system header files.
+        *
+        * THIS SOFTWARE IS PROVIDED BY SLEEPYCAT SOFTWARE ``AS IS'' AND
+        * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+        * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+        * PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL SLEEPYCAT
+        * SOFTWARE BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+        * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+        * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+        * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+        * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
+        * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
+        * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+        * SUCH DAMAGE.
+        */
+       /*
+        * Copyright (c) 1990, 1993, 1994, 1995
+        *      The Regents of the University of California.  All rights
+        *      reserved.
+        *
+        * Redistribution and use in source and binary forms, with or without
+        * modification, are permitted provided that the following conditions
+        * are met:
+        * 1. Redistributions of source code must retain the above copyright
+        *    notice, this list of conditions and the following disclaimer.
+        * 2. Redistributions in binary form must reproduce the above copyright
+        *    notice, this list of conditions and the following disclaimer in
+        *    the documentation and/or other materials provided with the
+        *    distribution.
+        * 3. All advertising materials mentioning features or use of this
+        *    software must display the following acknowledgement:
+        *      This product includes software developed by the University of
+        *      California, Berkeley and its contributors.
+        * 4. Neither the name of the University nor the names of its
+        *    contributors may be used to endorse or promote products derived
+        *    from this software without specific prior written permission.
+        *
+        * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS''
+        * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+        * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+        * PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS
+        * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+        * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+        * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+        * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+        * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+        * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+        * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+        * THE POSSIBILITY OF SUCH DAMAGE.
+        */
+       /*
+        * Copyright (c) 1995, 1996
+        *      The President and Fellows of Harvard University.  All rights
+        *      reserved.
+        *
+        * Redistribution and use in source and binary forms, with or without
+        * modification, are permitted provided that the following conditions
+        * are met:
+        * 1. Redistributions of source code must retain the above copyright
+        *    notice, this list of conditions and the following disclaimer.
+        * 2. Redistributions in binary form must reproduce the above copyright
+        *    notice, this list of conditions and the following disclaimer in
+        *    the documentation and/or other materials provided with the
+        *    distribution.
+        * 3. All advertising materials mentioning features or use of this
+        *    software must display the following acknowledgement:
+        *      This product includes software developed by Harvard University
+        *      and its contributors.
+        * 4. Neither the name of the University nor the names of its
+        *    contributors may be used to endorse or promote products derived
+        *    from this software without specific prior written permission.
+        *
+        * THIS SOFTWARE IS PROVIDED BY HARVARD AND ITS CONTRIBUTORS ``AS IS''
+        * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+        * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+        * PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL HARVARD OR
+        * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+        * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+        * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+        * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+        * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+        * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+        * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+        * POSSIBILITY OF SUCH DAMAGE.
+        */
index e1103c5ce148ebb3033d61e77376671050869b9f..185c16cf6d0299a8f4016953cdf503cb8abf1909 100644 (file)
@@ -554,7 +554,7 @@ ifeq (yes,$(build-omitfp))
 # library with debugging information.  The debuggable objects are named foo.og.
 object-suffixes += .og
 CFLAGS-.og = -g
-CFLAGS-.o = -g0 -O99 -fomit-frame-pointer
+CFLAGS-.o = -g0 -O99 -fomit-frame-pointer -D__USE_STRING_INLINES
 CFLAGS-.os += $(CFLAGS-.o)
 libtype.og = lib%_g.a
 endif
index 61246db85eb78f0b50a9f451c142287567cf57fa..9034b6fd400145ac8e8fc3ca655a6bab002b3bdd 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -51,7 +51,7 @@ sysdep-subdirs := $(subst $(\n), ,$(sysdep-subdirs))
 endif
 
 # These are the subdirectories containing the library source.
-subdirs = csu assert ctype db locale intl catgets math setjmp signal stdlib \
+subdirs = csu assert ctype db2 locale intl catgets math setjmp signal stdlib \
          stdio-common $(stdio) malloc string wcsmbs time dirent grp pwd \
          posix io termios resource misc socket sysvipc gmon gnulib \
          wctype manual shadow md5-crypt nss $(sysdep-subdirs) po argp \
@@ -304,7 +304,7 @@ distribute  := README INSTALL FAQ NOTES NEWS PROJECTS BUGS          \
               config.h.in config.make.in config-name.in Makefile.in    \
               autolock.sh rellns-sh munch-tmpl.c munch.awk interp.c    \
               sysdep.h set-hooks.h libc-symbols.h version.h shlib-versions \
-              rpm/Makefile rpm/template rpm/rpmrc glibcbug.in
+              rpm/Makefile rpm/template rpm/rpmrc glibcbug.in abi-tags
 
 distribute := $(strip $(distribute))
 generated := $(generated) stubs.h version-info.h
diff --git a/abi-tags b/abi-tags
new file mode 100644 (file)
index 0000000..45d1d1f
--- /dev/null
+++ b/abi-tags
@@ -0,0 +1,24 @@
+# This file defines the ABI tag value we will use in the ELF note included
+# in the startup code to be linked into every program.
+
+# The following lines list regexps matching canonical configurations, and
+# the associated ABI tag values.  The entire list is processed, with
+# earlier entries taking precedence over later entries.  So loose patterns
+# at the end of the list can give defaults.
+
+# The ABI tag values we use are 32-bit quantities stored in machine byte order.
+# Conventionally the high-order byte indicates the OS and the low three
+# bytes form a version number associated with a particular ABI version.
+
+# After the configuration regexp, four integers in C syntax appear
+# surrounded by any whitespace or punctuation, one for each byte, MSB first.
+
+# Configuration                ABI OS  ABI version
+# -------------                ------  -----------
+
+.*-.*-linux.*          0       2.0.0   # earliest compatible kernel version
+
+.*-.*-gnu-gnu.*                1       0.0.0
+
+# There is no catch-all default here because every supported OS that uses
+# ELF must have its own unique ABI tag.
index ee2742b9e0abf32a0c32bf319877706366fbed2b..1448103304ddd12d774d4d372c4bd9617f0f3237 100755 (executable)
@@ -506,6 +506,7 @@ EOF
                ret \$31,(\$26),1
                .end main
 EOF
+               LIBC=""
                ${CC-cc} dummy.s -o dummy 2>/dev/null
                if test "$?" = 0 ; then
                        ./dummy
@@ -516,10 +517,16 @@ EOF
                        2)      
                                        UNAME_MACHINE="alphaev56"
                                ;;      
-               esac    
-         fi    
-         rm -f dummy.s dummy
-         echo ${UNAME_MACHINE}-unknown-linux-gnu ; exit 0
+                       esac    
+
+                       objdump --private-headers dummy | \
+                         grep ld.so.1 > /dev/null
+                       if test "$?" = 0 ; then
+                               LIBC="libc1"
+                       fi
+               fi      
+               rm -f dummy.s dummy
+               echo ${UNAME_MACHINE}-unknown-linux-gnu${LIBC} ; exit 0
        elif test "${UNAME_MACHINE}" = "mips" ; then
          cat >dummy.c <<EOF
 main(argc, argv)
index 993053fb33d081a6496c21df09311097bab73167..baafac57181bbd1cf91a460e3ad6dacd43914916 100644 (file)
@@ -32,11 +32,12 @@ csu-dummies = $(filter-out $(start-installed-name),crt1.o Mcrt1.o)
 extra-objs = start.o gmon-start.o \
             $(start-installed-name) g$(start-installed-name) \
             $(csu-dummies)
+before-compile = $(objpfx)abi-tag.h
 omit-deps = $(patsubst %.o,%,$(start-installed-name) g$(start-installed-name) \
                             $(csu-dummies))
 install-lib = $(start-installed-name) g$(start-installed-name) \
              $(csu-dummies)
-distribute = initfini.c gmon-start.c start.c defs.awk abi-note.S abi-tag.h
+distribute = initfini.c gmon-start.c start.c defs.awk abi-note.S
 
 all: # Make this the default target; it will be defined in Rules.
 
@@ -85,6 +86,7 @@ endif
 
 ifeq (yes,$(elf))
 extra-objs += abi-note.o
+asm-CPPFLAGS += -I$(objpfx).
 endif
 
 include ../Rules
@@ -121,3 +123,15 @@ $(addprefix $(objpfx),$(filter-out $(start-installed-name),$(csu-dummies))):
        cp /dev/null $(@:.o=.c)
        $(COMPILE.c) $(@:.o=.c) $(OUTPUT_OPTION)
        rm -f $(@:.o=.c)
+
+/ := $$/# bite me.
+$(objpfx)abi-tag.h: $(..)abi-tags
+       rm -f $@.new
+       sed 's/#.*$//;/^[       ]*$$/d' $< | while read conf tag; do \
+         test `expr '$(config-machine)-$(config-vendor)-$(config-os)' \
+                    : "$$conf"` != 0 || continue; \
+         echo "$$tag" | sed > $@.new \
+              's/[^0-9xXa-fA-F]/ /g;s/ *$//;s/ /,/g;s/^ */#define ABI_TAG /';\
+       done
+       if test -r $@.new; then mv -f $@.new $@; \
+       else echo >&2 'This configuration not matched in $<'; exit 1; fi
diff --git a/db2/Makefile b/db2/Makefile
new file mode 100644 (file)
index 0000000..24d74cc
--- /dev/null
@@ -0,0 +1,90 @@
+# Copyright (C) 1991, 92, 93, 94, 95, 96, 97 Free Software Foundation, Inc.
+# This file is part of the GNU C Library.
+
+# The GNU C Library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Library General Public License as
+# published by the Free Software Foundation; either version 2 of the
+# License, or (at your option) any later version.
+
+# The GNU C Library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Library General Public License for more details.
+
+# You should have received a copy of the GNU Library General Public
+# License along with the GNU C Library; see the file COPYING.LIB.  If not,
+# write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+# Boston, MA 02111-1307, USA.
+
+#
+#      Sub-makefile for libdb.
+#
+#      The code is lifted straight from the db 2.3.4 distribution
+#      with minimal changes.
+#
+
+subdir = db2
+
+subdir-dirs = btree common db db185 dbm hash lock log mp mutex os txn \
+       progs/db_archive progs/db_checkpoint  progs/db_deadlock \
+       progs/db_dump progs/db_dump185 progs/db_load progs/db_printlog \
+       progs/db_recover progs/db_stat clib
+
+vpath %.c $(subdir-dirs)
+
+extra-libs := libdb
+extra-libs-others := $(extra-libs)
+
+libdb-routines := bt_close bt_compare bt_conv bt_cursor bt_delete \
+       bt_open bt_page bt_put bt_rec bt_recno bt_rsearch bt_search \
+       bt_split bt_stat btree_auto db db_appinit db_apprec \
+       db_auto \
+       db_byteorder db_conv db_dispatch db_dup db_err db_log2 \
+       db_os_abs db_os_dir db_os_fid db_os_lseek db_os_mmap \
+       db_os_open db_os_rw db_os_sleep db_os_stat db_os_unlink \
+       db_overflow db_pr db_rec db_region db_ret db_salloc \
+       db_shash db_thread hash hash_auto hash_conv hash_debug \
+       hash_dup hash_func hash_page hash_rec hash_stat lock \
+       lock_conflict lock_deadlock lock_util log log_archive \
+       log_auto log_compare log_findckp log_get log_put log_rec \
+       log_register mp_bh mp_fget mp_fopen mp_fput mp_fset \
+       mp_open mp_pr mp_region mp_sync mutex txn txn_auto \
+       txn_rec dbm db185
+
+others         := makedb db_dump185 db_archive db_checkpoint db_deadlock \
+               db_dump db_load db_recover db_stat
+install-bin    := makedb db_dump185 db_archive db_checkpoint db_deadlock \
+               db_dump db_load db_recover db_stat
+
+include ../Rules
+
+CPPFLAGS += -I./include -include ./compat.h
+
+$(objpfx)db_checkpoint: $(objpfx)getlong.o
+$(objpfx)db_deadlock: $(objpfx)getlong.o
+$(objpfx)db_load: $(objpfx)getlong.o
+
+ifeq ($(build-shared),yes)
+$(objpfx)makedb: $(objpfx)libdb.so$(libdb.so-version)
+$(objpfx)db_dump185: $(objpfx)libdb.so$(libdb.so-version)
+$(objpfx)db_archive: $(objpfx)libdb.so$(libdb.so-version)
+$(objpfx)db_checkpoint: $(objpfx)libdb.so$(libdb.so-version)
+$(objpfx)db_deadlock: $(objpfx)libdb.so$(libdb.so-version)
+$(objpfx)db_dump: $(objpfx)libdb.so$(libdb.so-version)
+$(objpfx)db_load: $(objpfx)libdb.so$(libdb.so-version)
+$(objpfx)db_recover: $(objpfx)libdb.so$(libdb.so-version)
+$(objpfx)db_stat: $(objpfx)libdb.so$(libdb.so-version)
+else
+$(objpfx)makedb: $(objpfx)libdb.a
+$(objpfx)db_dump185: $(objpfx)libdb.a
+$(objpfx)db_archive: $(objpfx)libdb.a
+$(objpfx)db_checkpoint: $(objpfx)libdb.a
+$(objpfx)db_deadlock: $(objpfx)libdb.a
+$(objpfx)db_dump: $(objpfx)libdb.a
+$(objpfx)db_load: $(objpfx)libdb.a
+$(objpfx)db_recover: $(objpfx)libdb.a
+$(objpfx)db_stat: $(objpfx)libdb.a
+endif
+
+# Depend on libc.so so a DT_NEEDED is generated in the shared objects.
+$(objpfx)libdb.so: $(common-objpfx)libc.so
diff --git a/db2/btree/bt_close.c b/db2/btree/bt_close.c
new file mode 100644 (file)
index 0000000..4e80634
--- /dev/null
@@ -0,0 +1,184 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997
+ *     Sleepycat Software.  All rights reserved.
+ */
+/*
+ * Copyright (c) 1990, 1993, 1994, 1995, 1996
+ *     Keith Bostic.  All rights reserved.
+ */
+/*
+ * Copyright (c) 1990, 1993, 1994, 1995
+ *     The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Mike Olson.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *     This product includes software developed by the University of
+ *     California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "config.h"
+
+#ifndef lint
+static const char sccsid[] = "@(#)bt_close.c   10.22 (Sleepycat) 8/23/97";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+#include <sys/mman.h>
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#endif
+
+#include "db_int.h"
+#include "db_page.h"
+#include "btree.h"
+
+static void __bam_upstat __P((DB *dbp));
+
+/*
+ * __bam_close --
+ *     Close a btree.
+ *
+ * PUBLIC: int __bam_close __P((DB *));
+ */
+int
+__bam_close(dbp)
+       DB *dbp;
+{
+       BTREE *t;
+
+       DEBUG_LWRITE(dbp, NULL, "bam_close", NULL, NULL, 0);
+
+       t = dbp->internal;
+
+       /* Update tree statistics. */
+       __bam_upstat(dbp);
+
+       /* Free any allocated memory. */
+       if (t->bt_rkey.data)
+               FREE(t->bt_rkey.data, t->bt_rkey.size);
+       if (t->bt_rdata.data)
+               FREE(t->bt_rdata.data, t->bt_rdata.ulen);
+       if (t->bt_sp != t->bt_stack)
+               FREE(t->bt_sp, (t->bt_esp - t->bt_sp) * sizeof(EPG));
+
+       FREE(t, sizeof(BTREE));
+       dbp->internal = NULL;
+
+       return (0);
+}
+
+/*
+ * __bam_sync --
+ *     Sync the btree to disk.
+ *
+ * PUBLIC: int __bam_sync __P((DB *, int));
+ */
+int
+__bam_sync(argdbp, flags)
+       DB *argdbp;
+       int flags;
+{
+       DB *dbp;
+       int ret;
+
+       DEBUG_LWRITE(argdbp, NULL, "bam_sync", NULL, NULL, flags);
+
+       /* Check for invalid flags. */
+       if ((ret = __db_syncchk(argdbp, flags)) != 0)
+               return (ret);
+
+       /* If it wasn't possible to modify the file, we're done. */
+       if (F_ISSET(argdbp, DB_AM_INMEM | DB_AM_RDONLY))
+               return (0);
+
+       GETHANDLE(argdbp, NULL, &dbp, ret);
+
+       /* Flush any dirty pages from the cache to the backing file. */
+       if ((ret = memp_fsync(dbp->mpf)) == DB_INCOMPLETE)
+               ret = 0;
+
+       PUTHANDLE(dbp);
+       return (ret);
+}
+
+/*
+ * __bam_upstat --
+ *     Update tree statistics.
+ */
+static void
+__bam_upstat(dbp)
+       DB *dbp;
+{
+       BTREE *t;
+       BTMETA *meta;
+       DB_LOCK mlock;
+       db_pgno_t pgno;
+       int flags, ret;
+
+       /*
+        * We use a no-op log call to log the update of the statistics onto the
+        * metadata page.  The dbp->close() call isn't transaction protected to
+        * start with, and I'm not sure what undoing a statistics update means,
+        * anyway.
+        */
+       if (F_ISSET(dbp, DB_AM_INMEM | DB_AM_RDONLY))
+               return;
+
+       /* Lock the page. */
+       if (__bam_lget(dbp, 0, pgno, DB_LOCK_WRITE, &mlock) != 0)
+               return;
+
+       flags = 0;
+       pgno = PGNO_METADATA;
+
+       /* Get the page. */
+       if (__bam_pget(dbp, (PAGE **)&meta, &pgno, 0) == 0) {
+               /* Log the change. */
+               if (DB_LOGGING(dbp) &&
+                   (ret = __db_noop_log(dbp->dbenv->lg_info, dbp->txn,
+                   &LSN(meta), 0)) == 0)
+                       goto err;
+
+               /* Update the statistics. */
+               t = dbp->internal;
+               __bam_add_mstat(&t->lstat, &meta->stat);
+
+               flags = DB_MPOOL_DIRTY;
+       }
+
+err:   (void)memp_fput(dbp->mpf, (PAGE *)meta, flags);
+       (void)__BT_LPUT(dbp, mlock);
+}
diff --git a/db2/btree/bt_compare.c b/db2/btree/bt_compare.c
new file mode 100644 (file)
index 0000000..e802fd2
--- /dev/null
@@ -0,0 +1,205 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997
+ *     Sleepycat Software.  All rights reserved.
+ */
+/*
+ * Copyright (c) 1990, 1993, 1994, 1995, 1996
+ *     Keith Bostic.  All rights reserved.
+ */
+/*
+ * Copyright (c) 1990, 1993, 1994, 1995
+ *     The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Mike Olson.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *     This product includes software developed by the University of
+ *     California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "config.h"
+
+#ifndef lint
+static const char sccsid[] = "@(#)bt_compare.c 10.3 (Sleepycat) 7/19/97";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#endif
+
+#include "db_int.h"
+#include "db_page.h"
+#include "btree.h"
+
+/*
+ * __bam_cmp --
+ *     Compare a key to a given record.
+ *
+ * PUBLIC: int __bam_cmp __P((DB *, const DBT *, EPG *));
+ */
+int
+__bam_cmp(dbp, k1, e)
+       DB *dbp;
+       const DBT *k1;
+       EPG *e;
+{
+       BINTERNAL *bi;
+       BKEYDATA *bk;
+       BOVERFLOW *bo;
+       BTREE *t;
+       DBT k2;
+       PAGE *h;
+
+       t = dbp->internal;
+
+       /*
+        * Returns:
+        *      < 0 if k1 is < record
+        *      = 0 if k1 is = record
+        *      > 0 if k1 is > record
+        *
+        * The left-most key on internal pages, at any level of the tree, is
+        * guaranteed, by the following code, to be less than any user key.
+        * This saves us from having to update the leftmost key on an internal
+        * page when the user inserts a new key in the tree smaller than
+        * anything we've yet seen.
+        */
+       h = e->page;
+       if (e->indx == 0 &&
+           h->prev_pgno == PGNO_INVALID && TYPE(h) != P_LBTREE)
+               return (1);
+
+       bo = NULL;
+       if (TYPE(h) == P_LBTREE) {
+               bk = GET_BKEYDATA(h, e->indx);
+               if (bk->type == B_OVERFLOW)
+                       bo = (BOVERFLOW *)bk;
+               else {
+                       memset(&k2, 0, sizeof(k2));
+                       k2.data = bk->data;
+                       k2.size = bk->len;
+               }
+       } else {
+               bi = GET_BINTERNAL(h, e->indx);
+               if (bi->type == B_OVERFLOW)
+                       bo = (BOVERFLOW *)(bi->data);
+               else {
+                       memset(&k2, 0, sizeof(k2));
+                       k2.data = bi->data;
+                       k2.size = bi->len;
+               }
+       }
+
+       /*
+        * XXX
+        * We ignore system errors; the only recoverable one is ENOMEM, and we
+        * don't want to require that comparison routines handle random errors.
+        * We don't want to return a valid comparison, either, so we stop.
+        */
+       if (bo != NULL) {
+               /*
+                * If using the default comparison routine, use __db_moff(),
+                * which compares the overflow key a page at a time.
+                */
+               if (t->bt_compare == __bam_defcmp)
+                       return (__db_moff(dbp, k1, bo->pgno));
+
+               /*
+                * Otherwise, we need a contiguous record so we can hand it
+                * to the user's routine.
+                */
+               if (__db_goff(dbp, &k2, bo->tlen,
+                   bo->pgno, &t->bt_rdata.data, &t->bt_rdata.ulen) != 0)
+                       abort();
+       }
+       return ((*t->bt_compare)(k1, &k2));
+}
+
+/*
+ * __bam_defcmp --
+ *     Default comparison routine.
+ *
+ * PUBLIC: int __bam_defcmp __P((const DBT *, const DBT *));
+ */
+int
+__bam_defcmp(a, b)
+       const DBT *a, *b;
+{
+       size_t len;
+       u_int8_t *p1, *p2;
+
+       /*
+        * Returns:
+        *      < 0 if a is < b
+        *      = 0 if a is = b
+        *      > 0 if a is > b
+        *
+        * XXX
+        * If a size_t doesn't fit into a long, or if the difference between
+        * any two characters doesn't fit into an int, this routine can lose.
+        * What we need is a signed integral type that's guaranteed to be at
+        * least as large as a size_t, and there is no such thing.
+        */
+       len = a->size > b->size ? b->size : a->size;
+       for (p1 = a->data, p2 = b->data; len--; ++p1, ++p2)
+               if (*p1 != *p2)
+                       return ((long)*p1 - (long)*p2);
+       return ((long)a->size - (long)b->size);
+}
+
+/*
+ * __bam_defpfx --
+ *     Default prefix routine.
+ *
+ * PUBLIC: size_t __bam_defpfx __P((const DBT *, const DBT *));
+ */
+size_t
+__bam_defpfx(a, b)
+       const DBT *a, *b;
+{
+       size_t cnt, len;
+       u_int8_t *p1, *p2;
+
+       cnt = 1;
+       len = a->size > b->size ? b->size : a->size;
+       for (p1 = a->data, p2 = b->data; len--; ++p1, ++p2, ++cnt)
+               if (*p1 != *p2)
+                       return (cnt);
+
+       /*
+        * We know that a->size must be <= b->size, or they wouldn't be
+        * in this order.
+        */
+       return (a->size < b->size ? a->size + 1 : a->size);
+}
diff --git a/db2/btree/bt_conv.c b/db2/btree/bt_conv.c
new file mode 100644 (file)
index 0000000..537e2f9
--- /dev/null
@@ -0,0 +1,83 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997
+ *     Sleepycat Software.  All rights reserved.
+ */
+
+#include "config.h"
+
+#ifndef lint
+static const char sccsid[] = "@(#)bt_conv.c    10.3 (Sleepycat) 8/9/97";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+#endif
+
+#include "db_int.h"
+#include "db_page.h"
+#include "db_swap.h"
+#include "btree.h"
+
+/*
+ * __bam_pgin, __bam_pgout --
+ *     Convert host-specific page layout to/from the host-independent
+ *     format stored on disk.
+ *
+ * PUBLIC: int __bam_pgin __P((db_pgno_t, void *, DBT *));
+ * PUBLIC: int __bam_pgout __P((db_pgno_t, void *, DBT *));
+ */
+int
+__bam_pgin(pg, pp, cookie)
+       db_pgno_t pg;
+       void *pp;
+       DBT *cookie;
+{
+       DB_PGINFO *pginfo;
+
+       pginfo = (DB_PGINFO *)cookie->data;
+       if (!pginfo->needswap)
+               return (0);
+       return (pg == PGNO_METADATA ? __bam_mswap(pp) : __db_pgin(pg, pp));
+}
+
+int
+__bam_pgout(pg, pp, cookie)
+       db_pgno_t pg;
+       void *pp;
+       DBT *cookie;
+{
+       DB_PGINFO *pginfo;
+
+       pginfo = (DB_PGINFO *)cookie->data;
+       if (!pginfo->needswap)
+               return (0);
+       return (pg == PGNO_METADATA ? __bam_mswap(pp) : __db_pgout(pg, pp));
+}
+
+/*
+ * __bam_mswap --
+ *     Swap the bytes on the btree metadata page.
+ *
+ * PUBLIC: int __bam_mswap __P((PAGE *));
+ */
+int
+__bam_mswap(pg)
+       PAGE *pg;
+{
+       u_int8_t *p;
+
+       p = (u_int8_t *)pg;
+       SWAP32(p);              /* lsn.file */
+       SWAP32(p);              /* lsn.offset */
+       SWAP32(p);              /* pgno */
+       SWAP32(p);              /* magic */
+       SWAP32(p);              /* version */
+       SWAP32(p);              /* pagesize */
+       SWAP32(p);              /* maxkey */
+       SWAP32(p);              /* minkey */
+       SWAP32(p);              /* free */
+       SWAP32(p);              /* flags */
+       return (0);
+}
diff --git a/db2/btree/bt_cursor.c b/db2/btree/bt_cursor.c
new file mode 100644 (file)
index 0000000..592ec9b
--- /dev/null
@@ -0,0 +1,1577 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997
+ *     Sleepycat Software.  All rights reserved.
+ */
+
+#include "config.h"
+
+#ifndef lint
+static const char sccsid[] = "@(#)bt_cursor.c  10.26 (Sleepycat) 8/24/97";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#endif
+
+#include "db_int.h"
+#include "db_page.h"
+#include "btree.h"
+
+static int __bam_c_close __P((DBC *));
+static int __bam_c_del __P((DBC *, int));
+static int __bam_c_first __P((DB *, CURSOR *));
+static int __bam_c_get __P((DBC *, DBT *, DBT *, int));
+static int __bam_c_last __P((DB *, CURSOR *));
+static int __bam_c_next __P((DB *, CURSOR *, int));
+static int __bam_c_physdel __P((DB *, CURSOR *, PAGE *));
+static int __bam_c_prev __P((DB *, CURSOR *));
+static int __bam_c_put __P((DBC *, DBT *, DBT *, int));
+static int __bam_c_rget __P((DB *, CURSOR *, DBT *, DBT *, int));
+static int __bam_c_search __P((DB *, CURSOR *, const DBT *, u_int, int, int *));
+
+/* Discard the current page/lock held by a cursor. */
+#undef DISCARD
+#define        DISCARD(dbp, cp) {                                              \
+       (void)memp_fput(dbp->mpf, (cp)->page, 0);                       \
+       (cp)->page = NULL;                                              \
+       (void)__BT_TLPUT((dbp), (cp)->lock);                            \
+       (cp)->lock = LOCK_INVALID;                                      \
+}
+
+/*
+ * __bam_cursor --
+ *     Interface to the cursor functions.
+ *
+ * PUBLIC: int __bam_cursor __P((DB *, DB_TXN *, DBC **));
+ */
+int
+__bam_cursor(dbp, txn, dbcp)
+       DB *dbp;
+       DB_TXN *txn;
+       DBC **dbcp;
+{
+       CURSOR *cp;
+       DBC *dbc;
+
+       DEBUG_LWRITE(dbp, txn, "bam_cursor", NULL, NULL, 0);
+
+       if ((dbc = (DBC *)calloc(1, sizeof(DBC))) == NULL)
+               return (ENOMEM);
+       if ((cp = (CURSOR *)calloc(1, sizeof(CURSOR))) == NULL) {
+               free(dbc);
+               return (ENOMEM);
+       }
+
+       cp->dbc = dbc;
+       cp->pgno = cp->dpgno = PGNO_INVALID;
+       cp->lock = LOCK_INVALID;
+
+       dbc->dbp = dbp;
+       dbc->txn = txn;
+       dbc->internal = cp;
+       dbc->c_close = __bam_c_close;
+       dbc->c_del = __bam_c_del;
+       dbc->c_get = __bam_c_get;
+       dbc->c_put = __bam_c_put;
+
+       /* All cursor structures hang off the main DB structure. */
+       DB_THREAD_LOCK(dbp);
+       TAILQ_INSERT_HEAD(&dbp->curs_queue, dbc, links);
+       DB_THREAD_UNLOCK(dbp);
+
+       *dbcp = dbc;
+       return (0);
+}
+
+/*
+ * __bam_c_close --
+ *     Close a single cursor.
+ */
+static int
+__bam_c_close(dbc)
+       DBC *dbc;
+{
+       DB *dbp;
+       CURSOR *cp;
+       int ret;
+
+       DEBUG_LWRITE(dbc->dbp, dbc->txn, "bam_c_close", NULL, NULL, 0);
+
+       GETHANDLE(dbc->dbp, dbc->txn, &dbp, ret);
+       cp = dbc->internal;
+
+       /* If a cursor key was deleted do the actual deletion.  */
+       ret = F_ISSET(cp, C_DELETED) ?  __bam_c_physdel(dbp, cp, NULL) : 0;
+
+       /* Discard any lock if we're not inside a transaction. */
+       if (dbp->txn == NULL && cp->lock != LOCK_INVALID)
+               (void)__BT_TLPUT(dbp, cp->lock);
+
+       /* Remove the cursor from the queue. */
+       DB_THREAD_LOCK(dbp);
+       TAILQ_REMOVE(&dbp->curs_queue, dbc, links);
+       DB_THREAD_UNLOCK(dbp);
+
+       /* Discard the structures. */
+       FREE(cp, sizeof(CURSOR));
+       FREE(dbc, sizeof(DBC));
+
+       PUTHANDLE(dbp);
+       return (ret);
+}
+
+/*
+ * __bam_c_del --
+ *     Delete using a cursor.
+ */
+static int
+__bam_c_del(dbc, flags)
+       DBC *dbc;
+       int flags;
+{
+       CURSOR *cp;
+       DB *dbp;
+       DB_LOCK lock;
+       PAGE *h;
+       db_pgno_t pgno;
+       db_indx_t indx;
+       int ret;
+
+       DEBUG_LWRITE(dbc->dbp, dbc->txn, "bam_c_del", NULL, NULL, flags);
+
+       cp = dbc->internal;
+
+       /* Check for invalid flags. */
+       if ((ret = __db_cdelchk(dbc->dbp, flags,
+           F_ISSET(dbc->dbp, DB_AM_RDONLY), cp->pgno != PGNO_INVALID)) != 0)
+               return (ret);
+
+       /* If already deleted, return failure. */
+       if (F_ISSET(cp, C_DELETED | C_REPLACE))
+               return (DB_KEYEMPTY);
+
+       GETHANDLE(dbc->dbp, dbc->txn, &dbp, ret);
+
+       /*
+        * We don't physically delete the record until the cursor moves,
+        * so we have to have a long-lived write lock on the page instead
+        * of a long-lived read lock.  Note, we have to have a read lock
+        * to even get here, so we simply discard it.
+        */
+       if (F_ISSET(dbp, DB_AM_LOCKING) && cp->mode != DB_LOCK_WRITE) {
+               if ((ret = __bam_lget(dbp,
+                   0, cp->pgno, DB_LOCK_WRITE, &lock)) != 0)
+                       goto err;
+               (void)__BT_TLPUT(dbp, cp->lock);
+               cp->lock = lock;
+               cp->mode = DB_LOCK_WRITE;
+       }
+
+       /*
+        * Acquire the underlying page (which may be different from the above
+        * page because it may be a duplicate page), and set the on-page and
+        * in-cursor delete flags.  We don't need to lock it as we've already
+        * write-locked the page leading to it.
+        */
+       if (cp->dpgno == PGNO_INVALID) {
+               pgno = cp->pgno;
+               indx = cp->indx;
+       } else {
+               pgno = cp->dpgno;
+               indx = cp->dindx;
+       }
+
+       if ((ret = __bam_pget(dbp, &h, &pgno, 0)) != 0)
+               goto err;
+
+       /* Log the change. */
+       if (DB_LOGGING(dbp) &&
+           (ret = __bam_cdel_log(dbp->dbenv->lg_info, dbp->txn, &LSN(h),
+           0, dbp->log_fileid, PGNO(h), &LSN(h), indx)) != 0) {
+               (void)memp_fput(dbp->mpf, h, 0);
+               goto err;
+       }
+
+       /* Set the intent-to-delete flag on the page and in all cursors. */
+       if (cp->dpgno == PGNO_INVALID)
+               GET_BKEYDATA(h, indx + O_INDX)->deleted = 1;
+       else
+               GET_BKEYDATA(h, indx)->deleted = 1;
+       (void)__bam_ca_delete(dbp, pgno, indx, NULL);
+
+       ret = memp_fput(dbp->mpf, h, DB_MPOOL_DIRTY);
+
+err:   PUTHANDLE(dbp);
+       return (ret);
+}
+
+/*
+ * __bam_get --
+ *     Retrieve a key/data pair from the tree.
+ *
+ * PUBLIC: int __bam_get __P((DB *, DB_TXN *, DBT *, DBT *, int));
+ */
+int
+__bam_get(argdbp, txn, key, data, flags)
+       DB *argdbp;
+       DB_TXN *txn;
+       DBT *key, *data;
+       int flags;
+{
+       DBC dbc;
+       CURSOR cp;
+       int ret;
+
+       DEBUG_LREAD(argdbp, txn, "bam_get", key, NULL, flags);
+
+       /* Check for invalid flags. */
+       if ((ret = __db_getchk(argdbp, key, data, flags)) != 0)
+               return (ret);
+
+       /* Build a cursor. */
+       memset(&cp, 0, sizeof(cp));
+       cp.dbc = &dbc;
+       cp.pgno = cp.dpgno = PGNO_INVALID;
+       cp.lock = LOCK_INVALID;
+
+       memset(&dbc, 0, sizeof(dbc));
+       dbc.dbp = argdbp;
+       dbc.txn = txn;
+       dbc.internal = &cp;
+
+       /* Get the key. */
+       if ((ret = __bam_c_get(&dbc,
+           key, data, LF_ISSET(DB_SET_RECNO) ? DB_SET_RECNO : DB_SET)) != 0)
+               return (ret);
+
+       /* Discard any lock, the cursor didn't really exist. */
+       if (cp.lock != LOCK_INVALID)
+               (void)__BT_TLPUT(argdbp, cp.lock);
+
+       return (0);
+}
+
+/*
+ * __bam_c_get --
+ *     Get using a cursor (btree).
+ */
+static int
+__bam_c_get(dbc, key, data, flags)
+       DBC *dbc;
+       DBT *key, *data;
+       int flags;
+{
+       BTREE *t;
+       CURSOR *cp, copy;
+       DB *dbp;
+       PAGE *h;
+       int exact, ret;
+
+       DEBUG_LREAD(dbc->dbp, dbc->txn, "bam_c_get",
+           flags == DB_SET || flags == DB_SET_RANGE ? key : NULL,
+           NULL, flags);
+
+       cp = dbc->internal;
+
+       /* Check for invalid flags. */
+       if ((ret = __db_cgetchk(dbc->dbp,
+           key, data, flags, cp->pgno != PGNO_INVALID)) != 0)
+               return (ret);
+
+       GETHANDLE(dbc->dbp, dbc->txn, &dbp, ret);
+       t = dbp->internal;
+
+       /*
+        * Break out the code to return a cursor's record number.  It
+        * has nothing to do with the cursor get code except that it's
+        * been rammed into the interface.
+        */
+       if (LF_ISSET(DB_GET_RECNO)) {
+               ret = __bam_c_rget(dbp, cp, key, data, flags);
+               PUTHANDLE(dbp);
+               return (ret);
+       }
+
+       /* Initialize the cursor for a new retrieval. */
+       copy = *cp;
+       cp->page = NULL;
+       cp->lock = LOCK_INVALID;
+
+       switch (flags) {
+       case DB_CURRENT:
+               /* It's not possible to return a deleted record. */
+               if (F_ISSET(cp, C_DELETED | C_REPLACE)) {
+                       PUTHANDLE(dbp);
+                       return (DB_KEYEMPTY);
+               }
+
+               /* Get the page with the current item on it. */
+               if ((ret = __bam_pget(dbp, &cp->page, &cp->pgno, 0)) != 0)
+                       goto err;
+               break;
+       case DB_NEXT:
+               if (cp->pgno != PGNO_INVALID) {
+                       if ((ret = __bam_c_next(dbp, cp, 1)) != 0)
+                               goto err;
+                       break;
+               }
+               /* FALLTHROUGH */
+       case DB_FIRST:
+               if ((ret = __bam_c_first(dbp, cp)) != 0)
+                       goto err;
+               break;
+       case DB_PREV:
+               if (cp->pgno != PGNO_INVALID) {
+                       if ((ret = __bam_c_prev(dbp, cp)) != 0)
+                               goto err;
+                       break;
+               }
+               /* FALLTHROUGH */
+       case DB_LAST:
+               if ((ret = __bam_c_last(dbp, cp)) != 0)
+                       goto err;
+               break;
+       case DB_SET_RECNO:
+               exact = 1;
+               if ((ret =
+                   __bam_c_search(dbp, cp, key, S_FIND, 1, &exact)) != 0)
+                       goto err;
+               break;
+       case DB_SET:
+               exact = 1;
+               if ((ret =
+                   __bam_c_search(dbp, cp, key, S_FIND, 0, &exact)) != 0)
+                       goto err;
+               break;
+       case DB_SET_RANGE:
+               exact = 0;
+               if ((ret =
+                   __bam_c_search(dbp, cp, key, S_FIND, 0, &exact)) != 0)
+                       goto err;
+               break;
+       }
+
+       /*
+        * Return the key if the user didn't give us one.  If we've moved to
+        * a duplicate page, we may no longer have a pointer to the main page,
+        * so we have to go get it.  We know that it's already read-locked,
+        * however, so we don't have to acquire a new lock.
+        */
+       if (flags != DB_SET) {
+               if (cp->dpgno != PGNO_INVALID) {
+                       if ((ret = __bam_pget(dbp, &h, &cp->pgno, 0)) != 0)
+                               goto err;
+               } else
+                       h = cp->page;
+               ret = __db_ret(dbp,
+                   h, cp->indx, key, &t->bt_rkey.data, &t->bt_rkey.ulen);
+               if (cp->dpgno != PGNO_INVALID)
+                       (void)memp_fput(dbp->mpf, h, 0);
+               if (ret)
+                       goto err;
+       }
+
+       /* Return the data. */
+       if ((ret = __db_ret(dbp, cp->page,
+           cp->dpgno == PGNO_INVALID ? cp->indx + O_INDX : cp->dindx,
+           data, &t->bt_rdata.data, &t->bt_rdata.ulen)) != 0)
+               goto err;
+
+       /*
+        * If the previous cursor record has been deleted, delete it.  The
+        * returned key isn't a deleted key, so clear the flag.
+        */
+       if (F_ISSET(&copy, C_DELETED) && __bam_c_physdel(dbp, &copy, cp->page))
+               goto err;
+       F_CLR(cp, C_DELETED | C_REPLACE);
+
+       /* Release the previous lock, if any. */
+       if (copy.lock != LOCK_INVALID)
+               (void)__BT_TLPUT(dbp, copy.lock);
+
+       /* Release the pinned page. */
+       ret = memp_fput(dbp->mpf, cp->page, 0);
+
+       ++t->lstat.bt_get;
+
+       if (0) {
+err:           if (cp->page != NULL)
+                       (void)memp_fput(dbp->mpf, cp->page, 0);
+               if (cp->lock != LOCK_INVALID)
+                       (void)__BT_TLPUT(dbp, cp->lock);
+               *cp = copy;
+       }
+
+       PUTHANDLE(dbp);
+       return (ret);
+}
+
+/*
+ * __bam_c_rget --
+ *     Return the record number for a cursor.
+ */
+static int
+__bam_c_rget(dbp, cp, key, data, flags)
+       DB *dbp;
+       CURSOR *cp;
+       DBT *key, *data;
+       int flags;
+{
+       BTREE *t;
+       DBT dbt;
+       db_recno_t recno;
+       int exact, ret;
+
+       /* Get the page with the current item on it. */
+       if ((ret = __bam_pget(dbp, &cp->page, &cp->pgno, 0)) != 0)
+               return (ret);
+
+       /* Get a copy of the key. */
+       memset(&dbt, 0, sizeof(DBT));
+       dbt.flags = DB_DBT_MALLOC | DB_DBT_INTERNAL;
+       if ((ret = __db_ret(dbp, cp->page, cp->indx, &dbt, NULL, NULL)) != 0)
+               goto err;
+
+       exact = 1;
+       if ((ret = __bam_search(dbp, &dbt, S_FIND, 1, &recno, &exact)) != 0)
+               goto err;
+
+       t = dbp->internal;
+       ret = __db_retcopy(data, &recno, sizeof(recno),
+           &t->bt_rdata.data, &t->bt_rdata.ulen, dbp->db_malloc);
+
+       /* Release the stack. */
+       __bam_stkrel(dbp);
+
+err:   (void)memp_fput(dbp->mpf, cp->page, 0);
+       free(dbt.data);
+       return (ret);
+}
+
+/*
+ * __bam_c_put --
+ *     Put using a cursor.
+ */
+static int
+__bam_c_put(dbc, key, data, flags)
+       DBC *dbc;
+       DBT *key, *data;
+       int flags;
+{
+       BTREE *t;
+       CURSOR *cp, copy;
+       DB *dbp;
+       DBT dbt;
+       db_indx_t indx;
+       db_pgno_t pgno;
+       int exact, needkey, ret;
+       void *arg;
+
+       DEBUG_LWRITE(dbc->dbp, dbc->txn, "bam_c_put",
+           flags == DB_KEYFIRST || flags == DB_KEYLAST ? key : NULL,
+           data, flags);
+
+       cp = dbc->internal;
+
+       if ((ret = __db_cputchk(dbc->dbp, key, data, flags,
+           F_ISSET(dbc->dbp, DB_AM_RDONLY), cp->pgno != PGNO_INVALID)) != 0)
+               return (ret);
+
+       GETHANDLE(dbc->dbp, dbc->txn, &dbp, ret);
+       t = dbp->internal;
+
+       /* Initialize the cursor for a new retrieval. */
+       copy = *cp;
+       cp->page = NULL;
+       cp->lock = LOCK_INVALID;
+
+       /*
+        * To split, we need a valid key for the page.  Since it's a cursor,
+        * we have to build one.
+        */
+       if (0) {
+split:         if (needkey) {
+                       memset(&dbt, 0, sizeof(DBT));
+                       ret = __db_ret(dbp, cp->page, indx,
+                           &dbt, &t->bt_rkey.data, &t->bt_rkey.ulen);
+
+                       DISCARD(dbp, cp);
+
+                       if (ret)
+                               goto err;
+                       arg = &dbt;
+               } else {
+                       (void)__bam_stkrel(dbp);
+                       arg = key;
+               }
+               if ((ret = __bam_split(dbp, arg)) != 0)
+                       goto err;
+       }
+
+       /* If there's no key supplied, use the cursor. */
+       if (flags == DB_KEYFIRST || flags == DB_KEYLAST)
+               needkey = 0;
+       else {
+               needkey = 1;
+               if (cp->dpgno == PGNO_INVALID) {
+                       pgno = cp->pgno;
+                       indx = cp->indx;
+               } else {
+                       pgno = cp->dpgno;
+                       indx = cp->dindx;
+               }
+               /* Acquire the current page. */
+               if ((ret = __bam_lget(dbp,
+                   0, cp->pgno, DB_LOCK_WRITE, &cp->lock)) != 0)
+                       goto err;
+               if ((ret = __bam_pget(dbp, &cp->page, &pgno, 0)) != 0)
+                       goto err;
+       }
+
+       ret = 0;
+       switch (flags) {
+       case DB_AFTER:
+       case DB_BEFORE:
+       case DB_CURRENT:
+               if ((ret = __bam_iitem(dbp, &cp->page,
+                   &indx, key, data, flags, 0)) == DB_NEEDSPLIT)
+                       goto split;
+               break;
+       case DB_KEYFIRST:
+               exact = 0;
+               if ((ret =
+                   __bam_c_search(dbp, cp, key, S_KEYFIRST, 0, &exact)) != 0)
+                       goto err;
+
+               indx = cp->dpgno == PGNO_INVALID ? cp->indx : cp->dindx;
+               if ((ret = __bam_iitem(dbp, &cp->page, &indx, key,
+                   data, DB_BEFORE, exact ? 0 : BI_NEWKEY)) == DB_NEEDSPLIT)
+                       goto split;
+               if (ret)
+                       goto err;
+               break;
+       case DB_KEYLAST:
+               exact = 0;
+               if ((ret =
+                   __bam_c_search(dbp, cp, key, S_KEYLAST, 0, &exact)) != 0)
+                       goto err;
+
+               indx = cp->dpgno == PGNO_INVALID ? cp->indx : cp->dindx;
+               if ((ret = __bam_iitem(dbp, &cp->page, &indx, key,
+                   data, DB_AFTER, exact ? 0 : BI_NEWKEY)) == DB_NEEDSPLIT)
+                       goto split;
+               break;
+       }
+       if (ret)
+               goto err;
+
+       /*
+        * Update the cursor to point to the new entry.  The new entry was
+        * stored on the current page, because we split pages until it was
+        * possible.
+        */
+       if (cp->dpgno == PGNO_INVALID)
+               cp->indx = indx;
+       else
+               cp->dindx = indx;
+
+       /*
+        * If the previous cursor record has been deleted, delete it.  The
+        * returned key isn't a deleted key, so clear the flag.
+        */
+       if (F_ISSET(&copy, C_DELETED) &&
+           (ret = __bam_c_physdel(dbp, &copy, cp->page)) != 0)
+               goto err;
+       F_CLR(cp, C_DELETED | C_REPLACE);
+
+       /* Release the previous lock, if any. */
+       if (copy.lock != LOCK_INVALID)
+               (void)__BT_TLPUT(dbp, copy.lock);
+
+       /* Discard the pinned page. */
+       ret = memp_fput(dbp->mpf, cp->page, 0);
+       if (0) {
+err:           if (cp->page != NULL)
+                       (void)memp_fput(dbp->mpf, cp->page, 0);
+               if (cp->lock != LOCK_INVALID)
+                       (void)__BT_TLPUT(dbp, cp->lock);
+               *cp = copy;
+       }
+
+       PUTHANDLE(dbp);
+       return (ret);
+}
+
+/*
+ * __bam_c_first --
+ *     Return the first record.
+ */
+static int
+__bam_c_first(dbp, cp)
+       DB *dbp;
+       CURSOR *cp;
+{
+       db_pgno_t pgno;
+       int ret;
+
+       /* Walk down the left-hand side of the tree. */
+       for (pgno = PGNO_ROOT;;) {
+               if ((ret =
+                   __bam_lget(dbp, 0, pgno, DB_LOCK_READ, &cp->lock)) != 0)
+                       return (ret);
+               if ((ret = __bam_pget(dbp, &cp->page, &pgno, 0)) != 0)
+                       return (ret);
+
+               /* If we find a leaf page, we're done. */
+               if (ISLEAF(cp->page))
+                       break;
+
+               pgno = GET_BINTERNAL(cp->page, 0)->pgno;
+               DISCARD(dbp, cp);
+       }
+
+       cp->pgno = cp->page->pgno;
+       cp->indx = 0;
+       cp->dpgno = PGNO_INVALID;
+
+       /* If it's an empty page or a deleted record, go to the next one. */
+       if (NUM_ENT(cp->page) == 0 ||
+           GET_BKEYDATA(cp->page, cp->indx + O_INDX)->deleted)
+               if ((ret = __bam_c_next(dbp, cp, 0)) != 0)
+                       return (ret);
+
+       /* If it's a duplicate reference, go to the first entry. */
+       if ((ret = __bam_ovfl_chk(dbp, cp, O_INDX, 0)) != 0)
+               return (ret);
+
+       /* If it's a deleted record, go to the next one. */
+       if (cp->dpgno != PGNO_INVALID &&
+           GET_BKEYDATA(cp->page, cp->dindx)->deleted)
+               if ((ret = __bam_c_next(dbp, cp, 0)) != 0)
+                       return (ret);
+       return (0);
+}
+
+/*
+ * __bam_c_last --
+ *     Return the last record.
+ */
+static int
+__bam_c_last(dbp, cp)
+       DB *dbp;
+       CURSOR *cp;
+{
+       db_pgno_t pgno;
+       int ret;
+
+       /* Walk down the right-hand side of the tree. */
+       for (pgno = PGNO_ROOT;;) {
+               if ((ret =
+                   __bam_lget(dbp, 0, pgno, DB_LOCK_READ, &cp->lock)) != 0)
+                       return (ret);
+               if ((ret = __bam_pget(dbp, &cp->page, &pgno, 0)) != 0)
+                       return (ret);
+
+               /* If we find a leaf page, we're done. */
+               if (ISLEAF(cp->page))
+                       break;
+
+               pgno =
+                   GET_BINTERNAL(cp->page, NUM_ENT(cp->page) - O_INDX)->pgno;
+               DISCARD(dbp, cp);
+       }
+
+       cp->pgno = cp->page->pgno;
+       cp->indx = NUM_ENT(cp->page) == 0 ? 0 : NUM_ENT(cp->page) - P_INDX;
+       cp->dpgno = PGNO_INVALID;
+
+       /* If it's an empty page or a deleted record, go to the previous one. */
+       if (NUM_ENT(cp->page) == 0 ||
+           GET_BKEYDATA(cp->page, cp->indx + O_INDX)->deleted)
+               if ((ret = __bam_c_prev(dbp, cp)) != 0)
+                       return (ret);
+
+       /* If it's a duplicate reference, go to the last entry. */
+       if ((ret = __bam_ovfl_chk(dbp, cp, cp->indx + O_INDX, 1)) != 0)
+               return (ret);
+
+       /* If it's a deleted record, go to the previous one. */
+       if (cp->dpgno != PGNO_INVALID &&
+           GET_BKEYDATA(cp->page, cp->dindx)->deleted)
+               if ((ret = __bam_c_prev(dbp, cp)) != 0)
+                       return (ret);
+       return (0);
+}
+
+/*
+ * __bam_c_next --
+ *     Move to the next record.
+ */
+static int
+__bam_c_next(dbp, cp, initial_move)
+       DB *dbp;
+       CURSOR *cp;
+       int initial_move;
+{
+       db_indx_t adjust, indx;
+       db_pgno_t pgno;
+       int ret;
+
+       /*
+        * We're either moving through a page of duplicates or a btree leaf
+        * page.
+        */
+       if (cp->dpgno == PGNO_INVALID) {
+               adjust = dbp->type == DB_BTREE ? P_INDX : O_INDX;
+               pgno = cp->pgno;
+               indx = cp->indx;
+       } else {
+               adjust = O_INDX;
+               pgno = cp->dpgno;
+               indx = cp->dindx;
+       }
+       if (cp->page == NULL) {
+               if ((ret =
+                   __bam_lget(dbp, 0, pgno, DB_LOCK_READ, &cp->lock)) != 0)
+                       return (ret);
+               if ((ret = __bam_pget(dbp, &cp->page, &pgno, 0)) != 0)
+                       return (ret);
+       }
+
+       /*
+        * If at the end of the page, move to a subsequent page.
+        *
+        * !!!
+        * Check for >= NUM_ENT.  If we're here as the result of a search that
+        * landed us on NUM_ENT, we'll increment indx before we test.
+        *
+        * !!!
+        * This code handles empty pages and pages with only deleted entries.
+        */
+       if (initial_move)
+               indx += adjust;
+       for (;;) {
+               if (indx >= NUM_ENT(cp->page)) {
+                       pgno = cp->page->next_pgno;
+                       DISCARD(dbp, cp);
+
+                       /*
+                        * If we're in a btree leaf page, we've reached the end
+                        * of the tree.  If we've reached the end of a page of
+                        * duplicates, continue from the btree leaf page where
+                        * we found this page of duplicates.
+                        */
+                       if (pgno == PGNO_INVALID) {
+                               /* If in a btree leaf page, it's EOF. */
+                               if (cp->dpgno == PGNO_INVALID)
+                                       return (DB_NOTFOUND);
+
+                               /* Continue from the last btree leaf page. */
+                               cp->dpgno = PGNO_INVALID;
+
+                               adjust = P_INDX;
+                               pgno = cp->pgno;
+                               indx = cp->indx + P_INDX;
+                       } else
+                               indx = 0;
+
+                       if ((ret = __bam_lget(dbp,
+                           0, pgno, DB_LOCK_READ, &cp->lock)) != 0)
+                               return (ret);
+                       if ((ret = __bam_pget(dbp, &cp->page, &pgno, 0)) != 0)
+                               return (ret);
+                       continue;
+               }
+
+               /* Ignore deleted records. */
+               if (dbp->type == DB_BTREE &&
+                   ((cp->dpgno == PGNO_INVALID &&
+                   GET_BKEYDATA(cp->page, indx + O_INDX)->deleted) ||
+                   (cp->dpgno != PGNO_INVALID &&
+                   GET_BKEYDATA(cp->page, indx)->deleted))) {
+                       indx += adjust;
+                       continue;
+               }
+
+               /*
+                * If we're not in a duplicates page, check to see if we've
+                * found a page of duplicates, in which case we move to the
+                * first entry.
+                */
+               if (cp->dpgno == PGNO_INVALID) {
+                       cp->pgno = cp->page->pgno;
+                       cp->indx = indx;
+
+                       if ((ret =
+                           __bam_ovfl_chk(dbp, cp, indx + O_INDX, 0)) != 0)
+                               return (ret);
+                       if (cp->dpgno != PGNO_INVALID) {
+                               indx = cp->dindx;
+                               adjust = O_INDX;
+                               continue;
+                       }
+               } else {
+                       cp->dpgno = cp->page->pgno;
+                       cp->dindx = indx;
+               }
+               break;
+       }
+       return (0);
+}
+
+/*
+ * __bam_c_prev --
+ *     Move to the previous record.
+ */
+static int
+__bam_c_prev(dbp, cp)
+       DB *dbp;
+       CURSOR *cp;
+{
+       db_indx_t indx, adjust;
+       db_pgno_t pgno;
+       int ret, set_indx;
+
+       /*
+        * We're either moving through a page of duplicates or a btree leaf
+        * page.
+        */
+       if (cp->dpgno == PGNO_INVALID) {
+               adjust = dbp->type == DB_BTREE ? P_INDX : O_INDX;
+               pgno = cp->pgno;
+               indx = cp->indx;
+       } else {
+               adjust = O_INDX;
+               pgno = cp->dpgno;
+               indx = cp->dindx;
+       }
+       if (cp->page == NULL) {
+               if ((ret =
+                   __bam_lget(dbp, 0, pgno, DB_LOCK_READ, &cp->lock)) != 0)
+                       return (ret);
+               if ((ret = __bam_pget(dbp, &cp->page, &pgno, 0)) != 0)
+                       return (ret);
+       }
+
+       /*
+        * If at the beginning of the page, move to any previous one.
+        *
+        * !!!
+         * This code handles empty pages and pages with only deleted entries.
+        */
+       for (;;) {
+               if (indx == 0) {
+                       pgno = cp->page->prev_pgno;
+                       DISCARD(dbp, cp);
+
+                       /*
+                        * If we're in a btree leaf page, we've reached the
+                        * beginning of the tree.  If we've reached the first
+                        * of a page of duplicates, continue from the btree
+                        * leaf page where we found this page of duplicates.
+                        */
+                       if (pgno == PGNO_INVALID) {
+                               /* If in a btree leaf page, it's SOF. */
+                               if (cp->dpgno == PGNO_INVALID)
+                                       return (DB_NOTFOUND);
+
+                               /* Continue from the last btree leaf page. */
+                               cp->dpgno = PGNO_INVALID;
+
+                               adjust = P_INDX;
+                               pgno = cp->pgno;
+                               indx = cp->indx;
+                               set_indx = 0;
+                       } else
+                               set_indx = 1;
+
+                       if ((ret = __bam_lget(dbp,
+                           0, pgno, DB_LOCK_READ, &cp->lock)) != 0)
+                               return (ret);
+                       if ((ret = __bam_pget(dbp, &cp->page, &pgno, 0)) != 0)
+                               return (ret);
+
+                       if (set_indx)
+                               indx = NUM_ENT(cp->page);
+                       if (indx == 0)
+                               continue;
+               }
+
+               /* Ignore deleted records. */
+               indx -= adjust;
+               if (dbp->type == DB_BTREE &&
+                   ((cp->dpgno == PGNO_INVALID &&
+                   GET_BKEYDATA(cp->page, indx + O_INDX)->deleted) ||
+                   (cp->dpgno != PGNO_INVALID &&
+                   GET_BKEYDATA(cp->page, indx)->deleted)))
+                       continue;
+
+               /*
+                * If we're not in a duplicates page, check to see if we've
+                * found a page of duplicates, in which case we move to the
+                * last entry.
+                */
+               if (cp->dpgno == PGNO_INVALID) {
+                       cp->pgno = cp->page->pgno;
+                       cp->indx = indx;
+
+                       if ((ret =
+                           __bam_ovfl_chk(dbp, cp, indx + O_INDX, 1)) != 0)
+                               return (ret);
+                       if (cp->dpgno != PGNO_INVALID) {
+                               indx = cp->dindx + O_INDX;
+                               adjust = O_INDX;
+                               continue;
+                       }
+               } else {
+                       cp->dpgno = cp->page->pgno;
+                       cp->dindx = indx;
+               }
+               break;
+       }
+       return (0);
+}
+
+/*
+ * __bam_c_search --
+ *     Move to a specified record.
+ */
+static int
+__bam_c_search(dbp, cp, key, flags, isrecno, exactp)
+       DB *dbp;
+       CURSOR *cp;
+       const DBT *key;
+       u_int flags;
+       int isrecno, *exactp;
+{
+       BTREE *t;
+       db_recno_t recno;
+       int needexact, ret;
+
+       t = dbp->internal;
+       needexact = *exactp;
+
+       /*
+        * Find any matching record; the search function pins the page.  Make
+        * sure it's a valid key (__bam_search may return an index just past
+        * the end of a page) and return it.
+        */
+       if (isrecno) {
+               if ((ret = __ram_getno(dbp, key, &recno, 0)) != 0)
+                       return (ret);
+               ret = __bam_rsearch(dbp, &recno, flags, 1, exactp);
+       } else
+               ret = __bam_search(dbp, key, flags, 1, NULL, exactp);
+       if (ret != 0)
+               return (ret);
+
+       cp->page = t->bt_csp->page;
+       cp->pgno = cp->page->pgno;
+       cp->indx = t->bt_csp->indx;
+       cp->lock = t->bt_csp->lock;
+       cp->dpgno = PGNO_INVALID;
+
+       /*
+        * If we have an exact match, make sure that we're not looking at a
+        * chain of duplicates -- if so, move to an entry in that chain.
+        */
+       if (*exactp) {
+               if ((ret = __bam_ovfl_chk(dbp,
+                   cp, cp->indx + O_INDX, LF_ISSET(S_DUPLAST))) != 0)
+                       return (ret);
+       } else
+               if (needexact)
+                       return (DB_NOTFOUND);
+
+       /* If past the end of a page, find the next entry. */
+       if (cp->indx == NUM_ENT(cp->page) &&
+           (ret = __bam_c_next(dbp, cp, 0)) != 0)
+               return (ret);
+
+       /* If it's a deleted record, go to the next or previous one. */
+       if (cp->dpgno != PGNO_INVALID &&
+           GET_BKEYDATA(cp->page, cp->dindx)->deleted)
+               if (flags == S_KEYLAST) {
+                       if ((ret = __bam_c_prev(dbp, cp)) != 0)
+                               return (ret);
+               } else
+                       if ((ret = __bam_c_next(dbp, cp, 0)) != 0)
+                               return (ret);
+       return (0);
+}
+
+/*
+ * __bam_ovfl_chk --
+ *     Check for an overflow record, and if found, move to the correct
+ *     record.
+ *
+ * PUBLIC: int __bam_ovfl_chk __P((DB *, CURSOR *, u_int32_t, int));
+ */
+int
+__bam_ovfl_chk(dbp, cp, indx, to_end)
+       DB *dbp;
+       CURSOR *cp;
+       u_int32_t indx;
+       int to_end;
+{
+       BOVERFLOW *bo;
+       db_pgno_t pgno;
+       int ret;
+
+       /* Check for an overflow entry. */
+       bo = GET_BOVERFLOW(cp->page, indx);
+       if (bo->type != B_DUPLICATE)
+               return (0);
+
+       /*
+        * If we find one, go to the duplicates page, and optionally move
+        * to the last record on that page.
+        *
+        * XXX
+        * We don't lock duplicates pages, we've already got the correct
+        * lock on the main page.
+        */
+       pgno = bo->pgno;
+       if ((ret = memp_fput(dbp->mpf, cp->page, 0)) != 0)
+               return (ret);
+       cp->page = NULL;
+       if (to_end) {
+               if ((ret = __db_dend(dbp, pgno, &cp->page)) != 0)
+                       return (ret);
+               indx = NUM_ENT(cp->page) - O_INDX;
+       } else {
+               if ((ret = __bam_pget(dbp, &cp->page, &pgno, 0)) != 0)
+                       return (ret);
+               indx = 0;
+       }
+
+       /* Update the duplicate entry in the cursor. */
+       cp->dpgno = cp->page->pgno;
+       cp->dindx = indx;
+
+       return (0);
+}
+
+#ifdef DEBUG
+/*
+ * __bam_cprint --
+ *     Display the current btree cursor list.
+ */
+int
+__bam_cprint(dbp)
+       DB *dbp;
+{
+       CURSOR *cp;
+       DBC *dbc;
+
+       DB_THREAD_LOCK(dbp);
+       for (dbc = TAILQ_FIRST(&dbp->curs_queue);
+           dbc != NULL; dbc = TAILQ_NEXT(dbc, links)) {
+               cp = (CURSOR *)dbc->internal;
+               fprintf(stderr,
+                   "%#0x: page: %lu index: %lu dpage %lu dindex: %lu",
+                   (u_int)cp, (u_long)cp->pgno, (u_long)cp->indx,
+                   (u_long)cp->dpgno, (u_long)cp->dindx);
+               if (F_ISSET(cp, C_DELETED))
+                       fprintf(stderr, "(deleted)");
+               fprintf(stderr, "\n");
+       }
+       DB_THREAD_UNLOCK(dbp);
+       return (0);
+}
+#endif /* DEBUG */
+
+/*
+ * __bam_ca_delete --
+ *     Check if any of the cursors refer to the item we are about to delete.
+ *     We'll return the number of cursors that refer to the item in question.
+ *     If a cursor does refer to the item, then we set its deleted bit.
+ *
+ * PUBLIC: int __bam_ca_delete __P((DB *, db_pgno_t, u_int32_t, CURSOR *));
+ */
+int
+__bam_ca_delete(dbp, pgno, indx, curs)
+       DB *dbp;
+       db_pgno_t pgno;
+       u_int32_t indx;
+       CURSOR *curs;
+{
+       DBC *dbc;
+       CURSOR *cp;
+       int count;
+
+       /*
+        * Adjust the cursors.  We don't have to review the cursors for any
+        * process other than the current one, because we have the page write
+        * locked at this point, and any other process had better be using a
+        * different locker ID, meaning that only cursors in our process can
+        * be on the page.
+        *
+        * It's possible for multiple cursors within the thread to have write
+        * locks on the same page, but, cursors within a thread must be single
+        * threaded, so all we're locking here is the cursor linked list.
+        *
+        * indx refers to the first of what might be a duplicate set.  The
+        * cursor passed in is the one initiating the delete, so we don't
+        * want to count it.
+        */
+       DB_THREAD_LOCK(dbp);
+       for (count = 0, dbc = TAILQ_FIRST(&dbp->curs_queue);
+           dbc != NULL; dbc = TAILQ_NEXT(dbc, links)) {
+               cp = (CURSOR *)dbc->internal;
+               if ((curs != cp &&
+                   cp->pgno == pgno && cp->indx == indx) ||
+                   (cp->dpgno == pgno && cp->dindx == indx)) {
+                       ++count;
+                       F_SET(cp, C_DELETED);
+               }
+       }
+       DB_THREAD_UNLOCK(dbp);
+       return (count);
+}
+
+/*
+ * __bam_ca_di --
+ *     Adjust the cursors during a delete or insert.
+ *
+ * PUBLIC: void __bam_ca_di __P((DB *, db_pgno_t, u_int32_t, int));
+ */
+void
+__bam_ca_di(dbp, pgno, indx, value)
+       DB *dbp;
+       db_pgno_t pgno;
+       u_int32_t indx;
+       int value;
+{
+       CURSOR *cp;
+       DBC *dbc;
+
+       /* Recno is responsible for its own adjustments. */
+       if (dbp->type == DB_RECNO)
+               return;
+
+       /*
+        * Adjust the cursors.  See the comment in __bam_ca_delete().
+        */
+       DB_THREAD_LOCK(dbp);
+       for (dbc = TAILQ_FIRST(&dbp->curs_queue);
+           dbc != NULL; dbc = TAILQ_NEXT(dbc, links)) {
+               cp = (CURSOR *)dbc->internal;
+               if (cp->pgno == pgno && cp->indx >= indx)
+                       cp->indx += value;
+               if (cp->dpgno == pgno && cp->dindx >= indx)
+                       cp->dindx += value;
+       }
+       DB_THREAD_UNLOCK(dbp);
+}
+
+/*
+ * __bam_ca_dup --
+ *     Adjust the cursors when moving data items to a duplicates page.
+ *
+ * PUBLIC: void __bam_ca_dup __P((DB *,
+ * PUBLIC:    db_pgno_t, u_int32_t, u_int32_t, db_pgno_t, u_int32_t));
+ */
+void
+__bam_ca_dup(dbp, fpgno, first, fi, tpgno, ti)
+       DB *dbp;
+       db_pgno_t fpgno, tpgno;
+       u_int32_t first, fi, ti;
+{
+       CURSOR *cp;
+       DBC *dbc;
+
+       /*
+        * Adjust the cursors.  See the comment in __bam_ca_delete().
+        *
+        * No need to test duplicates, this only gets called when moving
+        * leaf page data items onto a duplicates page.
+        */
+       DB_THREAD_LOCK(dbp);
+       for (dbc = TAILQ_FIRST(&dbp->curs_queue);
+           dbc != NULL; dbc = TAILQ_NEXT(dbc, links)) {
+               cp = (CURSOR *)dbc->internal;
+               /*
+                * Ignore matching entries that have already been moved,
+                * we move from the same location on the leaf page more
+                * than once.
+                */
+               if (cp->dpgno == PGNO_INVALID &&
+                   cp->pgno == fpgno && cp->indx == fi) {
+                       cp->indx = first;
+                       cp->dpgno = tpgno;
+                       cp->dindx = ti;
+               }
+       }
+       DB_THREAD_UNLOCK(dbp);
+}
+
+/*
+ * __bam_ca_move --
+ *     Adjust the cursors when moving data items to another page.
+ *
+ * PUBLIC: void __bam_ca_move __P((DB *, BTREE *, db_pgno_t, db_pgno_t));
+ */
+void
+__bam_ca_move(dbp, t, fpgno, tpgno)
+       DB *dbp;
+       BTREE *t;
+       db_pgno_t fpgno, tpgno;
+{
+       CURSOR *cp;
+       DBC *dbc;
+
+       /* Recno is responsible for its own adjustments. */
+       if (dbp->type == DB_RECNO)
+               return;
+
+       /*
+        * Adjust the cursors.  See the comment in __bam_ca_delete().
+        *
+        * No need to test duplicates, this only gets called when copying
+        * over the root page with a leaf or internal page.
+        */
+       DB_THREAD_LOCK(dbp);
+       for (dbc = TAILQ_FIRST(&dbp->curs_queue);
+           dbc != NULL; dbc = TAILQ_NEXT(dbc, links)) {
+               cp = (CURSOR *)dbc->internal;
+               if (cp->pgno == fpgno)
+                       cp->pgno = tpgno;
+       }
+       DB_THREAD_UNLOCK(dbp);
+}
+
+/*
+ * __bam_ca_replace --
+ *     Check if any of the cursors refer to the item we are about to replace.
+ *     If so, their flags should be changed from deleted to replaced.
+ *
+ * PUBLIC: void __bam_ca_replace
+ * PUBLIC:    __P((DB *, db_pgno_t, u_int32_t, ca_replace_arg));
+ */
+void
+__bam_ca_replace(dbp, pgno, indx, pass)
+       DB *dbp;
+       db_pgno_t pgno;
+       u_int32_t indx;
+       ca_replace_arg pass;
+{
+       CURSOR *cp;
+       DBC *dbc;
+
+       /*
+        * Adjust the cursors.  See the comment in __bam_ca_delete().
+        *
+        * Find any cursors that have logically deleted a record we're about
+        * to overwrite.
+        *
+        * Pass == REPLACE_SETUP:
+        *      Set C_REPLACE_SETUP so we can find the cursors again.
+        *
+        * Pass == REPLACE_SUCCESS:
+        *      Clear C_DELETED and C_REPLACE_SETUP, set C_REPLACE, the
+        *      overwrite was successful.
+        *
+        * Pass == REPLACE_FAILED:
+        *      Clear C_REPLACE_SETUP, the overwrite failed.
+        *
+        * For REPLACE_SUCCESS and REPLACE_FAILED, we reset the indx value
+        * for the cursor as it may have been changed by other cursor update
+        * routines as the item was deleted/inserted.
+        */
+       DB_THREAD_LOCK(dbp);
+       switch (pass) {
+       case REPLACE_SETUP:                     /* Setup. */
+               for (dbc = TAILQ_FIRST(&dbp->curs_queue);
+                   dbc != NULL; dbc = TAILQ_NEXT(dbc, links)) {
+                       cp = (CURSOR *)dbc->internal;
+                       if ((cp->pgno == pgno && cp->indx == indx) ||
+                           (cp->dpgno == pgno && cp->dindx == indx))
+                               F_SET(cp, C_REPLACE_SETUP);
+               }
+               break;
+       case REPLACE_SUCCESS:                   /* Overwrite succeeded. */
+               for (dbc = TAILQ_FIRST(&dbp->curs_queue);
+                   dbc != NULL; dbc = TAILQ_NEXT(dbc, links)) {
+                       cp = (CURSOR *)dbc->internal;
+                       if (F_ISSET(cp, C_REPLACE_SETUP)) {
+                               if (cp->dpgno == pgno)
+                                       cp->dindx = indx;
+                               if (cp->pgno == pgno)
+                                       cp->indx = indx;
+                               F_SET(cp, C_REPLACE);
+                               F_CLR(cp, C_DELETED | C_REPLACE_SETUP);
+                       }
+               }
+               break;
+       case REPLACE_FAILED:                    /* Overwrite failed. */
+               for (dbc = TAILQ_FIRST(&dbp->curs_queue);
+                   dbc != NULL; dbc = TAILQ_NEXT(dbc, links)) {
+                       cp = (CURSOR *)dbc->internal;
+                       if (F_ISSET(cp, C_REPLACE_SETUP)) {
+                               if (cp->dpgno == pgno)
+                                       cp->dindx = indx;
+                               if (cp->pgno == pgno)
+                                       cp->indx = indx;
+                               F_CLR(cp, C_REPLACE_SETUP);
+                       }
+               }
+               break;
+       }
+       DB_THREAD_UNLOCK(dbp);
+}
+
+/*
+ * __bam_ca_split --
+ *     Adjust the cursors when splitting a page.
+ *
+ * PUBLIC: void __bam_ca_split __P((DB *,
+ * PUBLIC:    db_pgno_t, db_pgno_t, db_pgno_t, u_int32_t, int));
+ */
+void
+__bam_ca_split(dbp, ppgno, lpgno, rpgno, split_indx, cleft)
+       DB *dbp;
+       db_pgno_t ppgno, lpgno, rpgno;
+       u_int32_t split_indx;
+       int cleft;
+{
+       DBC *dbc;
+       CURSOR *cp;
+
+       /* Recno is responsible for its own adjustments. */
+       if (dbp->type == DB_RECNO)
+               return;
+
+       /*
+        * Adjust the cursors.  See the comment in __bam_ca_delete().
+        *
+        * If splitting the page that a cursor was on, the cursor has to be
+        * adjusted to point to the same record as before the split.  Most
+        * of the time we don't adjust pointers to the left page, because
+        * we're going to copy its contents back over the original page.  If
+        * the cursor is on the right page, it is decremented by the number of
+        * records split to the left page.
+        */
+       DB_THREAD_LOCK(dbp);
+       for (dbc = TAILQ_FIRST(&dbp->curs_queue);
+           dbc != NULL; dbc = TAILQ_NEXT(dbc, links)) {
+               cp = (CURSOR *)dbc->internal;
+               if (cp->pgno == ppgno)
+                       if (cp->indx < split_indx) {
+                               if (cleft)
+                                       cp->pgno = lpgno;
+                       } else {
+                               cp->pgno = rpgno;
+                               cp->indx -= split_indx;
+                       }
+               if (cp->dpgno == ppgno)
+                       if (cp->dindx < split_indx) {
+                               if (cleft)
+                                       cp->dpgno = lpgno;
+                       } else {
+                               cp->dpgno = rpgno;
+                               cp->dindx -= split_indx;
+                       }
+       }
+       DB_THREAD_UNLOCK(dbp);
+}
+
+/*
+ * __bam_c_physdel --
+ *     Actually do the cursor deletion.
+ */
+static int
+__bam_c_physdel(dbp, cp, h)
+       DB *dbp;
+       CURSOR *cp;
+       PAGE *h;
+{
+       BOVERFLOW bo;
+       BTREE *t;
+       DBT dbt;
+       DB_LOCK lock;
+       db_indx_t indx;
+       db_pgno_t pgno, next_pgno, prev_pgno;
+       int local, ret;
+
+       t = dbp->internal;
+       ret = 0;
+
+       /* Figure out what we're deleting. */
+       if (cp->dpgno == PGNO_INVALID) {
+               pgno = cp->pgno;
+               indx = cp->indx;
+       } else {
+               pgno = cp->dpgno;
+               indx = cp->dindx;
+       }
+
+       /*
+        * If the item is referenced by another cursor, leave it up to that
+        * cursor to do the delete.
+        */
+       if (__bam_ca_delete(dbp, pgno, indx, cp) != 0)
+               return (0);
+
+       /*
+        * If we don't already have the page locked, get it and delete the
+        * items.
+        */
+       if ((h == NULL || h->pgno != pgno)) {
+               if ((ret = __bam_lget(dbp, 0, pgno, DB_LOCK_WRITE, &lock)) != 0)
+                       return (ret);
+               if ((ret = __bam_pget(dbp, &h, &pgno, 0)) != 0)
+                       return (ret);
+               local = 1;
+       } else
+               local = 0;
+
+       /*
+        * If we're deleting a duplicate entry, call the common code to do
+        * the work.
+        */
+       if (TYPE(h) == P_DUPLICATE) {
+               pgno = PGNO(h);
+               prev_pgno = PREV_PGNO(h);
+               next_pgno = NEXT_PGNO(h);
+               if ((ret = __db_drem(dbp, &h, indx, __bam_free)) != 0)
+                       goto err;
+
+               /*
+                * There are 4 cases:
+                *
+                * 1. We removed an item on a page, but there are more items
+                *    on the page.
+                * 2. We removed the last item on a page, removing the last
+                *    duplicate.
+                * 3. We removed the last item on a page, but there is a
+                *    following page of duplicates.
+                * 4. We removed the last item on a page, but there is a
+                *    previous page of duplicates.
+                *
+                * In case 1, h != NULL, h->pgno == pgno
+                * In case 2, h == NULL,
+                *    prev_pgno == PGNO_INVALID, next_pgno == PGNO_INVALID
+                * In case 3, h != NULL, next_pgno != PGNO_INVALID
+                * In case 4, h == NULL, prev_pgno != PGNO_INVALID
+                *
+                * In case 1, there's nothing else to do.
+                * In case 2, remove the entry from the parent page.
+                * In case 3 or 4, if the deleted page was the first in a chain
+                *    of duplicate pages, update the parent page's entry.
+                *
+                * Test:
+                *      If there were previous pages of duplicates or we didn't
+                *      empty the current page of duplicates, we don't need to
+                *      touch the parent page.
+                */
+               if (PREV_PGNO(h) != PGNO_INVALID ||
+                   (h != NULL && pgno == h->pgno))
+                       goto done;
+
+               /*
+                * Release any page we're holding and the lock on the deleted
+                * page.
+                */
+               if (local) {
+                       if (h != NULL)
+                               (void)memp_fput(dbp->mpf, h, 0);
+                       (void)__BT_TLPUT(dbp, lock);
+                       local = 0;
+               }
+
+               /* Acquire the parent page. */
+               if ((ret =
+                   __bam_lget(dbp, 0, cp->pgno, DB_LOCK_WRITE, &lock)) != 0)
+                       goto err;
+               if ((ret = __bam_pget(dbp, &h, &cp->pgno, 0)) != 0) {
+                       (void)__BT_TLPUT(dbp, lock);
+                       goto err;
+               }
+               local = 1;
+
+               /*
+                * If we deleted the last duplicate, we can fall out and do a
+                * normal btree delete in the context of the parent page.  If
+                * not, we have to update the parent's page.
+                */
+               indx = cp->indx;
+               if (next_pgno != PGNO_INVALID) {
+                       /*
+                        * Copy, delete, update and re-insert the parent page's
+                        * entry.
+                        */
+                       bo = *GET_BOVERFLOW(h, indx);
+                       (void)__db_ditem(dbp, h, indx, BOVERFLOW_SIZE);
+                       bo.pgno = next_pgno;
+                       memset(&dbt, 0, sizeof(dbt));
+                       dbt.data = &bo;
+                       dbt.size = BOVERFLOW_SIZE;
+                       (void)__db_pitem(dbp,
+                           h, indx, BOVERFLOW_SIZE, &dbt, NULL);
+
+                       /* Discard the parent page. */
+                       (void)memp_fput(dbp->mpf, h, 0);
+                       (void)__BT_TLPUT(dbp, lock);
+                       local = 0;
+
+                       goto done;
+               }
+       }
+
+       /* Otherwise, do a normal btree delete. */
+       if ((ret = __bam_ditem(dbp, h, indx)) != 0)
+               goto err;
+       if ((ret = __bam_ditem(dbp, h, indx)) != 0)
+               goto err;
+
+       /*
+        * If the page is empty, delete it.  To delete a leaf page we need a
+        * copy of a key from the page.  We use the first one that was there,
+        * since it's the last key that the page held.  We malloc the page
+        * information instead of using the return key/data memory because
+        * we've already set them -- the reason that we've already set them
+        * is because we're (potentially) about to do a reverse split, which
+        * would make our saved page information useless.
+        *
+        * XXX
+        * The following operations to delete a page might deadlock.  I think
+        * that's OK.  The problem is if we're deleting an item because we're
+        * closing cursors because we've already deadlocked and want to call
+        * txn_abort().  If we fail due to deadlock, we'll leave an locked
+        * empty page in the tree, which won't be empty long because we're
+        * going to undo the delete.
+        */
+       if (NUM_ENT(h) == 0 && h->pgno != PGNO_ROOT) {
+               memset(&dbt, 0, sizeof(DBT));
+               dbt.flags = DB_DBT_MALLOC | DB_DBT_INTERNAL;
+               if ((ret = __db_ret(dbp, h, 0, &dbt, NULL, NULL)) != 0)
+                       goto err;
+
+               if (local) {
+                       (void)memp_fput(dbp->mpf, h, 0);
+                       (void)__BT_TLPUT(dbp, lock);
+                       local = 0;
+               }
+
+               ret = __bam_dpage(dbp, &dbt);
+               free(dbt.data);
+       }
+
+err:
+done:  if (local) {
+               (void)memp_fput(dbp->mpf, h, 0);
+               (void)__BT_TLPUT(dbp, lock);
+       }
+
+       if (ret == 0)
+               ++t->lstat.bt_deleted;
+       return (ret);
+}
diff --git a/db2/btree/bt_delete.c b/db2/btree/bt_delete.c
new file mode 100644 (file)
index 0000000..e7ec4df
--- /dev/null
@@ -0,0 +1,607 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997
+ *     Sleepycat Software.  All rights reserved.
+ */
+/*
+ * Copyright (c) 1990, 1993, 1994, 1995, 1996
+ *     Keith Bostic.  All rights reserved.
+ */
+/*
+ * Copyright (c) 1990, 1993, 1994, 1995
+ *     The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Mike Olson.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *     This product includes software developed by the University of
+ *     California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "config.h"
+
+#ifndef lint
+static const char sccsid[] = "@(#)bt_delete.c  10.18 (Sleepycat) 8/24/97";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+
+#include <stdio.h>
+#include <string.h>
+#endif
+
+#include "db_int.h"
+#include "db_page.h"
+#include "btree.h"
+
+static int __bam_dpages __P((DB *, BTREE *));
+
+/*
+ * __bam_delete --
+ *     Delete the items referenced by a key.
+ *
+ * PUBLIC: int __bam_delete __P((DB *, DB_TXN *, DBT *, int));
+ */
+int
+__bam_delete(argdbp, txn, key, flags)
+       DB *argdbp;
+       DB_TXN *txn;
+       DBT *key;
+       int flags;
+{
+       BTREE *t;
+       DB *dbp;
+       PAGE *h;
+       db_indx_t cnt, i, indx;
+       int dpage, exact, ret, stack;
+
+       DEBUG_LWRITE(argdbp, txn, "bam_delete", key, NULL, flags);
+
+       stack = 0;
+
+       /* Check for invalid flags. */
+       if ((ret =
+           __db_delchk(argdbp, flags, F_ISSET(argdbp, DB_AM_RDONLY))) != 0)
+               return (ret);
+
+       GETHANDLE(argdbp, txn, &dbp, ret);
+       t = dbp->internal;
+
+       /* Search the tree for the key; delete only deletes exact matches. */
+       if ((ret = __bam_search(dbp, key, S_DELETE, 1, NULL, &exact)) != 0)
+               goto err;
+       stack = 1;
+       h = t->bt_csp->page;
+       indx = t->bt_csp->indx;
+
+       /* Delete the key/data pair, including any duplicates. */
+       for (cnt = 1, i = indx;; ++cnt)
+               if ((i += P_INDX) >= NUM_ENT(h) || h->inp[i] != h->inp[indx])
+                       break;
+       for (; cnt > 0; --cnt, ++t->lstat.bt_deleted)
+               if (__bam_ca_delete(dbp, h->pgno, indx, NULL) != 0) {
+                       GET_BKEYDATA(h, indx + O_INDX)->deleted = 1;
+                       indx += P_INDX;
+               } else if ((ret = __bam_ditem(dbp, h, indx)) != 0 ||
+                   (ret = __bam_ditem(dbp, h, indx)) != 0)
+                       goto err;
+
+       /* If we're using record numbers, update internal page record counts. */
+       if (F_ISSET(dbp, DB_BT_RECNUM) && (ret = __bam_adjust(dbp, t, -1)) != 0)
+               goto err;
+
+       /* If the page is now empty, delete it. */
+       dpage = NUM_ENT(h) == 0 && h->pgno != PGNO_ROOT;
+
+       __bam_stkrel(dbp);
+       stack = 0;
+
+       ret = dpage ? __bam_dpage(dbp, key) : 0;
+
+err:   if (stack)
+               __bam_stkrel(dbp);
+       PUTHANDLE(dbp);
+       return (ret);
+}
+
+/*
+ * __ram_delete --
+ *     Delete the items referenced by a key.
+ *
+ * PUBLIC: int __ram_delete __P((DB *, DB_TXN *, DBT *, int));
+ */
+int
+__ram_delete(argdbp, txn, key, flags)
+       DB *argdbp;
+       DB_TXN *txn;
+       DBT *key;
+       int flags;
+{
+       BKEYDATA bk;
+       BTREE *t;
+       DB *dbp;
+       DBT hdr, data;
+       PAGE *h;
+       db_indx_t indx;
+       db_recno_t recno;
+       int exact, ret, stack;
+
+       stack = 0;
+
+       /* Check for invalid flags. */
+       if ((ret =
+           __db_delchk(argdbp, flags, F_ISSET(argdbp, DB_AM_RDONLY))) != 0)
+               return (ret);
+
+       GETHANDLE(argdbp, txn, &dbp, ret);
+       t = dbp->internal;
+
+       /* Check the user's record number and fill in as necessary. */
+       if ((ret = __ram_getno(argdbp, key, &recno, 0)) != 0)
+               goto err;
+
+       /* Search the tree for the key; delete only deletes exact matches. */
+       if ((ret = __bam_rsearch(dbp, &recno, S_DELETE, 1, &exact)) != 0)
+               goto err;
+       if (!exact) {
+               ret = DB_NOTFOUND;
+               goto err;
+       }
+
+       h = t->bt_csp->page;
+       indx = t->bt_csp->indx;
+       stack = 1;
+
+       /* If the record has already been deleted, we couldn't have found it. */
+       if (GET_BKEYDATA(h, indx)->deleted) {
+               ret = DB_KEYEMPTY;
+               goto done;
+       }
+
+       /*
+        * If we're not renumbering records, replace the record with a marker
+        * and return.
+        */
+       if (!F_ISSET(dbp, DB_RE_RENUMBER)) {
+               if ((ret = __bam_ditem(dbp, h, indx)) != 0)
+                       goto err;
+
+               bk.deleted = 1;
+               bk.type = B_KEYDATA;
+               bk.len = 0;
+               memset(&hdr, 0, sizeof(hdr));
+               hdr.data = &bk;
+               hdr.size = SSZA(BKEYDATA, data);
+               memset(&data, 0, sizeof(data));
+               data.data = (char *) "";
+               data.size = 0;
+               if ((ret = __db_pitem(dbp,
+                   h, indx, BKEYDATA_SIZE(0), &hdr, &data)) != 0)
+                       goto err;
+
+               ++t->lstat.bt_deleted;
+               goto done;
+       }
+
+       /* Delete the item. */
+       if ((ret = __bam_ditem(dbp, h, indx)) != 0)
+               goto err;
+
+       ++t->lstat.bt_deleted;
+       if (t->bt_recno != NULL)
+               F_SET(t->bt_recno, RECNO_MODIFIED);
+
+       /* Adjust the counts. */
+       __bam_adjust(dbp, t, -1);
+
+       /* Adjust the cursors. */
+       __ram_ca(dbp, recno, CA_DELETE);
+
+       /*
+        * If the page is now empty, delete it -- we have the whole tree
+        * locked, so there are no preparations to make.  Else, release
+        * the pages.
+        */
+       if (NUM_ENT(h) == 0 && h->pgno != PGNO_ROOT) {
+               stack = 0;
+               ret = __bam_dpages(dbp, t);
+       }
+
+done:
+err:   if (stack)
+               __bam_stkrel(dbp);
+
+       PUTHANDLE(dbp);
+       return (ret);
+}
+
+/*
+ * __bam_ditem --
+ *     Delete one or more entries from a page.
+ *
+ * PUBLIC: int __bam_ditem __P((DB *, PAGE *, u_int32_t));
+ */
+int
+__bam_ditem(dbp, h, indx)
+       DB *dbp;
+       PAGE *h;
+       u_int32_t indx;
+{
+       BINTERNAL *bi;
+       BKEYDATA *bk;
+       BOVERFLOW *bo;
+       u_int32_t nbytes;
+       int ret;
+
+       switch (TYPE(h)) {
+       case P_IBTREE:
+               bi = GET_BINTERNAL(h, indx);
+               switch (bi->type) {
+               case B_DUPLICATE:
+               case B_OVERFLOW:
+                       nbytes = BINTERNAL_SIZE(bi->len);
+                       goto offpage;
+               case B_KEYDATA:
+                       nbytes = BKEYDATA_SIZE(bi->len);
+                       break;
+               default:
+                       return (__db_pgfmt(dbp, h->pgno));
+               }
+               break;
+       case P_IRECNO:
+               nbytes = RINTERNAL_SIZE;
+               break;
+       case P_LBTREE:
+               /*
+                * If it's a duplicate key, discard the index and don't touch
+                * the actual page item.  This works because no data item can
+                * have an index that matches any other index so even if the
+                * data item is in an index "slot", it won't match any other
+                * index.
+                */
+               if (!(indx % 2)) {
+                       if (indx > 0 && h->inp[indx] == h->inp[indx - P_INDX])
+                               return (__bam_adjindx(dbp,
+                                   h, indx, indx - P_INDX, 0));
+                       if (indx < (u_int32_t)(NUM_ENT(h) - P_INDX) &&
+                           h->inp[indx] == h->inp[indx + P_INDX])
+                               return (__bam_adjindx(dbp,
+                                   h, indx, indx + O_INDX, 0));
+               }
+               /* FALLTHROUGH */
+       case P_LRECNO:
+               bk = GET_BKEYDATA(h, indx);
+               switch (bk->type) {
+               case B_DUPLICATE:
+               case B_OVERFLOW:
+                       nbytes = BOVERFLOW_SIZE;
+
+offpage:               /* Delete duplicate/offpage chains. */
+                       bo = GET_BOVERFLOW(h, indx);
+                       if (bo->type == B_DUPLICATE) {
+                               if ((ret =
+                                   __db_ddup(dbp, bo->pgno, __bam_free)) != 0)
+                                       return (ret);
+                       } else
+                               if ((ret =
+                                   __db_doff(dbp, bo->pgno, __bam_free)) != 0)
+                                       return (ret);
+                       break;
+               case B_KEYDATA:
+                       nbytes = BKEYDATA_SIZE(bk->len);
+                       break;
+               default:
+                       return (__db_pgfmt(dbp, h->pgno));
+               }
+               break;
+       default:
+               return (__db_pgfmt(dbp, h->pgno));
+       }
+
+       /* Delete the item. */
+       if ((ret = __db_ditem(dbp, h, indx, nbytes)) != 0)
+               return (ret);
+
+       /* Mark the page dirty. */
+       return (memp_fset(dbp->mpf, h, DB_MPOOL_DIRTY));
+}
+
+/*
+ * __bam_adjindx --
+ *     Adjust an index on the page.
+ *
+ * PUBLIC: int __bam_adjindx __P((DB *, PAGE *, u_int32_t, u_int32_t, int));
+ */
+int
+__bam_adjindx(dbp, h, indx, indx_copy, is_insert)
+       DB *dbp;
+       PAGE *h;
+       u_int32_t indx, indx_copy;
+       int is_insert;
+{
+       db_indx_t copy;
+       int ret;
+
+       /* Log the change. */
+       if (DB_LOGGING(dbp) &&
+           (ret = __bam_adj_log(dbp->dbenv->lg_info, dbp->txn, &LSN(h),
+           0, dbp->log_fileid, PGNO(h), &LSN(h), indx, indx_copy,
+           (u_int32_t)is_insert)) != 0)
+               return (ret);
+
+       if (is_insert) {
+               copy = h->inp[indx_copy];
+               if (indx != NUM_ENT(h))
+                       memmove(&h->inp[indx + O_INDX], &h->inp[indx],
+                           sizeof(db_indx_t) * (NUM_ENT(h) - indx));
+               h->inp[indx] = copy;
+               ++NUM_ENT(h);
+       } else {
+               --NUM_ENT(h);
+               if (indx != NUM_ENT(h))
+                       memmove(&h->inp[indx], &h->inp[indx + O_INDX],
+                           sizeof(db_indx_t) * (NUM_ENT(h) - indx));
+       }
+
+       /* Mark the page dirty. */
+       ret = memp_fset(dbp->mpf, h, DB_MPOOL_DIRTY);
+
+       /* Adjust the cursors. */
+       __bam_ca_di(dbp, h->pgno, indx, is_insert ? 1 : -1);
+       return (0);
+}
+
+/*
+ * __bam_dpage --
+ *     Delete a page from the tree.
+ *
+ * PUBLIC: int __bam_dpage __P((DB *, const DBT *));
+ */
+int
+__bam_dpage(dbp, key)
+       DB *dbp;
+       const DBT *key;
+{
+       BTREE *t;
+       DB_LOCK lock;
+       PAGE *h;
+       db_pgno_t pgno;
+       int exact, level, ret;
+
+       ret = 0;
+       t = dbp->internal;
+
+       /*
+        * The locking protocol is that we acquire locks by walking down the
+        * tree, to avoid the obvious deadlocks.
+        *
+        * Call __bam_search to reacquire the empty leaf page, but this time
+        * get both the leaf page and it's parent, locked.  Walk back up the
+        * tree, until we have the top pair of pages that we want to delete.
+        * Once we have the top page that we want to delete locked, lock the
+        * underlying pages and check to make sure they're still empty.  If
+        * they are, delete them.
+        */
+       for (level = LEAFLEVEL;; ++level) {
+               /* Acquire a page and its parent, locked. */
+               if ((ret =
+                   __bam_search(dbp, key, S_WRPAIR, level, NULL, &exact)) != 0)
+                       return (ret);
+
+               /*
+                * If we reach the root or the page isn't going to be empty
+                * when we delete one record, quit.
+                */
+               h = t->bt_csp[-1].page;
+               if (h->pgno == PGNO_ROOT || NUM_ENT(h) != 1)
+                       break;
+
+               /* Release the two locked pages. */
+               (void)memp_fput(dbp->mpf, t->bt_csp[-1].page, 0);
+               (void)__BT_TLPUT(dbp, t->bt_csp[-1].lock);
+               (void)memp_fput(dbp->mpf, t->bt_csp[0].page, 0);
+               (void)__BT_TLPUT(dbp, t->bt_csp[0].lock);
+       }
+
+       /*
+        * Leave the stack pointer one after the last entry, we may be about
+        * to push more items on the stack.
+        */
+       ++t->bt_csp;
+
+       /*
+        * t->bt_csp[-2].page is the top page, which we're not going to delete,
+        * and t->bt_csp[-1].page is the first page we are going to delete.
+        *
+        * Walk down the chain, acquiring the rest of the pages until we've
+        * retrieved the leaf page.  If we find any pages that aren't going
+        * to be emptied by the delete, someone else added something while we
+        * were walking the tree, and we discontinue the delete.
+        */
+       for (h = t->bt_csp[-1].page;;) {
+               if (ISLEAF(h)) {
+                       if (NUM_ENT(h) != 0)
+                               goto release;
+                       break;
+               } else
+                       if (NUM_ENT(h) != 1)
+                               goto release;
+
+               /*
+                * Get the next page, write lock it and push it onto the stack.
+                * We know it's index 0, because it can only have one element.
+                */
+               pgno = TYPE(h) == P_IBTREE ?
+                   GET_BINTERNAL(h, 0)->pgno : GET_RINTERNAL(h, 0)->pgno;
+
+               if ((ret = __bam_lget(dbp, 0, pgno, DB_LOCK_WRITE, &lock)) != 0)
+                       goto release;
+               if ((ret = __bam_pget(dbp, &h, &pgno, 0)) != 0)
+                       goto release;
+               BT_STK_PUSH(t, h, 0, lock, ret);
+               if (ret != 0)
+                       goto release;
+       }
+
+       BT_STK_POP(t);
+       return (__bam_dpages(dbp, t));
+
+release:
+       /* Discard any locked pages and return. */
+       BT_STK_POP(t);
+       __bam_stkrel(dbp);
+       return (ret);
+}
+
+/*
+ * __bam_dpages --
+ *     Delete a set of locked pages.
+ */
+static int
+__bam_dpages(dbp, t)
+       DB *dbp;
+       BTREE *t;
+{
+       DBT a, b;
+       DB_LOCK lock;
+       EPG *epg;
+       PAGE *h;
+       db_pgno_t pgno;
+       db_recno_t rcnt;
+       int ret;
+
+       rcnt = 0;                               /* XXX: Shut the compiler up. */
+       epg = t->bt_sp;
+
+       /*
+        * !!!
+        * There is an interesting deadlock situation here.  We have to relink
+        * the leaf page chain around the leaf page being deleted.  Consider
+        * a cursor walking through the leaf pages, that has the previous page
+        * read-locked and is waiting on a lock for the page we're deleting.
+        * It will deadlock here.  This is a problem, because if our process is
+        * selected to resolve the deadlock, we'll leave an empty leaf page
+        * that we can never again access by walking down the tree.  So, before
+        * we unlink the subtree, we relink the leaf page chain.
+        */
+       if ((ret = __db_relink(dbp, t->bt_csp->page, NULL, 1)) != 0)
+               goto release;
+
+       /*
+        * We have the entire stack of deletable pages locked.  Start from the
+        * top of the tree and move to the bottom, as it's better to release
+        * the inner pages as soon as possible.
+        */
+       if ((ret = __bam_ditem(dbp, epg->page, epg->indx)) != 0)
+               goto release;
+
+       /*
+        * If we deleted the next-to-last item from the root page, the tree
+        * has collapsed a level.  Try and write lock the remaining root + 1
+        * page and copy it onto the root page.  If we can't get the lock,
+        * that's okay, the tree just stays a level deeper than we'd like.
+        */
+       h = epg->page;
+       if (h->pgno == PGNO_ROOT && NUM_ENT(h) == 1) {
+               pgno = TYPE(epg->page) == P_IBTREE ?
+                   GET_BINTERNAL(epg->page, 0)->pgno :
+                   GET_RINTERNAL(epg->page, 0)->pgno;
+               if ((ret = __bam_lget(dbp, 0, pgno, DB_LOCK_WRITE, &lock)) != 0)
+                       goto release;
+               if ((ret = __bam_pget(dbp, &h, &pgno, 0)) != 0)
+                       goto release;
+
+               /* Log the change. */
+               if (DB_LOGGING(dbp)) {
+                       memset(&a, 0, sizeof(a));
+                       a.data = h;
+                       a.size = dbp->pgsize;
+                       memset(&b, 0, sizeof(b));
+                       b.data = P_ENTRY(epg->page, 0);
+                       b.size = BINTERNAL_SIZE(((BINTERNAL *)b.data)->len);
+                       __bam_rsplit_log(dbp->dbenv->lg_info, dbp->txn,
+                          &h->lsn, 0, dbp->log_fileid, h->pgno, &a, &b,
+                          &epg->page->lsn);
+               }
+
+               /*
+                * Make the switch.
+                *
+                * One fixup -- if the tree has record numbers and we're not
+                * converting to a leaf page, we have to preserve the total
+                * record count.
+                */
+               if (TYPE(h) == P_IRECNO ||
+                   (TYPE(h) == P_IBTREE && F_ISSET(dbp, DB_BT_RECNUM)))
+                       rcnt = RE_NREC(epg->page);
+               memcpy(epg->page, h, dbp->pgsize);
+               epg->page->pgno = PGNO_ROOT;
+               if (TYPE(h) == P_IRECNO ||
+                   (TYPE(h) == P_IBTREE && F_ISSET(dbp, DB_BT_RECNUM)))
+                       RE_NREC_SET(epg->page, rcnt);
+
+               /* Free the last page in that level of the btree. */
+               ++t->lstat.bt_freed;
+               (void)__bam_free(dbp, h);
+
+               /* Adjust the cursors. */
+               __bam_ca_move(dbp, t, h->pgno, PGNO_ROOT);
+
+               (void)__BT_TLPUT(dbp, lock);
+       }
+
+       /* Release the top page in the subtree. */
+       (void)memp_fput(dbp->mpf, epg->page, 0);
+       (void)__BT_TLPUT(dbp, epg->lock);
+
+       /*
+        * Free the rest of the pages.
+        *
+        * XXX
+        * Don't bother checking for errors.  We've unlinked the subtree from
+        * the tree, and there's no possibility of recovery.
+        */
+       for (; ++epg <= t->bt_csp; ++t->lstat.bt_freed) {
+               if (NUM_ENT(epg->page) != 0)
+                       (void)__bam_ditem(dbp, epg->page, epg->indx);
+
+               (void)__bam_free(dbp, epg->page);
+               (void)__BT_TLPUT(dbp, epg->lock);
+       }
+       return (0);
+
+release:
+       /* Discard any remaining pages and return. */
+       for (; epg <= t->bt_csp; ++epg) {
+               (void)memp_fput(dbp->mpf, epg->page, 0);
+               (void)__BT_TLPUT(dbp, epg->lock);
+       }
+       return (ret);
+}
diff --git a/db2/btree/bt_open.c b/db2/btree/bt_open.c
new file mode 100644 (file)
index 0000000..354888c
--- /dev/null
@@ -0,0 +1,355 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997
+ *     Sleepycat Software.  All rights reserved.
+ */
+/*
+ * Copyright (c) 1990, 1993, 1994, 1995, 1996
+ *     Keith Bostic.  All rights reserved.
+ */
+/*
+ * Copyright (c) 1990, 1993, 1994, 1995
+ *     The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Mike Olson.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *     This product includes software developed by the University of
+ *     California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "config.h"
+
+#ifndef lint
+static const char sccsid[] = "@(#)bt_open.c    10.20 (Sleepycat) 8/19/97";
+#endif /* not lint */
+
+/*
+ * Implementation of btree access method for 4.4BSD.
+ *
+ * The design here was originally based on that of the btree access method
+ * used in the Postgres database system at UC Berkeley.  This implementation
+ * is wholly independent of the Postgres code.
+ */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#endif
+
+#include "db_int.h"
+#include "db_page.h"
+#include "btree.h"
+#include "common_ext.h"
+
+static int __bam_keyalloc __P((BTREE *));
+static int __bam_setmeta __P((DB *, BTREE *));
+
+/*
+ * __bam_open --
+ *     Open a btree.
+ *
+ * PUBLIC: int __bam_open __P((DB *, DBTYPE, DB_INFO *));
+ */
+int
+__bam_open(dbp, type, dbinfo)
+       DB *dbp;
+       DBTYPE type;
+       DB_INFO *dbinfo;
+{
+       BTREE *t;
+       int ret;
+
+       /* Allocate the btree internal structure. */
+       if ((t = (BTREE *)calloc(1, sizeof(BTREE))) == NULL)
+               return (ENOMEM);
+
+       t->bt_sp = t->bt_csp = t->bt_stack;
+       t->bt_esp = t->bt_stack + sizeof(t->bt_stack) / sizeof(t->bt_stack[0]);
+
+       if ((type == DB_RECNO || F_ISSET(dbp, DB_BT_RECNUM)) &&
+           (ret = __bam_keyalloc(t)) != 0)
+               goto err;
+
+       /*
+        * Intention is to make sure all of the user's selections are okay
+        * here and then use them without checking.
+        */
+       if (dbinfo != NULL) {
+               /* Minimum number of keys per page. */
+               if (dbinfo->bt_minkey == 0)
+                       t->bt_minkey = DEFMINKEYPAGE;
+               else {
+                       if (dbinfo->bt_minkey < 2)
+                               goto einval;
+                       t->bt_minkey = dbinfo->bt_minkey;
+               }
+
+               /* Maximum number of keys per page. */
+               if (dbinfo->bt_maxkey == 0)
+                       t->bt_maxkey = 0;
+               else {
+                       if (dbinfo->bt_maxkey < 1)
+                               goto einval;
+                       t->bt_maxkey = dbinfo->bt_maxkey;
+               }
+
+               /*
+                * If no comparison, use default comparison.  If no comparison
+                * and no prefix, use default prefix.  (We can't default the
+                * prefix if the user supplies a comparison routine; shortening
+                * the keys may break their comparison algorithm.)
+                */
+               t->bt_compare = dbinfo->bt_compare == NULL ?
+                   __bam_defcmp : dbinfo->bt_compare;
+               t->bt_prefix = dbinfo->bt_prefix == NULL ?
+                   (dbinfo->bt_compare == NULL ?
+                   __bam_defpfx : NULL) : dbinfo->bt_prefix;
+       } else {
+               t->bt_minkey = DEFMINKEYPAGE;
+               t->bt_compare = __bam_defcmp;
+               t->bt_prefix = __bam_defpfx;
+       }
+
+       /* Initialize the remaining fields of the DB. */
+       dbp->type = type;
+       dbp->internal = t;
+       dbp->cursor = __bam_cursor;
+       dbp->del = __bam_delete;
+       dbp->get = __bam_get;
+       dbp->put = __bam_put;
+       dbp->stat = __bam_stat;
+       dbp->sync = __bam_sync;
+
+       /*
+        * The btree data structure requires that at least two key/data pairs
+        * can fit on a page, but other than that there's no fixed requirement.
+        * Translate the minimum number of items into the bytes a key/data pair
+        * can use before being placed on an overflow page.  We calculate for
+        * the worst possible alignment by assuming every item requires the
+        * maximum alignment for padding.
+        *
+        * Recno uses the btree bt_ovflsize value -- it's close enough.
+        */
+       t->bt_ovflsize = (dbp->pgsize - P_OVERHEAD) / (t->bt_minkey * P_INDX)
+           - (BKEYDATA_PSIZE(0) + ALIGN(1, 4));
+
+       /* Create a root page if new tree. */
+       if ((ret = __bam_setmeta(dbp, t)) != 0)
+               goto err;
+
+       return (0);
+
+einval:        ret = EINVAL;
+
+err:   if (t != NULL) {
+               /* If we allocated room for key/data return, discard it. */
+               if (t->bt_rkey.data != NULL)
+                       free(t->bt_rkey.data);
+
+               FREE(t, sizeof(BTREE));
+       }
+       return (ret);
+}
+
+/*
+ * __bam_bdup --
+ *     Create a BTREE handle for a threaded DB handle.
+ *
+ * PUBLIC: int __bam_bdup __P((DB *, DB *));
+ */
+int
+__bam_bdup(orig, new)
+       DB *orig, *new;
+{
+       BTREE *t, *ot;
+       int ret;
+
+       ot = orig->internal;
+
+       if ((t = (BTREE *)calloc(1, sizeof(*t))) == NULL)
+               return (ENOMEM);
+
+       /*
+        * !!!
+        * Ignore the cursor queue, only the first DB has attached cursors.
+        */
+
+       t->bt_sp = t->bt_csp = t->bt_stack;
+       t->bt_esp = t->bt_stack + sizeof(t->bt_stack) / sizeof(t->bt_stack[0]);
+
+       if ((orig->type == DB_RECNO || F_ISSET(orig, DB_BT_RECNUM)) &&
+           (ret = __bam_keyalloc(t)) != 0) {
+               FREE(t, sizeof(*t));
+               return (ret);
+       }
+
+       t->bt_maxkey = ot->bt_maxkey;
+       t->bt_minkey = ot->bt_minkey;
+       t->bt_compare = ot->bt_compare;
+       t->bt_prefix = ot->bt_prefix;
+       t->bt_ovflsize = ot->bt_ovflsize;
+
+       /*
+        * !!!
+        * The entire RECNO structure is shared.  If it breaks, the application
+        * was misusing it to start with.
+        */
+       t->bt_recno = ot->bt_recno;
+
+       new->internal = t;
+
+       return (0);
+}
+
+/*
+ * __bam_keyalloc --
+ *     Allocate return memory for recno keys.
+ */
+static int
+__bam_keyalloc(t)
+       BTREE *t;
+{
+       /*
+        * Recno keys are always the same size, and we don't want to have
+        * to check for space on each return.  Allocate it now.
+        */
+       if ((t->bt_rkey.data = (void *)malloc(sizeof(db_recno_t))) == NULL)
+               return (ENOMEM);
+       t->bt_rkey.ulen = sizeof(db_recno_t);
+       return (0);
+}
+
+/*
+ * __bam_setmeta --
+ *     Check (and optionally create) a tree.
+ */
+static int
+__bam_setmeta(dbp, t)
+       DB *dbp;
+       BTREE *t;
+{
+       BTMETA *meta;
+       PAGE *root;
+       DB_LOCK mlock, rlock;
+       db_pgno_t pgno;
+       int ret;
+
+       /* Get, and optionally create the metadata page. */
+       pgno = PGNO_METADATA;
+       if ((ret =
+           __bam_lget(dbp, 0, PGNO_METADATA, DB_LOCK_WRITE, &mlock)) != 0)
+               return (ret);
+       if ((ret =
+           __bam_pget(dbp, (PAGE **)&meta, &pgno, DB_MPOOL_CREATE)) != 0) {
+               (void)__BT_LPUT(dbp, mlock);
+               return (ret);
+       }
+
+       /*
+        * If the magic number is correct, we're not creating the tree.
+        * Correct any fields that may not be right.  Note, all of the
+        * local flags were set by db_open(3).
+        */
+       if (meta->magic != 0) {
+               t->bt_maxkey = meta->maxkey;
+               t->bt_minkey = meta->minkey;
+
+               (void)memp_fput(dbp->mpf, (PAGE *)meta, 0);
+               (void)__BT_LPUT(dbp, mlock);
+               return (0);
+       }
+
+       /* Initialize the tree structure metadata information. */
+       ZERO_LSN(meta->lsn);
+       meta->pgno = PGNO_METADATA;
+       meta->magic = DB_BTREEMAGIC;
+       meta->version = DB_BTREEVERSION;
+       meta->pagesize = dbp->pgsize;
+       meta->maxkey = t->bt_maxkey;
+       meta->minkey = t->bt_minkey;
+       meta->free = PGNO_INVALID;
+       meta->flags = 0;
+       if (dbp->type == DB_RECNO)
+               F_SET(meta, BTM_RECNO);
+       if (F_ISSET(dbp, DB_AM_DUP))
+               F_SET(meta, BTM_DUP);
+       if (F_ISSET(dbp, DB_RE_FIXEDLEN))
+               F_SET(meta, BTM_FIXEDLEN);
+       if (F_ISSET(dbp, DB_BT_RECNUM))
+               F_SET(meta, BTM_RECNUM);
+       if (F_ISSET(dbp, DB_RE_RENUMBER))
+               F_SET(meta, BTM_RENUMBER);
+       meta->re_len = 0;
+       meta->re_pad = 0;
+       memcpy(meta->uid, dbp->lock.fileid, DB_FILE_ID_LEN);
+
+       /* Create and initialize a root page. */
+       pgno = PGNO_ROOT;
+       if ((ret = __bam_lget(dbp, 0, PGNO_ROOT, DB_LOCK_WRITE, &rlock)) != 0)
+               return (ret);
+       if ((ret = __bam_pget(dbp, &root, &pgno, DB_MPOOL_CREATE)) != 0) {
+               (void)__BT_LPUT(dbp, rlock);
+               return (ret);
+       }
+       P_INIT(root, dbp->pgsize, PGNO_ROOT, PGNO_INVALID,
+           PGNO_INVALID, 1, dbp->type == DB_RECNO ? P_LRECNO : P_LBTREE);
+       ZERO_LSN(root->lsn);
+
+       /* Release the metadata and root pages. */
+       if ((ret = memp_fput(dbp->mpf, (PAGE *)meta, DB_MPOOL_DIRTY)) != 0)
+               return (ret);
+       if ((ret = memp_fput(dbp->mpf, root, DB_MPOOL_DIRTY)) != 0)
+               return (ret);
+
+       /*
+        * Flush the metadata and root pages to disk -- since the user can't
+        * transaction protect open, the pages have to exist during recovery.
+        *
+        * XXX
+        * It's not useful to return not-yet-flushed here -- convert it to
+        * an error.
+        */
+       if ((ret = memp_fsync(dbp->mpf)) == DB_INCOMPLETE)
+               ret = EINVAL;
+
+       /* Release the locks. */
+       (void)__BT_LPUT(dbp, mlock);
+       (void)__BT_LPUT(dbp, rlock);
+
+       return (ret);
+}
diff --git a/db2/btree/bt_page.c b/db2/btree/bt_page.c
new file mode 100644 (file)
index 0000000..7ee74ff
--- /dev/null
@@ -0,0 +1,312 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997
+ *     Sleepycat Software.  All rights reserved.
+ */
+/*
+ * Copyright (c) 1990, 1993, 1994, 1995, 1996
+ *     Keith Bostic.  All rights reserved.
+ */
+/*
+ * Copyright (c) 1990, 1993, 1994, 1995
+ *     The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Mike Olson.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *     This product includes software developed by the University of
+ *     California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "config.h"
+
+#ifndef lint
+static const char sccsid[] = "@(#)bt_page.c    10.5 (Sleepycat) 8/18/97";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+
+#include <errno.h>
+#include <stdio.h>
+#include <string.h>
+#endif
+
+#include "db_int.h"
+#include "db_page.h"
+#include "btree.h"
+
+/*
+ * __bam_new --
+ *     Get a new page, preferably from the freelist.
+ *
+ * PUBLIC: int __bam_new __P((DB *, u_int32_t, PAGE **));
+ */
+int
+__bam_new(dbp, type, pagepp)
+       DB *dbp;
+       u_int32_t type;
+       PAGE **pagepp;
+{
+       BTMETA *meta;
+       DB_LOCK mlock;
+       PAGE *h;
+       db_pgno_t pgno;
+       int ret;
+
+       meta = NULL;
+       h = NULL;
+       mlock = LOCK_INVALID;
+
+       pgno = PGNO_METADATA;
+       if ((ret = __bam_lget(dbp, 0, pgno, DB_LOCK_WRITE, &mlock)) != 0)
+               goto err;
+       if ((ret = __bam_pget(dbp, (PAGE **)&meta, &pgno, 0)) != 0)
+               goto err;
+
+       if (meta->free == PGNO_INVALID) {
+               if ((ret = __bam_pget(dbp, &h, &pgno, DB_MPOOL_NEW)) != 0)
+                       goto err;
+               ZERO_LSN(h->lsn);
+               h->pgno = pgno;
+       } else {
+               pgno = meta->free;
+               if ((ret = __bam_pget(dbp, &h, &pgno, 0)) != 0)
+                       goto err;
+               meta->free = h->next_pgno;
+       }
+
+       /* Log the change. */
+       if (DB_LOGGING(dbp)) {
+               if ((ret = __bam_pg_alloc_log(dbp->dbenv->lg_info, dbp->txn,
+                   &meta->lsn, 0, dbp->log_fileid, &meta->lsn, &h->lsn,
+                   h->pgno, (u_int32_t)type, meta->free)) != 0)
+                       goto err;
+               LSN(h) = LSN(meta);
+       }
+
+       (void)memp_fput(dbp->mpf, (PAGE *)meta, DB_MPOOL_DIRTY);
+       (void)__BT_TLPUT(dbp, mlock);
+
+       P_INIT(h, dbp->pgsize, h->pgno, PGNO_INVALID, PGNO_INVALID, 0, type);
+       *pagepp = h;
+       return (0);
+
+err:   if (h != NULL)
+               (void)memp_fput(dbp->mpf, h, 0);
+       if (meta != NULL)
+               (void)memp_fput(dbp->mpf, meta, 0);
+       if (mlock != LOCK_INVALID)
+               (void)__BT_TLPUT(dbp, mlock);
+       return (ret);
+}
+
+/*
+ * __bam_free --
+ *     Add a page to the head of the freelist.
+ *
+ * PUBLIC: int __bam_free __P((DB *, PAGE *));
+ */
+int
+__bam_free(dbp, h)
+       DB *dbp;
+       PAGE *h;
+{
+       BTMETA *meta;
+       DBT ldbt;
+       DB_LOCK mlock;
+       db_pgno_t pgno;
+       int is_dirty, ret, t_ret;
+
+       /*
+        * Retrieve the metadata page and insert the page at the head of
+        * the free list.  If either the lock get or page get routines
+        * fail, then we need to put the page with which we were called
+        * back because our caller assumes we take care of it.
+        */
+       is_dirty = 0;
+       pgno = PGNO_METADATA;
+       if ((ret = __bam_lget(dbp, 0, pgno, DB_LOCK_WRITE, &mlock)) != 0)
+               goto err;
+       if ((ret = __bam_pget(dbp, (PAGE **)&meta, &pgno, 0)) != 0) {
+               (void)__BT_TLPUT(dbp, mlock);
+               goto err;
+       }
+
+       /* Log the change. */
+       if (DB_LOGGING(dbp)) {
+               memset(&ldbt, 0, sizeof(ldbt));
+               ldbt.data = h;
+               ldbt.size = P_OVERHEAD;
+               if ((ret = __bam_pg_free_log(dbp->dbenv->lg_info,
+                   dbp->txn, &meta->lsn, 0, dbp->log_fileid, h->pgno,
+                   &meta->lsn, &ldbt, meta->free)) != 0) {
+                       (void)memp_fput(dbp->mpf, (PAGE *)meta, 0);
+                       (void)__BT_TLPUT(dbp, mlock);
+                       return (ret);
+               }
+               LSN(h) = LSN(meta);
+       }
+
+       /*
+        * The page should have nothing interesting on it, re-initialize it,
+        * leaving only the page number and the LSN.
+        */
+#ifdef DEBUG
+       { db_pgno_t __pgno; DB_LSN __lsn;
+               __pgno = h->pgno;
+               __lsn = h->lsn;
+               memset(h, 0xff, dbp->pgsize);
+               h->pgno = __pgno;
+               h->lsn = __lsn;
+       }
+#endif
+       P_INIT(h, dbp->pgsize, h->pgno, PGNO_INVALID, meta->free, 0, P_INVALID);
+
+       /* Link the page on the metadata free list. */
+       meta->free = h->pgno;
+
+       /* Discard the metadata page. */
+       ret = memp_fput(dbp->mpf, (PAGE *)meta, DB_MPOOL_DIRTY);
+       if ((t_ret = __BT_TLPUT(dbp, mlock)) != 0)
+               ret = t_ret;
+
+       /* Discard the caller's page reference. */
+       is_dirty = DB_MPOOL_DIRTY;
+err:   if ((t_ret = memp_fput(dbp->mpf, h, is_dirty)) != 0 && ret == 0)
+               ret = t_ret;
+
+       /*
+        * XXX
+        * We have to unlock the caller's page in the caller!
+        */
+       return (ret);
+}
+
+#ifdef DEBUG
+/*
+ * __bam_lt --
+ *     Print out the list of currently held locks.
+ */
+int
+__bam_lt(dbp)
+       DB *dbp;
+{
+       DB_LOCKREQ req;
+
+       if (F_ISSET(dbp, DB_AM_LOCKING)) {
+               req.op = DB_LOCK_DUMP;
+               lock_vec(dbp->dbenv->lk_info, dbp->locker, 0, &req, 1, NULL);
+       }
+       return (0);
+}
+#endif
+
+/*
+ * __bam_lget --
+ *     The standard lock get call.
+ *
+ * PUBLIC: int __bam_lget __P((DB *, int, db_pgno_t, db_lockmode_t, DB_LOCK *));
+ */
+int
+__bam_lget(dbp, do_couple, pgno, mode, lockp)
+       DB *dbp;
+       int do_couple;
+       db_pgno_t pgno;
+       db_lockmode_t mode;
+       DB_LOCK *lockp;
+{
+       DB_LOCKREQ couple[2];
+       u_int32_t locker;
+       int ret;
+
+       if (!F_ISSET(dbp, DB_AM_LOCKING))
+               return (0);
+
+       locker = dbp->txn == NULL ? dbp->locker : dbp->txn->txnid;
+       dbp->lock.pgno = pgno;
+
+       /*
+        * If the object not currently locked, acquire the lock and return,
+        * otherwise, lock couple.  If we fail and it's not a system error,
+        * convert to EAGAIN.
+        */
+       if (do_couple) {
+               couple[0].op = DB_LOCK_GET;
+               couple[0].obj = &dbp->lock_dbt;
+               couple[0].mode = mode;
+               couple[1].op = DB_LOCK_PUT;
+               couple[1].lock = *lockp;
+
+               ret = lock_vec(dbp->dbenv->lk_info, locker, 0, couple, 2, NULL);
+               if (ret != 0) {
+                       /* If we fail, discard the lock we held. */
+                       __bam_lput(dbp, *lockp);
+
+                       return (ret < 0 ? EAGAIN : ret);
+               }
+               *lockp = couple[0].lock;
+       } else {
+                ret = lock_get(dbp->dbenv->lk_info,
+                    locker, 0, &dbp->lock_dbt, mode, lockp);
+                return (ret < 0 ? EAGAIN : ret);
+       }
+       return (0);
+}
+
+/*
+ * __bam_lput --
+ *     The standard lock put call.
+ *
+ * PUBLIC: int __bam_lput __P((DB *, DB_LOCK));
+ */
+int
+__bam_lput(dbp, lock)
+       DB *dbp;
+       DB_LOCK lock;
+{
+       return (__BT_LPUT(dbp, lock));
+}
+
+/*
+ * __bam_pget --
+ *     The standard page get call.
+ *
+ * PUBLIC: int __bam_pget __P((DB *, PAGE **, db_pgno_t *, int));
+ */
+int
+__bam_pget(dbp, hp, pgnop, mflags)
+       DB *dbp;
+       PAGE **hp;
+       db_pgno_t *pgnop;
+       int mflags;
+{
+       return (memp_fget((dbp)->mpf,
+           pgnop, mflags, hp) == 0 ? 0 : __db_pgerr(dbp, *pgnop));
+}
diff --git a/db2/btree/bt_put.c b/db2/btree/bt_put.c
new file mode 100644 (file)
index 0000000..632c3d1
--- /dev/null
@@ -0,0 +1,919 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997
+ *     Sleepycat Software.  All rights reserved.
+ */
+/*
+ * Copyright (c) 1990, 1993, 1994, 1995, 1996
+ *     Keith Bostic.  All rights reserved.
+ */
+/*
+ * Copyright (c) 1990, 1993, 1994, 1995
+ *     The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Mike Olson.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *     This product includes software developed by the University of
+ *     California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "config.h"
+
+#ifndef lint
+static const char sccsid[] = "@(#)bt_put.c     10.23 (Sleepycat) 8/22/97";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#endif
+
+#include "db_int.h"
+#include "db_page.h"
+#include "btree.h"
+
+static int __bam_fixed __P((BTREE *, DBT *));
+static int __bam_lookup __P((DB *, DBT *, int *));
+static int __bam_ndup __P((DB *, PAGE *, u_int32_t));
+static int __bam_partial __P((DB *, DBT *, PAGE *, u_int32_t));
+
+/*
+ * __bam_put --
+ *     Add a new key/data pair or replace an existing pair (btree).
+ *
+ * PUBLIC: int __bam_put __P((DB *, DB_TXN *, DBT *, DBT *, int));
+ */
+int
+__bam_put(argdbp, txn, key, data, flags)
+       DB *argdbp;
+       DB_TXN *txn;
+       DBT *key, *data;
+       int flags;
+{
+       BTREE *t;
+       CURSOR c;
+       DB *dbp;
+       PAGE *h;
+       db_indx_t indx;
+       int exact, iflags, newkey, replace, ret, stack;
+
+       DEBUG_LWRITE(argdbp, txn, "bam_put", key, data, flags);
+
+       /* Check flags. */
+       if ((ret = __db_putchk(argdbp, key, data, flags,
+           F_ISSET(argdbp, DB_AM_RDONLY), F_ISSET(argdbp, DB_AM_DUP))) != 0)
+               return (ret);
+
+       GETHANDLE(argdbp, txn, &dbp, ret);
+       t = dbp->internal;
+
+retry: /*
+        * Find the location at which to insert.  The call to bt_lookup()
+        * leaves the returned page pinned.
+        */
+       if ((ret = __bam_lookup(dbp, key, &exact)) != 0) {
+               PUTHANDLE(dbp);
+               return (ret);
+       }
+       h = t->bt_csp->page;
+       indx = t->bt_csp->indx;
+       stack = 1;
+
+       /*
+        * If an identical key is already in the tree, and DB_NOOVERWRITE is
+        * set, an error is returned.  If an identical key is already in the
+        * tree and DB_NOOVERWRITE is not set, the key is either added (when
+        * duplicates are permitted) or an error is returned.  The exception
+        * is when the item located is referenced by a cursor and marked for
+        * deletion, in which case we permit the overwrite and flag the cursor.
+        */
+       replace = 0;
+       if (exact && flags == DB_NOOVERWRITE) {
+               if (!GET_BKEYDATA(h, indx + O_INDX)->deleted) {
+                       ret = DB_KEYEXIST;
+                       goto err;
+               }
+               replace = 1;
+               __bam_ca_replace(dbp, h->pgno, indx, REPLACE_SETUP);
+       }
+
+       /*
+        * If we're inserting into the first or last page of the tree,
+        * remember where we did it so we can do fast lookup next time.
+        *
+        * XXX
+        * Does reverse order still work (did it ever!?!?)
+        */
+       t->bt_lpgno =
+           h->next_pgno == PGNO_INVALID || h->prev_pgno == PGNO_INVALID ?
+           h->pgno : PGNO_INVALID;
+
+       /*
+        * Select the arguments for __bam_iitem() and do the insert.  If the
+        * key is an exact match, we're either adding a new duplicate at the
+        * end of the duplicate set, or we're replacing the data item with a
+        * new data item.  If the key isn't an exact match, we're inserting
+        * a new key/data pair, before the search location.
+        */
+       newkey = dbp->type == DB_BTREE && !exact;
+       if (exact) {
+               if (F_ISSET(dbp, DB_AM_DUP)) {
+                       /*
+                        * Make sure that we're not looking at a page of
+                        * duplicates -- if so, move to the last entry on
+                        * that page.
+                        */
+                       c.page = h;
+                       c.pgno = h->pgno;
+                       c.indx = indx;
+                       c.dpgno = PGNO_INVALID;
+                       c.dindx = 0;
+                       if ((ret =
+                           __bam_ovfl_chk(dbp, &c, indx + O_INDX, 1)) != 0)
+                               goto err;
+                       if (c.dpgno != PGNO_INVALID) {
+                               /*
+                                * XXX
+                                * The __bam_ovfl_chk() routine memp_fput() the
+                                * current page and acquired a new one, but did
+                                * not do anything about the lock we're holding.
+                                */
+                               t->bt_csp->page = h = c.page;
+                               indx = c.dindx;
+                       }
+                       iflags = DB_AFTER;
+               } else
+                       iflags = DB_CURRENT;
+       } else
+               iflags = DB_BEFORE;
+
+       /*
+        * The pages we're using may be modified by __bam_iitem(), so make
+        * sure we reset the stack.
+        */
+       ret = __bam_iitem(dbp,
+           &h, &indx, key, data, iflags, newkey ? BI_NEWKEY : 0);
+       t->bt_csp->page = h;
+       t->bt_csp->indx = indx;
+
+       switch (ret) {
+       case 0:
+               /*
+                * Done.  Clean up the cursor, and, if we're doing record
+                * numbers, adjust the internal page counts.
+                */
+               if (replace)
+                       __bam_ca_replace(dbp, h->pgno, indx, REPLACE_SUCCESS);
+
+               if (!replace && F_ISSET(dbp, DB_BT_RECNUM))
+                       ret = __bam_adjust(dbp, t, 1);
+               break;
+       case DB_NEEDSPLIT:
+               /*
+                * We have to split the page.  Back out the cursor setup,
+                * discard the stack of pages, and do the split.
+                */
+               if (replace) {
+                       replace = 0;
+                       __bam_ca_replace(dbp, h->pgno, indx, REPLACE_FAILED);
+               }
+
+               (void)__bam_stkrel(dbp);
+               stack = 0;
+
+               if ((ret = __bam_split(dbp, key)) != 0)
+                       break;
+
+               goto retry;
+               /* NOTREACHED */
+       default:
+               if (replace)
+                       __bam_ca_replace(dbp, h->pgno, indx, REPLACE_FAILED);
+               break;
+       }
+
+err:   if (stack)
+               (void)__bam_stkrel(dbp);
+
+       PUTHANDLE(dbp);
+       return (ret);
+}
+
+/*
+ * __bam_lookup --
+ *     Find the right location in the tree for the key.
+ */
+static int
+__bam_lookup(dbp, key, exactp)
+       DB *dbp;
+       DBT *key;
+       int *exactp;
+{
+       BTREE *t;
+       DB_LOCK lock;
+       EPG e;
+       PAGE *h;
+       db_indx_t indx;
+       int cmp, ret;
+
+       t = dbp->internal;
+       h = NULL;
+
+       /*
+        * Record numbers can't be fast-tracked, we have to lock the entire
+        * tree.
+        */
+       if (F_ISSET(dbp, DB_BT_RECNUM))
+               goto slow;
+
+       /* Check to see if we've been seeing sorted input. */
+       if (t->bt_lpgno == PGNO_INVALID)
+               goto slow;
+
+       /*
+        * Retrieve the page on which we did the last insert.  It's okay if
+        * it doesn't exist, or if it's not the page type we expect, it just
+        * means that the world changed.
+        */
+       if (__bam_lget(dbp, 0, t->bt_lpgno, DB_LOCK_WRITE, &lock))
+               goto miss;
+       if (__bam_pget(dbp, &h, &t->bt_lpgno, 0)) {
+               (void)__BT_LPUT(dbp, lock);
+               goto miss;
+       }
+       if (TYPE(h) != P_LBTREE)
+               goto miss;
+       if (NUM_ENT(h) == 0)
+               goto miss;
+
+       /*
+        * We have to be at the end or beginning of the tree to know that
+        * we're inserting in a sort order.  If that's the case and we're
+        * in the right order in comparison to the first/last key/data pair,
+        * we have the right position.
+        */
+       if (h->next_pgno == PGNO_INVALID) {
+               e.page = h;
+               e.indx = NUM_ENT(h) - P_INDX;
+               if ((cmp = __bam_cmp(dbp, key, &e)) >= 0) {
+                       if (cmp > 0)
+                               e.indx += P_INDX;
+                       goto fast;
+               }
+       }
+       if (h->prev_pgno == PGNO_INVALID) {
+               e.page = h;
+               e.indx = 0;
+               if ((cmp = __bam_cmp(dbp, key, &e)) <= 0) {
+                       /*
+                        * We're doing a put, so we want to insert as the last
+                        * of any set of duplicates.
+                        */
+                       if (cmp == 0) {
+                               for (indx = 0;
+                                   indx < (db_indx_t)(NUM_ENT(h) - P_INDX) &&
+                                   h->inp[indx] == h->inp[indx + P_INDX];
+                                   indx += P_INDX);
+                               e.indx = indx;
+                       }
+                       goto fast;
+               }
+       }
+       goto miss;
+
+       /* Set the exact match flag in case we've already inserted this key. */
+fast:  *exactp = cmp == 0;
+
+       /* Enter the entry in the stack. */
+       BT_STK_CLR(t);
+       BT_STK_ENTER(t, e.page, e.indx, lock, ret);
+       if (ret != 0)
+               return (ret);
+
+       ++t->lstat.bt_cache_hit;
+       return (0);
+
+miss:  ++t->lstat.bt_cache_miss;
+       if (h != NULL) {
+               (void)memp_fput(dbp->mpf, h, 0);
+               (void)__BT_LPUT(dbp, lock);
+       }
+
+slow:  return (__bam_search(dbp, key, S_INSERT, 1, NULL, exactp));
+}
+
+/*
+ * OVPUT --
+ *     Copy an overflow item onto a page.
+ */
+#undef OVPUT
+#define        OVPUT(h, indx, bo) do {                                         \
+       DBT __hdr;                                                      \
+       memset(&__hdr, 0, sizeof(__hdr));                               \
+       __hdr.data = &bo;                                               \
+       __hdr.size = BOVERFLOW_SIZE;                                    \
+       if ((ret = __db_pitem(dbp,                                      \
+           h, indx, BOVERFLOW_SIZE, &__hdr, NULL)) != 0)               \
+               return (ret);                                           \
+} while (0)
+
+/*
+ * __bam_iitem --
+ *     Insert an item into the tree.
+ *
+ * PUBLIC: int __bam_iitem __P((DB *,
+ * PUBLIC:    PAGE **, db_indx_t *, DBT *, DBT *, int, int));
+ */
+int
+__bam_iitem(dbp, hp, indxp, key, data, op, flags)
+       DB *dbp;
+       PAGE **hp;
+       db_indx_t *indxp;
+       DBT *key, *data;
+       int op, flags;
+{
+       BTREE *t;
+       BKEYDATA *bk;
+       BOVERFLOW kbo, dbo;
+       DBT tdbt;
+       PAGE *h;
+       db_indx_t indx;
+       u_int32_t have_bytes, need_bytes, needed;
+       int bigkey, bigdata, dcopy, dupadjust, ret;
+
+       t = dbp->internal;
+       h = *hp;
+       indx = *indxp;
+
+       dupadjust = 0;
+       bk = NULL;                      /* XXX: Shut the compiler up. */
+
+       /*
+        * If it's a page of duplicates, call the common code to do the work.
+        *
+        * !!!
+        * Here's where the hp and indxp are important.  The duplicate code
+        * may decide to rework/rearrange the pages and indices we're using,
+        * so the caller must understand that the stack has to change.
+        */
+       if (TYPE(h) == P_DUPLICATE) {
+               /* Adjust the index for the new item if it's a DB_AFTER op. */
+               if (op == DB_AFTER)
+                       ++*indxp;
+
+               /* Remove the current item if it's a DB_CURRENT op. */
+               if (op == DB_CURRENT && (ret = __db_ditem(dbp, *hp, *indxp,
+                   BKEYDATA_SIZE(GET_BKEYDATA(*hp, *indxp)->len))) != 0)
+                       return (ret);
+
+               /* Put the new/replacement item onto the page. */
+               return (__db_dput(dbp, data, hp, indxp, __bam_new));
+       }
+
+       /*
+        * XXX
+        * Handle partial puts.
+        *
+        * This is truly awful from a performance standput.  We don't optimize
+        * for partial puts at all, we delete the record and add it back in,
+        * regardless of size or if we're simply overwriting current data.
+        * The hash access method does this a lot better than we do, and we're
+        * eventually going to have to fix it.
+        */
+       if (F_ISSET(data, DB_DBT_PARTIAL)) {
+               tdbt = *data;
+               if ((ret = __bam_partial(dbp, &tdbt, h, indx)) != 0)
+                       return (ret);
+               data = &tdbt;
+       }
+
+       /* If it's a short fixed-length record, fix it up. */
+       if (F_ISSET(dbp, DB_RE_FIXEDLEN) && data->size != t->bt_recno->re_len) {
+               tdbt = *data;
+               if ((ret = __bam_fixed(t, &tdbt)) != 0)
+                       return (ret);
+               data = &tdbt;
+       }
+
+       /*
+        * If the key or data item won't fit on a page, store it in the
+        * overflow pages.
+        *
+        * !!!
+        * From this point on, we have to recover the allocated overflow
+        * pages on error.
+        */
+       bigkey = bigdata = 0;
+       if (LF_ISSET(BI_NEWKEY) && key->size > t->bt_ovflsize) {
+               kbo.deleted = 0;
+               kbo.type = B_OVERFLOW;
+               kbo.tlen = key->size;
+               if ((ret = __db_poff(dbp, key, &kbo.pgno, __bam_new)) != 0)
+                       goto err;
+               bigkey = 1;
+       }
+       if (data->size > t->bt_ovflsize) {
+               dbo.deleted = 0;
+               dbo.type = B_OVERFLOW;
+               dbo.tlen = data->size;
+               if ((ret = __db_poff(dbp, data, &dbo.pgno, __bam_new)) != 0)
+                       goto err;
+               bigdata = 1;
+       }
+
+       dcopy = 0;
+       needed = 0;
+       if (LF_ISSET(BI_NEWKEY)) {
+               /* If BI_NEWKEY is set we're adding a new key and data pair. */
+               if (bigkey)
+                       needed += BOVERFLOW_PSIZE;
+               else
+                       needed += BKEYDATA_PSIZE(key->size);
+               if (bigdata)
+                       needed += BOVERFLOW_PSIZE;
+               else
+                       needed += BKEYDATA_PSIZE(data->size);
+       } else {
+               /*
+                * We're either overwriting the data item of a key/data pair
+                * or we're adding the data item only, i.e. a new duplicate.
+                */
+               if (op == DB_CURRENT) {
+                       bk = GET_BKEYDATA(h,
+                           indx + (TYPE(h) == P_LBTREE ? O_INDX : 0));
+                       if (bk->type == B_OVERFLOW)
+                               have_bytes = BOVERFLOW_PSIZE;
+                       else
+                               have_bytes = BKEYDATA_PSIZE(bk->len);
+                       need_bytes = 0;
+               } else {
+                       have_bytes = 0;
+                       need_bytes = sizeof(db_indx_t);
+               }
+               if (bigdata)
+                       need_bytes += BOVERFLOW_PSIZE;
+               else
+                       need_bytes += BKEYDATA_PSIZE(data->size);
+
+               /*
+                * If we're overwriting a data item, we copy it if it's not a
+                * special record type and it's the same size (including any
+                * alignment) and do a delete/insert otherwise.
+                */
+               if (op == DB_CURRENT && !bigdata &&
+                   bk->type == B_KEYDATA && have_bytes == need_bytes)
+                       dcopy = 1;
+               if (have_bytes < need_bytes)
+                       needed += need_bytes - have_bytes;
+       }
+
+       /*
+        * If there's not enough room, or the user has put a ceiling on the
+        * number of keys permitted in the page, split the page.
+        *
+        * XXX
+        * The t->bt_maxkey test here may be insufficient -- do we have to
+        * check in the btree split code, so we don't undo it there!?!?
+        */
+       if (P_FREESPACE(h) < needed ||
+           (t->bt_maxkey != 0 && NUM_ENT(h) > t->bt_maxkey)) {
+               ret = DB_NEEDSPLIT;
+               goto err;
+       }
+
+       /*
+        * The code breaks it up into six cases:
+        *
+        * 1. Append a new key/data pair.
+        * 2. Insert a new key/data pair.
+        * 3. Copy the data item.
+        * 4. Delete/insert the data item.
+        * 5. Append a new data item.
+        * 6. Insert a new data item.
+        */
+       if (LF_ISSET(BI_NEWKEY)) {
+               switch (op) {
+               case DB_AFTER:          /* 1. Append a new key/data pair. */
+                       indx += 2;
+                       *indxp += 2;
+                       break;
+               case DB_BEFORE:         /* 2. Insert a new key/data pair. */
+                       break;
+               default:
+                       abort();
+               }
+
+               /* Add the key. */
+               if (bigkey)
+                       OVPUT(h, indx, kbo);
+               else {
+                       DBT __data;
+                       memset(&__data, 0, sizeof(__data));
+                       __data.data = key->data;
+                       __data.size = key->size;
+                       if ((ret = __db_pitem(dbp, h, indx,
+                           BKEYDATA_SIZE(key->size), NULL, &__data)) != 0)
+                               goto err;
+               }
+               ++indx;
+       } else {
+               switch (op) {
+               case DB_CURRENT:        /* 3. Copy the data item. */
+                       /*
+                        * If we're not logging and it's possible, overwrite
+                        * the current item.
+                        *
+                        * XXX
+                        * We should add a separate logging message so that
+                        * we can do this anytime it's possible, including
+                        * for partial record puts.
+                        */
+                       if (dcopy && !DB_LOGGING(dbp)) {
+                               bk->len = data->size;
+                               memcpy(bk->data, data->data, data->size);
+                               goto done;
+                       }
+                                       /* 4. Delete/insert the data item. */
+                       if (TYPE(h) == P_LBTREE)
+                               ++indx;
+                       if ((ret = __bam_ditem(dbp, h, indx)) != 0)
+                               goto err;
+                       break;
+               case DB_AFTER:          /* 5. Append a new data item. */
+                       if (TYPE(h) == P_LBTREE) {
+                               /*
+                                * Adjust the cursor and copy in the key for
+                                * the duplicate.
+                                */
+                               if ((ret = __bam_adjindx(dbp,
+                                   h, indx + P_INDX, indx, 1)) != 0)
+                                       goto err;
+
+                               indx += 3;
+                               dupadjust = 1;
+
+                               *indxp += 2;
+                       } else {
+                               ++indx;
+                               __bam_ca_di(dbp, h->pgno, indx, 1);
+
+                               *indxp += 1;
+                       }
+                       break;
+               case DB_BEFORE:         /* 6. Insert a new data item. */
+                       if (TYPE(h) == P_LBTREE) {
+                               /*
+                                * Adjust the cursor and copy in the key for
+                                * the duplicate.
+                                */
+                               if ((ret =
+                                   __bam_adjindx(dbp, h, indx, indx, 1)) != 0)
+                                       goto err;
+
+                               ++indx;
+                               dupadjust = 1;
+                       } else
+                               __bam_ca_di(dbp, h->pgno, indx, 1);
+                       break;
+               default:
+                       abort();
+               }
+       }
+
+       /* Add the data. */
+       if (bigdata)
+               OVPUT(h, indx, dbo);
+       else {
+               BKEYDATA __bk;
+               DBT __hdr, __data;
+               memset(&__data, 0, sizeof(__data));
+               __data.data = data->data;
+               __data.size = data->size;
+
+               if (LF_ISSET(BI_DELETED)) {
+                       __bk.len = __data.size;
+                       __bk.deleted = 1;
+                       __bk.type = B_KEYDATA;
+                       __hdr.data = &__bk;
+                       __hdr.size = SSZA(BKEYDATA, data);
+                       ret = __db_pitem(dbp, h, indx,
+                           BKEYDATA_SIZE(__data.size), &__hdr, &__data);
+               } else
+                       ret = __db_pitem(dbp, h, indx,
+                           BKEYDATA_SIZE(data->size), NULL, &__data);
+               if (ret != 0)
+                       goto err;
+       }
+
+done:  ++t->lstat.bt_added;
+
+       ret = memp_fset(dbp->mpf, h, DB_MPOOL_DIRTY);
+
+       /*
+        * If the page is at least 50% full, and we added a duplicate, see if
+        * that set of duplicates takes up at least 25% of the space.  If it
+        * does, move it off onto its own page.
+        */
+       if (dupadjust && P_FREESPACE(h) <= dbp->pgsize / 2) {
+               --indx;
+               if ((ret = __bam_ndup(dbp, h, indx)) != 0)
+                       goto err;
+       }
+
+       if (t->bt_recno != NULL)
+               F_SET(t->bt_recno, RECNO_MODIFIED);
+
+       if (0) {
+err:           if (bigkey)
+                       (void)__db_doff(dbp, kbo.pgno, __bam_free);
+               if (bigdata)
+                       (void)__db_doff(dbp, dbo.pgno, __bam_free);
+       }
+       return (ret);
+}
+
+/*
+ * __bam_ndup --
+ *     Check to see if the duplicate set at indx should have its own page.
+ *     If it should, create it.
+ */
+static int
+__bam_ndup(dbp, h, indx)
+       DB *dbp;
+       PAGE *h;
+       u_int32_t indx;
+{
+       BKEYDATA *bk;
+       BOVERFLOW bo;
+       DBT hdr;
+       PAGE *cp;
+       db_indx_t cnt, cpindx, first, sz;
+       int ret;
+
+       while (indx > 0 && h->inp[indx] == h->inp[indx - P_INDX])
+               indx -= P_INDX;
+       for (cnt = 0, sz = 0, first = indx;; ++cnt, indx += P_INDX) {
+               if (indx >= NUM_ENT(h) || h->inp[first] != h->inp[indx])
+                       break;
+               bk = GET_BKEYDATA(h, indx);
+               sz += bk->type == B_KEYDATA ?
+                   BKEYDATA_PSIZE(bk->len) : BOVERFLOW_PSIZE;
+               bk = GET_BKEYDATA(h, indx + O_INDX);
+               sz += bk->type == B_KEYDATA ?
+                   BKEYDATA_PSIZE(bk->len) : BOVERFLOW_PSIZE;
+       }
+
+       /*
+        * If this set of duplicates is using more than 25% of the page, move
+        * them off.  The choice of 25% is a WAG, but it has to be small enough
+        * that we can always split regardless of the presence of duplicates.
+        */
+       if (sz < dbp->pgsize / 4)
+               return (0);
+
+       /* Get a new page. */
+       if ((ret = __bam_new(dbp, P_DUPLICATE, &cp)) != 0)
+               return (ret);
+
+       /*
+        * Move this set of duplicates off the page.  First points to the first
+        * key of the first duplicate key/data pair, cnt is the number of pairs
+        * we're dealing with.
+        */
+       memset(&hdr, 0, sizeof(hdr));
+       for (indx = first + O_INDX, cpindx = 0;; ++cpindx) {
+               /* Copy the entry to the new page. */
+               bk = GET_BKEYDATA(h, indx);
+               hdr.data = bk;
+               hdr.size = bk->type == B_KEYDATA ?
+                   BKEYDATA_SIZE(bk->len) : BOVERFLOW_SIZE;
+               if ((ret =
+                   __db_pitem(dbp, cp, cpindx, hdr.size, &hdr, NULL)) != 0)
+                       goto err;
+
+               /*
+                * Move cursors referencing the old entry to the new entry.
+                * Done after the page put because __db_pitem() adjusts
+                * cursors on the new page, and before the delete because
+                * __db_ditem adjusts cursors on the old page.
+                */
+               __bam_ca_dup(dbp,
+                   PGNO(h), first, indx - O_INDX, PGNO(cp), cpindx);
+
+               /* Delete the data item. */
+               if ((ret = __db_ditem(dbp, h, indx, hdr.size)) != 0)
+                       goto err;
+
+               /* Delete all but the first reference to the key. */
+               if (--cnt == 0)
+                       break;
+               if ((ret = __bam_adjindx(dbp, h, indx, first, 0)) != 0)
+                       goto err;
+       }
+
+       /* Put in a new data item that points to the duplicates page. */
+       bo.deleted = 0;
+       bo.type = B_DUPLICATE;
+       bo.pgno = cp->pgno;
+       bo.tlen = 0;
+
+       OVPUT(h, indx, bo);
+
+       return (memp_fput(dbp->mpf, cp, DB_MPOOL_DIRTY));
+
+err:   (void)__bam_free(dbp, cp);
+       return (ret);
+}
+
+/*
+ * __bam_fixed --
+ *     Build the real record for a fixed length put.
+ */
+static int
+__bam_fixed(t, dbt)
+       BTREE *t;
+       DBT *dbt;
+{
+       RECNO *rp;
+
+       rp = t->bt_recno;
+
+       /*
+        * If using fixed-length records, and the record is long, return
+        * EINVAL.  If it's short, pad it out.  Use the record data return
+        * memory, it's only short-term.
+        */
+       if (dbt->size > rp->re_len)
+               return (EINVAL);
+       if (t->bt_rdata.ulen < rp->re_len) {
+               t->bt_rdata.data = t->bt_rdata.data == NULL ?
+                   (void *)malloc(rp->re_len) :
+                   (void *)realloc(t->bt_rdata.data, rp->re_len);
+               if (t->bt_rdata.data == NULL) {
+                       t->bt_rdata.ulen = 0;
+                       return (ENOMEM);
+               }
+               t->bt_rdata.ulen = rp->re_len;
+       }
+       memcpy(t->bt_rdata.data, dbt->data, dbt->size);
+       memset((u_int8_t *)t->bt_rdata.data + dbt->size,
+           rp->re_pad, rp->re_len - dbt->size);
+
+       /* Set the DBT to reference our new record. */
+       t->bt_rdata.size = rp->re_len;
+       t->bt_rdata.dlen = 0;
+       t->bt_rdata.doff = 0;
+       t->bt_rdata.flags = 0;
+       *dbt = t->bt_rdata;
+       return (0);
+}
+
+/*
+ * __bam_partial --
+ *     Build the real record for a partial put.
+ */
+static int
+__bam_partial(dbp, dbt, h, indx)
+       DB *dbp;
+       DBT *dbt;
+       PAGE *h;
+       u_int32_t indx;
+{
+       BTREE *t;
+       BKEYDATA *bk, tbk;
+       BOVERFLOW *bo;
+       DBT copy;
+       u_int32_t len, nbytes, tlen;
+       int ret;
+       u_int8_t *p;
+
+       bo = NULL;                      /* XXX: Shut the compiler up. */
+       t = dbp->internal;
+
+       /*
+        * Figure out how much total space we'll need.  Worst case is where
+        * the record is 0 bytes long, in which case doff causes the record
+        * to extend, and the put data is appended to it.
+        */
+       if (indx < NUM_ENT(h)) {
+               bk = GET_BKEYDATA(h, indx + (TYPE(h) == P_LBTREE ? O_INDX : 0));
+               if (bk->type == B_OVERFLOW) {
+                       bo = (BOVERFLOW *)bk;
+                       nbytes = bo->tlen;
+               } else
+                       nbytes = bk->len;
+       } else {
+               bk = &tbk;
+               bk->type = B_KEYDATA;
+               nbytes = bk->len = 0;
+       }
+       nbytes += dbt->doff + dbt->size + dbt->dlen;
+
+       /* Allocate the space. */
+       if (t->bt_rdata.ulen < nbytes) {
+               t->bt_rdata.data = t->bt_rdata.data == NULL ?
+                   (void *)malloc(nbytes) :
+                   (void *)realloc(t->bt_rdata.data, nbytes);
+               if (t->bt_rdata.data == NULL) {
+                       t->bt_rdata.ulen = 0;
+                       return (ENOMEM);
+               }
+               t->bt_rdata.ulen = nbytes;
+       }
+
+       /* We use nul bytes for extending the record, get it over with. */
+       memset(t->bt_rdata.data, 0, nbytes);
+
+       tlen = 0;
+       if (bk->type == B_OVERFLOW) {
+               /* Take up to doff bytes from the record. */
+               memset(&copy, 0, sizeof(copy));
+               if ((ret = __db_goff(dbp, &copy, bo->tlen,
+                   bo->pgno, &t->bt_rdata.data, &t->bt_rdata.ulen)) != 0)
+                       return (ret);
+               tlen += dbt->doff;
+
+               /*
+                * If the original record was larger than the offset:
+                *      If dlen > size, shift the remaining data down.
+                *      If dlen < size, shift the remaining data up.
+                * Use memmove(), the regions may overlap.
+                */
+               p = t->bt_rdata.data;
+               if (bo->tlen > dbt->doff)
+                       if (dbt->dlen > dbt->size) {
+                               tlen += len = bo->tlen -
+                                   dbt->doff - (dbt->dlen - dbt->size);
+                               memmove(p + dbt->doff + dbt->size,
+                                   p + dbt->doff + dbt->dlen, len);
+                       } else if (dbt->dlen < dbt->size) {
+                               tlen += len = bo->tlen -
+                                   dbt->doff - (dbt->size - dbt->dlen);
+                               memmove(p + dbt->doff + dbt->dlen,
+                                   p + dbt->doff + dbt->size, len);
+                       } else
+                               tlen += bo->tlen - dbt->doff;
+
+               /* Copy in the user's data. */
+               memcpy((u_int8_t *)t->bt_rdata.data + dbt->doff,
+                   dbt->data, dbt->size);
+               tlen += dbt->size;
+       } else {
+               /* Take up to doff bytes from the record. */
+               memcpy(t->bt_rdata.data,
+                   bk->data, dbt->doff > bk->len ? bk->len : dbt->doff);
+               tlen += dbt->doff;
+
+               /* Copy in the user's data. */
+               memcpy((u_int8_t *)t->bt_rdata.data +
+                   dbt->doff, dbt->data, dbt->size);
+               tlen += dbt->size;
+
+               /* Copy in any remaining data. */
+               len = dbt->doff + dbt->dlen;
+               if (bk->len > len) {
+                       memcpy((u_int8_t *)t->bt_rdata.data + dbt->doff +
+                           dbt->size, bk->data + len, bk->len - len);
+                       tlen += bk->len - len;
+               }
+       }
+
+       /* Set the DBT to reference our new record. */
+       t->bt_rdata.size = tlen;
+       t->bt_rdata.dlen = 0;
+       t->bt_rdata.doff = 0;
+       t->bt_rdata.flags = 0;
+       *dbt = t->bt_rdata;
+       return (0);
+}
diff --git a/db2/btree/bt_rec.c b/db2/btree/bt_rec.c
new file mode 100644 (file)
index 0000000..d4bc7f6
--- /dev/null
@@ -0,0 +1,767 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997
+ *     Sleepycat Software.  All rights reserved.
+ */
+
+#include "config.h"
+
+#ifndef lint
+static const char sccsid[] = "@(#)bt_rec.c     10.11 (Sleepycat) 8/22/97";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+
+#include <ctype.h>
+#include <errno.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+#endif
+
+#include "db_int.h"
+#include "db_page.h"
+#include "shqueue.h"
+#include "hash.h"
+#include "btree.h"
+#include "log.h"
+#include "db_dispatch.h"
+#include "common_ext.h"
+
+/*
+ * __bam_pg_alloc_recover --
+ *     Recovery function for pg_alloc.
+ *
+ * PUBLIC: int __bam_pg_alloc_recover
+ * PUBLIC:   __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+ */
+int
+__bam_pg_alloc_recover(logp, dbtp, lsnp, redo, info)
+       DB_LOG *logp;
+       DBT *dbtp;
+       DB_LSN *lsnp;
+       int redo;
+       void *info;
+{
+       __bam_pg_alloc_args *argp;
+       BTMETA *meta;
+       DB_MPOOLFILE *mpf;
+       PAGE *pagep;
+       DB *file_dbp, *mdbp;
+       db_pgno_t pgno;
+       int cmp_n, cmp_p, created, modified, ret;
+
+       REC_PRINT(__bam_pg_alloc_print);
+       REC_INTRO(__bam_pg_alloc_read);
+
+       /*
+        * Fix up the allocated page.  If we're redoing the operation, we have
+        * to get the page (creating it if it doesn't exist), and update its
+        * LSN.  If we're undoing the operation, we have to reset the page's
+        * LSN and put it on the free list.  
+        *
+        * Fix up the metadata page.  If we're redoing the operation, we have
+        * to get the metadata page and update its LSN and its free pointer.
+        * If we're undoing the operation and the page was ever created, we put
+        * it on the freelist.
+        */
+       pgno = PGNO_METADATA;
+       if ((ret = memp_fget(mpf, &pgno, 0, &meta)) != 0) {
+               (void)__db_pgerr(file_dbp, pgno);
+               goto out;
+       }
+       if ((ret = memp_fget(mpf, &argp->pgno, DB_MPOOL_CREATE, &pagep)) != 0) {
+               (void)__db_pgerr(file_dbp, argp->pgno);
+               (void)memp_fput(mpf, meta, 0);
+               goto out;
+       }
+
+       /* Fix up the allocated page. */
+       created = IS_ZERO_LSN(LSN(pagep));
+       modified = 0;
+       cmp_n = log_compare(lsnp, &LSN(pagep));
+       cmp_p = log_compare(&LSN(pagep), &argp->page_lsn);
+       if ((created || cmp_p == 0) && redo) {
+               /* Need to redo update described. */
+               P_INIT(pagep, file_dbp->pgsize,
+                   argp->pgno, PGNO_INVALID, PGNO_INVALID, 0, argp->ptype);
+
+               pagep->lsn = *lsnp;
+               modified = 1;
+       } else if ((created || cmp_n == 0) && !redo) {
+               /* Need to undo update described. */
+               P_INIT(pagep, file_dbp->pgsize,
+                   argp->pgno, PGNO_INVALID, meta->free, 0, P_INVALID);
+
+               pagep->lsn = argp->page_lsn;
+               modified = 1;
+       }
+       if ((ret = memp_fput(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) != 0) {
+               (void)__db_panic(file_dbp);
+               (void)memp_fput(mpf, meta, 0);
+               goto out;
+       }
+
+       /* Fix up the metadata page. */
+       modified = 0;
+       cmp_n = log_compare(lsnp, &LSN(meta));
+       cmp_p = log_compare(&LSN(meta), &argp->meta_lsn);
+       if (cmp_p == 0 && redo) {
+               /* Need to redo update described. */
+               meta->lsn = *lsnp;
+               meta->free = argp->next;
+               modified = 1;
+       } else if (cmp_n == 0 && !redo) {
+               /* Need to undo update described. */
+               meta->lsn = argp->meta_lsn;
+               meta->free = argp->pgno;
+               modified = 1;
+       }
+       if ((ret = memp_fput(mpf, meta, modified ? DB_MPOOL_DIRTY : 0)) != 0) {
+               (void)__db_panic(file_dbp);
+               goto out;
+       }
+
+       *lsnp = argp->prev_lsn;
+       ret = 0;
+
+out:   REC_CLOSE;
+}
+
+/*
+ * __bam_pg_free_recover --
+ *     Recovery function for pg_free.
+ *
+ * PUBLIC: int __bam_pg_free_recover
+ * PUBLIC:   __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+ */
+int
+__bam_pg_free_recover(logp, dbtp, lsnp, redo, info)
+       DB_LOG *logp;
+       DBT *dbtp;
+       DB_LSN *lsnp;
+       int redo;
+       void *info;
+{
+       __bam_pg_free_args *argp;
+       BTMETA *meta;
+       DB *file_dbp, *mdbp;
+       DB_MPOOLFILE *mpf;
+       PAGE *pagep;
+       db_pgno_t pgno;
+       int cmp_n, cmp_p, modified, ret;
+
+       REC_PRINT(__bam_pg_free_print);
+       REC_INTRO(__bam_pg_free_read);
+
+       /*
+        * Fix up the freed page.  If we're redoing the operation we get the
+        * page and explicitly discard its contents, then update its LSN.  If
+        * we're undoing the operation, we get the page and restore its header.
+        */
+       if ((ret = memp_fget(mpf, &argp->pgno, 0, &pagep)) != 0) {
+               (void)__db_pgerr(file_dbp, argp->pgno);
+               goto out;
+       }
+       modified = 0;
+       cmp_n = log_compare(lsnp, &LSN(pagep));
+       cmp_p = log_compare(&LSN(pagep), &LSN(argp->header.data));
+       if (cmp_p == 0 && redo) {
+               /* Need to redo update described. */
+               P_INIT(pagep, file_dbp->pgsize,
+                   pagep->pgno, PGNO_INVALID, argp->next, 0, P_INVALID);
+               pagep->lsn = *lsnp;
+
+               modified = 1;
+       } else if (cmp_n == 0 && !redo) {
+               /* Need to undo update described. */
+               memcpy(pagep, argp->header.data, argp->header.size);
+
+               modified = 1;
+       }
+       if ((ret = memp_fput(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) != 0) {
+               (void)__db_panic(file_dbp);
+               goto out;
+       }
+
+       /*
+        * Fix up the metadata page.  If we're redoing or undoing the operation
+        * we get the page and update its LSN and free pointer.
+        */
+       pgno = PGNO_METADATA;
+       if ((ret = memp_fget(mpf, &pgno, 0, &meta)) != 0) {
+               (void)__db_pgerr(file_dbp, pgno);
+               goto out;
+       }
+
+       modified = 0;
+       cmp_n = log_compare(lsnp, &LSN(meta));
+       cmp_p = log_compare(&LSN(meta), &argp->meta_lsn);
+       if (cmp_p == 0 && redo) {
+               /* Need to redo update described. */
+               meta->free = argp->pgno;
+
+               meta->lsn = *lsnp;
+               modified = 1;
+       } else if (cmp_n == 0 && !redo) {
+               /* Need to undo update described. */
+               meta->free = argp->next;
+
+               meta->lsn = argp->meta_lsn;
+               modified = 1;
+       }
+       if ((ret = memp_fput(mpf, meta, modified ? DB_MPOOL_DIRTY : 0)) != 0) {
+               (void)__db_panic(file_dbp);
+               goto out;
+       }
+
+       *lsnp = argp->prev_lsn;
+       ret = 0;
+
+out:   REC_CLOSE;
+}
+
+/*
+ * __bam_split_recover --
+ *     Recovery function for split.
+ *
+ * PUBLIC: int __bam_split_recover
+ * PUBLIC:   __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+ */
+int
+__bam_split_recover(logp, dbtp, lsnp, redo, info)
+       DB_LOG *logp;
+       DBT *dbtp;
+       DB_LSN *lsnp;
+       int redo;
+       void *info;
+{
+       __bam_split_args *argp;
+       DB *file_dbp, *mdbp;
+       DB_MPOOLFILE *mpf;
+       PAGE *_lp, *lp, *np, *pp, *_rp, *rp, *sp;
+       db_pgno_t pgno;
+       int l_update, p_update, r_update, ret, rootsplit, t_ret;
+
+       REC_PRINT(__bam_split_print);
+
+       mpf = NULL;
+       _lp = lp = np = pp = _rp = rp = NULL;
+
+       REC_INTRO(__bam_split_read);
+
+       /*
+        * There are two kinds of splits that we have to recover from.  The
+        * first is a root-page split, where the root page is split from a
+        * leaf page into an internal page and two new leaf pages are created.
+        * The second is where a page is split into two pages, and a new key
+        * is inserted into the parent page.
+        */
+       sp = argp->pg.data;
+       pgno = PGNO(sp);
+       rootsplit = pgno == PGNO_ROOT;
+       if (memp_fget(mpf, &argp->left, 0, &lp) != 0)
+               lp = NULL;
+       if (memp_fget(mpf, &argp->right, 0, &rp) != 0)
+               rp = NULL;
+
+       if (redo) {
+               l_update = r_update = p_update = 0;
+               /*
+                * Decide if we need to resplit the page.
+                *
+                * If this is a root split, then the root has to exist, it's
+                * the page we're splitting and it gets modified.  If this is
+                * not a root split, then the left page has to exist, for the
+                * same reason.
+                */
+               if (rootsplit) {
+                       if ((ret = memp_fget(mpf, &pgno, 0, &pp)) != 0) {
+                               (void)__db_pgerr(file_dbp, pgno);
+                               pp = NULL;
+                               goto out;
+                       }
+                       p_update =
+                           log_compare(&LSN(pp), &LSN(argp->pg.data)) == 0;
+               } else
+                       if (lp == NULL) {
+                               (void)__db_pgerr(file_dbp, argp->left);
+                               goto out;
+                       }
+               if (lp == NULL || log_compare(&LSN(lp), &argp->llsn) == 0)
+                       l_update = 1;
+               if (rp == NULL || log_compare(&LSN(rp), &argp->rlsn) == 0)
+                       r_update = 1;
+               if (!p_update && !l_update && !r_update)
+                       goto done;
+
+               /* Allocate and initialize new left/right child pages. */
+               if ((_lp = (PAGE *)malloc(file_dbp->pgsize)) == NULL)
+                       goto nomem;
+               if ((_rp = (PAGE *)malloc(file_dbp->pgsize)) == NULL) {
+nomem:                 errno = ENOMEM;
+                       __db_err(file_dbp->dbenv, "%s", strerror(errno));
+                       goto out;
+               }
+               if (rootsplit) {
+                       P_INIT(_lp, file_dbp->pgsize, argp->left,
+                           PGNO_INVALID,
+                           ISINTERNAL(sp) ? PGNO_INVALID : argp->right,
+                           LEVEL(sp), TYPE(sp));
+                       P_INIT(_rp, file_dbp->pgsize, argp->right,
+                           ISINTERNAL(sp) ?  PGNO_INVALID : argp->left,
+                           PGNO_INVALID, LEVEL(sp), TYPE(sp));
+               } else {
+                       P_INIT(_lp, file_dbp->pgsize, PGNO(sp),
+                           ISINTERNAL(sp) ? PGNO_INVALID : PREV_PGNO(sp),
+                           ISINTERNAL(sp) ? PGNO_INVALID : argp->right,
+                           LEVEL(sp), TYPE(sp));
+                       P_INIT(_rp, file_dbp->pgsize, argp->right,
+                           ISINTERNAL(sp) ? PGNO_INVALID : sp->pgno,
+                           ISINTERNAL(sp) ? PGNO_INVALID : NEXT_PGNO(sp),
+                           LEVEL(sp), TYPE(sp));
+               }
+
+               /* Split the page. */
+               if ((ret = __bam_copy(file_dbp, sp, _lp, 0, argp->indx)) != 0 ||
+                   (ret = __bam_copy(file_dbp, sp, _rp, argp->indx,
+                   NUM_ENT(sp))) != 0)
+                       goto out;
+
+               /* If the left child is wrong, update it. */
+               if (lp == NULL && (ret =
+                   memp_fget(mpf, &argp->left, DB_MPOOL_CREATE, &lp)) != 0) {
+                       (void)__db_pgerr(file_dbp, argp->left);
+                       lp = NULL;
+                       goto out;
+               }
+               if (l_update) {
+                       memcpy(lp, _lp, file_dbp->pgsize);
+                       lp->lsn = *lsnp;
+                       if ((ret = memp_fput(mpf, lp, DB_MPOOL_DIRTY)) != 0)
+                               goto fatal;
+                       lp = NULL;
+               }
+
+               /* If the right child is wrong, update it. */
+               if (rp == NULL && (ret = memp_fget(mpf,
+                   &argp->right, DB_MPOOL_CREATE, &rp)) != 0) {
+                       (void)__db_pgerr(file_dbp, argp->right);
+                       rp = NULL;
+                       goto out;
+               }
+               if (r_update) {
+                       memcpy(rp, _rp, file_dbp->pgsize);
+                       rp->lsn = *lsnp;
+                       if ((ret = memp_fput(mpf, rp, DB_MPOOL_DIRTY)) != 0)
+                               goto fatal;
+                       rp = NULL;
+               }
+
+               /*
+                * If the parent page is wrong, update it.  This is of interest
+                * only if it was a root split, since root splits create parent
+                * pages.  All other splits modify a parent page, but those are
+                * separately logged and recovered.
+                */
+               if (rootsplit && p_update) {
+                       if (file_dbp->type == DB_BTREE)
+                               P_INIT(pp, file_dbp->pgsize,
+                                   PGNO_ROOT, PGNO_INVALID, PGNO_INVALID,
+                                   _lp->level + 1, P_IBTREE);
+                       else
+                               P_INIT(pp, file_dbp->pgsize,
+                                   PGNO_ROOT, PGNO_INVALID, PGNO_INVALID,
+                                   _lp->level + 1, P_IRECNO);
+                       RE_NREC_SET(pp,
+                           file_dbp->type == DB_RECNO ||
+                           F_ISSET(file_dbp, DB_BT_RECNUM) ?
+                           __bam_total(_lp) + __bam_total(_rp) : 0);
+                       pp->lsn = *lsnp;
+                       if ((ret = memp_fput(mpf, pp, DB_MPOOL_DIRTY)) != 0)
+                               goto fatal;
+                       pp = NULL;
+               }
+
+               /*
+                * Finally, redo the next-page link if necessary.  This is of
+                * interest only if it wasn't a root split -- inserting a new
+                * page in the tree requires that any following page have its
+                * previous-page pointer updated to our new page.  The next
+                * page had better exist.
+                */
+               if (!rootsplit && !IS_ZERO_LSN(argp->nlsn)) {
+                       if ((ret = memp_fget(mpf, &argp->npgno, 0, &np)) != 0) {
+                               (void)__db_pgerr(file_dbp, argp->npgno);
+                               np = NULL;
+                               goto out;
+                       }
+                       if (log_compare(&LSN(np), &argp->nlsn) == 0) {
+                               PREV_PGNO(np) = argp->right;
+                               np->lsn = *lsnp;
+                               if ((ret = memp_fput(mpf,
+                                   np, DB_MPOOL_DIRTY)) != 0)
+                                       goto fatal;
+                               np = NULL;
+                       }
+               }
+       } else {
+               /*
+                * If the split page is wrong, replace its contents with the
+                * logged page contents.  The split page had better exist.
+                */
+               if ((ret = memp_fget(mpf, &pgno, 0, &pp)) != 0) {
+                       (void)__db_pgerr(file_dbp, pgno);
+                       pp = NULL;
+                       goto out;
+               }
+               if (log_compare(lsnp, &LSN(pp)) == 0) {
+                       memcpy(pp, argp->pg.data, argp->pg.size);
+                       if ((ret = memp_fput(mpf, pp, DB_MPOOL_DIRTY)) != 0)
+                               goto fatal;
+                       pp = NULL;
+               }
+
+               /*
+                * If it's a root split and the left child ever existed, put
+                * it on the free list.  (If it's not a root split, we just
+                * updated the left page -- it's the same as the split page.)
+                * If the right child ever existed, root split or not, put it
+                * on the free list.
+                */
+               if ((rootsplit && lp != NULL) || rp != NULL) {
+                       if (rootsplit && lp != NULL &&
+                           log_compare(lsnp, &LSN(lp)) == 0) {
+                               lp->lsn = argp->llsn;
+                               if ((ret =
+                                   memp_fput(mpf, lp, DB_MPOOL_DIRTY)) != 0)
+                                       goto fatal;
+                               lp = NULL;
+                       }
+                       if (rp != NULL &&
+                           log_compare(lsnp, &LSN(rp)) == 0) {
+                               rp->lsn = argp->rlsn;
+                               if ((ret =
+                                   memp_fput(mpf, rp, DB_MPOOL_DIRTY)) != 0)
+                                       goto fatal;
+                               rp = NULL;
+                       }
+               }
+
+               /*
+                * Finally, undo the next-page link if necessary.  This is of
+                * interest only if it wasn't a root split -- inserting a new
+                * page in the tree requires that any following page have its
+                * previous-page pointer updated to our new page.  The next
+                * page had better exist.
+                */
+               if (!rootsplit && !IS_ZERO_LSN(argp->nlsn)) {
+                       if ((ret = memp_fget(mpf, &argp->npgno, 0, &np)) != 0) {
+                               (void)__db_pgerr(file_dbp, argp->npgno);
+                               np = NULL;
+                               goto out;
+                       }
+                       if (log_compare(lsnp, &LSN(np)) == 0) {
+                               PREV_PGNO(np) = argp->left;
+                               np->lsn = argp->nlsn;
+                               if (memp_fput(mpf, np, DB_MPOOL_DIRTY))
+                                       goto fatal;
+                               np = NULL;
+                       }
+               }
+       }
+
+done:  ret = 0;
+       *lsnp = argp->prev_lsn;
+
+       if (0) {
+fatal:         (void)__db_panic(file_dbp);
+       }
+out:   /* Free any pages that weren't dirtied. */
+       if (pp != NULL && (t_ret = memp_fput(mpf, pp, 0)) != 0 && ret == 0)
+               ret = t_ret;
+       if (lp != NULL && (t_ret = memp_fput(mpf, lp, 0)) != 0 && ret == 0)
+               ret = t_ret;
+       if (np != NULL && (t_ret = memp_fput(mpf, np, 0)) != 0 && ret == 0)
+               ret = t_ret;
+       if (rp != NULL && (t_ret = memp_fput(mpf, rp, 0)) != 0 && ret == 0)
+               ret = t_ret;
+
+       /* Free any allocated space. */
+       if (_lp != NULL)
+               free(_lp);
+       if (_rp != NULL)
+               free(_rp);
+
+       REC_CLOSE;
+}
+
+/*
+ * __bam_rsplit_recover --
+ *     Recovery function for a reverse split.
+ *
+ * PUBLIC: int __bam_rsplit_recover
+ * PUBLIC:   __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+ */
+int
+__bam_rsplit_recover(logp, dbtp, lsnp, redo, info)
+       DB_LOG *logp;
+       DBT *dbtp;
+       DB_LSN *lsnp;
+       int redo;
+       void *info;
+{
+       __bam_rsplit_args *argp;
+       DB *file_dbp, *mdbp;
+       DB_MPOOLFILE *mpf;
+       PAGE *pagep;
+       db_pgno_t pgno;
+       int cmp_n, cmp_p, modified, ret;
+
+       REC_PRINT(__bam_rsplit_print);
+       REC_INTRO(__bam_rsplit_read);
+
+       /* Fix the root page. */
+       pgno = PGNO_ROOT;
+       if ((ret = memp_fget(mpf, &pgno, 0, &pagep)) != 0) {
+               __db_pgerr(file_dbp, pgno);
+               pagep = NULL;
+               goto out;
+       }
+       modified = 0;
+       cmp_n = log_compare(lsnp, &LSN(pagep));
+       cmp_p = log_compare(&LSN(pagep), &argp->rootlsn);
+       if (cmp_p == 0 && redo) {
+               /* Need to redo update described. */
+               memcpy(pagep, argp->pgdbt.data, argp->pgdbt.size);
+               pagep->pgno = PGNO_ROOT;
+               pagep->lsn = *lsnp;
+               modified = 1;
+       } else if (cmp_n == 0 && !redo) {
+               /* Need to undo update described. */
+               P_INIT(pagep, file_dbp->pgsize, PGNO_ROOT,
+                   PGNO_INVALID, PGNO_INVALID, pagep->level + 1, TYPE(pagep));
+               if ((ret = __db_pitem(file_dbp, pagep, 0,
+                   argp->rootent.size, &argp->rootent, NULL)) != 0)
+                       goto out;
+               pagep->lsn = argp->rootlsn;
+               modified = 1;
+       }
+       if ((ret = memp_fput(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) != 0) {
+               (void)__db_panic(file_dbp);
+               goto out;
+       }
+
+       /* Fix the page copied over the root page. */
+       if ((ret = memp_fget(mpf, &argp->pgno, 0, &pagep)) != 0) {
+               (void)__db_pgerr(file_dbp, argp->pgno);
+               pagep = NULL;
+               goto out;
+       }
+       modified = 0;
+       cmp_n = log_compare(lsnp, &LSN(pagep));
+       cmp_p = log_compare(&LSN(pagep), &LSN(argp->pgdbt.data));
+       if (cmp_p == 0 && redo) {
+               /* Need to redo update described. */
+               pagep->lsn = *lsnp;
+               modified = 1;
+       } else if (cmp_n == 0 && !redo) {
+               /* Need to undo update described. */
+               memcpy(pagep, argp->pgdbt.data, argp->pgdbt.size);
+               modified = 1;
+       }
+       if ((ret = memp_fput(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) != 0) {
+               (void)__db_panic(file_dbp);
+               goto out;
+       }
+
+       ret = 0;
+       *lsnp = argp->prev_lsn;
+
+out:   REC_CLOSE;
+}
+
+/*
+ * __bam_adj_recover --
+ *     Recovery function for adj.
+ *
+ * PUBLIC: int __bam_adj_recover
+ * PUBLIC:   __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+ */
+int
+__bam_adj_recover(logp, dbtp, lsnp, redo, info)
+       DB_LOG *logp;
+       DBT *dbtp;
+       DB_LSN *lsnp;
+       int redo;
+       void *info;
+{
+       __bam_adj_args *argp;
+       DB *file_dbp, *mdbp;
+       DB_MPOOLFILE *mpf;
+       PAGE *pagep;
+       int cmp_n, cmp_p, modified, ret;
+
+       REC_PRINT(__bam_adj_print);
+       REC_INTRO(__bam_adj_read);
+
+       if ((ret = memp_fget(mpf, &argp->pgno, 0, &pagep)) != 0) {
+               (void)__db_pgerr(file_dbp, argp->pgno);
+               pagep = NULL;
+               goto out;
+       }
+
+       modified = 0;
+       cmp_n = log_compare(lsnp, &LSN(pagep));
+       cmp_p = log_compare(&LSN(pagep), &argp->lsn);
+       if (cmp_p == 0 && redo) {
+               /* Need to redo update described. */
+               if ((ret = __bam_adjindx(file_dbp,
+                   pagep, argp->indx, argp->indx_copy, argp->is_insert)) != 0)
+                       goto err;
+
+               LSN(pagep) = *lsnp;
+               modified = 1;
+       } else if (cmp_n == 0 && !redo) {
+               /* Need to undo update described. */
+               if ((ret = __bam_adjindx(file_dbp,
+                   pagep, argp->indx, argp->indx_copy, !argp->is_insert)) != 0)
+                       goto err;
+
+               LSN(pagep) = argp->lsn;
+               modified = 1;
+       }
+       if ((ret = memp_fput(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) == 0)
+               *lsnp = argp->prev_lsn;
+
+       if (0) {
+err:           (void)memp_fput(mpf, pagep, 0);
+       }
+out:   REC_CLOSE;
+}
+
+/*
+ * __bam_cadjust_recover --
+ *     Recovery function for the adjust of a count change in an internal
+ *     page.
+ *
+ * PUBLIC: int __bam_cadjust_recover
+ * PUBLIC:   __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+ */
+int
+__bam_cadjust_recover(logp, dbtp, lsnp, redo, info)
+       DB_LOG *logp;
+       DBT *dbtp;
+       DB_LSN *lsnp;
+       int redo;
+       void *info;
+{
+       __bam_cadjust_args *argp;
+       DB *file_dbp, *mdbp;
+       DB_MPOOLFILE *mpf;
+       PAGE *pagep;
+       int cmp_n, cmp_p, modified, ret;
+
+       REC_PRINT(__bam_cadjust_print);
+       REC_INTRO(__bam_cadjust_read);
+
+       if ((ret = memp_fget(mpf, &argp->pgno, 0, &pagep)) != 0) {
+               errno = __db_pgerr(file_dbp, argp->pgno);
+               pagep = NULL;
+               goto out;
+       }
+
+       modified = 0;
+       cmp_n = log_compare(lsnp, &LSN(pagep));
+       cmp_p = log_compare(&LSN(pagep), &argp->lsn);
+       if (cmp_p == 0 && redo) {
+               /* Need to redo update described. */
+               if (file_dbp->type == DB_BTREE &&
+                   F_ISSET(file_dbp, DB_BT_RECNUM)) {
+                       GET_BINTERNAL(pagep, argp->indx)->nrecs += argp->adjust;
+                       if (argp->total && PGNO(pagep) == PGNO_ROOT)
+                               RE_NREC_ADJ(pagep, argp->adjust);
+               }
+               if (file_dbp->type == DB_RECNO) {
+                       GET_RINTERNAL(pagep, argp->indx)->nrecs += argp->adjust;
+                       if (argp->total && PGNO(pagep) == PGNO_ROOT)
+                               RE_NREC_ADJ(pagep, argp->adjust);
+               }
+
+               LSN(pagep) = *lsnp;
+               modified = 1;
+       } else if (cmp_n == 0 && !redo) {
+               /* Need to undo update described. */
+               if (file_dbp->type == DB_BTREE &&
+                   F_ISSET(file_dbp, DB_BT_RECNUM)) {
+                       GET_BINTERNAL(pagep, argp->indx)->nrecs -= argp->adjust;
+                       if (argp->total && PGNO(pagep) == PGNO_ROOT)
+                               RE_NREC_ADJ(pagep, argp->adjust);
+               }
+               if (file_dbp->type == DB_RECNO) {
+                       GET_RINTERNAL(pagep, argp->indx)->nrecs -= argp->adjust;
+                       if (argp->total && PGNO(pagep) == PGNO_ROOT)
+                               RE_NREC_ADJ(pagep, -(argp->adjust));
+               }
+               LSN(pagep) = argp->lsn;
+               modified = 1;
+       }
+       if ((ret = memp_fput(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) == 0)
+               *lsnp = argp->prev_lsn;
+
+out:   REC_CLOSE;
+}
+
+/*
+ * __bam_cdel_recover --
+ *     Recovery function for the intent-to-delete of a cursor record.
+ *
+ * PUBLIC: int __bam_cdel_recover
+ * PUBLIC:   __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+ */
+int
+__bam_cdel_recover(logp, dbtp, lsnp, redo, info)
+       DB_LOG *logp;
+       DBT *dbtp;
+       DB_LSN *lsnp;
+       int redo;
+       void *info;
+{
+       __bam_cdel_args *argp;
+       DB *file_dbp, *mdbp;
+       DB_MPOOLFILE *mpf;
+       PAGE *pagep;
+       int cmp_n, cmp_p, modified, ret;
+
+       REC_PRINT(__bam_cdel_print);
+       REC_INTRO(__bam_cdel_read);
+
+       if ((ret = memp_fget(mpf, &argp->pgno, 0, &pagep)) != 0) {
+               (void)__db_pgerr(file_dbp, argp->pgno);
+               pagep = NULL;
+               goto out;
+       }
+
+       modified = 0;
+       cmp_n = log_compare(lsnp, &LSN(pagep));
+       cmp_p = log_compare(&LSN(pagep), &argp->lsn);
+       if (cmp_p == 0 && redo) {
+               /* Need to redo update described. */
+               GET_BKEYDATA(pagep, argp->indx + O_INDX)->deleted = 1;
+
+               LSN(pagep) = *lsnp;
+               modified = 1;
+       } else if (cmp_n == 0 && !redo) {
+               /* Need to undo update described. */
+               GET_BKEYDATA(pagep, argp->indx + O_INDX)->deleted = 0;
+
+               LSN(pagep) = argp->lsn;
+               modified = 1;
+       }
+       if ((ret = memp_fput(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) == 0)
+               *lsnp = argp->prev_lsn;
+
+out:   REC_CLOSE;
+}
diff --git a/db2/btree/bt_recno.c b/db2/btree/bt_recno.c
new file mode 100644 (file)
index 0000000..cd8872a
--- /dev/null
@@ -0,0 +1,1195 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1997
+ *     Sleepycat Software.  All rights reserved.
+ */
+
+#include "config.h"
+
+#ifndef lint
+static const char sccsid[] = "@(#)bt_recno.c   10.12 (Sleepycat) 8/25/97";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+
+#include <errno.h>
+#include <limits.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#endif
+
+#include "db_int.h"
+#include "db_page.h"
+#include "btree.h"
+
+static int __ram_add __P((DB *, db_recno_t *, DBT *, int, int));
+static int __ram_c_close __P((DBC *));
+static int __ram_c_del __P((DBC *, int));
+static int __ram_c_get __P((DBC *, DBT *, DBT *, int));
+static int __ram_c_put __P((DBC *, DBT *, DBT *, int));
+static int __ram_fmap __P((DB *, db_recno_t));
+static int __ram_get __P((DB *, DB_TXN *, DBT *, DBT *, int));
+static int __ram_put __P((DB *, DB_TXN *, DBT *, DBT *, int));
+static int __ram_source __P((DB *, RECNO *, const char *));
+static int __ram_sync __P((DB *, int));
+static int __ram_update __P((DB *, db_recno_t, int));
+static int __ram_vmap __P((DB *, db_recno_t));
+static int __ram_writeback __P((DB *));
+
+/*
+ * If we're renumbering records, then we have to detect in the cursor that a
+ * record was deleted, and adjust the cursor as necessary.  If not renumbering
+ * records, then we can detect this by looking at the actual record, so we
+ * ignore the cursor delete flag.
+ */
+#define        CD_SET(dbp, cp) {                                               \
+       if (F_ISSET(dbp, DB_RE_RENUMBER))                               \
+               F_SET(cp, CR_DELETED);                                  \
+}
+#define        CD_CLR(dbp, cp) {                                               \
+       if (F_ISSET(dbp, DB_RE_RENUMBER))                               \
+               F_CLR(cp, CR_DELETED);                                  \
+}
+#define        CD_ISSET(dbp, cp)                                               \
+       (F_ISSET(dbp, DB_RE_RENUMBER) && F_ISSET(cp, CR_DELETED))
+
+/*
+ * __ram_open --
+ *     Recno open function.
+ *
+ * PUBLIC: int __ram_open __P((DB *, DBTYPE, DB_INFO *));
+ */
+int
+__ram_open(dbp, type, dbinfo)
+       DB *dbp;
+       DBTYPE type;
+       DB_INFO *dbinfo;
+{
+       BTREE *t;
+       RECNO *rp;
+       int ret;
+
+       ret = 0;
+
+       /* Allocate and initialize the private RECNO structure. */
+       if ((rp = (RECNO *)calloc(1, sizeof(*rp))) == NULL)
+               return (errno);
+
+       if (dbinfo != NULL) {
+               /*
+                * If the user specified a source tree, open it and map it in.
+                *
+                * !!!
+                * We don't complain if the user specified transactions or
+                * threads.  It's possible to make it work, but you'd better
+                * know what you're doing!
+                */
+               if (dbinfo->re_source == NULL) {
+                       rp->re_fd = -1;
+                       F_SET(rp, RECNO_EOF);
+               } else {
+                       if ((ret =
+                           __ram_source(dbp, rp, dbinfo->re_source)) != 0)
+                       goto err;
+               }
+
+               /* Copy delimiter, length and padding values. */
+               rp->re_delim =
+                   F_ISSET(dbp, DB_RE_DELIMITER) ? dbinfo->re_delim : '\n';
+               rp->re_pad = F_ISSET(dbp, DB_RE_PAD) ? dbinfo->re_pad : ' ';
+
+               if (F_ISSET(dbp, DB_RE_FIXEDLEN)) {
+                       if ((rp->re_len = dbinfo->re_len) == 0) {
+                               __db_err(dbp->dbenv,
+                                   "record length must be greater than 0");
+                               ret = EINVAL;
+                               goto err;
+                       }
+               } else
+                       rp->re_len = 0;
+       } else {
+               rp->re_delim = '\n';
+               rp->re_pad = ' ';
+               rp->re_fd = -1;
+               F_SET(rp, RECNO_EOF);
+       }
+
+       /* Open the underlying btree. */
+       if ((ret = __bam_open(dbp, DB_RECNO, dbinfo)) != 0)
+               goto err;
+
+       /* Set the routines necessary to make it look like a recno tree. */
+       dbp->cursor = __ram_cursor;
+       dbp->del = __ram_delete;
+       dbp->get = __ram_get;
+       dbp->put = __ram_put;
+       dbp->sync = __ram_sync;
+
+       /* Link in the private recno structure. */
+       ((BTREE *)dbp->internal)->bt_recno = rp;
+
+       /* If we're snapshotting an underlying source file, do it now. */
+       if (dbinfo != NULL && F_ISSET(dbinfo, DB_SNAPSHOT))
+               if ((ret = __ram_snapshot(dbp)) != 0 && ret != DB_NOTFOUND)
+                       goto err;
+
+       return (0);
+
+err:   /* If we mmap'd a source file, discard it. */
+       if (rp->re_smap != NULL)
+               (void)__db_munmap(rp->re_smap, rp->re_msize);
+
+       /* If we opened a source file, discard it. */
+       if (rp->re_fd != -1)
+               (void)__db_close(rp->re_fd);
+       if (rp->re_source != NULL)
+               FREES(rp->re_source);
+
+       /* If we allocated room for key/data return, discard it. */
+       t = dbp->internal;
+       if (t->bt_rkey.data != NULL)
+               free(t->bt_rkey.data);
+
+       FREE(rp, sizeof(*rp));
+
+       return (ret);
+}
+
+/*
+ * __ram_cursor --
+ *     Recno db->cursor function.
+ *
+ * PUBLIC: int __ram_cursor __P((DB *, DB_TXN *, DBC **));
+ */
+int
+__ram_cursor(dbp, txn, dbcp)
+       DB *dbp;
+       DB_TXN *txn;
+       DBC **dbcp;
+{
+       RCURSOR *cp;
+       DBC *dbc;
+
+       DEBUG_LWRITE(dbp, txn, "ram_cursor", NULL, NULL, 0);
+
+       if ((dbc = (DBC *)calloc(1, sizeof(DBC))) == NULL)
+               return (ENOMEM);
+       if ((cp = (RCURSOR *)calloc(1, sizeof(RCURSOR))) == NULL) {
+               free(dbc);
+               return (ENOMEM);
+       }
+
+       cp->dbc = dbc;
+       cp->recno = RECNO_OOB;
+
+       dbc->dbp = dbp;
+       dbc->txn = txn;
+       dbc->internal = cp;
+       dbc->c_close = __ram_c_close;
+       dbc->c_del = __ram_c_del;
+       dbc->c_get = __ram_c_get;
+       dbc->c_put = __ram_c_put;
+
+       /* All cursor structures hang off the main DB structure. */
+       DB_THREAD_LOCK(dbp);
+       TAILQ_INSERT_HEAD(&dbp->curs_queue, dbc, links);
+       DB_THREAD_UNLOCK(dbp);
+
+       *dbcp = dbc;
+       return (0);
+}
+
+/*
+ * __ram_get --
+ *     Recno db->get function.
+ */
+static int
+__ram_get(argdbp, txn, key, data, flags)
+       DB *argdbp;
+       DB_TXN *txn;
+       DBT *key, *data;
+       int flags;
+{
+       BTREE *t;
+       DB *dbp;
+       PAGE *h;
+       db_indx_t indx;
+       db_recno_t recno;
+       int exact, ret, stack;
+
+       stack = 0;
+
+       DEBUG_LWRITE(argdbp, txn, "ram_get", key, NULL, flags);
+
+       /* Check for invalid flags. */
+       if ((ret = __db_getchk(argdbp, key, data, flags)) != 0)
+               return (ret);
+
+       GETHANDLE(argdbp, txn, &dbp, ret);
+       t = dbp->internal;
+
+       /* Check the user's record number and fill in as necessary. */
+       if ((ret = __ram_getno(dbp, key, &recno, 0)) != 0)
+               goto done;
+
+       /* Search the tree for the record. */
+       if ((ret = __bam_rsearch(dbp, &recno, S_FIND, 1, &exact)) != 0)
+               goto done;
+       if (!exact)
+               return (DB_NOTFOUND);
+       stack = 1;
+
+       h = t->bt_csp->page;
+       indx = t->bt_csp->indx;
+
+       /* If the record has already been deleted, we couldn't have found it. */
+       if (GET_BKEYDATA(h, indx)->deleted) {
+               ret = DB_KEYEMPTY;
+               goto done;
+       }
+
+       /* Return the data item. */
+       ret = __db_ret(dbp,
+           h, indx, data, &t->bt_rdata.data, &t->bt_rdata.ulen);
+       ++t->lstat.bt_get;
+
+done:  /* Discard the stack. */
+       if (stack)
+               __bam_stkrel(dbp);
+
+       PUTHANDLE(dbp);
+       return (ret);
+}
+
+/*
+ * __ram_put --
+ *     Recno db->put function.
+ */
+static int
+__ram_put(argdbp, txn, key, data, flags)
+       DB *argdbp;
+       DB_TXN *txn;
+       DBT *key, *data;
+       int flags;
+{
+       BTREE *t;
+       DB *dbp;
+       db_recno_t recno;
+       int ret;
+
+       DEBUG_LWRITE(argdbp, txn, "ram_put", key, data, flags);
+
+       /* Check for invalid flags. */
+       if ((ret = __db_putchk(argdbp,
+           key, data, flags, F_ISSET(argdbp, DB_AM_RDONLY), 0)) != 0)
+               return (ret);
+
+       GETHANDLE(argdbp, txn, &dbp, ret);
+
+       /*
+        * If we're appending to the tree, make sure we've read in all of
+        * the backing source file.  Otherwise, check the user's record
+        * number and fill in as necessary.
+        */
+       ret = LF_ISSET(DB_APPEND) ?
+           __ram_snapshot(dbp) : __ram_getno(dbp, key, &recno, 1);
+
+       /* Add the record. */
+       if (ret == 0)
+               ret = __ram_add(dbp, &recno, data, flags, 0);
+
+       /* If we're appending to the tree, we have to return the record. */
+       if (ret == 0 && LF_ISSET(DB_APPEND)) {
+               t = dbp->internal;
+               ret = __db_retcopy(key, &recno, sizeof(recno),
+                   &t->bt_rkey.data, &t->bt_rkey.ulen, dbp->db_malloc);
+       }
+
+       PUTHANDLE(dbp);
+       return (ret);
+}
+
+/*
+ * __ram_sync --
+ *     Recno db->sync function.
+ */
+static int
+__ram_sync(argdbp, flags)
+       DB *argdbp;
+       int flags;
+{
+       DB *dbp;
+       int ret;
+
+       DEBUG_LWRITE(argdbp, NULL, "ram_sync", NULL, NULL, flags);
+
+       /* Sync the underlying btree. */
+       if ((ret = __bam_sync(argdbp, flags)) != 0)
+               return (ret);
+
+       /* Copy back the backing source file. */
+       GETHANDLE(argdbp, NULL, &dbp, ret);
+       ret = __ram_writeback(dbp);
+       PUTHANDLE(dbp);
+
+       return (ret);
+}
+
+/*
+ * __ram_close --
+ *     Recno db->close function.
+ *
+ * PUBLIC: int __ram_close __P((DB *));
+ */
+int
+__ram_close(argdbp)
+       DB *argdbp;
+{
+       RECNO *rp;
+
+       DEBUG_LWRITE(argdbp, NULL, "ram_close", NULL, NULL, 0);
+
+       rp = ((BTREE *)argdbp->internal)->bt_recno;
+
+       /* Close any underlying mmap region. */
+       if (rp->re_smap != NULL)
+               (void)__db_munmap(rp->re_smap, rp->re_msize);
+
+       /* Close any backing source file descriptor. */
+       if (rp->re_fd != -1)
+               (void)__db_close(rp->re_fd);
+
+       /* Free any backing source file name. */
+       if (rp->re_source != NULL)
+               FREES(rp->re_source);
+
+       /* Free allocated memory. */
+       FREE(rp, sizeof(RECNO));
+       ((BTREE *)argdbp->internal)->bt_recno = NULL;
+
+       /* Close the underlying btree. */
+       return (__bam_close(argdbp));
+}
+
+/*
+ * __ram_c_close --
+ *     Recno cursor->close function.
+ */
+static int
+__ram_c_close(dbc)
+       DBC *dbc;
+{
+       DB *dbp;
+
+       DEBUG_LWRITE(dbc->dbp, dbc->txn, "ram_c_close", NULL, NULL, 0);
+
+       dbp = dbc->dbp;
+
+       /* Remove the cursor from the queue. */
+       DB_THREAD_LOCK(dbp);
+       TAILQ_REMOVE(&dbp->curs_queue, dbc, links);
+       DB_THREAD_UNLOCK(dbp);
+
+       /* Discard the structures. */
+       FREE(dbc->internal, sizeof(RCURSOR));
+       FREE(dbc, sizeof(DBC));
+
+       return (0);
+}
+
+/*
+ * __ram_c_del --
+ *     Recno cursor->c_del function.
+ */
+static int
+__ram_c_del(dbc, flags)
+       DBC *dbc;
+       int flags;
+{
+       DBT key;
+       RCURSOR *cp;
+       int ret;
+
+       DEBUG_LWRITE(dbc->dbp, dbc->txn, "ram_c_del", NULL, NULL, flags);
+
+       cp = dbc->internal;
+
+       /* Check for invalid flags. */
+       if ((ret = __db_cdelchk(dbc->dbp, flags,
+           F_ISSET(dbc->dbp, DB_AM_RDONLY), cp->recno != RECNO_OOB)) != 0)
+               return (ret);
+
+       /* If already deleted, return failure. */
+       if (CD_ISSET(dbc->dbp, cp))
+               return (DB_KEYEMPTY);
+
+       /* Build a normal delete request. */
+       memset(&key, 0, sizeof(key));
+       key.data = &cp->recno;
+       key.size = sizeof(db_recno_t);
+       if ((ret = __ram_delete(dbc->dbp, dbc->txn, &key, 0)) == 0)
+               CD_SET(dbc->dbp, cp);
+
+       return (ret);
+}
+
+/*
+ * __ram_c_get --
+ *     Recno cursor->c_get function.
+ */
+static int
+__ram_c_get(dbc, key, data, flags)
+       DBC *dbc;
+       DBT *key, *data;
+       int flags;
+{
+       BTREE *t;
+       DB *dbp;
+       RCURSOR *cp, copy;
+       int ret;
+
+       DEBUG_LREAD(dbc->dbp, dbc->txn, "ram_c_get",
+           flags == DB_SET || flags == DB_SET_RANGE ? key : NULL,
+           NULL, flags);
+
+       cp = dbc->internal;
+       dbp = dbc->dbp;
+
+       /* Check for invalid flags. */
+       if ((ret = __db_cgetchk(dbc->dbp,
+           key, data, flags, cp->recno != RECNO_OOB)) != 0)
+               return (ret);
+
+       GETHANDLE(dbc->dbp, dbc->txn, &dbp, ret);
+       t = dbp->internal;
+
+       /* Initialize the cursor for a new retrieval. */
+       copy = *cp;
+
+retry: /* Update the record number. */
+       switch (flags) {
+       case DB_CURRENT:
+               if (CD_ISSET(dbp, cp)) {
+                       PUTHANDLE(dbp);
+                       return (DB_KEYEMPTY);
+               }
+               break;
+       case DB_NEXT:
+               if (CD_ISSET(dbp, cp))
+                       break;
+               if (cp->recno != RECNO_OOB) {
+                       ++cp->recno;
+                       break;
+               }
+               /* FALLTHROUGH */
+       case DB_FIRST:
+               flags = DB_NEXT;
+               cp->recno = 1;
+               break;
+       case DB_PREV:
+               if (cp->recno != RECNO_OOB) {
+                       if (cp->recno == 1)
+                               return (DB_NOTFOUND);
+                       --cp->recno;
+                       break;
+               }
+               /* FALLTHROUGH */
+       case DB_LAST:
+               flags = DB_PREV;
+               if (((ret = __ram_snapshot(dbp)) != 0) && ret != DB_NOTFOUND)
+                       goto err;
+               if ((ret = __bam_nrecs(dbp, &cp->recno)) != 0)
+                       goto err;
+               if (cp->recno == 0)
+                       return (DB_NOTFOUND);
+               break;
+       case DB_SET:
+       case DB_SET_RANGE:
+               if ((ret = __ram_getno(dbp, key, &cp->recno, 0)) != 0)
+                       goto err;
+               break;
+       }
+
+       /*
+        * Return the key if the user didn't give us one, and then pass it
+        * into __ram_get().
+        */
+       if (flags != DB_SET && flags != DB_SET_RANGE &&
+           (ret = __db_retcopy(key, &cp->recno, sizeof(cp->recno),
+           &t->bt_rkey.data, &t->bt_rkey.ulen, dbp->db_malloc)) != 0)
+               return (ret);
+
+       /*
+        * The cursor was reset, so the delete adjustment is no
+        * longer necessary.
+        */
+       CD_CLR(dbp, cp);
+
+       /*
+        * Retrieve the record.
+        *
+        * Skip any keys that don't really exist.
+        */
+       if ((ret = __ram_get(dbp, dbc->txn, key, data, 0)) != 0)
+               if (ret == DB_KEYEMPTY &&
+                   (flags == DB_NEXT || flags == DB_PREV))
+                       goto retry;
+
+err:   if (ret != 0)
+               *cp = copy;
+
+       PUTHANDLE(dbp);
+       return (ret);
+}
+
+/*
+ * __ram_c_put --
+ *     Recno cursor->c_put function.
+ */
+static int
+__ram_c_put(dbc, key, data, flags)
+       DBC *dbc;
+       DBT *key, *data;
+       int flags;
+{
+       BTREE *t;
+       RCURSOR *cp, copy;
+       DB *dbp;
+       int exact, ret;
+       void *arg;
+
+       DEBUG_LWRITE(dbc->dbp, dbc->txn, "ram_c_put", NULL, data, flags);
+
+       cp = dbc->internal;
+
+       if ((ret = __db_cputchk(dbc->dbp, key, data, flags,
+           F_ISSET(dbc->dbp, DB_AM_RDONLY), cp->recno != RECNO_OOB)) != 0)
+               return (ret);
+
+       GETHANDLE(dbc->dbp, dbc->txn, &dbp, ret);
+       t = dbp->internal;
+
+       /* Initialize the cursor for a new retrieval. */
+       copy = *cp;
+
+       /*
+        * To split, we need a valid key for the page.  Since it's a cursor,
+        * we have to build one.
+        *
+        * The split code discards all short-term locks and stack pages.
+        */
+       if (0) {
+split:         arg = &cp->recno;
+               if ((ret = __bam_split(dbp, arg)) != 0)
+                       goto err;
+       }
+
+       if ((ret = __bam_rsearch(dbp, &cp->recno, S_INSERT, 1, &exact)) != 0)
+               goto err;
+       if (!exact) {
+               ret = DB_NOTFOUND;
+               goto err;
+       }
+       if ((ret = __bam_iitem(dbp, &t->bt_csp->page,
+           &t->bt_csp->indx, key, data, flags, 0)) == DB_NEEDSPLIT) {
+               if ((ret = __bam_stkrel(dbp)) != 0)
+                       goto err;
+               goto split;
+       }
+       if ((ret = __bam_stkrel(dbp)) != 0)
+               goto err;
+
+       if (flags != DB_CURRENT) {
+               /* Adjust the counts. */
+               if ((ret = __bam_adjust(dbp, t, 1)) != 0)
+                       goto err;
+
+               switch (flags) {
+               case DB_AFTER:
+                       /* Adjust the cursors. */
+                       __ram_ca(dbp, cp->recno, CA_IAFTER);
+
+                       /* Set this cursor to reference the new record. */
+                       cp->recno = copy.recno + 1;
+                       break;
+               case DB_BEFORE:
+                       /* Adjust the cursors. */
+                       __ram_ca(dbp, cp->recno, CA_IBEFORE);
+
+                       /* Set this cursor to reference the new record. */
+                       cp->recno = copy.recno;
+                       break;
+               }
+
+       }
+
+       /*
+        * The cursor was reset, so the delete adjustment is no
+        * longer necessary.
+        */
+       CD_CLR(dbp, cp);
+
+err:   if (ret != 0)
+               *cp = copy;
+
+       PUTHANDLE(dbp);
+       return (ret);
+}
+
+/*
+ * __ram_ca --
+ *     Adjust cursors.
+ *
+ * PUBLIC: void __ram_ca __P((DB *, db_recno_t, ca_recno_arg));
+ */
+void
+__ram_ca(dbp, recno, op)
+       DB *dbp;
+       db_recno_t recno;
+       ca_recno_arg op;
+{
+       DBC *dbc;
+       RCURSOR *cp;
+
+       /*
+        * Adjust the cursors.  See the comment in __bam_ca_delete().
+        */
+       DB_THREAD_LOCK(dbp);
+       for (dbc = TAILQ_FIRST(&dbp->curs_queue);
+           dbc != NULL; dbc = TAILQ_NEXT(dbc, links)) {
+               cp = (RCURSOR *)dbc->internal;
+               switch (op) {
+               case CA_DELETE:
+                       if (recno > cp->recno)
+                               --cp->recno;
+                       break;
+               case CA_IAFTER:
+                       if (recno > cp->recno)
+                               ++cp->recno;
+                       break;
+               case CA_IBEFORE:
+                       if (recno >= cp->recno)
+                               ++cp->recno;
+                       break;
+               }
+       }
+       DB_THREAD_UNLOCK(dbp);
+}
+
+#ifdef DEBUG
+/*
+ * __ram_cprint --
+ *     Display the current recno cursor list.
+ */
+int
+__ram_cprint(dbp)
+       DB *dbp;
+{
+       DBC *dbc;
+       RCURSOR *cp;
+
+       DB_THREAD_LOCK(dbp);
+       for (dbc = TAILQ_FIRST(&dbp->curs_queue);
+           dbc != NULL; dbc = TAILQ_NEXT(dbc, links)) {
+               cp = (RCURSOR *)dbc->internal;
+               fprintf(stderr,
+                   "%#0x: recno: %lu\n", (u_int)cp, (u_long)cp->recno);
+       }
+       DB_THREAD_UNLOCK(dbp);
+       return (0);
+}
+#endif /* DEBUG */
+
+/*
+ * __ram_getno --
+ *     Check the user's record number, and make sure we've seen it.
+ *
+ * PUBLIC: int __ram_getno __P((DB *, const DBT *, db_recno_t *, int));
+ */
+int
+__ram_getno(dbp, key, rep, can_create)
+       DB *dbp;
+       const DBT *key;
+       db_recno_t *rep;
+       int can_create;
+{
+       db_recno_t recno;
+
+       /* Check the user's record number. */
+       if ((recno = *(db_recno_t *)key->data) == 0) {
+               __db_err(dbp->dbenv, "illegal record number of 0");
+               return (EINVAL);
+       }
+       if (rep != NULL)
+               *rep = recno;
+
+       /*
+        * Btree can neither create records or read them in.  Recno can
+        * do both, see if we can find the record.
+        */
+       return (dbp->type == DB_RECNO ?
+           __ram_update(dbp, recno, can_create) : 0);
+}
+
+/*
+ * __ram_snapshot --
+ *     Read in any remaining records from the backing input file.
+ *
+ * PUBLIC: int __ram_snapshot __P((DB *));
+ */
+int
+__ram_snapshot(dbp)
+       DB *dbp;
+{
+       return (__ram_update(dbp, DB_MAX_RECORDS, 0));
+}
+
+/*
+ * __ram_update --
+ *     Ensure the tree has records up to and including the specified one.
+ */
+static int
+__ram_update(dbp, recno, can_create)
+       DB *dbp;
+       db_recno_t recno;
+       int can_create;
+{
+       BTREE *t;
+       RECNO *rp;
+       db_recno_t nrecs;
+       int ret;
+
+       t = dbp->internal;
+       rp = t->bt_recno;
+
+       /*
+        * If we can't create records and we've read the entire backing input
+        * file, we're done.
+        */
+       if (!can_create && F_ISSET(rp, RECNO_EOF))
+               return (0);
+
+       /*
+        * If we haven't seen this record yet, try to get it from the original
+        * file.
+        */
+       if ((ret = __bam_nrecs(dbp, &nrecs)) != 0)
+               return (ret);
+       if (!F_ISSET(rp, RECNO_EOF) && recno > nrecs) {
+               if ((ret = rp->re_irec(dbp, recno)) != 0)
+                       return (ret);
+               if ((ret = __bam_nrecs(dbp, &nrecs)) != 0)
+                       return (ret);
+       }
+
+       /*
+        * If we can create records, create empty ones up to the requested
+        * record.
+        */
+       if (!can_create || recno <= nrecs + 1)
+               return (0);
+
+       t->bt_rdata.dlen = 0;
+       t->bt_rdata.doff = 0;
+       t->bt_rdata.flags = 0;
+       if (F_ISSET(dbp, DB_RE_FIXEDLEN)) {
+               if (t->bt_rdata.ulen < rp->re_len) {
+                       t->bt_rdata.data = t->bt_rdata.data == NULL ?
+                           (void *)malloc(rp->re_len) :
+                           (void *)realloc(t->bt_rdata.data, rp->re_len);
+                       if (t->bt_rdata.data == NULL) {
+                               t->bt_rdata.ulen = 0;
+                               return (ENOMEM);
+                       }
+                       t->bt_rdata.ulen = rp->re_len;
+               }
+               t->bt_rdata.size = rp->re_len;
+               memset(t->bt_rdata.data, rp->re_pad, rp->re_len);
+       } else
+               t->bt_rdata.size = 0;
+
+       while (recno > ++nrecs)
+               if ((ret = __ram_add(dbp,
+                   &nrecs, &t->bt_rdata, 0, BI_DELETED)) != 0)
+                       return (ret);
+       return (0);
+}
+
+/*
+ * __ram_source --
+ *     Load information about the backing file.
+ */
+static int
+__ram_source(dbp, rp, fname)
+       DB *dbp;
+       RECNO *rp;
+       const char *fname;
+{
+       off_t size;
+       int oflags, ret;
+
+       if ((ret = __db_appname(dbp->dbenv,
+           DB_APP_DATA, NULL, fname, NULL, &rp->re_source)) != 0)
+               return (ret);
+
+       oflags = F_ISSET(dbp, DB_AM_RDONLY) ? DB_RDONLY : 0;
+       if ((ret =
+           __db_fdopen(rp->re_source, oflags, oflags, 0, &rp->re_fd)) != 0) {
+               __db_err(dbp->dbenv, "%s: %s", rp->re_source, strerror(ret));
+               goto err;
+       }
+
+       /*
+        * XXX
+        * We'd like to test to see if the file is too big to mmap.  Since we
+        * don't know what size or type off_t's or size_t's are, or the largest
+        * unsigned integral type is, or what random insanity the local C
+        * compiler will perpetrate, doing the comparison in a portable way is
+        * flatly impossible.  Hope that mmap fails if the file is too large.
+        */
+       if ((ret =
+           __db_stat(dbp->dbenv, rp->re_source, rp->re_fd, &size, NULL)) != 0)
+               goto err;
+       if (size == 0) {
+               F_SET(rp, RECNO_EOF);
+               return (0);
+       }
+
+       if ((ret = __db_mmap(rp->re_fd, (size_t)size, 1, 1, &rp->re_smap)) != 0)
+               goto err;
+       rp->re_cmap = rp->re_smap;
+       rp->re_emap = (u_int8_t *)rp->re_smap + (rp->re_msize = size);
+       rp->re_irec = F_ISSET(dbp, DB_RE_FIXEDLEN) ?  __ram_fmap : __ram_vmap;
+       return (0);
+
+err:   FREES(rp->re_source)
+       return (ret);
+}
+
+/*
+ * __ram_writeback --
+ *     Rewrite the backing file.
+ */
+static int
+__ram_writeback(dbp)
+       DB *dbp;
+{
+       RECNO *rp;
+       DBT key, data;
+       db_recno_t keyno;
+       ssize_t nw;
+       int fd, ret, t_ret;
+       u_int8_t delim, *pad;
+
+       rp = ((BTREE *)dbp->internal)->bt_recno;
+
+       /* If the file wasn't modified, we're done. */
+       if (!F_ISSET(rp, RECNO_MODIFIED))
+               return (0);
+
+       /* If there's no backing source file, we're done. */
+       if (rp->re_source == NULL) {
+               F_CLR(rp, RECNO_MODIFIED);
+               return (0);
+       }
+
+       /*
+        * Read any remaining records into the tree.
+        *
+        * XXX
+        * This is why we can't support transactions when applications specify
+        * backing (re_source) files.  At this point we have to read in the
+        * rest of the records from the file so that we can write all of the
+        * records back out again, which could modify a page for which we'd
+        * have to log changes and which we don't have locked.  This could be
+        * partially fixed by taking a snapshot of the entire file during the
+        * db_open(), or, since db_open() isn't transaction protected, as part
+        * of the first DB operation.  But, if a checkpoint occurs then, the
+        * part of the log holding the copy of the file could be discarded, and
+        * that would make it impossible to recover in the face of disaster.
+        * This could all probably be fixed, but it would require transaction
+        * protecting the backing source file, i.e. mpool would have to know
+        * about it, and we don't want to go there.
+        */
+       if ((ret = __ram_snapshot(dbp)) != 0 && ret != DB_NOTFOUND)
+               return (ret);
+
+       /*
+        * !!!
+        * Close any underlying mmap region.  This is required for Windows NT
+        * (4.0, Service Pack 2) -- if the file is still mapped, the following
+        * open will fail.
+        */
+       if (rp->re_smap != NULL) {
+               (void)__db_munmap(rp->re_smap, rp->re_msize);
+               rp->re_smap = NULL;
+       }
+
+       /* Get rid of any backing file descriptor, just on GP's. */
+       if (rp->re_fd != -1) {
+               (void)__db_close(rp->re_fd);
+               rp->re_fd = -1;
+       }
+
+       /* Open the file, truncating it. */
+       if ((ret = __db_fdopen(rp->re_source,
+           DB_SEQUENTIAL | DB_TRUNCATE,
+           DB_SEQUENTIAL | DB_TRUNCATE, 0, &fd)) != 0) {
+               __db_err(dbp->dbenv, "%s: %s", rp->re_source, strerror(ret));
+               return (ret);
+       }
+
+       /*
+        * We step through the records, writing each one out.  Use the record
+        * number and the dbp->get() function, instead of a cursor, so we find
+        * and write out "deleted" or non-existent records.
+        */
+       memset(&key, 0, sizeof(key));
+       memset(&data, 0, sizeof(data));
+       key.size = sizeof(db_recno_t);
+       key.data = &keyno;
+
+       /*
+        * We'll need the delimiter if we're doing variable-length records,
+        * and the pad character if we're doing fixed-length records.
+        */
+       delim = rp->re_delim;
+       if (F_ISSET(dbp, DB_RE_FIXEDLEN)) {
+               if ((pad = malloc(rp->re_len)) == NULL) {
+                       ret = ENOMEM;
+                       goto err;
+               }
+               memset(pad, rp->re_pad, rp->re_len);
+       } else
+               pad = NULL;                     /* XXX: Shut the compiler up. */
+       for (keyno = 1;; ++keyno) {
+               switch (ret = dbp->get(dbp, NULL, &key, &data, 0)) {
+               case 0:
+                       if ((ret =
+                           __db_write(fd, data.data, data.size, &nw)) != 0)
+                               goto err;
+                       if (nw != (ssize_t)data.size) {
+                               ret = EIO;
+                               goto err;
+                       }
+                       break;
+               case DB_KEYEMPTY:
+                       if (F_ISSET(dbp, DB_RE_FIXEDLEN)) {
+                               if ((ret =
+                                   __db_write(fd, pad, rp->re_len, &nw)) != 0)
+                                       goto err;
+                               if (nw != (ssize_t) rp->re_len) {
+                                       ret = EIO;
+                                       goto err;
+                               }
+                       }
+                       break;
+               case DB_NOTFOUND:
+                       ret = 0;
+                       goto done;
+               }
+               if (!F_ISSET(dbp, DB_RE_FIXEDLEN)) {
+                       if ((ret = __db_write(fd, &delim, 1, &nw)) != 0)
+                               goto err;
+                       if (nw != 1) {
+                               ret = EIO;
+                               goto err;
+                       }
+               }
+       }
+
+err:
+done:  /* Close the file descriptor. */
+       if ((t_ret = __db_close(fd)) != 0 || ret == 0)
+               ret = t_ret;
+
+       if (ret == 0)
+               F_CLR(rp, RECNO_MODIFIED);
+       return (ret);
+}
+
+/*
+ * __ram_fmap --
+ *     Get fixed length records from a file.
+ */
+static int
+__ram_fmap(dbp, top)
+       DB *dbp;
+       db_recno_t top;
+{
+       BTREE *t;
+       DBT data;
+       RECNO *rp;
+       db_recno_t recno;
+       u_int32_t len;
+       u_int8_t *sp, *ep, *p;
+       int ret;
+
+       if ((ret = __bam_nrecs(dbp, &recno)) != 0)
+               return (ret);
+
+       t = dbp->internal;
+       rp = t->bt_recno;
+       if (t->bt_rdata.ulen < rp->re_len) {
+               t->bt_rdata.data = t->bt_rdata.data == NULL ?
+                   (void *)malloc(rp->re_len) :
+                   (void *)realloc(t->bt_rdata.data, rp->re_len);
+               if (t->bt_rdata.data == NULL) {
+                       t->bt_rdata.ulen = 0;
+                       return (ENOMEM);
+               }
+               t->bt_rdata.ulen = rp->re_len;
+       }
+
+       memset(&data, 0, sizeof(data));
+       data.data = t->bt_rdata.data;
+       data.size = rp->re_len;
+
+       sp = (u_int8_t *)rp->re_cmap;
+       ep = (u_int8_t *)rp->re_emap;
+       while (recno <= top) {
+               if (sp >= ep) {
+                       F_SET(rp, RECNO_EOF);
+                       return (DB_NOTFOUND);
+               }
+               len = rp->re_len;
+               for (p = t->bt_rdata.data;
+                   sp < ep && len > 0; *p++ = *sp++, --len);
+
+               /*
+                * Another process may have read some portion of the input
+                * file already, in which case we just want to discard the
+                * new record.
+                *
+                * XXX
+                * We should just do a seek, since the records are fixed
+                * length.
+                */
+               if (rp->re_last >= recno) {
+                       if (len != 0)
+                               memset(p, rp->re_pad, len);
+
+                       ++recno;
+                       if ((ret = __ram_add(dbp, &recno, &data, 0, 0)) != 0)
+                               return (ret);
+               }
+               ++rp->re_last;
+       }
+       rp->re_cmap = sp;
+       return (0);
+}
+
+/*
+ * __ram_vmap --
+ *     Get variable length records from a file.
+ */
+static int
+__ram_vmap(dbp, top)
+       DB *dbp;
+       db_recno_t top;
+{
+       BTREE *t;
+       DBT data;
+       RECNO *rp;
+       db_recno_t recno;
+       u_int8_t *sp, *ep;
+       int delim, ret;
+
+       t = dbp->internal;
+       rp = t->bt_recno;
+
+       if ((ret = __bam_nrecs(dbp, &recno)) != 0)
+               return (ret);
+
+       memset(&data, 0, sizeof(data));
+
+       delim = rp->re_delim;
+
+       sp = (u_int8_t *)rp->re_cmap;
+       ep = (u_int8_t *)rp->re_emap;
+       while (recno <= top) {
+               if (sp >= ep) {
+                       F_SET(rp, RECNO_EOF);
+                       return (DB_NOTFOUND);
+               }
+               for (data.data = sp; sp < ep && *sp != delim; ++sp);
+
+               /*
+                * Another process may have read some portion of the input
+                * file already, in which case we just want to discard the
+                * new record.
+                */
+               if (rp->re_last >= recno) {
+                       data.size = sp - (u_int8_t *)data.data;
+                       ++recno;
+                       if ((ret = __ram_add(dbp, &recno, &data, 0, 0)) != 0)
+                               return (ret);
+               }
+               ++rp->re_last;
+               ++sp;
+       }
+       rp->re_cmap = sp;
+       return (0);
+}
+
+/*
+ * __ram_add --
+ *     Add records into the tree.
+ */
+static int
+__ram_add(dbp, recnop, data, flags, bi_flags)
+       DB *dbp;
+       db_recno_t *recnop;
+       DBT *data;
+       int flags, bi_flags;
+{
+       BTREE *t;
+       PAGE *h;
+       db_indx_t indx;
+       int exact, ret, stack;
+
+       t = dbp->internal;
+
+retry: /* Find the slot for insertion. */
+       if ((ret = __bam_rsearch(dbp, recnop,
+           S_INSERT | (LF_ISSET(DB_APPEND) ? S_APPEND : 0), 1, &exact)) != 0)
+               return (ret);
+       h = t->bt_csp->page;
+       indx = t->bt_csp->indx;
+       stack = 1;
+
+       /*
+        * The recno access method doesn't currently support duplicates, so
+        * if an identical key is already in the tree we're either overwriting
+        * it or an error is returned.
+        */
+       if (exact && LF_ISSET(DB_NOOVERWRITE)) {
+               ret = DB_KEYEXIST;
+               goto err;
+       }
+
+       /*
+        * Select the arguments for __bam_iitem() and do the insert.  If the
+        * key is an exact match, or we're replacing the data item with a
+        * new data item.  If the key isn't an exact match, we're inserting
+        * a new key/data pair, before the search location.
+        */
+       if ((ret = __bam_iitem(dbp, &h, &indx, NULL,
+           data, exact ? DB_CURRENT : DB_BEFORE, bi_flags)) == DB_NEEDSPLIT) {
+               (void)__bam_stkrel(dbp);
+               stack = 0;
+               if ((ret = __bam_split(dbp, recnop)) != 0)
+                       goto err;
+               goto retry;
+       }
+
+       if (!exact && ret == 0)
+               __bam_adjust(dbp, t, 1);
+
+err:   if (stack)
+               __bam_stkrel(dbp);
+       return (ret);
+}
diff --git a/db2/btree/bt_rsearch.c b/db2/btree/bt_rsearch.c
new file mode 100644 (file)
index 0000000..ee26221
--- /dev/null
@@ -0,0 +1,347 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997
+ *     Sleepycat Software.  All rights reserved.
+ */
+/*
+ * Copyright (c) 1990, 1993, 1994, 1995, 1996
+ *     Keith Bostic.  All rights reserved.
+ */
+/*
+ * Copyright (c) 1990, 1993
+ *     The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *     This product includes software developed by the University of
+ *     California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "config.h"
+
+#ifndef lint
+static const char sccsid[] = "@(#)bt_rsearch.c 10.8 (Sleepycat) 8/24/97";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#endif
+
+#include "db_int.h"
+#include "db_page.h"
+#include "btree.h"
+
+/*
+ * __bam_rsearch --
+ *     Search a btree for a record number.
+ *
+ * PUBLIC: int __bam_rsearch __P((DB *, db_recno_t *, u_int, int, int *));
+ */
+int
+__bam_rsearch(dbp, recnop, flags, stop, exactp)
+       DB *dbp;
+       db_recno_t *recnop;
+       u_int flags;
+       int stop, *exactp;
+{
+       BINTERNAL *bi;
+       BTREE *t;
+       DB_LOCK lock;
+       PAGE *h;
+       RINTERNAL *ri;
+       db_indx_t indx, top;
+       db_pgno_t pg;
+       db_recno_t recno, total;
+       int isappend, ret, stack;
+
+       t = dbp->internal;
+
+       /*
+        * We test for groups of flags, S_APPEND is the only one that can be
+        * OR'd into the set.  Clear it now so that the tests for equality
+        * will work.
+        */
+       if ((isappend = LF_ISSET(S_APPEND)) != 0)
+               LF_CLR(S_APPEND);
+
+       /*
+        * There are several ways we search a btree tree.  The flags argument
+        * specifies if we're acquiring read or write locks and if we are
+        * locking pairs of pages.  See btree.h for more details.
+        *
+        * If write-locking pages, we need to know whether or not to acquire a
+        * write lock on a page before getting it.  This depends on how deep it
+        * is in tree, which we don't know until we acquire the root page.  So,
+        * if we need to lock the root page we may have to upgrade it later,
+        * because we won't get the correct lock initially.
+        *
+        * Retrieve the root page.
+        */
+       pg = PGNO_ROOT;
+       if ((ret = __bam_lget(dbp, 0, PGNO_ROOT,
+           flags == S_INSERT || flags == S_DELETE ?
+           DB_LOCK_WRITE : DB_LOCK_READ, &lock)) != 0)
+               return (ret);
+       if ((ret = __bam_pget(dbp, &h, &pg, 0)) != 0) {
+               (void)__BT_LPUT(dbp, lock);
+               return (ret);
+       }
+       total = RE_NREC(h);
+
+       /*
+        * If appending to the tree, set the record number now -- we have the
+        * root page locked.
+        *
+        * Delete only deletes exact matches, read only returns exact matches.
+        * Note, this is different from __bam_search(), which returns non-exact
+        * matches for read.
+        *
+        * The record may not exist.  We can only return the correct location
+        * for the record immediately after the last record in the tree, so do
+        * a fast check now.
+        */
+       if (isappend) {
+               *exactp = 0;
+               *recnop = recno = total + 1;
+       } else {
+               recno = *recnop;
+               if (recno <= total)
+                       *exactp = 1;
+               else {
+                       *exactp = 0;
+                       if (flags == S_DELETE ||
+                           flags == S_FIND || recno > total + 1) {
+                               (void)memp_fput(dbp->mpf, h, 0);
+                               (void)__BT_LPUT(dbp, lock);
+                               return (DB_NOTFOUND);
+                       }
+               }
+       }
+
+       /* Decide if we're building a stack based on the operation. */
+       BT_STK_CLR(t);
+       stack = flags == S_DELETE || flags == S_INSERT;
+
+       /*
+        * Decide if we need to save this page; if we do, write lock it, and
+        * start to build a stack.
+        */
+       if (LF_ISSET(S_PARENT) && (u_int8_t)(stop + 1) >= h->level) {
+               (void)memp_fput(dbp->mpf, h, 0);
+               if ((ret = __bam_lget(dbp, 1, pg, DB_LOCK_WRITE, &lock)) != 0)
+                       return (ret);
+               if ((ret = __bam_pget(dbp, &h, &pg, 0)) != 0) {
+                       (void)__BT_LPUT(dbp, lock);
+                       return (ret);
+               }
+               stack = 1;
+       }
+
+       /* Records in the tree are 0-based, and record numbers are 1-based. */
+       --recno;
+
+       for (total = 0;;) {
+               switch (TYPE(h)) {
+               case P_LBTREE:
+                       BT_STK_ENTER(t, h, (recno - total) * P_INDX, lock, ret);
+                       return (ret);
+               case P_IBTREE:
+                       for (indx = 0, top = NUM_ENT(h);;) {
+                               bi = GET_BINTERNAL(h, indx);
+                               if (++indx == top || total + bi->nrecs > recno)
+                                       break;
+                               total += bi->nrecs;
+                       }
+                       pg = bi->pgno;
+                       break;
+               case P_LRECNO:
+                       BT_STK_ENTER(t, h, recno - total, lock, ret);
+                       return (ret);
+               case P_IRECNO:
+                       for (indx = 0, top = NUM_ENT(h);;) {
+                               ri = GET_RINTERNAL(h, indx);
+                               if (++indx == top || total + ri->nrecs > recno)
+                                       break;
+                               total += ri->nrecs;
+                       }
+                       pg = ri->pgno;
+                       break;
+               default:
+                       return (__db_pgfmt(dbp, h->pgno));
+               }
+               --indx;
+
+               if (stack) {
+                       /* Return if this is the lowest page wanted. */
+                       if (LF_ISSET(S_PARENT) && stop == h->level) {
+                               BT_STK_ENTER(t, h, indx, lock, ret);
+                               return (ret);
+                       }
+                       BT_STK_PUSH(t, h, indx, lock, ret);
+                       if (ret)
+                               goto err;
+
+                       if ((ret = __bam_lget(dbp, 0, pg,
+                           LF_ISSET(S_WRITE) ? DB_LOCK_WRITE : DB_LOCK_READ,
+                           &lock)) != 0)
+                               goto err;
+               } else {
+                       (void)memp_fput(dbp->mpf, h, 0);
+
+                       /*
+                        * Decide if we want to return a pointer to the next
+                        * page in the stack.  If we do, write lock it and
+                        * never unlock it.
+                        */
+                       if (LF_ISSET(S_PARENT) &&
+                           (u_int8_t)(stop + 1) >= (u_int8_t)(h->level - 1))
+                               stack = 1;
+
+                       if ((ret = __bam_lget(dbp, 1, pg,
+                           LF_ISSET(S_WRITE) ? DB_LOCK_WRITE : DB_LOCK_READ,
+                           &lock)) != 0)
+                               goto err;
+               }
+
+               if ((ret = __bam_pget(dbp, &h, &pg, 0)) != 0)
+                       goto err;
+       }
+       /* NOTREACHED */
+
+err:   BT_STK_POP(t);
+       __bam_stkrel(dbp);
+       return (ret);
+}
+
+/*
+ * __bam_adjust --
+ *     Adjust the tree after adding or deleting a record.
+ *
+ * PUBLIC: int __bam_adjust __P((DB *, BTREE *, int));
+ */
+int
+__bam_adjust(dbp, t, adjust)
+       DB *dbp;
+       BTREE *t;
+       int adjust;
+{
+       EPG *epg;
+       PAGE *h;
+       int ret;
+
+       /* Update the record counts for the tree. */
+       for (epg = t->bt_sp; epg <= t->bt_csp; ++epg) {
+               h = epg->page;
+               if (TYPE(h) == P_IBTREE || TYPE(h) == P_IRECNO) {
+                       if (DB_LOGGING(dbp) &&
+                           (ret = __bam_cadjust_log(dbp->dbenv->lg_info,
+                           dbp->txn, &LSN(h), 0, dbp->log_fileid,
+                           PGNO(h), &LSN(h), (u_int32_t)epg->indx,
+                           (int32_t)adjust, 1)) != 0)
+                               return (ret);
+
+                       if (TYPE(h) == P_IBTREE)
+                               GET_BINTERNAL(h, epg->indx)->nrecs += adjust;
+                       else
+                               GET_RINTERNAL(h, epg->indx)->nrecs += adjust;
+
+                       if (PGNO(h) == PGNO_ROOT)
+                               RE_NREC_ADJ(h, adjust);
+
+                       if ((ret = memp_fset(dbp->mpf, h, DB_MPOOL_DIRTY)) != 0)
+                               return (ret);
+               }
+       }
+       return (0);
+}
+
+/*
+ * __bam_nrecs --
+ *     Return the number of records in the tree.
+ *
+ * PUBLIC: int __bam_nrecs __P((DB *, db_recno_t *));
+ */
+int
+__bam_nrecs(dbp, rep)
+       DB *dbp;
+       db_recno_t *rep;
+{
+       DB_LOCK lock;
+       PAGE *h;
+       db_pgno_t pgno;
+       int ret;
+
+       pgno = PGNO_ROOT;
+       if ((ret = __bam_lget(dbp, 0, pgno, DB_LOCK_READ, &lock)) != 0)
+               return (ret);
+       if ((ret = __bam_pget(dbp, &h, &pgno, 0)) != 0)
+               return (ret);
+
+       *rep = RE_NREC(h);
+
+       (void)memp_fput(dbp->mpf, h, 0);
+       (void)__BT_TLPUT(dbp, lock);
+
+       return (0);
+}
+
+/*
+ * __bam_total --
+ *     Return the number of records below a page.
+ *
+ * PUBLIC: db_recno_t __bam_total __P((PAGE *));
+ */
+db_recno_t
+__bam_total(h)
+       PAGE *h;
+{
+       db_recno_t recs;
+       db_indx_t nxt, top;
+
+       switch (TYPE(h)) {
+       case P_LBTREE:
+               recs = NUM_ENT(h) / 2;
+               break;
+       case P_IBTREE:
+               for (recs = 0, nxt = 0, top = NUM_ENT(h); nxt < top; ++nxt)
+                       recs += GET_BINTERNAL(h, nxt)->nrecs;
+               break;
+       case P_LRECNO:
+               recs = NUM_ENT(h);
+               break;
+       case P_IRECNO:
+               for (recs = 0, nxt = 0, top = NUM_ENT(h); nxt < top; ++nxt)
+                       recs += GET_RINTERNAL(h, nxt)->nrecs;
+               break;
+       default:
+               abort();
+       }
+       return (recs);
+}
diff --git a/db2/btree/bt_search.c b/db2/btree/bt_search.c
new file mode 100644 (file)
index 0000000..d5f20d4
--- /dev/null
@@ -0,0 +1,335 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997
+ *     Sleepycat Software.  All rights reserved.
+ */
+/*
+ * Copyright (c) 1990, 1993, 1994, 1995, 1996
+ *     Keith Bostic.  All rights reserved.
+ */
+/*
+ * Copyright (c) 1990, 1993, 1994, 1995
+ *     The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Mike Olson.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *     This product includes software developed by the University of
+ *     California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "config.h"
+
+#ifndef lint
+static const char sccsid[] = "@(#)bt_search.c  10.6 (Sleepycat) 8/22/97";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#endif
+
+#include "db_int.h"
+#include "db_page.h"
+#include "btree.h"
+
+/*
+ * __bam_search --
+ *     Search a btree for a key.
+ *
+ * PUBLIC: int __bam_search __P((DB *,
+ * PUBLIC:     const DBT *, u_int, int, db_recno_t *, int *));
+ */
+int
+__bam_search(dbp, key, flags, stop, recnop, exactp)
+       DB *dbp;
+       const DBT *key;
+       u_int flags;
+       int stop, *exactp;
+       db_recno_t *recnop;
+{
+       BTREE *t;
+       DB_LOCK lock;
+       EPG cur;
+       PAGE *h;
+       db_indx_t base, i, indx, lim;
+       db_pgno_t pg;
+       db_recno_t recno;
+       int cmp, jump, ret, stack;
+
+       t = dbp->internal;
+       recno = 0;
+
+       BT_STK_CLR(t);
+
+       /*
+        * There are several ways we search a btree tree.  The flags argument
+        * specifies if we're acquiring read or write locks, if we position
+        * to the first or last item in a set of duplicates, if we return
+        * deleted items, and if we are locking pairs of pages.  See btree.h
+        * for more details.  In addition, if we're doing record numbers, we
+        * have to lock the entire tree regardless.
+        *
+        * If write-locking pages, we need to know whether or not to acquire a
+        * write lock on a page before getting it.  This depends on how deep it
+        * is in tree, which we don't know until we acquire the root page.  So,
+        * if we need to lock the root page we may have to upgrade it later,
+        * because we won't get the correct lock initially.
+        *
+        * Retrieve the root page.
+        */
+       pg = PGNO_ROOT;
+       stack = F_ISSET(dbp, DB_BT_RECNUM) &&
+           (flags == S_INSERT || flags == S_DELETE);
+       if ((ret = __bam_lget(dbp,
+           0, pg, stack ? DB_LOCK_WRITE : DB_LOCK_READ, &lock)) != 0)
+               return (ret);
+       if ((ret = __bam_pget(dbp, &h, &pg, 0)) != 0) {
+               (void)__BT_LPUT(dbp, lock);
+               return (ret);
+       }
+
+       /* Decide if we need to save this page; if we do, write lock it. */
+       if (!stack &&
+           ((LF_ISSET(S_PARENT) && (u_int8_t)(stop + 1) >= h->level) ||
+           (LF_ISSET(S_WRITE) && h->level == LEAFLEVEL))) {
+               (void)memp_fput(dbp->mpf, h, 0);
+               if ((ret = __bam_lget(dbp, 1, pg, DB_LOCK_WRITE, &lock)) != 0)
+                       return (ret);
+               if ((ret = __bam_pget(dbp, &h, &pg, 0)) != 0) {
+                       (void)__BT_LPUT(dbp, lock);
+                       return (ret);
+               }
+
+               stack = 1;
+       }
+
+       for (;;) {
+               /*
+                * Do a binary search on the current page.  If we're searching
+                * a leaf page, we have to manipulate the indices in groups of
+                * two.  If we're searching an internal page, they're an index
+                * per page item.  If we find an exact match on a leaf page,
+                * we're done.
+                */
+               cur.page = h;
+               jump = TYPE(h) == P_LBTREE ? P_INDX : O_INDX;
+               for (base = 0,
+                   lim = NUM_ENT(h) / (db_indx_t)jump; lim != 0; lim >>= 1) {
+                       cur.indx = indx = base + ((lim >> 1) * jump);
+                       if ((cmp = __bam_cmp(dbp, key, &cur)) == 0) {
+                               if (TYPE(h) == P_LBTREE)
+                                       goto match;
+                               goto next;
+                       }
+                       if (cmp > 0) {
+                               base = indx + jump;
+                               --lim;
+                       }
+               }
+
+               /*
+                * No match found.  Base is the smallest index greater than
+                * key and may be zero or a last + O_INDX index.
+                *
+                * If it's a leaf page, return base as the "found" value.
+                * Delete only deletes exact matches.
+                */
+               if (TYPE(h) == P_LBTREE) {
+                       *exactp = 0;
+
+                       if (LF_ISSET(S_EXACT))
+                               goto notfound;
+
+                       BT_STK_ENTER(t, h, base, lock, ret);
+                       return (ret);
+               }
+
+               /*
+                * If it's not a leaf page, record the internal page (which is
+                * a parent page for the key).  Decrement the base by 1 if it's
+                * non-zero so that if a split later occurs, the inserted page
+                * will be to the right of the saved page.
+                */
+               indx = base > 0 ? base - O_INDX : base;
+
+               /*
+                * If we're trying to calculate the record number, sum up
+                * all the record numbers on this page up to the indx point.
+                */
+               if (recnop != NULL)
+                       for (i = 0; i < indx; ++i)
+                               recno += GET_BINTERNAL(h, i)->nrecs;
+
+next:          pg = GET_BINTERNAL(h, indx)->pgno;
+               if (stack) {
+                       /* Return if this is the lowest page wanted. */
+                       if (LF_ISSET(S_PARENT) && stop == h->level) {
+                               BT_STK_ENTER(t, h, indx, lock, ret);
+                               return (ret);
+                       }
+                       BT_STK_PUSH(t, h, indx, lock, ret);
+                       if (ret != 0)
+                               goto err;
+
+                       if ((ret =
+                           __bam_lget(dbp, 0, pg, DB_LOCK_WRITE, &lock)) != 0)
+                               goto err;
+               } else {
+                       (void)memp_fput(dbp->mpf, h, 0);
+
+                       /*
+                        * Decide if we want to return a pointer to the next
+                        * page in the stack.  If we do, write lock it and
+                        * never unlock it.
+                        */
+                       if ((LF_ISSET(S_PARENT) &&
+                           (u_int8_t)(stop + 1) >= (u_int8_t)(h->level - 1)) ||
+                           (h->level - 1) == LEAFLEVEL)
+                               stack = 1;
+
+                       if ((ret =
+                           __bam_lget(dbp, 1, pg, stack && LF_ISSET(S_WRITE) ?
+                           DB_LOCK_WRITE : DB_LOCK_READ, &lock)) != 0)
+                               goto err;
+               }
+               if ((ret = __bam_pget(dbp, &h, &pg, 0)) != 0)
+                       goto err;
+       }
+
+       /* NOTREACHED */
+match: *exactp = 1;
+
+       /*
+        * If we're trying to calculate the record number, add in the
+        * offset on this page and correct for the fact that records
+        * in the tree are 0-based.
+        */
+       if (recnop != NULL)
+               *recnop = recno + (indx / P_INDX) + 1;
+
+       /*
+        * If we got here, we know that we have a btree leaf page.
+        *
+        * If there are duplicates, go to the first/last one.
+        */
+       if (LF_ISSET(S_DUPLAST))
+               while (indx < (db_indx_t)(NUM_ENT(h) - P_INDX) &&
+                   h->inp[indx] == h->inp[indx + P_INDX])
+                       indx += P_INDX;
+       else
+               while (indx > 0 &&
+                   h->inp[indx] == h->inp[indx - P_INDX])
+                       indx -= P_INDX;
+
+       /*
+        * Now check if we are allowed to return deleted item; if not
+        * find/last the first non-deleted item.
+        */
+       if (LF_ISSET(S_DELNO)) {
+               if (LF_ISSET(S_DUPLAST))
+                       while (GET_BKEYDATA(h, indx + O_INDX)->deleted &&
+                           indx > 0 &&
+                           h->inp[indx] == h->inp[indx - P_INDX])
+                               indx -= P_INDX;
+               else
+                       while (GET_BKEYDATA(h, indx + O_INDX)->deleted &&
+                           indx < (db_indx_t)(NUM_ENT(h) - P_INDX) &&
+                           h->inp[indx] == h->inp[indx + P_INDX])
+                               indx += P_INDX;
+
+               if (GET_BKEYDATA(h, indx + O_INDX)->deleted)
+                       goto notfound;
+       }
+
+       BT_STK_ENTER(t, h, indx, lock, ret);
+       return (ret);
+
+notfound:
+       (void)memp_fput(dbp->mpf, h, 0);
+       (void)__BT_LPUT(dbp, lock);
+       ret = DB_NOTFOUND;
+
+err:   if (t->bt_csp > t->bt_sp) {
+               BT_STK_POP(t);
+               __bam_stkrel(dbp);
+       }
+       return (ret);
+}
+
+/*
+ * __bam_stkrel --
+ *     Release all pages currently held in the stack.
+ *
+ * PUBLIC: int __bam_stkrel __P((DB *));
+ */
+int
+__bam_stkrel(dbp)
+       DB *dbp;
+{
+       BTREE *t;
+       EPG *epg;
+
+       t = dbp->internal;
+       for (epg = t->bt_sp; epg <= t->bt_csp; ++epg) {
+               (void)memp_fput(dbp->mpf, epg->page, 0);
+               (void)__BT_TLPUT(dbp, epg->lock);
+       }
+       return (0);
+}
+
+/*
+ * __bam_stkgrow --
+ *     Grow the stack.
+ *
+ * PUBLIC: int __bam_stkgrow __P((BTREE *));
+ */
+int
+__bam_stkgrow(t)
+       BTREE *t;
+{
+       EPG *p;
+       size_t entries;
+
+       entries = t->bt_esp - t->bt_sp;
+
+       if ((p = (EPG *)calloc(entries * 2, sizeof(EPG))) == NULL)
+               return (ENOMEM);
+       memcpy(p, t->bt_sp, entries * sizeof(EPG));
+       if (t->bt_sp != t->bt_stack)
+               FREE(t->bt_sp, entries * sizeof(EPG));
+       t->bt_sp = p;
+       t->bt_csp = p + entries;
+       t->bt_esp = p + entries * 2;
+       return (0);
+}
diff --git a/db2/btree/bt_split.c b/db2/btree/bt_split.c
new file mode 100644 (file)
index 0000000..89cfcb5
--- /dev/null
@@ -0,0 +1,952 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997
+ *     Sleepycat Software.  All rights reserved.
+ */
+/*
+ * Copyright (c) 1990, 1993, 1994, 1995, 1996
+ *     Keith Bostic.  All rights reserved.
+ */
+/*
+ * Copyright (c) 1990, 1993, 1994, 1995
+ *     The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *     This product includes software developed by the University of
+ *     California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "config.h"
+
+#ifndef lint
+static const char sccsid[] = "@(#)bt_split.c   10.12 (Sleepycat) 8/24/97";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+
+#include <errno.h>
+#include <limits.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#endif
+
+#include "db_int.h"
+#include "db_page.h"
+#include "btree.h"
+
+static int __bam_page __P((DB *, EPG *, EPG *));
+static int __bam_pinsert __P((DB *, EPG *, PAGE *, PAGE *));
+static int __bam_psplit __P((DB *, EPG *, PAGE *, PAGE *, int));
+static int __bam_root __P((DB *, EPG *));
+
+/*
+ * __bam_split --
+ *     Split a page.
+ *
+ * PUBLIC: int __bam_split __P((DB *, void *));
+ */
+int
+__bam_split(dbp, arg)
+       DB *dbp;
+       void *arg;
+{
+       BTREE *t;
+       enum { UP, DOWN } dir;
+       int exact, level, ret;
+
+       t = dbp->internal;
+
+       /*
+        * The locking protocol we use to avoid deadlock to acquire locks by
+        * walking down the tree, but we do it as lazily as possible, locking
+        * the root only as a last resort.  We expect all stack pages to have
+        * been discarded before we're called; we discard all short-term locks.
+        *
+        * When __bam_split is first called, we know that a leaf page was too
+        * full for an insert.  We don't know what leaf page it was, but we
+        * have the key/recno that caused the problem.  We call XX_search to
+        * reacquire the leaf page, but this time get both the leaf page and
+        * its parent, locked.  We then split the leaf page and see if the new
+        * internal key will fit into the parent page.  If it will, we're done.
+        *
+        * If it won't, we discard our current locks and repeat the process,
+        * only this time acquiring the parent page and its parent, locked.
+        * This process repeats until we succeed in the split, splitting the
+        * root page as the final resort.  The entire process then repeats,
+        * as necessary, until we split a leaf page.
+        *
+        * XXX
+        * A traditional method of speeding this up is to maintain a stack of
+        * the pages traversed in the original search.  You can detect if the
+        * stack is correct by storing the page's LSN when it was searched and
+        * comparing that LSN with the current one when it's locked during the
+        * split.  This would be an easy change for this code, but I have no
+        * numbers that indicate it's worthwhile.
+        */
+       for (dir = UP, level = LEAFLEVEL;; dir == UP ? ++level : --level) {
+               /*
+                * Acquire a page and its parent, locked.
+                */
+               if ((ret = (dbp->type == DB_BTREE ?
+                   __bam_search(dbp, arg, S_WRPAIR, level, NULL, &exact) :
+                   __bam_rsearch(dbp,
+                       (db_recno_t *)arg, S_WRPAIR, level, &exact))) != 0)
+                       return (ret);
+
+               /* Split the page. */
+               ret = t->bt_csp[0].page->pgno == PGNO_ROOT ?
+                   __bam_root(dbp, &t->bt_csp[0]) :
+                   __bam_page(dbp, &t->bt_csp[-1], &t->bt_csp[0]);
+
+               switch (ret) {
+               case 0:
+                       /* Once we've split the leaf page, we're done. */
+                       if (level == LEAFLEVEL)
+                               return (0);
+
+                       /* Switch directions. */
+                       if (dir == UP)
+                               dir = DOWN;
+                       break;
+               case DB_NEEDSPLIT:
+                       /*
+                        * It's possible to fail to split repeatedly, as other
+                        * threads may be modifying the tree, or the page usage
+                        * is sufficiently bad that we don't get enough space
+                        * the first time.
+                        */
+                       if (dir == DOWN)
+                               dir = UP;
+                       break;
+               default:
+                       return (ret);
+               }
+       }
+       /* NOTREACHED */
+}
+
+/*
+ * __bam_root --
+ *     Split the root page of a btree.
+ */
+static int
+__bam_root(dbp, cp)
+       DB *dbp;
+       EPG *cp;
+{
+       BTREE *t;
+       PAGE *lp, *rp;
+       int ret;
+
+       t = dbp->internal;
+
+       /* Yeah, right. */
+       if (cp->page->level >= MAXBTREELEVEL)
+               return (ENOSPC);
+
+       /* Create new left and right pages for the split. */
+       lp = rp = NULL;
+       if ((ret = __bam_new(dbp, TYPE(cp->page), &lp)) != 0 ||
+           (ret = __bam_new(dbp, TYPE(cp->page), &rp)) != 0)
+               goto err;
+       P_INIT(lp, dbp->pgsize, lp->pgno,
+           PGNO_INVALID, ISINTERNAL(cp->page) ? PGNO_INVALID : rp->pgno,
+           cp->page->level, TYPE(cp->page));
+       P_INIT(rp, dbp->pgsize, rp->pgno,
+           ISINTERNAL(cp->page) ?  PGNO_INVALID : lp->pgno, PGNO_INVALID,
+           cp->page->level, TYPE(cp->page));
+
+       /* Split the page. */
+       if ((ret = __bam_psplit(dbp, cp, lp, rp, 1)) != 0)
+               goto err;
+
+       /* Log the change. */
+       if (DB_LOGGING(dbp)) {
+               DBT __a;
+               DB_LSN __lsn;
+               memset(&__a, 0, sizeof(__a));
+               __a.data = cp->page;
+               __a.size = dbp->pgsize;
+               ZERO_LSN(__lsn);
+               if ((ret = __bam_split_log(dbp->dbenv->lg_info, dbp->txn,
+                   &LSN(cp->page), 0, dbp->log_fileid, PGNO(lp), &LSN(lp),
+                   PGNO(rp), &LSN(rp), (u_int32_t)NUM_ENT(lp), 0, &__lsn,
+                   &__a)) != 0)
+                       goto err;
+               LSN(lp) = LSN(rp) = LSN(cp->page);
+       }
+
+       /* Clean up the new root page. */
+       if ((ret = (dbp->type == DB_RECNO ?
+           __ram_root(dbp, cp->page, lp, rp) :
+           __bam_broot(dbp, cp->page, lp, rp))) != 0)
+               goto err;
+
+       /* Success -- write the real pages back to the store. */
+       (void)memp_fput(dbp->mpf, cp->page, DB_MPOOL_DIRTY);
+       (void)__BT_TLPUT(dbp, cp->lock);
+       (void)memp_fput(dbp->mpf, lp, DB_MPOOL_DIRTY);
+       (void)memp_fput(dbp->mpf, rp, DB_MPOOL_DIRTY);
+
+       ++t->lstat.bt_split;
+       ++t->lstat.bt_rootsplit;
+       return (0);
+
+err:   if (lp != NULL)
+               (void)__bam_free(dbp, lp);
+       if (rp != NULL)
+               (void)__bam_free(dbp, rp);
+       (void)memp_fput(dbp->mpf, cp->page, 0);
+       (void)__BT_TLPUT(dbp, cp->lock);
+       return (ret);
+}
+
+/*
+ * __bam_page --
+ *     Split the non-root page of a btree.
+ */
+static int
+__bam_page(dbp, pp, cp)
+       DB *dbp;
+       EPG *pp, *cp;
+{
+       BTREE *t;
+       DB_LOCK tplock;
+       PAGE *lp, *rp, *tp;
+       int ret;
+
+       t = dbp->internal;
+       lp = rp = tp = NULL;
+       ret = -1;
+
+       /* Create new right page for the split. */
+       if ((ret = __bam_new(dbp, TYPE(cp->page), &rp)) != 0)
+               return (ret);
+       P_INIT(rp, dbp->pgsize, rp->pgno,
+           ISINTERNAL(cp->page) ? PGNO_INVALID : cp->page->pgno,
+           ISINTERNAL(cp->page) ? PGNO_INVALID : cp->page->next_pgno,
+           cp->page->level, TYPE(cp->page));
+
+       /* Create new left page for the split. */
+       if ((lp = (PAGE *)malloc(dbp->pgsize)) == NULL) {
+               ret = ENOMEM;
+               goto err;
+       }
+#ifdef DEBUG
+       memset(lp, 0xff, dbp->pgsize);
+#endif
+       P_INIT(lp, dbp->pgsize, cp->page->pgno,
+           ISINTERNAL(cp->page) ?  PGNO_INVALID : cp->page->prev_pgno,
+           ISINTERNAL(cp->page) ?  PGNO_INVALID : rp->pgno,
+           cp->page->level, TYPE(cp->page));
+       ZERO_LSN(lp->lsn);
+
+       /*
+        * Split right.
+        *
+        * Only the indices are sorted on the page, i.e., the key/data pairs
+        * aren't, so it's simpler to copy the data from the split page onto
+        * two new pages instead of copying half the data to the right page
+        * and compacting the left page in place.  Since the left page can't
+        * change, we swap the original and the allocated left page after the
+        * split.
+        */
+       if ((ret = __bam_psplit(dbp, cp, lp, rp, 0)) != 0)
+               goto err;
+
+       /*
+        * Fix up the previous pointer of any leaf page following the split
+        * page.
+        *
+        * !!!
+        * There are interesting deadlock situations here as we write-lock a
+        * page that's not in our direct ancestry.  Consider a cursor walking
+        * through the leaf pages, that has the previous page read-locked and
+        * is waiting on a lock for the page we just split.  It will deadlock
+        * here.  If this is a problem, we can fail in the split; it's not a
+        * problem as the split will succeed after the cursor passes through
+        * the page we're splitting.
+        */
+       if (TYPE(cp->page) == P_LBTREE && rp->next_pgno != PGNO_INVALID) {
+               if ((ret = __bam_lget(dbp,
+                   0, rp->next_pgno, DB_LOCK_WRITE, &tplock)) != 0)
+                       goto err;
+               if ((ret = __bam_pget(dbp, &tp, &rp->next_pgno, 0)) != 0)
+                       goto err;
+       }
+
+       /* Insert the new pages into the parent page. */
+       if ((ret = __bam_pinsert(dbp, pp, lp, rp)) != 0)
+               goto err;
+
+       /* Log the change. */
+       if (DB_LOGGING(dbp)) {
+               DBT __a;
+               DB_LSN __lsn;
+               memset(&__a, 0, sizeof(__a));
+               __a.data = cp->page;
+               __a.size = dbp->pgsize;
+               if (tp == NULL)
+                       ZERO_LSN(__lsn);
+               if ((ret = __bam_split_log(dbp->dbenv->lg_info, dbp->txn,
+                   &cp->page->lsn, 0, dbp->log_fileid, PGNO(cp->page),
+                   &LSN(cp->page), PGNO(rp), &LSN(rp), (u_int32_t)NUM_ENT(lp),
+                   tp == NULL ? 0 : PGNO(tp),
+                   tp == NULL ? &__lsn : &LSN(tp), &__a)) != 0)
+                       goto err;
+
+               LSN(lp) = LSN(rp) = LSN(cp->page);
+               if (tp != NULL)
+                       LSN(tp) = LSN(cp->page);
+       }
+
+       /* Copy the allocated page into place. */
+       memcpy(cp->page, lp, LOFFSET(lp));
+       memcpy((u_int8_t *)cp->page + HOFFSET(lp),
+           (u_int8_t *)lp + HOFFSET(lp), dbp->pgsize - HOFFSET(lp));
+       FREE(lp, dbp->pgsize);
+       lp = NULL;
+
+       /* Finish the next-page link. */
+       if (tp != NULL)
+               tp->prev_pgno = rp->pgno;
+
+       /* Success -- write the real pages back to the store. */
+       (void)memp_fput(dbp->mpf, pp->page, DB_MPOOL_DIRTY);
+       (void)__BT_TLPUT(dbp, pp->lock);
+       (void)memp_fput(dbp->mpf, cp->page, DB_MPOOL_DIRTY);
+       (void)__BT_TLPUT(dbp, cp->lock);
+       (void)memp_fput(dbp->mpf, rp, DB_MPOOL_DIRTY);
+       if (tp != NULL) {
+               (void)memp_fput(dbp->mpf, tp, DB_MPOOL_DIRTY);
+               (void)__BT_TLPUT(dbp, tplock);
+       }
+       return (0);
+
+err:   if (lp != NULL)
+               FREE(lp, dbp->pgsize);
+       if (rp != NULL)
+               (void)__bam_free(dbp, rp);
+       if (tp != NULL) {
+               (void)memp_fput(dbp->mpf, tp, 0);
+               (void)__BT_TLPUT(dbp, tplock);
+       }
+       (void)memp_fput(dbp->mpf, pp->page, 0);
+       (void)__BT_TLPUT(dbp, pp->lock);
+       (void)memp_fput(dbp->mpf, cp->page, 0);
+       (void)__BT_TLPUT(dbp, cp->lock);
+       return (ret);
+}
+
+/*
+ * __bam_broot --
+ *     Fix up the btree root page after it has been split.
+ *
+ * PUBLIC: int __bam_broot __P((DB *, PAGE *, PAGE *, PAGE *));
+ */
+int
+__bam_broot(dbp, rootp, lp, rp)
+       DB *dbp;
+       PAGE *rootp, *lp, *rp;
+{
+       BINTERNAL bi, *child_bi;
+       BKEYDATA *child_bk;
+       DBT hdr, data;
+       int ret;
+
+       /*
+        * If the root page was a leaf page, change it into an internal page.
+        * We copy the key we split on (but not the key's data, in the case of
+        * a leaf page) to the new root page.
+        */
+       P_INIT(rootp, dbp->pgsize,
+           PGNO_ROOT, PGNO_INVALID, PGNO_INVALID, lp->level + 1, P_IBTREE);
+
+       /*
+        * The btree comparison code guarantees that the left-most key on any
+        * level of the tree is never used, so it doesn't need to be filled in.
+        */
+       bi.len = 0;
+       bi.deleted = 0;
+       bi.type = B_KEYDATA;
+       bi.pgno = lp->pgno;
+       if (F_ISSET(dbp, DB_BT_RECNUM)) {
+               bi.nrecs = __bam_total(lp);
+               RE_NREC_SET(rootp, bi.nrecs);
+       }
+       memset(&hdr, 0, sizeof(hdr));
+       hdr.data = &bi;
+       hdr.size = SSZA(BINTERNAL, data);
+       memset(&data, 0, sizeof(data));
+       data.data = (char *) "";
+       data.size = 0;
+       if ((ret =
+           __db_pitem(dbp, rootp, 0, BINTERNAL_SIZE(0), &hdr, &data)) != 0)
+               return (ret);
+
+       switch (TYPE(rp)) {
+       case P_IBTREE:
+               /* Copy the first key of the child page onto the root page. */
+               child_bi = GET_BINTERNAL(rp, 0);
+
+               bi.len = child_bi->len;
+               bi.deleted = 0;
+               bi.type = child_bi->type;
+               bi.pgno = rp->pgno;
+               if (F_ISSET(dbp, DB_BT_RECNUM)) {
+                       bi.nrecs = __bam_total(rp);
+                       RE_NREC_ADJ(rootp, bi.nrecs);
+               }
+               hdr.data = &bi;
+               hdr.size = SSZA(BINTERNAL, data);
+               data.data = child_bi->data;
+               data.size = child_bi->len;
+               if ((ret = __db_pitem(dbp, rootp, 1,
+                   BINTERNAL_SIZE(child_bi->len), &hdr, &data)) != 0)
+                       return (ret);
+
+               /* Increment the overflow ref count. */
+               if (child_bi->type == B_OVERFLOW && (ret =
+                   __db_ioff(dbp, ((BOVERFLOW *)(child_bi->data))->pgno)) != 0)
+                       return (ret);
+               break;
+       case P_LBTREE:
+               /* Copy the first key of the child page onto the root page. */
+               child_bk = GET_BKEYDATA(rp, 0);
+               switch (child_bk->type) {
+               case B_KEYDATA:
+                       bi.len = child_bk->len;
+                       bi.deleted = 0;
+                       bi.type = child_bk->type;
+                       bi.pgno = rp->pgno;
+                       if (F_ISSET(dbp, DB_BT_RECNUM)) {
+                               bi.nrecs = __bam_total(rp);
+                               RE_NREC_ADJ(rootp, bi.nrecs);
+                       }
+                       hdr.data = &bi;
+                       hdr.size = SSZA(BINTERNAL, data);
+                       data.data = child_bk->data;
+                       data.size = child_bk->len;
+                       if ((ret = __db_pitem(dbp, rootp, 1,
+                           BINTERNAL_SIZE(child_bk->len), &hdr, &data)) != 0)
+                               return (ret);
+                       break;
+               case B_DUPLICATE:
+               case B_OVERFLOW:
+                       bi.len = BOVERFLOW_SIZE;
+                       bi.deleted = 0;
+                       bi.type = child_bk->type;
+                       bi.pgno = rp->pgno;
+                       if (F_ISSET(dbp, DB_BT_RECNUM)) {
+                               bi.nrecs = __bam_total(rp);
+                               RE_NREC_ADJ(rootp, bi.nrecs);
+                       }
+                       hdr.data = &bi;
+                       hdr.size = SSZA(BINTERNAL, data);
+                       data.data = child_bk;
+                       data.size = BOVERFLOW_SIZE;
+                       if ((ret = __db_pitem(dbp, rootp, 1,
+                           BINTERNAL_SIZE(BOVERFLOW_SIZE), &hdr, &data)) != 0)
+                               return (ret);
+
+                       /* Increment the overflow ref count. */
+                       if (child_bk->type == B_OVERFLOW && (ret =
+                           __db_ioff(dbp, ((BOVERFLOW *)child_bk)->pgno)) != 0)
+                               return (ret);
+                       break;
+               default:
+                       return (__db_pgfmt(dbp, rp->pgno));
+               }
+               break;
+       default:
+               return (__db_pgfmt(dbp, rp->pgno));
+       }
+       return (0);
+}
+
+/*
+ * __ram_root --
+ *     Fix up the recno root page after it has been split.
+ *
+ * PUBLIC: int __ram_root __P((DB *, PAGE *, PAGE *, PAGE *));
+ */
+int
+__ram_root(dbp, rootp, lp, rp)
+       DB *dbp;
+       PAGE *rootp, *lp, *rp;
+{
+       DBT hdr;
+       RINTERNAL ri;
+       int ret;
+
+       /* Initialize the page. */
+       P_INIT(rootp, dbp->pgsize,
+           PGNO_ROOT, PGNO_INVALID, PGNO_INVALID, lp->level + 1, P_IRECNO);
+
+       /* Initialize the header. */
+       memset(&hdr, 0, sizeof(hdr));
+       hdr.data = &ri;
+       hdr.size = RINTERNAL_SIZE;
+
+       /* Insert the left and right keys, set the header information. */
+       ri.pgno = lp->pgno;
+       ri.nrecs = __bam_total(lp);
+       if ((ret = __db_pitem(dbp, rootp, 0, RINTERNAL_SIZE, &hdr, NULL)) != 0)
+               return (ret);
+       RE_NREC_SET(rootp, ri.nrecs);
+       ri.pgno = rp->pgno;
+       ri.nrecs = __bam_total(rp);
+       if ((ret = __db_pitem(dbp, rootp, 1, RINTERNAL_SIZE, &hdr, NULL)) != 0)
+               return (ret);
+       RE_NREC_ADJ(rootp, ri.nrecs);
+       return (0);
+}
+
+/*
+ * __bam_pinsert --
+ *     Insert a new key into a parent page, completing the split.
+ */
+static int
+__bam_pinsert(dbp, parent, lchild, rchild)
+       DB *dbp;
+       EPG *parent;
+       PAGE *lchild, *rchild;
+{
+       BINTERNAL bi, *child_bi;
+       BKEYDATA *child_bk, *tmp_bk;
+       BTREE *t;
+       DBT a, b, hdr, data;
+       PAGE *ppage;
+       RINTERNAL ri;
+       db_indx_t off;
+       db_recno_t nrecs;
+       u_int32_t n, nbytes, nksize;
+       int ret;
+
+       t = dbp->internal;
+       ppage = parent->page;
+
+       /* If handling record numbers, count records split to the right page. */
+       nrecs = dbp->type == DB_RECNO || F_ISSET(dbp, DB_BT_RECNUM) ?
+           __bam_total(rchild) : 0;
+
+       /*
+        * Now we insert the new page's first key into the parent page, which
+        * completes the split.  The parent points to a PAGE and a page index
+        * offset, where the new key goes ONE AFTER the index, because we split
+        * to the right.
+        *
+        * XXX
+        * Some btree algorithms replace the key for the old page as well as
+        * the new page.  We don't, as there's no reason to believe that the
+        * first key on the old page is any better than the key we have, and,
+        * in the case of a key being placed at index 0 causing the split, the
+        * key is unavailable.
+        */
+       off = parent->indx + O_INDX;
+
+       /*
+        * Calculate the space needed on the parent page.
+        *
+        * Prefix trees: space hack used when inserting into BINTERNAL pages.
+        * Retain only what's needed to distinguish between the new entry and
+        * the LAST entry on the page to its left.  If the keys compare equal,
+        * retain the entire key.  We ignore overflow keys, and the entire key
+        * must be retained for the next-to-leftmost key on the leftmost page
+        * of each level, or the search will fail.  Applicable ONLY to internal
+        * pages that have leaf pages as children.  Further reduction of the
+        * key between pairs of internal pages loses too much information.
+        */
+       switch (TYPE(rchild)) {
+       case P_IBTREE:
+               child_bi = GET_BINTERNAL(rchild, 0);
+               nbytes = BINTERNAL_PSIZE(child_bi->len);
+
+               if (P_FREESPACE(ppage) < nbytes)
+                       return (DB_NEEDSPLIT);
+
+               /* Add a new record for the right page. */
+               bi.len = child_bi->len;
+               bi.deleted = 0;
+               bi.type = child_bi->type;
+               bi.pgno = rchild->pgno;
+               bi.nrecs = nrecs;
+               memset(&hdr, 0, sizeof(hdr));
+               hdr.data = &bi;
+               hdr.size = SSZA(BINTERNAL, data);
+               memset(&data, 0, sizeof(data));
+               data.data = child_bi->data;
+               data.size = child_bi->len;
+               if ((ret = __db_pitem(dbp, ppage, off,
+                   BINTERNAL_SIZE(child_bi->len), &hdr, &data)) != 0)
+                       return (ret);
+
+               /* Increment the overflow ref count. */
+               if (child_bi->type == B_OVERFLOW && (ret =
+                   __db_ioff(dbp, ((BOVERFLOW *)(child_bi->data))->pgno)) != 0)
+                       return (ret);
+               break;
+       case P_LBTREE:
+               child_bk = GET_BKEYDATA(rchild, 0);
+               switch (child_bk->type) {
+               case B_KEYDATA:
+                       nbytes = BINTERNAL_PSIZE(child_bk->len);
+                       nksize = child_bk->len;
+                       if (t->bt_prefix == NULL)
+                               goto noprefix;
+                       if (ppage->prev_pgno == PGNO_INVALID && off <= 1)
+                               goto noprefix;
+                       tmp_bk = GET_BKEYDATA(lchild, NUM_ENT(lchild) - P_INDX);
+                       if (tmp_bk->type != B_KEYDATA)
+                               goto noprefix;
+                       memset(&a, 0, sizeof(a));
+                       a.size = tmp_bk->len;
+                       a.data = tmp_bk->data;
+                       memset(&b, 0, sizeof(b));
+                       b.size = child_bk->len;
+                       b.data = child_bk->data;
+                       nksize = t->bt_prefix(&a, &b);
+                       if ((n = BINTERNAL_PSIZE(nksize)) < nbytes) {
+                               t->lstat.bt_pfxsaved += nbytes - n;
+                               nbytes = n;
+                       } else
+noprefix:                      nksize = child_bk->len;
+
+                       if (P_FREESPACE(ppage) < nbytes)
+                               return (DB_NEEDSPLIT);
+
+                       bi.len = nksize;
+                       bi.deleted = 0;
+                       bi.type = child_bk->type;
+                       bi.pgno = rchild->pgno;
+                       bi.nrecs = nrecs;
+                       memset(&hdr, 0, sizeof(hdr));
+                       hdr.data = &bi;
+                       hdr.size = SSZA(BINTERNAL, data);
+                       memset(&data, 0, sizeof(data));
+                       data.data = child_bk->data;
+                       data.size = nksize;
+                       if ((ret = __db_pitem(dbp, ppage, off,
+                           BINTERNAL_SIZE(nksize), &hdr, &data)) != 0)
+                               return (ret);
+                       break;
+               case B_DUPLICATE:
+               case B_OVERFLOW:
+                       nbytes = BINTERNAL_PSIZE(BOVERFLOW_SIZE);
+
+                       if (P_FREESPACE(ppage) < nbytes)
+                               return (DB_NEEDSPLIT);
+
+                       bi.len = BOVERFLOW_SIZE;
+                       bi.deleted = 0;
+                       bi.type = child_bk->type;
+                       bi.pgno = rchild->pgno;
+                       bi.nrecs = nrecs;
+                       memset(&hdr, 0, sizeof(hdr));
+                       hdr.data = &bi;
+                       hdr.size = SSZA(BINTERNAL, data);
+                       memset(&data, 0, sizeof(data));
+                       data.data = child_bk;
+                       data.size = BOVERFLOW_SIZE;
+                       if ((ret = __db_pitem(dbp, ppage, off,
+                           BINTERNAL_SIZE(BOVERFLOW_SIZE), &hdr, &data)) != 0)
+                               return (ret);
+
+                       /* Increment the overflow ref count. */
+                       if (child_bk->type == B_OVERFLOW && (ret =
+                           __db_ioff(dbp, ((BOVERFLOW *)child_bk)->pgno)) != 0)
+                               return (ret);
+                       break;
+               default:
+                       return (__db_pgfmt(dbp, rchild->pgno));
+               }
+               break;
+       case P_IRECNO:
+       case P_LRECNO:
+               nbytes = RINTERNAL_PSIZE;
+
+               if (P_FREESPACE(ppage) < nbytes)
+                       return (DB_NEEDSPLIT);
+
+               /* Add a new record for the right page. */
+               memset(&hdr, 0, sizeof(hdr));
+               hdr.data = &ri;
+               hdr.size = RINTERNAL_SIZE;
+               ri.pgno = rchild->pgno;
+               ri.nrecs = nrecs;
+               if ((ret = __db_pitem(dbp,
+                   ppage, off, RINTERNAL_SIZE, &hdr, NULL)) != 0)
+                       return (ret);
+               break;
+       default:
+               return (__db_pgfmt(dbp, rchild->pgno));
+       }
+
+       /* Adjust the parent page's left page record count. */
+       if (dbp->type == DB_RECNO || F_ISSET(dbp, DB_BT_RECNUM)) {
+               /* Log the change. */
+               if (DB_LOGGING(dbp) &&
+                   (ret = __bam_cadjust_log(dbp->dbenv->lg_info,
+                   dbp->txn, &LSN(ppage), 0, dbp->log_fileid,
+                   PGNO(ppage), &LSN(ppage), (u_int32_t)parent->indx,
+                   -(int32_t)nrecs, (int32_t)0)) != 0)
+                       return (ret);
+
+               /* Update the left page count. */
+               if (dbp->type == DB_RECNO)
+                       GET_RINTERNAL(ppage, parent->indx)->nrecs -= nrecs;
+               else
+                       GET_BINTERNAL(ppage, parent->indx)->nrecs -= nrecs;
+       }
+
+       return (0);
+}
+
+/*
+ * __bam_psplit --
+ *     Do the real work of splitting the page.
+ */
+static int
+__bam_psplit(dbp, cp, lp, rp, cleft)
+       DB *dbp;
+       EPG *cp;
+       PAGE *lp, *rp;
+       int cleft;
+{
+       BTREE *t;
+       PAGE *pp;
+       db_indx_t half, nbytes, off, splitp, top;
+       int adjust, cnt, isbigkey, ret;
+
+       t = dbp->internal;
+       pp = cp->page;
+       adjust = TYPE(pp) == P_LBTREE ? P_INDX : O_INDX;
+
+       /*
+        * If we're splitting the first (last) page on a level because we're
+        * inserting (appending) a key to it, it's likely that the data is
+        * sorted.  Moving a single item to the new page is less work and can
+        * push the fill factor higher than normal.  If we're wrong it's not
+        * a big deal, we'll just do the split the right way next time.
+        */
+       off = 0;
+       if (NEXT_PGNO(pp) == PGNO_INVALID &&
+           ((ISINTERNAL(pp) && cp->indx == NUM_ENT(cp->page) - 1) ||
+           (!ISINTERNAL(pp) && cp->indx == NUM_ENT(cp->page))))
+               off = NUM_ENT(cp->page) - adjust;
+       else if (PREV_PGNO(pp) == PGNO_INVALID && cp->indx == 0)
+               off = adjust;
+
+       ++t->lstat.bt_split;
+       if (off != 0) {
+               ++t->lstat.bt_fastsplit;
+               goto sort;
+       }
+
+       /*
+        * Split the data to the left and right pages.  Try not to split on
+        * an overflow key.  (Overflow keys on internal pages will slow down
+        * searches.)  Refuse to split in the middle of a set of duplicates.
+        *
+        * First, find the optimum place to split.
+        *
+        * It's possible to try and split past the last record on the page if
+        * there's a very large record at the end of the page.  Make sure this
+        * doesn't happen by bounding the check at the next-to-last entry on
+        * the page.
+        *
+        * Note, we try and split half the data present on the page.  This is
+        * because another process may have already split the page and left
+        * it half empty.  We don't try and skip the split -- we don't know
+        * how much space we're going to need on the page, and we may need up
+        * to half the page for a big item, so there's no easy test to decide
+        * if we need to split or not.  Besides, if two threads are inserting
+        * data into the same place in the database, we're probably going to
+        * need more space soon anyway.
+        */
+       top = NUM_ENT(pp) - adjust;
+       half = (dbp->pgsize - HOFFSET(pp)) / 2;
+       for (nbytes = 0, off = 0; off < top && nbytes < half; ++off)
+               switch (TYPE(pp)) {
+               case P_IBTREE:
+                       if (GET_BINTERNAL(pp, off)->type == B_KEYDATA)
+                               nbytes +=
+                                  BINTERNAL_SIZE(GET_BINTERNAL(pp, off)->len);
+                       else
+                               nbytes += BINTERNAL_SIZE(BOVERFLOW_SIZE);
+                       break;
+               case P_LBTREE:
+                       if (GET_BKEYDATA(pp, off)->type == B_KEYDATA)
+                               nbytes +=
+                                   BKEYDATA_SIZE(GET_BKEYDATA(pp, off)->len);
+                       else
+                               nbytes += BOVERFLOW_SIZE;
+
+                       ++off;
+                       if (GET_BKEYDATA(pp, off)->type == B_KEYDATA)
+                               nbytes +=
+                                   BKEYDATA_SIZE(GET_BKEYDATA(pp, off)->len);
+                       else
+                               nbytes += BOVERFLOW_SIZE;
+                       break;
+               case P_IRECNO:
+                       nbytes += RINTERNAL_SIZE;
+                       break;
+               case P_LRECNO:
+                       nbytes += BKEYDATA_SIZE(GET_BKEYDATA(pp, off)->len);
+                       break;
+               default:
+                       return (__db_pgfmt(dbp, pp->pgno));
+               }
+sort:  splitp = off;
+
+       /*
+        * Splitp is either at or just past the optimum split point.  If
+        * it's a big key, try and find something close by that's not.
+        */
+       if (TYPE(pp) == P_IBTREE)
+               isbigkey = GET_BINTERNAL(pp, off)->type != B_KEYDATA;
+       else if (TYPE(pp) == P_LBTREE)
+               isbigkey = GET_BKEYDATA(pp, off)->type != B_KEYDATA;
+       else
+               isbigkey = 0;
+       if (isbigkey)
+               for (cnt = 1; cnt <= 3; ++cnt) {
+                       off = splitp + cnt * adjust;
+                       if (off < (db_indx_t)NUM_ENT(pp) &&
+                           ((TYPE(pp) == P_IBTREE &&
+                           GET_BINTERNAL(pp, off)->type == B_KEYDATA) ||
+                           GET_BKEYDATA(pp, off)->type == B_KEYDATA)) {
+                               splitp = off;
+                               break;
+                       }
+                       if (splitp <= (db_indx_t)(cnt * adjust))
+                               continue;
+                       off = splitp - cnt * adjust;
+                       if (TYPE(pp) == P_IBTREE ?
+                           GET_BINTERNAL(pp, off)->type == B_KEYDATA :
+                           GET_BKEYDATA(pp, off)->type == B_KEYDATA) {
+                               splitp = off;
+                               break;
+                       }
+               }
+
+       /*
+        * We can't split in the middle a set of duplicates.  We know that
+        * no duplicate set can take up more than about 25% of the page,
+        * because that's the point where we push it off onto a duplicate
+        * page set.  So, this loop can't be unbounded.
+        */
+       if (F_ISSET(dbp, DB_AM_DUP) && TYPE(pp) == P_LBTREE &&
+           pp->inp[splitp] == pp->inp[splitp - adjust])
+               for (cnt = 1;; ++cnt) {
+                       off = splitp + cnt * adjust;
+                       if (off < NUM_ENT(pp) &&
+                           pp->inp[splitp] != pp->inp[off]) {
+                               splitp = off;
+                               break;
+                       }
+                       if (splitp <= (db_indx_t)(cnt * adjust))
+                               continue;
+                       off = splitp - cnt * adjust;
+                       if (pp->inp[splitp] != pp->inp[off]) {
+                               splitp = off + adjust;
+                               break;
+                       }
+               }
+
+
+       /* We're going to split at splitp. */
+       if ((ret = __bam_copy(dbp, pp, lp, 0, splitp)) != 0)
+               return (ret);
+       if ((ret = __bam_copy(dbp, pp, rp, splitp, NUM_ENT(pp))) != 0)
+               return (ret);
+
+       /* Adjust the cursors. */
+       __bam_ca_split(dbp, pp->pgno, lp->pgno, rp->pgno, splitp, cleft);
+       return (0);
+}
+
+/*
+ * __bam_copy --
+ *     Copy a set of records from one page to another.
+ *
+ * PUBLIC: int __bam_copy __P((DB *, PAGE *, PAGE *, u_int32_t, u_int32_t));
+ */
+int
+__bam_copy(dbp, pp, cp, nxt, stop)
+       DB *dbp;
+       PAGE *pp, *cp;
+       u_int32_t nxt, stop;
+{
+       db_indx_t dup, nbytes, off;
+
+       /*
+        * Copy the rest of the data to the right page.  Nxt is the next
+        * offset placed on the target page.
+        */
+       for (dup = off = 0; nxt < stop; ++nxt, ++NUM_ENT(cp), ++off) {
+               switch (TYPE(pp)) {
+               case P_IBTREE:
+                       if (GET_BINTERNAL(pp, nxt)->type == B_KEYDATA)
+                               nbytes =
+                                   BINTERNAL_SIZE(GET_BINTERNAL(pp, nxt)->len);
+                       else
+                               nbytes = BINTERNAL_SIZE(BOVERFLOW_SIZE);
+                       break;
+               case P_LBTREE:
+                       /*
+                        * If we're on a key and it's a duplicate, just copy
+                        * the offset.
+                        */
+                       if (off != 0 && (nxt % P_INDX) == 0 &&
+                           pp->inp[nxt] == pp->inp[nxt - P_INDX]) {
+                               cp->inp[off] = cp->inp[off - P_INDX];
+                               continue;
+                       }
+                       /* FALLTHROUGH */
+               case P_LRECNO:
+                       if (GET_BKEYDATA(pp, nxt)->type == B_KEYDATA)
+                               nbytes =
+                                   BKEYDATA_SIZE(GET_BKEYDATA(pp, nxt)->len);
+                       else
+                               nbytes = BOVERFLOW_SIZE;
+                       break;
+               case P_IRECNO:
+                       nbytes = RINTERNAL_SIZE;
+                       break;
+               default:
+                       return (__db_pgfmt(dbp, pp->pgno));
+               }
+               cp->inp[off] = HOFFSET(cp) -= nbytes;
+               memcpy(P_ENTRY(cp, off), P_ENTRY(pp, nxt), nbytes);
+       }
+       return (0);
+}
diff --git a/db2/btree/bt_stat.c b/db2/btree/bt_stat.c
new file mode 100644 (file)
index 0000000..ba71ea6
--- /dev/null
@@ -0,0 +1,257 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997
+ *     Sleepycat Software.  All rights reserved.
+ */
+
+#include "config.h"
+
+#ifndef lint
+static const char sccsid[] = "@(#)bt_stat.c    10.11 (Sleepycat) 8/19/97";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+#endif
+
+#include "db_int.h"
+#include "db_page.h"
+#include "btree.h"
+
+static void __bam_add_rstat __P((DB_BTREE_LSTAT *, DB_BTREE_STAT *));
+
+/*
+ * __bam_stat --
+ *     Gather/print the btree statistics
+ *
+ * PUBLIC: int __bam_stat __P((DB *, void *, void *(*)(size_t), int));
+ */
+int
+__bam_stat(argdbp, spp, db_malloc, flags)
+       DB *argdbp;
+       void *spp;
+       void *(*db_malloc) __P((size_t));
+       int flags;
+{
+       BTMETA *meta;
+       BTREE *t;
+       DB *dbp;
+       DB_BTREE_STAT *sp;
+       DB_LOCK lock;
+       PAGE *h;
+       db_pgno_t lastpgno, pgno;
+       int ret;
+
+       DEBUG_LWRITE(argdbp, NULL, "bam_stat", NULL, NULL, flags);
+
+       /* Check for invalid flags. */
+       if ((ret = __db_statchk(argdbp, flags)) != 0)
+               return (ret);
+
+       if (spp == NULL)
+               return (0);
+
+       GETHANDLE(argdbp, NULL, &dbp, ret);
+       t = dbp->internal;
+
+       /* Allocate and clear the structure. */
+       if ((sp = db_malloc == NULL ?
+           (DB_BTREE_STAT *)malloc(sizeof(*sp)) :
+           (DB_BTREE_STAT *)db_malloc(sizeof(*sp))) == NULL) {
+               ret = ENOMEM;
+               goto err;
+       }
+       memset(sp, 0, sizeof(*sp));
+
+       /* If the app just wants the record count, make it fast. */
+       if (LF_ISSET(DB_RECORDCOUNT)) {
+               pgno = PGNO_ROOT;
+               if ((ret = __bam_lget(dbp, 0, pgno, DB_LOCK_READ, &lock)) != 0)
+                       goto err;
+               if ((ret = __bam_pget(dbp, (PAGE **)&h, &pgno, 0)) != 0)
+                       goto err;
+
+               sp->bt_nrecs = RE_NREC(h);
+
+               (void)memp_fput(dbp->mpf, h, 0);
+               (void)__BT_LPUT(dbp, lock);
+               goto done;
+       }
+
+       /* Get the meta-data page. */
+       pgno = PGNO_METADATA;
+       if ((ret = __bam_lget(dbp, 0, pgno, DB_LOCK_READ, &lock)) != 0)
+               goto err;
+       if ((ret = __bam_pget(dbp, (PAGE **)&meta, &pgno, 0)) != 0)
+               goto err;
+
+       /* Translate the metadata flags. */
+       if (F_ISSET(meta, BTM_DUP))
+               sp->bt_flags |= DB_DUP;
+       if (F_ISSET(meta, BTM_FIXEDLEN))
+               sp->bt_flags |= DB_FIXEDLEN;
+       if (F_ISSET(meta, BTM_RECNUM))
+               sp->bt_flags |= DB_RECNUM;
+       if (F_ISSET(meta, BTM_RENUMBER))
+               sp->bt_flags |= DB_RENUMBER;
+
+       /*
+        * Get the maxkey, minkey, re_len and re_pad fields from the
+        * metadata.
+        */
+       sp->bt_minkey = meta->minkey;
+       sp->bt_maxkey = meta->maxkey;
+       sp->bt_re_len = meta->re_len;
+       sp->bt_re_pad = meta->re_pad;
+
+       /* Get the page size from the DB. */
+       sp->bt_pagesize = dbp->pgsize;
+
+       /* Initialize counters with the meta-data page information. */
+       __bam_add_rstat(&meta->stat, sp);
+
+       /*
+        * Add in the local information from this handle.
+        *
+        * !!!
+        * This is a bit odd, but it gets us closer to the truth.
+        */
+       __bam_add_rstat(&t->lstat, sp);
+
+       /* Walk the free list, counting pages. */
+       for (sp->bt_free = 0, pgno = meta->free; pgno != PGNO_INVALID;) {
+               ++sp->bt_free;
+
+               if ((ret = __bam_pget(dbp, &h, &pgno, 0)) != 0) {
+                       (void)memp_fput(dbp->mpf, meta, 0);
+                       (void)__BT_TLPUT(dbp, lock);
+                       goto err;
+               }
+               pgno = h->next_pgno;
+               (void)memp_fput(dbp->mpf, h, 0);
+       }
+
+       /* Discard the meta-data page. */
+       (void)memp_fput(dbp->mpf, meta, 0);
+       (void)__BT_TLPUT(dbp, lock);
+
+       /* Get the root page. */
+       pgno = PGNO_ROOT;
+       if ((ret = __bam_lget(dbp, 0, PGNO_ROOT, DB_LOCK_READ, &lock)) != 0)
+               goto err;
+       if ((ret = __bam_pget(dbp, &h, &pgno, 0)) != 0) {
+               (void)__BT_LPUT(dbp, lock);
+               goto err;
+       }
+
+       /* Get the levels from the root page. */
+       sp->bt_levels = h->level;
+
+       /*
+        * Determine the last page of the database, then walk it, counting
+        * things.
+        */
+       if ((ret = memp_fget(dbp->mpf, &lastpgno, DB_MPOOL_LAST, &h)) != 0)
+               goto err;
+       (void)memp_fput(dbp->mpf, h, 0);
+       for (;;) {
+               switch (TYPE(h)) {
+               case P_INVALID:
+                       break;
+               case P_IBTREE:
+               case P_IRECNO:
+                       ++sp->bt_int_pg;
+                       sp->bt_int_pgfree += HOFFSET(h) - LOFFSET(h);
+                       break;
+               case P_LBTREE:
+                       ++sp->bt_leaf_pg;
+                       sp->bt_leaf_pgfree += HOFFSET(h) - LOFFSET(h);
+                       sp->bt_nrecs += NUM_ENT(h) / P_INDX;
+                       break;
+               case P_LRECNO:
+                       ++sp->bt_leaf_pg;
+                       sp->bt_leaf_pgfree += HOFFSET(h) - LOFFSET(h);
+                       sp->bt_nrecs += NUM_ENT(h);
+                       break;
+               case P_DUPLICATE:
+                       ++sp->bt_dup_pg;
+                       /* XXX MARGO: sp->bt_dup_pgfree; */
+                       break;
+               case P_OVERFLOW:
+                       ++sp->bt_over_pg;
+                       /* XXX MARGO: sp->bt_over_pgfree; */
+                       break;
+               default:
+                       (void)memp_fput(dbp->mpf, h, 0);
+                       (void)__BT_LPUT(dbp, lock);
+                       return (__db_pgfmt(dbp, pgno));
+               }
+
+               (void)memp_fput(dbp->mpf, h, 0);
+               (void)__BT_LPUT(dbp, lock);
+
+               if (++pgno > lastpgno)
+                       break;
+               if (__bam_lget(dbp, 0, pgno, DB_LOCK_READ, &lock))
+                       break;
+               if (memp_fget(dbp->mpf, &pgno, 0, &h) != 0) {
+                       (void)__BT_LPUT(dbp, lock);
+                       break;
+               }
+       }
+
+done:  *(DB_BTREE_STAT **)spp = sp;
+       ret = 0;
+
+err:   PUTHANDLE(dbp);
+       return (ret);
+}
+
+/*
+ * __bam_add_mstat --
+ *     Add the local statistics to the meta-data page statistics.
+ *
+ * PUBLIC: void __bam_add_mstat __P((DB_BTREE_LSTAT *, DB_BTREE_LSTAT *));
+ */
+void
+__bam_add_mstat(from, to)
+       DB_BTREE_LSTAT *from;
+       DB_BTREE_LSTAT *to;
+{
+       to->bt_freed += from->bt_freed;
+       to->bt_pfxsaved += from->bt_pfxsaved;
+       to->bt_split += from->bt_split;
+       to->bt_rootsplit += from->bt_rootsplit;
+       to->bt_fastsplit += from->bt_fastsplit;
+       to->bt_added += from->bt_added;
+       to->bt_deleted += from->bt_deleted;
+       to->bt_get += from->bt_get;
+       to->bt_cache_hit += from->bt_cache_hit;
+       to->bt_cache_miss += from->bt_cache_miss;
+}
+
+/*
+ * __bam_add_rstat --
+ *     Add the local statistics to the returned statistics.
+ */
+static void
+__bam_add_rstat(from, to)
+       DB_BTREE_LSTAT *from;
+       DB_BTREE_STAT *to;
+{
+       to->bt_freed += from->bt_freed;
+       to->bt_pfxsaved += from->bt_pfxsaved;
+       to->bt_split += from->bt_split;
+       to->bt_rootsplit += from->bt_rootsplit;
+       to->bt_fastsplit += from->bt_fastsplit;
+       to->bt_added += from->bt_added;
+       to->bt_deleted += from->bt_deleted;
+       to->bt_get += from->bt_get;
+       to->bt_cache_hit += from->bt_cache_hit;
+       to->bt_cache_miss += from->bt_cache_miss;
+}
diff --git a/db2/btree/btree.src b/db2/btree/btree.src
new file mode 100644 (file)
index 0000000..50cc0dd
--- /dev/null
@@ -0,0 +1,137 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997
+ *     Sleepycat Software.  All rights reserved.
+ */
+
+#include "config.h"
+
+#ifndef lint
+static const char sccsid[] = "@(#)btree.src    10.3 (Sleepycat) 8/17/97";
+#endif /* not lint */
+
+PREFIX bam
+
+/*
+ * BTREE-pg_alloc: used to record allocating a new page.
+ *
+ * meta_lsn:   the meta-data page's original lsn.
+ * page_lsn:   the allocated page's original lsn.
+ * pgno:       the page allocated.
+ * next:       the next page on the free list.
+ */
+BEGIN pg_alloc
+ARG    fileid          u_int32_t       lu
+POINTER        meta_lsn        DB_LSN *        lu
+POINTER        page_lsn        DB_LSN *        lu
+ARG    pgno            db_pgno_t       lu
+ARG    ptype           u_int32_t       lu
+ARG    next            db_pgno_t       lu
+END
+
+/*
+ * BTREE-pg_free: used to record freeing a page.
+ *
+ * pgno:       the page being freed.
+ * meta_lsn:   the meta-data page's original lsn.
+ * header:     the header from the free'd page.
+ * next:       the previous next pointer on the metadata page.
+ */
+BEGIN pg_free
+ARG    fileid          u_int32_t       lu
+ARG    pgno            db_pgno_t       lu
+POINTER        meta_lsn        DB_LSN *        lu
+DBT    header          DBT             s
+ARG    next            db_pgno_t       lu
+END
+
+/*
+ * BTREE-split: used to log a page split.
+ *
+ * left:       the page number for the low-order contents.
+ * llsn:       the left page's original LSN.
+ * right:      the page number for the high-order contents.
+ * rlsn:       the right page's original LSN.
+ * indx:       the number of entries that went to the left page.
+ * npgno:      the next page number
+ * nlsn:       the next page's original LSN (or 0 if no next page).
+ * pg:         the split page's contents before the split.
+ */
+BEGIN split
+ARG    fileid          u_int32_t       lu
+ARG    left            db_pgno_t       lu
+POINTER        llsn            DB_LSN *        lu
+ARG    right           db_pgno_t       lu
+POINTER        rlsn            DB_LSN *        lu
+ARG    indx            u_int32_t       lu
+ARG    npgno           db_pgno_t       lu
+POINTER        nlsn            DB_LSN *        lu
+DBT    pg              DBT             s
+END
+
+/*
+ * BTREE-rsplit: used to log a reverse-split
+ *
+ * pgno:       the page number of the page copied over the root.
+ * pgdbt:      the page being copied on the root page.
+ * rootent:    last entry on the root page.
+ * rootlsn:    the root page's original lsn.
+ */
+BEGIN rsplit
+ARG    fileid          u_int32_t       lu
+ARG    pgno            db_pgno_t       lu
+DBT    pgdbt           DBT             s
+DBT    rootent         DBT             s
+POINTER rootlsn                DB_LSN *        lu
+END
+
+/*
+ * BTREE-adj: used to log the adjustment of an index.
+ * 
+ * pgno:       the page modified.
+ * lsn:                the page's original lsn.
+ * indx:       the index adjusted.
+ * indx_copy:  the index to copy if inserting.
+ * is_insert:  0 if a delete, 1 if an insert.
+ */
+BEGIN adj
+ARG    fileid          u_int32_t       lu
+ARG    pgno            db_pgno_t       lu
+POINTER        lsn             DB_LSN *        lu
+ARG    indx            u_int32_t       lu
+ARG    indx_copy       u_int32_t       lu
+ARG    is_insert       u_int32_t       lu
+END
+
+/*
+ * BTREE-cadjust: used to adjust the count change in an internal page.
+ * 
+ * pgno:       the page modified.
+ * lsn:                the page's original lsn.
+ * indx:       the index to be adjusted.
+ * adjust:     the signed adjustment.
+ * total:      if the total tree entries count should be adjusted
+ */
+BEGIN cadjust
+ARG    fileid          u_int32_t       lu
+ARG    pgno            db_pgno_t       lu
+POINTER        lsn             DB_LSN *        lu
+ARG    indx            u_int32_t       lu
+ARG    adjust          int32_t         ld
+ARG    total           int32_t         ld
+END
+
+/*
+ * BTREE-cdel: used to log the intent-to-delete of a cursor record.
+ * 
+ * pgno:       the page modified.
+ * lsn:                the page's original lsn.
+ * indx:       the index to be deleted.
+ */
+BEGIN cdel
+ARG    fileid          u_int32_t       lu
+ARG    pgno            db_pgno_t       lu
+POINTER        lsn             DB_LSN *        lu
+ARG    indx            u_int32_t       lu
+END
diff --git a/db2/btree/btree_auto.c b/db2/btree/btree_auto.c
new file mode 100644 (file)
index 0000000..e6b7225
--- /dev/null
@@ -0,0 +1,1279 @@
+/* Do not edit: automatically built by dist/db_gen.sh. */
+#include "config.h"
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <ctype.h>
+#include <errno.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+#endif
+
+#include "db_int.h"
+#include "shqueue.h"
+#include "db_page.h"
+#include "db_dispatch.h"
+#include "btree.h"
+#include "db_am.h"
+#include "common_ext.h"
+
+/*
+ * PUBLIC: int __bam_pg_alloc_log
+ * PUBLIC:     __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t,
+ * PUBLIC:     u_int32_t, DB_LSN *, DB_LSN *, db_pgno_t,
+ * PUBLIC:     u_int32_t, db_pgno_t));
+ */
+int __bam_pg_alloc_log(logp, txnid, ret_lsnp, flags,
+       fileid, meta_lsn, page_lsn, pgno, ptype, next)
+       DB_LOG *logp;
+       DB_TXN *txnid;
+       DB_LSN *ret_lsnp;
+       u_int32_t flags;
+       u_int32_t fileid;
+       DB_LSN * meta_lsn;
+       DB_LSN * page_lsn;
+       db_pgno_t pgno;
+       u_int32_t ptype;
+       db_pgno_t next;
+{
+       DBT logrec;
+       DB_LSN *lsnp, null_lsn;
+       u_int32_t rectype, txn_num;
+       int ret;
+       u_int8_t *bp;
+
+       rectype = DB_bam_pg_alloc;
+       txn_num = txnid == NULL ? 0 : txnid->txnid;
+       if (txnid == NULL) {
+               null_lsn.file = 0;
+               null_lsn.offset = 0;
+               lsnp = &null_lsn;
+       } else
+               lsnp = &txnid->last_lsn;
+       logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN)
+           + sizeof(fileid)
+           + sizeof(*meta_lsn)
+           + sizeof(*page_lsn)
+           + sizeof(pgno)
+           + sizeof(ptype)
+           + sizeof(next);
+       if ((logrec.data = (void *)malloc(logrec.size)) == NULL)
+               return (ENOMEM);
+
+       bp = logrec.data;
+       memcpy(bp, &rectype, sizeof(rectype));
+       bp += sizeof(rectype);
+       memcpy(bp, &txn_num, sizeof(txn_num));
+       bp += sizeof(txn_num);
+       memcpy(bp, lsnp, sizeof(DB_LSN));
+       bp += sizeof(DB_LSN);
+       memcpy(bp, &fileid, sizeof(fileid));
+       bp += sizeof(fileid);
+       if (meta_lsn != NULL)
+               memcpy(bp, meta_lsn, sizeof(*meta_lsn));
+       else
+               memset(bp, 0, sizeof(*meta_lsn));
+       bp += sizeof(*meta_lsn);
+       if (page_lsn != NULL)
+               memcpy(bp, page_lsn, sizeof(*page_lsn));
+       else
+               memset(bp, 0, sizeof(*page_lsn));
+       bp += sizeof(*page_lsn);
+       memcpy(bp, &pgno, sizeof(pgno));
+       bp += sizeof(pgno);
+       memcpy(bp, &ptype, sizeof(ptype));
+       bp += sizeof(ptype);
+       memcpy(bp, &next, sizeof(next));
+       bp += sizeof(next);
+#ifdef DEBUG
+       if ((u_int32_t)(bp - (u_int8_t *)logrec.data) != logrec.size)
+               fprintf(stderr, "Error in log record length");
+#endif
+       ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags);
+       if (txnid != NULL)
+               txnid->last_lsn = *ret_lsnp;
+       free(logrec.data);
+       return (ret);
+}
+
+/*
+ * PUBLIC: int __bam_pg_alloc_print
+ * PUBLIC:    __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+ */
+
+int
+__bam_pg_alloc_print(notused1, dbtp, lsnp, notused3, notused4)
+       DB_LOG *notused1;
+       DBT *dbtp;
+       DB_LSN *lsnp;
+       int notused3;
+       void *notused4;
+{
+       __bam_pg_alloc_args *argp;
+       u_int32_t i;
+       int c, ret;
+
+       i = 0;
+       c = 0;
+       notused1 = NULL;
+       notused3 = 0;
+       notused4 = NULL;
+
+       if((ret = __bam_pg_alloc_read(dbtp->data, &argp)) != 0)
+               return (ret);
+       printf("[%lu][%lu]bam_pg_alloc: rec: %lu txnid %lx prevlsn [%lu][%lu]\n",
+           (u_long)lsnp->file,
+           (u_long)lsnp->offset,
+           (u_long)argp->type,
+           (u_long)argp->txnid->txnid,
+           (u_long)argp->prev_lsn.file,
+           (u_long)argp->prev_lsn.offset);
+       printf("\tfileid: %lu\n", (u_long)argp->fileid);
+       printf("\tmeta_lsn: [%lu][%lu]\n",
+           (u_long)argp->meta_lsn.file, (u_long)argp->meta_lsn.offset);
+       printf("\tpage_lsn: [%lu][%lu]\n",
+           (u_long)argp->page_lsn.file, (u_long)argp->page_lsn.offset);
+       printf("\tpgno: %lu\n", (u_long)argp->pgno);
+       printf("\tptype: %lu\n", (u_long)argp->ptype);
+       printf("\tnext: %lu\n", (u_long)argp->next);
+       printf("\n");
+       free(argp);
+       return (0);
+}
+
+/*
+ * PUBLIC: int __bam_pg_alloc_read __P((void *, __bam_pg_alloc_args **));
+ */
+int
+__bam_pg_alloc_read(recbuf, argpp)
+       void *recbuf;
+       __bam_pg_alloc_args **argpp;
+{
+       __bam_pg_alloc_args *argp;
+       u_int8_t *bp;
+
+       argp = (__bam_pg_alloc_args *)malloc(sizeof(__bam_pg_alloc_args) +
+           sizeof(DB_TXN));
+       if (argp == NULL)
+               return (ENOMEM);
+       argp->txnid = (DB_TXN *)&argp[1];
+       bp = recbuf;
+       memcpy(&argp->type, bp, sizeof(argp->type));
+       bp += sizeof(argp->type);
+       memcpy(&argp->txnid->txnid,  bp, sizeof(argp->txnid->txnid));
+       bp += sizeof(argp->txnid->txnid);
+       memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN));
+       bp += sizeof(DB_LSN);
+       memcpy(&argp->fileid, bp, sizeof(argp->fileid));
+       bp += sizeof(argp->fileid);
+       memcpy(&argp->meta_lsn, bp,  sizeof(argp->meta_lsn));
+       bp += sizeof(argp->meta_lsn);
+       memcpy(&argp->page_lsn, bp,  sizeof(argp->page_lsn));
+       bp += sizeof(argp->page_lsn);
+       memcpy(&argp->pgno, bp, sizeof(argp->pgno));
+       bp += sizeof(argp->pgno);
+       memcpy(&argp->ptype, bp, sizeof(argp->ptype));
+       bp += sizeof(argp->ptype);
+       memcpy(&argp->next, bp, sizeof(argp->next));
+       bp += sizeof(argp->next);
+       *argpp = argp;
+       return (0);
+}
+
+/*
+ * PUBLIC: int __bam_pg_free_log
+ * PUBLIC:     __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t,
+ * PUBLIC:     u_int32_t, db_pgno_t, DB_LSN *, DBT *,
+ * PUBLIC:     db_pgno_t));
+ */
+int __bam_pg_free_log(logp, txnid, ret_lsnp, flags,
+       fileid, pgno, meta_lsn, header, next)
+       DB_LOG *logp;
+       DB_TXN *txnid;
+       DB_LSN *ret_lsnp;
+       u_int32_t flags;
+       u_int32_t fileid;
+       db_pgno_t pgno;
+       DB_LSN * meta_lsn;
+       DBT *header;
+       db_pgno_t next;
+{
+       DBT logrec;
+       DB_LSN *lsnp, null_lsn;
+       u_int32_t zero;
+       u_int32_t rectype, txn_num;
+       int ret;
+       u_int8_t *bp;
+
+       rectype = DB_bam_pg_free;
+       txn_num = txnid == NULL ? 0 : txnid->txnid;
+       if (txnid == NULL) {
+               null_lsn.file = 0;
+               null_lsn.offset = 0;
+               lsnp = &null_lsn;
+       } else
+               lsnp = &txnid->last_lsn;
+       logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN)
+           + sizeof(fileid)
+           + sizeof(pgno)
+           + sizeof(*meta_lsn)
+           + sizeof(u_int32_t) + (header == NULL ? 0 : header->size)
+           + sizeof(next);
+       if ((logrec.data = (void *)malloc(logrec.size)) == NULL)
+               return (ENOMEM);
+
+       bp = logrec.data;
+       memcpy(bp, &rectype, sizeof(rectype));
+       bp += sizeof(rectype);
+       memcpy(bp, &txn_num, sizeof(txn_num));
+       bp += sizeof(txn_num);
+       memcpy(bp, lsnp, sizeof(DB_LSN));
+       bp += sizeof(DB_LSN);
+       memcpy(bp, &fileid, sizeof(fileid));
+       bp += sizeof(fileid);
+       memcpy(bp, &pgno, sizeof(pgno));
+       bp += sizeof(pgno);
+       if (meta_lsn != NULL)
+               memcpy(bp, meta_lsn, sizeof(*meta_lsn));
+       else
+               memset(bp, 0, sizeof(*meta_lsn));
+       bp += sizeof(*meta_lsn);
+       if (header == NULL) {
+               zero = 0;
+               memcpy(bp, &zero, sizeof(u_int32_t));
+               bp += sizeof(u_int32_t);
+       } else {
+               memcpy(bp, &header->size, sizeof(header->size));
+               bp += sizeof(header->size);
+               memcpy(bp, header->data, header->size);
+               bp += header->size;
+       }
+       memcpy(bp, &next, sizeof(next));
+       bp += sizeof(next);
+#ifdef DEBUG
+       if ((u_int32_t)(bp - (u_int8_t *)logrec.data) != logrec.size)
+               fprintf(stderr, "Error in log record length");
+#endif
+       ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags);
+       if (txnid != NULL)
+               txnid->last_lsn = *ret_lsnp;
+       free(logrec.data);
+       return (ret);
+}
+
+/*
+ * PUBLIC: int __bam_pg_free_print
+ * PUBLIC:    __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+ */
+
+int
+__bam_pg_free_print(notused1, dbtp, lsnp, notused3, notused4)
+       DB_LOG *notused1;
+       DBT *dbtp;
+       DB_LSN *lsnp;
+       int notused3;
+       void *notused4;
+{
+       __bam_pg_free_args *argp;
+       u_int32_t i;
+       int c, ret;
+
+       i = 0;
+       c = 0;
+       notused1 = NULL;
+       notused3 = 0;
+       notused4 = NULL;
+
+       if((ret = __bam_pg_free_read(dbtp->data, &argp)) != 0)
+               return (ret);
+       printf("[%lu][%lu]bam_pg_free: rec: %lu txnid %lx prevlsn [%lu][%lu]\n",
+           (u_long)lsnp->file,
+           (u_long)lsnp->offset,
+           (u_long)argp->type,
+           (u_long)argp->txnid->txnid,
+           (u_long)argp->prev_lsn.file,
+           (u_long)argp->prev_lsn.offset);
+       printf("\tfileid: %lu\n", (u_long)argp->fileid);
+       printf("\tpgno: %lu\n", (u_long)argp->pgno);
+       printf("\tmeta_lsn: [%lu][%lu]\n",
+           (u_long)argp->meta_lsn.file, (u_long)argp->meta_lsn.offset);
+       printf("\theader: ");
+       for (i = 0; i < argp->header.size; i++) {
+               c = ((char *)argp->header.data)[i];
+               if (isprint(c) || c == 0xa)
+                       putchar(c);
+               else
+                       printf("%#x ", c);
+       }
+       printf("\n");
+       printf("\tnext: %lu\n", (u_long)argp->next);
+       printf("\n");
+       free(argp);
+       return (0);
+}
+
+/*
+ * PUBLIC: int __bam_pg_free_read __P((void *, __bam_pg_free_args **));
+ */
+int
+__bam_pg_free_read(recbuf, argpp)
+       void *recbuf;
+       __bam_pg_free_args **argpp;
+{
+       __bam_pg_free_args *argp;
+       u_int8_t *bp;
+
+       argp = (__bam_pg_free_args *)malloc(sizeof(__bam_pg_free_args) +
+           sizeof(DB_TXN));
+       if (argp == NULL)
+               return (ENOMEM);
+       argp->txnid = (DB_TXN *)&argp[1];
+       bp = recbuf;
+       memcpy(&argp->type, bp, sizeof(argp->type));
+       bp += sizeof(argp->type);
+       memcpy(&argp->txnid->txnid,  bp, sizeof(argp->txnid->txnid));
+       bp += sizeof(argp->txnid->txnid);
+       memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN));
+       bp += sizeof(DB_LSN);
+       memcpy(&argp->fileid, bp, sizeof(argp->fileid));
+       bp += sizeof(argp->fileid);
+       memcpy(&argp->pgno, bp, sizeof(argp->pgno));
+       bp += sizeof(argp->pgno);
+       memcpy(&argp->meta_lsn, bp,  sizeof(argp->meta_lsn));
+       bp += sizeof(argp->meta_lsn);
+       memcpy(&argp->header.size, bp, sizeof(u_int32_t));
+       bp += sizeof(u_int32_t);
+       argp->header.data = bp;
+       bp += argp->header.size;
+       memcpy(&argp->next, bp, sizeof(argp->next));
+       bp += sizeof(argp->next);
+       *argpp = argp;
+       return (0);
+}
+
+/*
+ * PUBLIC: int __bam_split_log
+ * PUBLIC:     __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t,
+ * PUBLIC:     u_int32_t, db_pgno_t, DB_LSN *, db_pgno_t,
+ * PUBLIC:     DB_LSN *, u_int32_t, db_pgno_t, DB_LSN *,
+ * PUBLIC:     DBT *));
+ */
+int __bam_split_log(logp, txnid, ret_lsnp, flags,
+       fileid, left, llsn, right, rlsn, indx,
+       npgno, nlsn, pg)
+       DB_LOG *logp;
+       DB_TXN *txnid;
+       DB_LSN *ret_lsnp;
+       u_int32_t flags;
+       u_int32_t fileid;
+       db_pgno_t left;
+       DB_LSN * llsn;
+       db_pgno_t right;
+       DB_LSN * rlsn;
+       u_int32_t indx;
+       db_pgno_t npgno;
+       DB_LSN * nlsn;
+       DBT *pg;
+{
+       DBT logrec;
+       DB_LSN *lsnp, null_lsn;
+       u_int32_t zero;
+       u_int32_t rectype, txn_num;
+       int ret;
+       u_int8_t *bp;
+
+       rectype = DB_bam_split;
+       txn_num = txnid == NULL ? 0 : txnid->txnid;
+       if (txnid == NULL) {
+               null_lsn.file = 0;
+               null_lsn.offset = 0;
+               lsnp = &null_lsn;
+       } else
+               lsnp = &txnid->last_lsn;
+       logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN)
+           + sizeof(fileid)
+           + sizeof(left)
+           + sizeof(*llsn)
+           + sizeof(right)
+           + sizeof(*rlsn)
+           + sizeof(indx)
+           + sizeof(npgno)
+           + sizeof(*nlsn)
+           + sizeof(u_int32_t) + (pg == NULL ? 0 : pg->size);
+       if ((logrec.data = (void *)malloc(logrec.size)) == NULL)
+               return (ENOMEM);
+
+       bp = logrec.data;
+       memcpy(bp, &rectype, sizeof(rectype));
+       bp += sizeof(rectype);
+       memcpy(bp, &txn_num, sizeof(txn_num));
+       bp += sizeof(txn_num);
+       memcpy(bp, lsnp, sizeof(DB_LSN));
+       bp += sizeof(DB_LSN);
+       memcpy(bp, &fileid, sizeof(fileid));
+       bp += sizeof(fileid);
+       memcpy(bp, &left, sizeof(left));
+       bp += sizeof(left);
+       if (llsn != NULL)
+               memcpy(bp, llsn, sizeof(*llsn));
+       else
+               memset(bp, 0, sizeof(*llsn));
+       bp += sizeof(*llsn);
+       memcpy(bp, &right, sizeof(right));
+       bp += sizeof(right);
+       if (rlsn != NULL)
+               memcpy(bp, rlsn, sizeof(*rlsn));
+       else
+               memset(bp, 0, sizeof(*rlsn));
+       bp += sizeof(*rlsn);
+       memcpy(bp, &indx, sizeof(indx));
+       bp += sizeof(indx);
+       memcpy(bp, &npgno, sizeof(npgno));
+       bp += sizeof(npgno);
+       if (nlsn != NULL)
+               memcpy(bp, nlsn, sizeof(*nlsn));
+       else
+               memset(bp, 0, sizeof(*nlsn));
+       bp += sizeof(*nlsn);
+       if (pg == NULL) {
+               zero = 0;
+               memcpy(bp, &zero, sizeof(u_int32_t));
+               bp += sizeof(u_int32_t);
+       } else {
+               memcpy(bp, &pg->size, sizeof(pg->size));
+               bp += sizeof(pg->size);
+               memcpy(bp, pg->data, pg->size);
+               bp += pg->size;
+       }
+#ifdef DEBUG
+       if ((u_int32_t)(bp - (u_int8_t *)logrec.data) != logrec.size)
+               fprintf(stderr, "Error in log record length");
+#endif
+       ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags);
+       if (txnid != NULL)
+               txnid->last_lsn = *ret_lsnp;
+       free(logrec.data);
+       return (ret);
+}
+
+/*
+ * PUBLIC: int __bam_split_print
+ * PUBLIC:    __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+ */
+
+int
+__bam_split_print(notused1, dbtp, lsnp, notused3, notused4)
+       DB_LOG *notused1;
+       DBT *dbtp;
+       DB_LSN *lsnp;
+       int notused3;
+       void *notused4;
+{
+       __bam_split_args *argp;
+       u_int32_t i;
+       int c, ret;
+
+       i = 0;
+       c = 0;
+       notused1 = NULL;
+       notused3 = 0;
+       notused4 = NULL;
+
+       if((ret = __bam_split_read(dbtp->data, &argp)) != 0)
+               return (ret);
+       printf("[%lu][%lu]bam_split: rec: %lu txnid %lx prevlsn [%lu][%lu]\n",
+           (u_long)lsnp->file,
+           (u_long)lsnp->offset,
+           (u_long)argp->type,
+           (u_long)argp->txnid->txnid,
+           (u_long)argp->prev_lsn.file,
+           (u_long)argp->prev_lsn.offset);
+       printf("\tfileid: %lu\n", (u_long)argp->fileid);
+       printf("\tleft: %lu\n", (u_long)argp->left);
+       printf("\tllsn: [%lu][%lu]\n",
+           (u_long)argp->llsn.file, (u_long)argp->llsn.offset);
+       printf("\tright: %lu\n", (u_long)argp->right);
+       printf("\trlsn: [%lu][%lu]\n",
+           (u_long)argp->rlsn.file, (u_long)argp->rlsn.offset);
+       printf("\tindx: %lu\n", (u_long)argp->indx);
+       printf("\tnpgno: %lu\n", (u_long)argp->npgno);
+       printf("\tnlsn: [%lu][%lu]\n",
+           (u_long)argp->nlsn.file, (u_long)argp->nlsn.offset);
+       printf("\tpg: ");
+       for (i = 0; i < argp->pg.size; i++) {
+               c = ((char *)argp->pg.data)[i];
+               if (isprint(c) || c == 0xa)
+                       putchar(c);
+               else
+                       printf("%#x ", c);
+       }
+       printf("\n");
+       printf("\n");
+       free(argp);
+       return (0);
+}
+
+/*
+ * PUBLIC: int __bam_split_read __P((void *, __bam_split_args **));
+ */
+int
+__bam_split_read(recbuf, argpp)
+       void *recbuf;
+       __bam_split_args **argpp;
+{
+       __bam_split_args *argp;
+       u_int8_t *bp;
+
+       argp = (__bam_split_args *)malloc(sizeof(__bam_split_args) +
+           sizeof(DB_TXN));
+       if (argp == NULL)
+               return (ENOMEM);
+       argp->txnid = (DB_TXN *)&argp[1];
+       bp = recbuf;
+       memcpy(&argp->type, bp, sizeof(argp->type));
+       bp += sizeof(argp->type);
+       memcpy(&argp->txnid->txnid,  bp, sizeof(argp->txnid->txnid));
+       bp += sizeof(argp->txnid->txnid);
+       memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN));
+       bp += sizeof(DB_LSN);
+       memcpy(&argp->fileid, bp, sizeof(argp->fileid));
+       bp += sizeof(argp->fileid);
+       memcpy(&argp->left, bp, sizeof(argp->left));
+       bp += sizeof(argp->left);
+       memcpy(&argp->llsn, bp,  sizeof(argp->llsn));
+       bp += sizeof(argp->llsn);
+       memcpy(&argp->right, bp, sizeof(argp->right));
+       bp += sizeof(argp->right);
+       memcpy(&argp->rlsn, bp,  sizeof(argp->rlsn));
+       bp += sizeof(argp->rlsn);
+       memcpy(&argp->indx, bp, sizeof(argp->indx));
+       bp += sizeof(argp->indx);
+       memcpy(&argp->npgno, bp, sizeof(argp->npgno));
+       bp += sizeof(argp->npgno);
+       memcpy(&argp->nlsn, bp,  sizeof(argp->nlsn));
+       bp += sizeof(argp->nlsn);
+       memcpy(&argp->pg.size, bp, sizeof(u_int32_t));
+       bp += sizeof(u_int32_t);
+       argp->pg.data = bp;
+       bp += argp->pg.size;
+       *argpp = argp;
+       return (0);
+}
+
+/*
+ * PUBLIC: int __bam_rsplit_log
+ * PUBLIC:     __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t,
+ * PUBLIC:     u_int32_t, db_pgno_t, DBT *, DBT *,
+ * PUBLIC:     DB_LSN *));
+ */
+int __bam_rsplit_log(logp, txnid, ret_lsnp, flags,
+       fileid, pgno, pgdbt, rootent, rootlsn)
+       DB_LOG *logp;
+       DB_TXN *txnid;
+       DB_LSN *ret_lsnp;
+       u_int32_t flags;
+       u_int32_t fileid;
+       db_pgno_t pgno;
+       DBT *pgdbt;
+       DBT *rootent;
+       DB_LSN * rootlsn;
+{
+       DBT logrec;
+       DB_LSN *lsnp, null_lsn;
+       u_int32_t zero;
+       u_int32_t rectype, txn_num;
+       int ret;
+       u_int8_t *bp;
+
+       rectype = DB_bam_rsplit;
+       txn_num = txnid == NULL ? 0 : txnid->txnid;
+       if (txnid == NULL) {
+               null_lsn.file = 0;
+               null_lsn.offset = 0;
+               lsnp = &null_lsn;
+       } else
+               lsnp = &txnid->last_lsn;
+       logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN)
+           + sizeof(fileid)
+           + sizeof(pgno)
+           + sizeof(u_int32_t) + (pgdbt == NULL ? 0 : pgdbt->size)
+           + sizeof(u_int32_t) + (rootent == NULL ? 0 : rootent->size)
+           + sizeof(*rootlsn);
+       if ((logrec.data = (void *)malloc(logrec.size)) == NULL)
+               return (ENOMEM);
+
+       bp = logrec.data;
+       memcpy(bp, &rectype, sizeof(rectype));
+       bp += sizeof(rectype);
+       memcpy(bp, &txn_num, sizeof(txn_num));
+       bp += sizeof(txn_num);
+       memcpy(bp, lsnp, sizeof(DB_LSN));
+       bp += sizeof(DB_LSN);
+       memcpy(bp, &fileid, sizeof(fileid));
+       bp += sizeof(fileid);
+       memcpy(bp, &pgno, sizeof(pgno));
+       bp += sizeof(pgno);
+       if (pgdbt == NULL) {
+               zero = 0;
+               memcpy(bp, &zero, sizeof(u_int32_t));
+               bp += sizeof(u_int32_t);
+       } else {
+               memcpy(bp, &pgdbt->size, sizeof(pgdbt->size));
+               bp += sizeof(pgdbt->size);
+               memcpy(bp, pgdbt->data, pgdbt->size);
+               bp += pgdbt->size;
+       }
+       if (rootent == NULL) {
+               zero = 0;
+               memcpy(bp, &zero, sizeof(u_int32_t));
+               bp += sizeof(u_int32_t);
+       } else {
+               memcpy(bp, &rootent->size, sizeof(rootent->size));
+               bp += sizeof(rootent->size);
+               memcpy(bp, rootent->data, rootent->size);
+               bp += rootent->size;
+       }
+       if (rootlsn != NULL)
+               memcpy(bp, rootlsn, sizeof(*rootlsn));
+       else
+               memset(bp, 0, sizeof(*rootlsn));
+       bp += sizeof(*rootlsn);
+#ifdef DEBUG
+       if ((u_int32_t)(bp - (u_int8_t *)logrec.data) != logrec.size)
+               fprintf(stderr, "Error in log record length");
+#endif
+       ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags);
+       if (txnid != NULL)
+               txnid->last_lsn = *ret_lsnp;
+       free(logrec.data);
+       return (ret);
+}
+
+/*
+ * PUBLIC: int __bam_rsplit_print
+ * PUBLIC:    __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+ */
+
+int
+__bam_rsplit_print(notused1, dbtp, lsnp, notused3, notused4)
+       DB_LOG *notused1;
+       DBT *dbtp;
+       DB_LSN *lsnp;
+       int notused3;
+       void *notused4;
+{
+       __bam_rsplit_args *argp;
+       u_int32_t i;
+       int c, ret;
+
+       i = 0;
+       c = 0;
+       notused1 = NULL;
+       notused3 = 0;
+       notused4 = NULL;
+
+       if((ret = __bam_rsplit_read(dbtp->data, &argp)) != 0)
+               return (ret);
+       printf("[%lu][%lu]bam_rsplit: rec: %lu txnid %lx prevlsn [%lu][%lu]\n",
+           (u_long)lsnp->file,
+           (u_long)lsnp->offset,
+           (u_long)argp->type,
+           (u_long)argp->txnid->txnid,
+           (u_long)argp->prev_lsn.file,
+           (u_long)argp->prev_lsn.offset);
+       printf("\tfileid: %lu\n", (u_long)argp->fileid);
+       printf("\tpgno: %lu\n", (u_long)argp->pgno);
+       printf("\tpgdbt: ");
+       for (i = 0; i < argp->pgdbt.size; i++) {
+               c = ((char *)argp->pgdbt.data)[i];
+               if (isprint(c) || c == 0xa)
+                       putchar(c);
+               else
+                       printf("%#x ", c);
+       }
+       printf("\n");
+       printf("\trootent: ");
+       for (i = 0; i < argp->rootent.size; i++) {
+               c = ((char *)argp->rootent.data)[i];
+               if (isprint(c) || c == 0xa)
+                       putchar(c);
+               else
+                       printf("%#x ", c);
+       }
+       printf("\n");
+       printf("\trootlsn: [%lu][%lu]\n",
+           (u_long)argp->rootlsn.file, (u_long)argp->rootlsn.offset);
+       printf("\n");
+       free(argp);
+       return (0);
+}
+
+/*
+ * PUBLIC: int __bam_rsplit_read __P((void *, __bam_rsplit_args **));
+ */
+int
+__bam_rsplit_read(recbuf, argpp)
+       void *recbuf;
+       __bam_rsplit_args **argpp;
+{
+       __bam_rsplit_args *argp;
+       u_int8_t *bp;
+
+       argp = (__bam_rsplit_args *)malloc(sizeof(__bam_rsplit_args) +
+           sizeof(DB_TXN));
+       if (argp == NULL)
+               return (ENOMEM);
+       argp->txnid = (DB_TXN *)&argp[1];
+       bp = recbuf;
+       memcpy(&argp->type, bp, sizeof(argp->type));
+       bp += sizeof(argp->type);
+       memcpy(&argp->txnid->txnid,  bp, sizeof(argp->txnid->txnid));
+       bp += sizeof(argp->txnid->txnid);
+       memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN));
+       bp += sizeof(DB_LSN);
+       memcpy(&argp->fileid, bp, sizeof(argp->fileid));
+       bp += sizeof(argp->fileid);
+       memcpy(&argp->pgno, bp, sizeof(argp->pgno));
+       bp += sizeof(argp->pgno);
+       memcpy(&argp->pgdbt.size, bp, sizeof(u_int32_t));
+       bp += sizeof(u_int32_t);
+       argp->pgdbt.data = bp;
+       bp += argp->pgdbt.size;
+       memcpy(&argp->rootent.size, bp, sizeof(u_int32_t));
+       bp += sizeof(u_int32_t);
+       argp->rootent.data = bp;
+       bp += argp->rootent.size;
+       memcpy(&argp->rootlsn, bp,  sizeof(argp->rootlsn));
+       bp += sizeof(argp->rootlsn);
+       *argpp = argp;
+       return (0);
+}
+
+/*
+ * PUBLIC: int __bam_adj_log
+ * PUBLIC:     __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t,
+ * PUBLIC:     u_int32_t, db_pgno_t, DB_LSN *, u_int32_t,
+ * PUBLIC:     u_int32_t, u_int32_t));
+ */
+int __bam_adj_log(logp, txnid, ret_lsnp, flags,
+       fileid, pgno, lsn, indx, indx_copy, is_insert)
+       DB_LOG *logp;
+       DB_TXN *txnid;
+       DB_LSN *ret_lsnp;
+       u_int32_t flags;
+       u_int32_t fileid;
+       db_pgno_t pgno;
+       DB_LSN * lsn;
+       u_int32_t indx;
+       u_int32_t indx_copy;
+       u_int32_t is_insert;
+{
+       DBT logrec;
+       DB_LSN *lsnp, null_lsn;
+       u_int32_t rectype, txn_num;
+       int ret;
+       u_int8_t *bp;
+
+       rectype = DB_bam_adj;
+       txn_num = txnid == NULL ? 0 : txnid->txnid;
+       if (txnid == NULL) {
+               null_lsn.file = 0;
+               null_lsn.offset = 0;
+               lsnp = &null_lsn;
+       } else
+               lsnp = &txnid->last_lsn;
+       logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN)
+           + sizeof(fileid)
+           + sizeof(pgno)
+           + sizeof(*lsn)
+           + sizeof(indx)
+           + sizeof(indx_copy)
+           + sizeof(is_insert);
+       if ((logrec.data = (void *)malloc(logrec.size)) == NULL)
+               return (ENOMEM);
+
+       bp = logrec.data;
+       memcpy(bp, &rectype, sizeof(rectype));
+       bp += sizeof(rectype);
+       memcpy(bp, &txn_num, sizeof(txn_num));
+       bp += sizeof(txn_num);
+       memcpy(bp, lsnp, sizeof(DB_LSN));
+       bp += sizeof(DB_LSN);
+       memcpy(bp, &fileid, sizeof(fileid));
+       bp += sizeof(fileid);
+       memcpy(bp, &pgno, sizeof(pgno));
+       bp += sizeof(pgno);
+       if (lsn != NULL)
+               memcpy(bp, lsn, sizeof(*lsn));
+       else
+               memset(bp, 0, sizeof(*lsn));
+       bp += sizeof(*lsn);
+       memcpy(bp, &indx, sizeof(indx));
+       bp += sizeof(indx);
+       memcpy(bp, &indx_copy, sizeof(indx_copy));
+       bp += sizeof(indx_copy);
+       memcpy(bp, &is_insert, sizeof(is_insert));
+       bp += sizeof(is_insert);
+#ifdef DEBUG
+       if ((u_int32_t)(bp - (u_int8_t *)logrec.data) != logrec.size)
+               fprintf(stderr, "Error in log record length");
+#endif
+       ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags);
+       if (txnid != NULL)
+               txnid->last_lsn = *ret_lsnp;
+       free(logrec.data);
+       return (ret);
+}
+
+/*
+ * PUBLIC: int __bam_adj_print
+ * PUBLIC:    __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+ */
+
+int
+__bam_adj_print(notused1, dbtp, lsnp, notused3, notused4)
+       DB_LOG *notused1;
+       DBT *dbtp;
+       DB_LSN *lsnp;
+       int notused3;
+       void *notused4;
+{
+       __bam_adj_args *argp;
+       u_int32_t i;
+       int c, ret;
+
+       i = 0;
+       c = 0;
+       notused1 = NULL;
+       notused3 = 0;
+       notused4 = NULL;
+
+       if((ret = __bam_adj_read(dbtp->data, &argp)) != 0)
+               return (ret);
+       printf("[%lu][%lu]bam_adj: rec: %lu txnid %lx prevlsn [%lu][%lu]\n",
+           (u_long)lsnp->file,
+           (u_long)lsnp->offset,
+           (u_long)argp->type,
+           (u_long)argp->txnid->txnid,
+           (u_long)argp->prev_lsn.file,
+           (u_long)argp->prev_lsn.offset);
+       printf("\tfileid: %lu\n", (u_long)argp->fileid);
+       printf("\tpgno: %lu\n", (u_long)argp->pgno);
+       printf("\tlsn: [%lu][%lu]\n",
+           (u_long)argp->lsn.file, (u_long)argp->lsn.offset);
+       printf("\tindx: %lu\n", (u_long)argp->indx);
+       printf("\tindx_copy: %lu\n", (u_long)argp->indx_copy);
+       printf("\tis_insert: %lu\n", (u_long)argp->is_insert);
+       printf("\n");
+       free(argp);
+       return (0);
+}
+
+/*
+ * PUBLIC: int __bam_adj_read __P((void *, __bam_adj_args **));
+ */
+int
+__bam_adj_read(recbuf, argpp)
+       void *recbuf;
+       __bam_adj_args **argpp;
+{
+       __bam_adj_args *argp;
+       u_int8_t *bp;
+
+       argp = (__bam_adj_args *)malloc(sizeof(__bam_adj_args) +
+           sizeof(DB_TXN));
+       if (argp == NULL)
+               return (ENOMEM);
+       argp->txnid = (DB_TXN *)&argp[1];
+       bp = recbuf;
+       memcpy(&argp->type, bp, sizeof(argp->type));
+       bp += sizeof(argp->type);
+       memcpy(&argp->txnid->txnid,  bp, sizeof(argp->txnid->txnid));
+       bp += sizeof(argp->txnid->txnid);
+       memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN));
+       bp += sizeof(DB_LSN);
+       memcpy(&argp->fileid, bp, sizeof(argp->fileid));
+       bp += sizeof(argp->fileid);
+       memcpy(&argp->pgno, bp, sizeof(argp->pgno));
+       bp += sizeof(argp->pgno);
+       memcpy(&argp->lsn, bp,  sizeof(argp->lsn));
+       bp += sizeof(argp->lsn);
+       memcpy(&argp->indx, bp, sizeof(argp->indx));
+       bp += sizeof(argp->indx);
+       memcpy(&argp->indx_copy, bp, sizeof(argp->indx_copy));
+       bp += sizeof(argp->indx_copy);
+       memcpy(&argp->is_insert, bp, sizeof(argp->is_insert));
+       bp += sizeof(argp->is_insert);
+       *argpp = argp;
+       return (0);
+}
+
+/*
+ * PUBLIC: int __bam_cadjust_log
+ * PUBLIC:     __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t,
+ * PUBLIC:     u_int32_t, db_pgno_t, DB_LSN *, u_int32_t,
+ * PUBLIC:     int32_t, int32_t));
+ */
+int __bam_cadjust_log(logp, txnid, ret_lsnp, flags,
+       fileid, pgno, lsn, indx, adjust, total)
+       DB_LOG *logp;
+       DB_TXN *txnid;
+       DB_LSN *ret_lsnp;
+       u_int32_t flags;
+       u_int32_t fileid;
+       db_pgno_t pgno;
+       DB_LSN * lsn;
+       u_int32_t indx;
+       int32_t adjust;
+       int32_t total;
+{
+       DBT logrec;
+       DB_LSN *lsnp, null_lsn;
+       u_int32_t rectype, txn_num;
+       int ret;
+       u_int8_t *bp;
+
+       rectype = DB_bam_cadjust;
+       txn_num = txnid == NULL ? 0 : txnid->txnid;
+       if (txnid == NULL) {
+               null_lsn.file = 0;
+               null_lsn.offset = 0;
+               lsnp = &null_lsn;
+       } else
+               lsnp = &txnid->last_lsn;
+       logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN)
+           + sizeof(fileid)
+           + sizeof(pgno)
+           + sizeof(*lsn)
+           + sizeof(indx)
+           + sizeof(adjust)
+           + sizeof(total);
+       if ((logrec.data = (void *)malloc(logrec.size)) == NULL)
+               return (ENOMEM);
+
+       bp = logrec.data;
+       memcpy(bp, &rectype, sizeof(rectype));
+       bp += sizeof(rectype);
+       memcpy(bp, &txn_num, sizeof(txn_num));
+       bp += sizeof(txn_num);
+       memcpy(bp, lsnp, sizeof(DB_LSN));
+       bp += sizeof(DB_LSN);
+       memcpy(bp, &fileid, sizeof(fileid));
+       bp += sizeof(fileid);
+       memcpy(bp, &pgno, sizeof(pgno));
+       bp += sizeof(pgno);
+       if (lsn != NULL)
+               memcpy(bp, lsn, sizeof(*lsn));
+       else
+               memset(bp, 0, sizeof(*lsn));
+       bp += sizeof(*lsn);
+       memcpy(bp, &indx, sizeof(indx));
+       bp += sizeof(indx);
+       memcpy(bp, &adjust, sizeof(adjust));
+       bp += sizeof(adjust);
+       memcpy(bp, &total, sizeof(total));
+       bp += sizeof(total);
+#ifdef DEBUG
+       if ((u_int32_t)(bp - (u_int8_t *)logrec.data) != logrec.size)
+               fprintf(stderr, "Error in log record length");
+#endif
+       ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags);
+       if (txnid != NULL)
+               txnid->last_lsn = *ret_lsnp;
+       free(logrec.data);
+       return (ret);
+}
+
+/*
+ * PUBLIC: int __bam_cadjust_print
+ * PUBLIC:    __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+ */
+
+int
+__bam_cadjust_print(notused1, dbtp, lsnp, notused3, notused4)
+       DB_LOG *notused1;
+       DBT *dbtp;
+       DB_LSN *lsnp;
+       int notused3;
+       void *notused4;
+{
+       __bam_cadjust_args *argp;
+       u_int32_t i;
+       int c, ret;
+
+       i = 0;
+       c = 0;
+       notused1 = NULL;
+       notused3 = 0;
+       notused4 = NULL;
+
+       if((ret = __bam_cadjust_read(dbtp->data, &argp)) != 0)
+               return (ret);
+       printf("[%lu][%lu]bam_cadjust: rec: %lu txnid %lx prevlsn [%lu][%lu]\n",
+           (u_long)lsnp->file,
+           (u_long)lsnp->offset,
+           (u_long)argp->type,
+           (u_long)argp->txnid->txnid,
+           (u_long)argp->prev_lsn.file,
+           (u_long)argp->prev_lsn.offset);
+       printf("\tfileid: %lu\n", (u_long)argp->fileid);
+       printf("\tpgno: %lu\n", (u_long)argp->pgno);
+       printf("\tlsn: [%lu][%lu]\n",
+           (u_long)argp->lsn.file, (u_long)argp->lsn.offset);
+       printf("\tindx: %lu\n", (u_long)argp->indx);
+       printf("\tadjust: %ld\n", (long)argp->adjust);
+       printf("\ttotal: %ld\n", (long)argp->total);
+       printf("\n");
+       free(argp);
+       return (0);
+}
+
+/*
+ * PUBLIC: int __bam_cadjust_read __P((void *, __bam_cadjust_args **));
+ */
+int
+__bam_cadjust_read(recbuf, argpp)
+       void *recbuf;
+       __bam_cadjust_args **argpp;
+{
+       __bam_cadjust_args *argp;
+       u_int8_t *bp;
+
+       argp = (__bam_cadjust_args *)malloc(sizeof(__bam_cadjust_args) +
+           sizeof(DB_TXN));
+       if (argp == NULL)
+               return (ENOMEM);
+       argp->txnid = (DB_TXN *)&argp[1];
+       bp = recbuf;
+       memcpy(&argp->type, bp, sizeof(argp->type));
+       bp += sizeof(argp->type);
+       memcpy(&argp->txnid->txnid,  bp, sizeof(argp->txnid->txnid));
+       bp += sizeof(argp->txnid->txnid);
+       memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN));
+       bp += sizeof(DB_LSN);
+       memcpy(&argp->fileid, bp, sizeof(argp->fileid));
+       bp += sizeof(argp->fileid);
+       memcpy(&argp->pgno, bp, sizeof(argp->pgno));
+       bp += sizeof(argp->pgno);
+       memcpy(&argp->lsn, bp,  sizeof(argp->lsn));
+       bp += sizeof(argp->lsn);
+       memcpy(&argp->indx, bp, sizeof(argp->indx));
+       bp += sizeof(argp->indx);
+       memcpy(&argp->adjust, bp, sizeof(argp->adjust));
+       bp += sizeof(argp->adjust);
+       memcpy(&argp->total, bp, sizeof(argp->total));
+       bp += sizeof(argp->total);
+       *argpp = argp;
+       return (0);
+}
+
+/*
+ * PUBLIC: int __bam_cdel_log
+ * PUBLIC:     __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t,
+ * PUBLIC:     u_int32_t, db_pgno_t, DB_LSN *, u_int32_t));
+ */
+int __bam_cdel_log(logp, txnid, ret_lsnp, flags,
+       fileid, pgno, lsn, indx)
+       DB_LOG *logp;
+       DB_TXN *txnid;
+       DB_LSN *ret_lsnp;
+       u_int32_t flags;
+       u_int32_t fileid;
+       db_pgno_t pgno;
+       DB_LSN * lsn;
+       u_int32_t indx;
+{
+       DBT logrec;
+       DB_LSN *lsnp, null_lsn;
+       u_int32_t rectype, txn_num;
+       int ret;
+       u_int8_t *bp;
+
+       rectype = DB_bam_cdel;
+       txn_num = txnid == NULL ? 0 : txnid->txnid;
+       if (txnid == NULL) {
+               null_lsn.file = 0;
+               null_lsn.offset = 0;
+               lsnp = &null_lsn;
+       } else
+               lsnp = &txnid->last_lsn;
+       logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN)
+           + sizeof(fileid)
+           + sizeof(pgno)
+           + sizeof(*lsn)
+           + sizeof(indx);
+       if ((logrec.data = (void *)malloc(logrec.size)) == NULL)
+               return (ENOMEM);
+
+       bp = logrec.data;
+       memcpy(bp, &rectype, sizeof(rectype));
+       bp += sizeof(rectype);
+       memcpy(bp, &txn_num, sizeof(txn_num));
+       bp += sizeof(txn_num);
+       memcpy(bp, lsnp, sizeof(DB_LSN));
+       bp += sizeof(DB_LSN);
+       memcpy(bp, &fileid, sizeof(fileid));
+       bp += sizeof(fileid);
+       memcpy(bp, &pgno, sizeof(pgno));
+       bp += sizeof(pgno);
+       if (lsn != NULL)
+               memcpy(bp, lsn, sizeof(*lsn));
+       else
+               memset(bp, 0, sizeof(*lsn));
+       bp += sizeof(*lsn);
+       memcpy(bp, &indx, sizeof(indx));
+       bp += sizeof(indx);
+#ifdef DEBUG
+       if ((u_int32_t)(bp - (u_int8_t *)logrec.data) != logrec.size)
+               fprintf(stderr, "Error in log record length");
+#endif
+       ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags);
+       if (txnid != NULL)
+               txnid->last_lsn = *ret_lsnp;
+       free(logrec.data);
+       return (ret);
+}
+
+/*
+ * PUBLIC: int __bam_cdel_print
+ * PUBLIC:    __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+ */
+
+int
+__bam_cdel_print(notused1, dbtp, lsnp, notused3, notused4)
+       DB_LOG *notused1;
+       DBT *dbtp;
+       DB_LSN *lsnp;
+       int notused3;
+       void *notused4;
+{
+       __bam_cdel_args *argp;
+       u_int32_t i;
+       int c, ret;
+
+       i = 0;
+       c = 0;
+       notused1 = NULL;
+       notused3 = 0;
+       notused4 = NULL;
+
+       if((ret = __bam_cdel_read(dbtp->data, &argp)) != 0)
+               return (ret);
+       printf("[%lu][%lu]bam_cdel: rec: %lu txnid %lx prevlsn [%lu][%lu]\n",
+           (u_long)lsnp->file,
+           (u_long)lsnp->offset,
+           (u_long)argp->type,
+           (u_long)argp->txnid->txnid,
+           (u_long)argp->prev_lsn.file,
+           (u_long)argp->prev_lsn.offset);
+       printf("\tfileid: %lu\n", (u_long)argp->fileid);
+       printf("\tpgno: %lu\n", (u_long)argp->pgno);
+       printf("\tlsn: [%lu][%lu]\n",
+           (u_long)argp->lsn.file, (u_long)argp->lsn.offset);
+       printf("\tindx: %lu\n", (u_long)argp->indx);
+       printf("\n");
+       free(argp);
+       return (0);
+}
+
+/*
+ * PUBLIC: int __bam_cdel_read __P((void *, __bam_cdel_args **));
+ */
+int
+__bam_cdel_read(recbuf, argpp)
+       void *recbuf;
+       __bam_cdel_args **argpp;
+{
+       __bam_cdel_args *argp;
+       u_int8_t *bp;
+
+       argp = (__bam_cdel_args *)malloc(sizeof(__bam_cdel_args) +
+           sizeof(DB_TXN));
+       if (argp == NULL)
+               return (ENOMEM);
+       argp->txnid = (DB_TXN *)&argp[1];
+       bp = recbuf;
+       memcpy(&argp->type, bp, sizeof(argp->type));
+       bp += sizeof(argp->type);
+       memcpy(&argp->txnid->txnid,  bp, sizeof(argp->txnid->txnid));
+       bp += sizeof(argp->txnid->txnid);
+       memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN));
+       bp += sizeof(DB_LSN);
+       memcpy(&argp->fileid, bp, sizeof(argp->fileid));
+       bp += sizeof(argp->fileid);
+       memcpy(&argp->pgno, bp, sizeof(argp->pgno));
+       bp += sizeof(argp->pgno);
+       memcpy(&argp->lsn, bp,  sizeof(argp->lsn));
+       bp += sizeof(argp->lsn);
+       memcpy(&argp->indx, bp, sizeof(argp->indx));
+       bp += sizeof(argp->indx);
+       *argpp = argp;
+       return (0);
+}
+
+/*
+ * PUBLIC: int __bam_init_print __P((DB_ENV *));
+ */
+int
+__bam_init_print(dbenv)
+       DB_ENV *dbenv;
+{
+       int ret;
+
+       if ((ret = __db_add_recovery(dbenv,
+           __bam_pg_alloc_print, DB_bam_pg_alloc)) != 0)
+               return (ret);
+       if ((ret = __db_add_recovery(dbenv,
+           __bam_pg_free_print, DB_bam_pg_free)) != 0)
+               return (ret);
+       if ((ret = __db_add_recovery(dbenv,
+           __bam_split_print, DB_bam_split)) != 0)
+               return (ret);
+       if ((ret = __db_add_recovery(dbenv,
+           __bam_rsplit_print, DB_bam_rsplit)) != 0)
+               return (ret);
+       if ((ret = __db_add_recovery(dbenv,
+           __bam_adj_print, DB_bam_adj)) != 0)
+               return (ret);
+       if ((ret = __db_add_recovery(dbenv,
+           __bam_cadjust_print, DB_bam_cadjust)) != 0)
+               return (ret);
+       if ((ret = __db_add_recovery(dbenv,
+           __bam_cdel_print, DB_bam_cdel)) != 0)
+               return (ret);
+       return (0);
+}
+
+/*
+ * PUBLIC: int __bam_init_recover __P((DB_ENV *));
+ */
+int
+__bam_init_recover(dbenv)
+       DB_ENV *dbenv;
+{
+       int ret;
+
+       if ((ret = __db_add_recovery(dbenv,
+           __bam_pg_alloc_recover, DB_bam_pg_alloc)) != 0)
+               return (ret);
+       if ((ret = __db_add_recovery(dbenv,
+           __bam_pg_free_recover, DB_bam_pg_free)) != 0)
+               return (ret);
+       if ((ret = __db_add_recovery(dbenv,
+           __bam_split_recover, DB_bam_split)) != 0)
+               return (ret);
+       if ((ret = __db_add_recovery(dbenv,
+           __bam_rsplit_recover, DB_bam_rsplit)) != 0)
+               return (ret);
+       if ((ret = __db_add_recovery(dbenv,
+           __bam_adj_recover, DB_bam_adj)) != 0)
+               return (ret);
+       if ((ret = __db_add_recovery(dbenv,
+           __bam_cadjust_recover, DB_bam_cadjust)) != 0)
+               return (ret);
+       if ((ret = __db_add_recovery(dbenv,
+           __bam_cdel_recover, DB_bam_cdel)) != 0)
+               return (ret);
+       return (0);
+}
+
diff --git a/db2/clib/getlong.c b/db2/clib/getlong.c
new file mode 100644 (file)
index 0000000..d79c684
--- /dev/null
@@ -0,0 +1,48 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997
+ *     Sleepycat Software.  All rights reserved.
+ */
+
+#include "config.h"
+
+#ifndef lint
+static const char sccsid[] = "@(#)getlong.c    10.2 (Sleepycat) 5/1/97";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <errno.h>
+#include <limits.h>
+#include <stdlib.h>
+#endif
+
+#include "db.h"
+#include "clib_ext.h"
+
+/*
+ * get_long --
+ *     Return a long value inside of basic parameters.
+ *
+ * PUBLIC: void get_long __P((char *, long, long, long *));
+ */
+void
+get_long(p, min, max, storep)
+       char *p;
+       long min, max, *storep;
+{
+       long val;
+       char *end;
+
+       errno = 0;
+       val = strtol(p, &end, 10);
+       if ((val == LONG_MIN || val == LONG_MAX) && errno == ERANGE)
+               err(1, "%s", p);
+       if (p[0] == '\0' || end[0] != '\0')
+               errx(1, "%s: Invalid numeric argument", p);
+       if (val < min)
+               errx(1, "%s: Less than minimum value (%ld)", p, min);
+       if (val > max)
+               errx(1, "%s: Greater than maximum value (%ld)", p, max);
+       *storep = val;
+}
diff --git a/db2/common/db_appinit.c b/db2/common/db_appinit.c
new file mode 100644 (file)
index 0000000..01891c6
--- /dev/null
@@ -0,0 +1,787 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997
+ *     Sleepycat Software.  All rights reserved.
+ */
+
+#include "config.h"
+
+#ifndef lint
+static const char sccsid[] = "@(#)db_appinit.c 10.27 (Sleepycat) 8/23/97";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/param.h>
+#include <sys/stat.h>
+
+#include <ctype.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#endif
+
+#include "db_int.h"
+#include "shqueue.h"
+#include "db_page.h"
+#include "btree.h"
+#include "hash.h"
+#include "log.h"
+#include "txn.h"
+#include "clib_ext.h"
+#include "common_ext.h"
+
+static int __db_home __P((DB_ENV *, const char *, int));
+static int __db_parse __P((DB_ENV *, char *));
+static int __db_tmp_dir __P((DB_ENV *, int));
+static int __db_tmp_open __P((DB_ENV *, char *, int *));
+
+/*
+ * db_version --
+ *     Return verision information.
+ */
+const char *
+db_version(majverp, minverp, patchp)
+       int *majverp, *minverp, *patchp;
+{
+       if (majverp != NULL)
+               *majverp = DB_VERSION_MAJOR;
+       if (minverp != NULL)
+               *minverp = DB_VERSION_MINOR;
+       if (patchp != NULL)
+               *patchp = DB_VERSION_PATCH;
+       return (DB_VERSION_STRING);
+}
+
+/*
+ * db_appinit --
+ *     Initialize the application environment.
+ */
+int
+db_appinit(db_home, db_config, dbenv, flags)
+       const char *db_home;
+       char * const *db_config;
+       DB_ENV *dbenv;
+       int flags;
+{
+       FILE *fp;
+       int i_lock, i_log, i_mpool, i_txn, ret;
+       char *lp, **p, buf[MAXPATHLEN * 2];
+
+       /* Validate arguments. */
+       if (dbenv == NULL)
+               return (EINVAL);
+#ifdef HAVE_SPINLOCKS
+#define        OKFLAGS                                                         \
+   (DB_CREATE | DB_NOMMAP | DB_THREAD | DB_INIT_LOCK | DB_INIT_LOG |   \
+    DB_INIT_MPOOL | DB_INIT_TXN | DB_MPOOL_PRIVATE | DB_RECOVER |      \
+    DB_RECOVER_FATAL | DB_TXN_NOSYNC | DB_USE_ENVIRON | DB_USE_ENVIRON_ROOT)
+#else
+#define        OKFLAGS                                                         \
+   (DB_CREATE | DB_NOMMAP | DB_INIT_LOCK | DB_INIT_LOG |               \
+    DB_INIT_MPOOL | DB_INIT_TXN | DB_MPOOL_PRIVATE | DB_RECOVER |      \
+    DB_RECOVER_FATAL | DB_TXN_NOSYNC | DB_USE_ENVIRON | DB_USE_ENVIRON_ROOT)
+#endif
+       if ((ret = __db_fchk(dbenv, "db_appinit", flags, OKFLAGS)) != 0)
+               return (ret);
+
+#define        RECOVERY_FLAGS (DB_CREATE | DB_INIT_TXN | DB_INIT_LOG)
+       if (LF_ISSET(DB_RECOVER | DB_RECOVER_FATAL) &&
+           LF_ISSET(RECOVERY_FLAGS) != RECOVERY_FLAGS)
+               return (__db_ferr(dbenv, "db_appinit", 1));
+
+       fp = NULL;
+       i_lock = i_log = i_mpool = i_txn = 0;
+
+       /* Set the database home. */
+       if ((ret = __db_home(dbenv, db_home, flags)) != 0)
+               goto err;
+
+       /* Parse the config array. */
+       for (p = (char **)db_config; p != NULL && *p != NULL; ++p)
+               if ((ret = __db_parse(dbenv, *p)) != 0)
+                       goto err;
+
+       /* Parse the config file. */
+       if (dbenv->db_home != NULL) {
+               (void)snprintf(buf,
+                   sizeof(buf), "%s/DB_CONFIG", dbenv->db_home);
+               if ((fp = fopen(buf, "r")) != NULL) {
+                       while (fgets(buf, sizeof(buf), fp) != NULL) {
+                               if ((lp = strchr(buf, '\n')) != NULL)
+                                       *lp = '\0';
+                               if ((ret = __db_parse(dbenv, buf)) != 0)
+                                       goto err;
+                       }
+                       (void)fclose(fp);
+               }
+       }
+
+       /* Set up the tmp directory path. */
+       if (dbenv->db_tmp_dir == NULL &&
+           (ret = __db_tmp_dir(dbenv, flags)) != 0)
+               goto err;
+
+       /* Indicate that the path names have been set. */
+       F_SET(dbenv, DB_APP_INIT);
+
+       /*
+        * If we are doing recovery, remove all the regions.
+        */
+       if (LF_ISSET(DB_RECOVER | DB_RECOVER_FATAL)) {
+               /* Remove all the old shared memory regions.  */
+               if ((ret = log_unlink(NULL, 1 /* force */, dbenv)) != 0)
+                       goto err;
+               if ((ret = memp_unlink(NULL, 1 /* force */, dbenv)) != 0)
+                       goto err;
+               if ((ret = lock_unlink(NULL, 1 /* force */, dbenv)) != 0)
+                       goto err;
+               if ((ret = txn_unlink(NULL, 1 /* force */, dbenv)) != 0)
+                       goto err;
+       }
+
+       /* Transactions imply logging. */
+       if (LF_ISSET(DB_INIT_TXN))
+               LF_SET(DB_INIT_LOG);
+
+       /* Default permissions are 0660. */
+#undef DB_DEFPERM
+#define        DB_DEFPERM      (S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP)
+
+       /* Initialize the subsystems. */
+       if (LF_ISSET(DB_INIT_LOCK)) {
+               if ((ret = lock_open(NULL,
+                   LF_ISSET(DB_CREATE | DB_THREAD),
+                   DB_DEFPERM, dbenv, &dbenv->lk_info)) != 0)
+                       goto err;
+               i_lock = 1;
+       }
+       if (LF_ISSET(DB_INIT_LOG)) {
+               if ((ret = log_open(NULL,
+                   LF_ISSET(DB_CREATE | DB_THREAD),
+                   DB_DEFPERM, dbenv, &dbenv->lg_info)) != 0)
+                       goto err;
+               i_log = 1;
+       }
+       if (LF_ISSET(DB_INIT_MPOOL)) {
+               if ((ret = memp_open(NULL,
+           LF_ISSET(DB_CREATE | DB_MPOOL_PRIVATE | DB_NOMMAP | DB_THREAD),
+                   DB_DEFPERM, dbenv, &dbenv->mp_info)) != 0)
+                       goto err;
+               i_mpool = 1;
+       }
+       if (LF_ISSET(DB_INIT_TXN)) {
+               if ((ret = txn_open(NULL,
+                   LF_ISSET(DB_CREATE | DB_THREAD | DB_TXN_NOSYNC),
+                   DB_DEFPERM, dbenv, &dbenv->tx_info)) != 0)
+                       goto err;
+               i_txn = 1;
+       }
+
+       /* Initialize recovery. */
+       if (LF_ISSET(DB_INIT_TXN)) {
+               if ((ret = __bam_init_recover(dbenv)) != 0)
+                       goto err;
+               if ((ret = __db_init_recover(dbenv)) != 0)
+                       goto err;
+               if ((ret = __ham_init_recover(dbenv)) != 0)
+                       goto err;
+               if ((ret = __log_init_recover(dbenv)) != 0)
+                       goto err;
+               if ((ret = __txn_init_recover(dbenv)) != 0)
+                       goto err;
+       }
+
+       /* Now run recovery if necessary. */
+       if (LF_ISSET(DB_RECOVER | DB_RECOVER_FATAL) && (ret =
+           __db_apprec(dbenv, LF_ISSET(DB_RECOVER | DB_RECOVER_FATAL))) != 0)
+               goto err;
+
+       return (ret);
+
+err:   if (fp != NULL)
+               (void)fclose(fp);
+       if (i_lock)
+               (void)lock_close(dbenv->lk_info);
+       if (i_log)
+               (void)log_close(dbenv->lg_info);
+       if (i_mpool)
+               (void)memp_close(dbenv->mp_info);
+       if (i_txn)
+               (void)txn_close(dbenv->tx_info);
+
+       (void)db_appexit(dbenv);
+       return (ret);
+}
+
+/*
+ * db_appexit --
+ *     Close down the default application environment.
+ */
+int
+db_appexit(dbenv)
+       DB_ENV *dbenv;
+{
+       int ret, t_ret;
+       char **p;
+
+       ret = 0;
+
+       /* Close subsystems. */
+       if (dbenv->tx_info && (t_ret = txn_close(dbenv->tx_info)) != 0)
+               if (ret == 0)
+                       ret = t_ret;
+       if (dbenv->mp_info && (t_ret = memp_close(dbenv->mp_info)) != 0)
+               if (ret == 0)
+                       ret = t_ret;
+       if (dbenv->lg_info && (t_ret = log_close(dbenv->lg_info)) != 0)
+               if (ret == 0)
+                       ret = t_ret;
+       if (dbenv->lk_info && (t_ret = lock_close(dbenv->lk_info)) != 0)
+               if (ret == 0)
+                       ret = t_ret;
+
+       /* Free allocated memory. */
+       if (dbenv->db_home != NULL)
+               FREES(dbenv->db_home);
+       if ((p = dbenv->db_data_dir) != NULL) {
+               for (; *p != NULL; ++p)
+                       FREES(*p);
+               FREE(dbenv->db_data_dir, dbenv->data_cnt * sizeof(char **));
+       }
+       if (dbenv->db_log_dir != NULL)
+               FREES(dbenv->db_log_dir);
+       if (dbenv->db_tmp_dir != NULL)
+               FREES(dbenv->db_tmp_dir);
+
+       return (ret);
+}
+
+#define        DB_ADDSTR(str) {                                                \
+       if ((str) != NULL) {                                            \
+               /* If leading slash, start over. */                     \
+               if (__db_abspath(str)) {                                \
+                       p = start;                                      \
+                       slash = 0;                                      \
+               }                                                       \
+               /* Append to the current string. */                     \
+               len = strlen(str);                                      \
+               if (slash)                                              \
+                       *p++ = PATH_SEPARATOR[0];                       \
+               memcpy(p, str, len);                                    \
+               p += len;                                               \
+               slash = strchr(PATH_SEPARATOR, p[-1]) == NULL;          \
+       }                                                               \
+}
+
+/*
+ * __db_appname --
+ *     Given an optional DB environment, directory and file name and type
+ *     of call, build a path based on the db_appinit(3) rules, and return
+ *     it in allocated space.
+ *
+ * PUBLIC: int __db_appname __P((DB_ENV *,
+ * PUBLIC:    APPNAME, const char *, const char *, int *, char **));
+ */
+int
+__db_appname(dbenv, appname, dir, file, fdp, namep)
+       DB_ENV *dbenv;
+       APPNAME appname;
+       const char *dir, *file;
+       int *fdp;
+       char **namep;
+{
+       DB_ENV etmp;
+       size_t len;
+       int ret, slash, tmp_create, tmp_free;
+       const char *a, *b, *c;
+       int data_entry;
+       char *p, *start;
+
+       a = b = c = NULL;
+       data_entry = -1;
+       tmp_create = tmp_free = 0;
+
+       /*
+        * We don't return a name when creating temporary files, just an fd.
+        * Default to error now.
+        */
+       if (fdp != NULL)
+               *fdp = -1;
+       if (namep != NULL)
+               *namep = NULL;
+
+       /*
+        * Absolute path names are never modified.  If the file is an absolute
+        * path, we're done.  If the directory is, simply append the file and
+        * return.
+        */
+       if (file != NULL && __db_abspath(file))
+               return ((*namep = (char *)strdup(file)) == NULL ? ENOMEM : 0);
+       if (dir != NULL && __db_abspath(dir)) {
+               a = dir;
+               goto done;
+       }
+
+       /*
+        * DB_ENV  DIR     APPNAME         RESULT
+        * -------------------------------------------
+        * null    null    none            <tmp>/file
+        * null    set     none            DIR/file
+        * set     null    none            DB_HOME/file
+        * set     set     none            DB_HOME/DIR/file
+        *
+        * DB_ENV  FILE    APPNAME         RESULT
+        * -------------------------------------------
+        * null    null    DB_APP_DATA     <tmp>/<create>
+        * null    set     DB_APP_DATA     ./file
+        * set     null    DB_APP_DATA     <tmp>/<create>
+        * set     set     DB_APP_DATA     DB_HOME/DB_DATA_DIR/file
+        *
+        * DB_ENV  DIR     APPNAME         RESULT
+        * -------------------------------------------
+        * null    null    DB_APP_LOG      <tmp>/file
+        * null    set     DB_APP_LOG      DIR/file
+        * set     null    DB_APP_LOG      DB_HOME/DB_LOG_DIR/file
+        * set     set     DB_APP_LOG      DB_HOME/DB_LOG_DIR/DIR/file
+        *
+        * DB_ENV          APPNAME         RESULT
+        * -------------------------------------------
+        * null            DB_APP_TMP      <tmp>/<create>
+        * set             DB_APP_TMP      DB_HOME/DB_TMP_DIR/<create>
+        */
+retry: switch (appname) {
+       case DB_APP_NONE:
+               if (dbenv == NULL || !F_ISSET(dbenv, DB_APP_INIT)) {
+                       if (dir == NULL)
+                               goto tmp;
+                       a = dir;
+               } else {
+                       a = dbenv->db_home;
+                       b = dir;
+               }
+               break;
+       case DB_APP_DATA:
+               if (dir != NULL) {
+                       __db_err(dbenv,
+                           "DB_APP_DATA: illegal directory specification");
+                       return (EINVAL);
+               }
+
+               if (file == NULL) {
+                       tmp_create = 1;
+                       goto tmp;
+               }
+               if (dbenv == NULL || !F_ISSET(dbenv, DB_APP_INIT))
+                       a = PATH_DOT;
+               else {
+                       a = dbenv->db_home;
+                       if (dbenv->db_data_dir != NULL &&
+                           (b = dbenv->db_data_dir[++data_entry]) == NULL) {
+                               data_entry = -1;
+                               b = dbenv->db_data_dir[0];
+                       }
+               }
+               break;
+       case DB_APP_LOG:
+               if (dbenv == NULL || !F_ISSET(dbenv, DB_APP_INIT)) {
+                       if (dir == NULL)
+                               goto tmp;
+                       a = dir;
+               } else {
+                       a = dbenv->db_home;
+                       b = dbenv->db_log_dir;
+                       c = dir;
+               }
+               break;
+       case DB_APP_TMP:
+               if (dir != NULL || file != NULL) {
+                       __db_err(dbenv,
+                   "DB_APP_TMP: illegal directory or file specification");
+                       return (EINVAL);
+               }
+
+               tmp_create = 1;
+               if (dbenv == NULL || !F_ISSET(dbenv, DB_APP_INIT))
+                       goto tmp;
+               else {
+                       a = dbenv->db_home;
+                       b = dbenv->db_tmp_dir;
+               }
+               break;
+       }
+
+       /* Reference a file from the appropriate temporary directory. */
+       if (0) {
+tmp:           if (dbenv == NULL || !F_ISSET(dbenv, DB_APP_INIT)) {
+                       memset(&etmp, 0, sizeof(etmp));
+                       if ((ret = __db_tmp_dir(&etmp, DB_USE_ENVIRON)) != 0)
+                               return (ret);
+                       tmp_free = 1;
+                       a = etmp.db_tmp_dir;
+               } else
+                       a = dbenv->db_tmp_dir;
+       }
+
+done:  len =
+           (a == NULL ? 0 : strlen(a) + 1) +
+           (b == NULL ? 0 : strlen(b) + 1) +
+           (c == NULL ? 0 : strlen(c) + 1) +
+           (file == NULL ? 0 : strlen(file) + 1);
+
+       if ((start = (char *)malloc(len)) == NULL) {
+               __db_err(dbenv, "%s", strerror(ENOMEM));
+               if (tmp_free)
+                       FREES(etmp.db_tmp_dir);
+               return (ENOMEM);
+       }
+
+       slash = 0;
+       p = start;
+       DB_ADDSTR(a);
+       DB_ADDSTR(b);
+       DB_ADDSTR(file);
+       *p = '\0';
+
+       /*
+        * If we're opening a data file, see if it exists.  If it does,
+        * return it, otherwise, try and find another one to open.
+        */
+       if (data_entry != -1 && __db_exists(start, NULL) != 0) {
+               FREES(start);
+               a = b = c = NULL;
+               goto retry;
+       }
+
+       /* Discard any space allocated to find the temp directory. */
+       if (tmp_free)
+               FREES(etmp.db_tmp_dir);
+
+       /* Create the file if so requested. */
+       if (tmp_create) {
+               ret = __db_tmp_open(dbenv, start, fdp);
+               FREES(start);
+       } else {
+               *namep = start;
+               ret = 0;
+       }
+       return (ret);
+}
+
+/*
+ * __db_home --
+ *     Find the database home.
+ */
+static int
+__db_home(dbenv, db_home, flags)
+       DB_ENV *dbenv;
+       const char *db_home;
+       int flags;
+{
+       const char *p;
+
+       p = db_home;
+
+       /* Use the environment if it's permitted and initialized. */
+#ifdef HAVE_GETUID
+       if (LF_ISSET(DB_USE_ENVIRON) ||
+           (LF_ISSET(DB_USE_ENVIRON_ROOT) && getuid() == 0)) {
+#else
+       if (LF_ISSET(DB_USE_ENVIRON)) {
+#endif
+               if ((p = getenv("DB_HOME")) == NULL)
+                       p = db_home;
+               else if (p[0] == '\0') {
+                       __db_err(dbenv,
+                           "illegal DB_HOME environment variable");
+                       return (EINVAL);
+               }
+       }
+
+       if (p == NULL)
+               return (0);
+
+       if ((dbenv->db_home = (char *)strdup(p)) == NULL) {
+               __db_err(dbenv, "%s", strerror(ENOMEM));
+               return (ENOMEM);
+       }
+       return (0);
+}
+
+/*
+ * __db_parse --
+ *     Parse a single NAME VALUE pair.
+ */
+static int
+__db_parse(dbenv, s)
+       DB_ENV *dbenv;
+       char *s;
+{
+       int ret;
+       char *local_s, *name, *value, **p, *tp;
+
+       ret = 0;
+
+       /*
+        * We need to strdup the argument in case the caller passed us
+        * static data.
+        */
+       if ((local_s = (char *)strdup(s)) == NULL)
+               return (ENOMEM);
+
+       tp = local_s;
+       while ((name = strsep(&tp, " \t")) != NULL && *name == '\0');
+       if (name == NULL)
+               goto illegal;
+       while ((value = strsep(&tp, " \t")) != NULL && *value == '\0');
+       if (value == NULL) {
+illegal:       ret = EINVAL;
+               __db_err(dbenv, "illegal name-value pair: %s", s);
+               goto err;
+       }
+
+#define        DATA_INIT_CNT   20                      /* Start with 20 data slots. */
+       if (!strcmp(name, "DB_DATA_DIR")) {
+               if (dbenv->db_data_dir == NULL) {
+                       if ((dbenv->db_data_dir = (char **)calloc(DATA_INIT_CNT,
+                           sizeof(char **))) == NULL)
+                               goto nomem;
+                       dbenv->data_cnt = DATA_INIT_CNT;
+               } else if (dbenv->data_next == dbenv->data_cnt - 1) {
+                       dbenv->data_cnt *= 2;
+                       if ((dbenv->db_data_dir =
+                           (char **)realloc(dbenv->db_data_dir,
+                           dbenv->data_cnt * sizeof(char **))) == NULL)
+                               goto nomem;
+               }
+               p = &dbenv->db_data_dir[dbenv->data_next++];
+       } else if (!strcmp(name, "DB_LOG_DIR")) {
+               if (dbenv->db_log_dir != NULL)
+                       FREES(dbenv->db_log_dir);
+               p = &dbenv->db_log_dir;
+       } else if (!strcmp(name, "DB_TMP_DIR")) {
+               if (dbenv->db_tmp_dir != NULL)
+                       FREES(dbenv->db_tmp_dir);
+               p = &dbenv->db_tmp_dir;
+       } else
+               goto err;
+
+       if ((*p = (char *)strdup(value)) == NULL) {
+nomem:         ret = ENOMEM;
+               __db_err(dbenv, "%s", strerror(ENOMEM));
+       }
+
+err:   FREES(local_s);
+       return (ret);
+}
+
+#ifdef macintosh
+#include <TFileSpec.h>
+
+static char *sTempFolder;
+#endif
+
+/*
+ * tmp --
+ *     Set the temporary directory path.
+ */
+static int
+__db_tmp_dir(dbenv, flags)
+       DB_ENV *dbenv;
+       int flags;
+{
+       static const char * list[] = {  /* Ordered: see db_appinit(3). */
+               "/var/tmp",
+               "/usr/tmp",
+               "/temp",                /* WIN32. */
+               "/tmp",
+               "C:/temp",              /* WIN32. */
+               "C:/tmp",               /* WIN32. */
+               NULL
+       };
+       const char **lp, *p;
+
+       /* Use the environment if it's permitted and initialized. */
+       p = NULL;
+#ifdef HAVE_GETEUID
+       if (LF_ISSET(DB_USE_ENVIRON) ||
+           (LF_ISSET(DB_USE_ENVIRON_ROOT) && getuid() == 0)) {
+#else
+       if (LF_ISSET(DB_USE_ENVIRON)) {
+#endif
+               if ((p = getenv("TMPDIR")) != NULL && p[0] == '\0') {
+                       __db_err(dbenv, "illegal TMPDIR environment variable");
+                       return (EINVAL);
+               }
+               /* WIN32 */
+               if (p == NULL && (p = getenv("TEMP")) != NULL && p[0] == '\0') {
+                       __db_err(dbenv, "illegal TEMP environment variable");
+                       return (EINVAL);
+               }
+               /* WIN32 */
+               if (p == NULL && (p = getenv("TMP")) != NULL && p[0] == '\0') {
+                       __db_err(dbenv, "illegal TMP environment variable");
+                       return (EINVAL);
+               }
+               /* Macintosh */
+               if (p == NULL &&
+                   (p = getenv("TempFolder")) != NULL && p[0] == '\0') {
+                       __db_err(dbenv,
+                           "illegal TempFolder environment variable");
+                       return (EINVAL);
+               }
+       }
+
+#ifdef macintosh
+       /* Get the path to the temporary folder. */
+       if (p == NULL) {
+               FSSpec spec;
+
+               if (!Special2FSSpec(kTemporaryFolderType,
+                   kOnSystemDisk, 0, &spec)) {
+                       p = FSp2FullPath(&spec);
+                       sTempFolder = malloc(strlen(p) + 1);
+                       strcpy(sTempFolder, p);
+                       p = sTempFolder;
+               }
+       }
+#endif
+
+       /* Step through the list looking for a possibility. */
+       if (p == NULL)
+               for (lp = list; *lp != NULL; ++lp)
+                       if (__db_exists(p = *lp, NULL) == 0)
+                               break;
+
+       if (p == NULL)
+               return (0);
+
+       if ((dbenv->db_tmp_dir = (char *)strdup(p)) == NULL) {
+               __db_err(dbenv, "%s", strerror(ENOMEM));
+               return (ENOMEM);
+       }
+       return (0);
+}
+
+/*
+ * __db_tmp_open --
+ *     Create a temporary file.
+ */
+static int
+__db_tmp_open(dbenv, dir, fdp)
+       DB_ENV *dbenv;
+       char *dir;
+       int *fdp;
+{
+#ifdef HAVE_SIGFILLSET
+       sigset_t set, oset;
+#endif
+       u_long pid;
+       size_t len;
+       int isdir, ret;
+       char *trv, buf[MAXPATHLEN];
+
+       /*
+        * Check the target directory; if you have six X's and it doesn't
+        * exist, this runs for a *very* long time.
+        */
+       if ((ret = __db_exists(dir, &isdir)) != 0) {
+               __db_err(dbenv, "%s: %s", dir, strerror(ret));
+               return (ret);
+       }
+       if (!isdir) {
+               __db_err(dbenv, "%s: %s", dir, strerror(EINVAL));
+               return (EINVAL);
+       }
+
+       /* Build the path. */
+#define        DB_TRAIL        "/XXXXXX"
+       if ((len = strlen(dir)) + sizeof(DB_TRAIL) > sizeof(buf)) {
+               __db_err(dbenv,
+                   "tmp_open: %s: %s", buf, strerror(ENAMETOOLONG));
+               return (ENAMETOOLONG);
+       }
+       (void)strcpy(buf, dir);
+       (void)strcpy(buf + len, DB_TRAIL);
+       buf[len] = PATH_SEPARATOR[0];                   /* WIN32 */
+
+       /*
+        * Replace the X's with the process ID.  Pid should be a pid_t,
+        * but we use unsigned long for portability.
+        */
+       for (pid = getpid(),
+           trv = buf + len + sizeof(DB_TRAIL) - 1; *--trv == 'X'; pid /= 10)
+               switch (pid % 10) {
+               case 0: *trv = '0'; break;
+               case 1: *trv = '1'; break;
+               case 2: *trv = '2'; break;
+               case 3: *trv = '3'; break;
+               case 4: *trv = '4'; break;
+               case 5: *trv = '5'; break;
+               case 6: *trv = '6'; break;
+               case 7: *trv = '7'; break;
+               case 8: *trv = '8'; break;
+               case 9: *trv = '9'; break;
+               }
+       ++trv;
+
+       /*
+        * Try and open a file.  We block every signal we can get our hands
+        * on so that, if we're interrupted at the wrong time, the temporary
+        * file isn't left around -- of course, if we drop core in-between
+        * the calls we'll hang forever, but that's probably okay.  ;-}
+        */
+#ifdef HAVE_SIGFILLSET
+       (void)sigfillset(&set);
+#endif
+       for (;;) {
+#ifdef HAVE_SIGFILLSET
+               (void)sigprocmask(SIG_BLOCK, &set, &oset);
+#endif
+#define        DB_TEMPOPEN     DB_CREATE | DB_EXCL | DB_TEMPORARY
+               if ((ret = __db_fdopen(buf,
+                   DB_TEMPOPEN, DB_TEMPOPEN, S_IRUSR | S_IWUSR, fdp)) == 0) {
+#ifdef HAVE_SIGFILLSET
+                       (void)sigprocmask(SIG_SETMASK, &oset, NULL);
+#endif
+                       return (0);
+               }
+#ifdef HAVE_SIGFILLSET
+               (void)sigprocmask(SIG_SETMASK, &oset, NULL);
+#endif
+               /*
+                * XXX:
+                * If we don't get an EEXIST error, then there's something
+                * seriously wrong.  Unfortunately, if the implementation
+                * doesn't return EEXIST for O_CREAT and O_EXCL regardless
+                * of other possible errors, we've lost.
+                */
+               if (ret != EEXIST) {
+                       __db_err(dbenv,
+                           "tmp_open: %s: %s", buf, strerror(ret));
+                       return (ret);
+               }
+
+               /*
+                * Tricky little algorithm for backward compatibility.
+                * Assumes the ASCII ordering of lower-case characters.
+                */
+               for (;;) {
+                       if (*trv == '\0')
+                               return (EINVAL);
+                       if (*trv == 'z')
+                               *trv++ = 'a';
+                       else {
+                               if (isdigit(*trv))
+                                       *trv = 'a';
+                               else
+                                       ++*trv;
+                               break;
+                       }
+               }
+       }
+       /* NOTREACHED */
+}
diff --git a/db2/common/db_apprec.c b/db2/common/db_apprec.c
new file mode 100644 (file)
index 0000000..b22b0c5
--- /dev/null
@@ -0,0 +1,143 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997
+ *     Sleepycat Software.  All rights reserved.
+ */
+
+#include "config.h"
+
+#ifndef lint
+static const char copyright[] =
+"@(#) Copyright (c) 1997\n\
+       Sleepycat Software Inc.  All rights reserved.\n";
+static const char sccsid[] = "@(#)db_apprec.c  10.15 (Sleepycat) 7/27/97";
+#endif
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+
+#include <time.h>
+#include <string.h>
+#endif
+
+#include "db_int.h"
+#include "shqueue.h"
+#include "db_page.h"
+#include "db_dispatch.h"
+#include "db_am.h"
+#include "log.h"
+#include "txn.h"
+#include "common_ext.h"
+
+/*
+ * __db_apprec --
+ *     Perform recovery.
+ *
+ * PUBLIC: int __db_apprec __P((DB_ENV *, int));
+ */
+int
+__db_apprec(dbenv, flags)
+       DB_ENV *dbenv;
+       int flags;
+{
+       DBT data;
+       DB_LOG *lp;
+       DB_LSN ckp_lsn, first_lsn, lsn, tmp_lsn;
+       time_t now;
+       int first_flag, ret, tret;
+       void *txninfo;
+
+       ZERO_LSN(ckp_lsn);
+
+       /* Initialize the transaction list. */
+       if ((ret = __db_txnlist_init(&txninfo)) != 0)
+               return (ret);
+
+       /*
+        * Read forward through the log opening the appropriate files
+        * so that we can call recovery routines.  In general, we start
+        * at the last checkpoint prior to the last checkpointed LSN.
+        * For catastrophic recovery, we begin at the first LSN that
+        * appears in any log file (log figures this out for us when
+        * we pass it the DB_FIRST flag).
+        */
+       lp = dbenv->lg_info;
+       if (LF_ISSET(DB_RECOVER_FATAL))
+               first_flag = DB_FIRST;
+       else
+               first_flag = __log_findckp(lp, &lsn) != 0 ? DB_FIRST : DB_SET;
+
+       memset(&data, 0, sizeof(data));
+       if ((ret = log_get(lp, &lsn, &data, first_flag)) != 0) {
+               __db_err(dbenv, "Failure: unable to get log record");
+               if (first_flag == DB_SET)
+                       __db_err(dbenv, "Retrieving LSN %lu %lu",
+                           (u_long)lsn.file, (u_long)lsn.offset);
+               else
+                       __db_err(dbenv, "Retrieving first LSN");
+               goto err;
+       }
+
+       first_lsn = lsn;
+       for (; ret == 0;
+           ret = log_get(dbenv->lg_info, &lsn, &data, DB_NEXT))
+               if ((tret = __db_dispatch(lp,
+                   &data, &lsn, TXN_OPENFILES, txninfo)) < 0) {
+                       ret = tret;
+                       goto msgerr;
+               }
+
+       for (ret = log_get(lp, &lsn, &data, DB_LAST);
+           ret == 0 && log_compare(&lsn, &first_lsn) > 0;
+           ret = log_get(lp,&lsn, &data, DB_PREV)) {
+               tmp_lsn = lsn;
+               tret =
+                   __db_dispatch(lp, &data, &lsn, TXN_BACKWARD_ROLL, txninfo);
+               if (IS_ZERO_LSN(ckp_lsn) && tret > 0)
+                       ckp_lsn = tmp_lsn;
+               if (tret < 0) {
+                       ret = tret;
+                       goto msgerr;
+               }
+       }
+
+       for (ret = log_get(lp, &lsn, &data, DB_NEXT);
+           ret == 0; ret = log_get(lp, &lsn, &data, DB_NEXT))
+               if ((tret = __db_dispatch(lp,
+                   &data, &lsn, TXN_FORWARD_ROLL, txninfo)) < 0) {
+                       ret = tret;
+                       goto msgerr;
+               }
+
+       /* Now close all the db files that are open. */
+       __log_close_files(lp);
+
+       /*
+        * Now set the maximum transaction id, set the last checkpoint lsn,
+        * and the current time.  Then take a checkpoint.
+        */
+       (void)time(&now);
+
+       dbenv->tx_info->region->last_txnid = ((__db_txnhead *)txninfo)->maxid;
+       dbenv->tx_info->region->last_ckp = ckp_lsn;
+       dbenv->tx_info->region->time_ckp = (u_int32_t) now;
+       txn_checkpoint(dbenv->tx_info, 0, 0);
+
+       if (dbenv->db_verbose) {
+               __db_err(lp->dbenv, "Recovery complete at %s", ctime(&now));
+               __db_err(lp->dbenv, "%s %lu %s [%lu][%lu]",
+                   "Maximum transaction id",
+                   (u_long)dbenv->tx_info->region->last_txnid,
+                   "Recovery checkpoint",
+                   (u_long)dbenv->tx_info->region->last_ckp.file,
+                   (u_long)dbenv->tx_info->region->last_ckp.offset);
+       }
+
+       return (0);
+
+msgerr:        __db_err(dbenv, "Recovery function for LSN %lu %lu failed",
+           (u_long)lsn.file, (u_long)lsn.offset);
+
+err:   return (ret);
+}
diff --git a/db2/common/db_byteorder.c b/db2/common/db_byteorder.c
new file mode 100644 (file)
index 0000000..d49883e
--- /dev/null
@@ -0,0 +1,56 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997
+ *     Sleepycat Software.  All rights reserved.
+ */
+
+#include "config.h"
+
+#ifndef lint
+static const char sccsid[] = "@(#)db_byteorder.c       10.3 (Sleepycat) 6/21/97";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+
+#include <errno.h>
+#endif
+
+#include "db_int.h"
+#include "common_ext.h"
+
+/*
+ * __db_byteorder --
+ *     Return if we need to do byte swapping, checking for illegal
+ *     values.
+ *
+ * PUBLIC: int __db_byteorder __P((DB_ENV *, int));
+ */
+int
+__db_byteorder(dbenv, lorder)
+       DB_ENV *dbenv;
+       int lorder;
+{
+       switch (lorder) {
+       case 0:
+               break;
+       case 1234:
+#if defined(WORDS_BIGENDIAN)
+               return (DB_SWAPBYTES);
+#else
+               break;
+#endif
+       case 4321:
+#if defined(WORDS_BIGENDIAN)
+               break;
+#else
+               return (DB_SWAPBYTES);
+#endif
+       default:
+               __db_err(dbenv,
+                   "illegal byte order, only big and little-endian supported");
+               return (EINVAL);
+       }
+       return (0);
+}
diff --git a/db2/common/db_err.c b/db2/common/db_err.c
new file mode 100644 (file)
index 0000000..3dc4ca0
--- /dev/null
@@ -0,0 +1,548 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997
+ *     Sleepycat Software.  All rights reserved.
+ */
+
+#include "config.h"
+
+#ifndef lint
+static const char sccsid[] = "@(#)db_err.c     10.16 (Sleepycat) 8/24/97";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+
+#include <errno.h>
+
+#ifdef __STDC__
+#include <stdarg.h>
+#else
+#include <varargs.h>
+#endif
+#endif
+
+#include "db_int.h"
+#include "common_ext.h"
+
+static int __db_rdonly __P((const DB_ENV *, const char *));
+
+/*
+ * __db_err --
+ *     Standard DB error routine.
+ *
+ * PUBLIC: #ifdef __STDC__
+ * PUBLIC: void __db_err __P((const DB_ENV *dbenv, const char *fmt, ...));
+ * PUBLIC: #else
+ * PUBLIC: void __db_err();
+ * PUBLIC: #endif
+ */
+void
+#ifdef __STDC__
+__db_err(const DB_ENV *dbenv, const char *fmt, ...)
+#else
+__db_err(dbenv, fmt, va_alist)
+       const DB_ENV *dbenv;
+       const char *fmt;
+       va_dcl
+#endif
+{
+       va_list ap;
+       char errbuf[2048];      /* XXX: END OF THE STACK DON'T TRUST SPRINTF. */
+
+       if (dbenv == NULL)
+               return;
+
+#ifdef __STDC__
+       va_start(ap, fmt);
+#else
+       va_start(ap);
+#endif
+       if (dbenv->db_errcall != NULL) {
+               (void)vsnprintf(errbuf, sizeof(errbuf), fmt, ap);
+               dbenv->db_errcall(dbenv->db_errpfx, errbuf);
+       }
+       if (dbenv->db_errfile != NULL) {
+               if (dbenv->db_errpfx != NULL)
+                       (void)fprintf(dbenv->db_errfile, "%s: ",
+                           dbenv->db_errpfx);
+               (void)vfprintf(dbenv->db_errfile, fmt, ap);
+               (void)fprintf(dbenv->db_errfile, "\n");
+               (void)fflush(dbenv->db_errfile);
+       }
+       va_end(ap);
+}
+
+/*
+ * XXX
+ * Provide ANSI C prototypes for the panic functions.  Some compilers, (e.g.,
+ * MS VC 4.2) get upset if they aren't here, even though the K&R declaration
+ * appears before the assignment in the __db__panic() call.
+ */
+static int __db_ecursor __P((DB *, DB_TXN *, DBC **));
+static int __db_edel __P((DB *, DB_TXN *, DBT *, int));
+static int __db_efd __P((DB *, int *));
+static int __db_egp __P((DB *, DB_TXN *, DBT *, DBT *, int));
+static int __db_estat __P((DB *, void *, void *(*)(size_t), int));
+static int __db_esync __P((DB *, int));
+
+/*
+ * __db_ecursor --
+ *     After-panic cursor routine.
+ */
+static int
+__db_ecursor(a, b, c)
+       DB *a;
+       DB_TXN *b;
+       DBC **c;
+{
+       a = a; b = b; c = c;                    /* XXX: Shut the compiler up. */
+
+       return (EPERM);
+}
+
+/*
+ * __db_edel --
+ *     After-panic delete routine.
+ */
+static int
+__db_edel(a, b, c, d)
+       DB *a;
+       DB_TXN *b;
+       DBT *c;
+       int d;
+{
+       a = a; b = b; c = c; d = d;             /* XXX: Shut the compiler up. */
+
+       return (EPERM);
+}
+
+/*
+ * __db_efd --
+ *     After-panic fd routine.
+ */
+static int
+__db_efd(a, b)
+       DB *a;
+       int *b;
+{
+       a = a; b = b;                           /* XXX: Shut the compiler up. */
+
+       return (EPERM);
+}
+
+/*
+ * __db_egp --
+ *     After-panic get/put routine.
+ */
+static int
+__db_egp(a, b, c, d, e)
+       DB *a;
+       DB_TXN *b;
+       DBT *c, *d;
+       int e;
+{
+       a = a; b = b; c = c; d = d; e = e;      /* XXX: Shut the compiler up. */
+
+       return (EPERM);
+}
+
+/*
+ * __db_estat --
+ *     After-panic stat routine.
+ */
+static int
+__db_estat(a, b, c, d)
+       DB *a;
+       void *b;
+       void *(*c) __P((size_t));
+       int d;
+{
+       a = a; b = b; c = c; d = d;             /* XXX: Shut the compiler up. */
+
+       return (EPERM);
+}
+
+/*
+ * __db_esync --
+ *     After-panic sync routine.
+ */
+static int
+__db_esync(a, b)
+       DB *a;
+       int b;
+{
+       a = a; b = b;                           /* XXX: Shut the compiler up. */
+
+       return (EPERM);
+}
+
+/*
+ * __db_panic --
+ *     Lock out the tree due to unrecoverable error.
+ *
+ * PUBLIC: int __db_panic __P((DB *));
+ */
+int
+__db_panic(dbp)
+       DB *dbp;
+{
+       /*
+        * XXX
+        * We should shut down all of the process's cursors, too.
+        *
+        * We should call mpool and have it shut down the file, so we get
+        * other processes sharing this file as well.
+        */
+       dbp->cursor = __db_ecursor;
+       dbp->del = __db_edel;
+       dbp->fd = __db_efd;
+       dbp->get = __db_egp;
+       dbp->put = __db_egp;
+       dbp->stat = __db_estat;
+       dbp->sync = __db_esync;
+
+       return (EPERM);
+}
+
+/* Check for invalid flags. */
+#undef DB_CHECK_FLAGS
+#define        DB_CHECK_FLAGS(dbenv, name, flags, ok_flags)                    \
+       if ((flags) & ~(ok_flags))                                      \
+               return (__db_ferr(dbenv, name, 0));
+/* Check for invalid flag combinations. */
+#undef DB_CHECK_FCOMBO
+#define        DB_CHECK_FCOMBO(dbenv, name, flags, flag1, flag2)               \
+       if ((flags) & (flag1) && (flags) & (flag2))                     \
+               return (__db_ferr(dbenv, name, 1));
+
+/*
+ * __db_fchk --
+ *     General flags checking routine.
+ *
+ * PUBLIC: int __db_fchk __P((DB_ENV *, char *, int, int));
+ */
+int
+__db_fchk(dbenv, name, flags, ok_flags)
+       DB_ENV *dbenv;
+       const char *name;
+       int flags, ok_flags;
+{
+       DB_CHECK_FLAGS(dbenv, name, flags, ok_flags);
+       return (0);
+}
+
+/*
+ * __db_fcchk --
+ *     General combination flags checking routine.
+ *
+ * PUBLIC: int __db_fcchk __P((DB_ENV *, char *, int, int, int));
+ */
+int
+__db_fcchk(dbenv, name, flags, flag1, flag2)
+       DB_ENV *dbenv;
+       const char *name;
+       int flags, flag1, flag2;
+{
+       DB_CHECK_FCOMBO(dbenv, name, flags, flag1, flag2);
+       return (0);
+}
+
+/*
+ * __db_cdelchk --
+ *     Common cursor delete argument checking routine.
+ *
+ * PUBLIC: int __db_cdelchk __P((const DB *, int, int, int));
+ */
+int
+__db_cdelchk(dbp, flags, isrdonly, isvalid)
+       const DB *dbp;
+       int flags, isrdonly, isvalid;
+{
+       /* Check for changes to a read-only tree. */
+       if (isrdonly)
+               return (__db_rdonly(dbp->dbenv, "c_del"));
+
+       /* Check for invalid dbc->c_del() function flags. */
+       DB_CHECK_FLAGS(dbp->dbenv, "c_del", flags, 0);
+
+       /*
+        * The cursor must be initialized, return -1 for an invalid cursor,
+        * otherwise 0.
+        */
+       return (isvalid ? 0 : EINVAL);
+}
+
+/*
+ * __db_cgetchk --
+ *     Common cursor get argument checking routine.
+ *
+ * PUBLIC: int __db_cgetchk __P((const DB *, DBT *, DBT *, int, int));
+ */
+int
+__db_cgetchk(dbp, key, data, flags, isvalid)
+       const DB *dbp;
+       DBT *key, *data;
+       int flags, isvalid;
+{
+       int check_key;
+
+       check_key = 0;
+
+       /* Check for invalid dbc->c_get() function flags. */
+       switch (flags) {
+       case DB_CURRENT:
+       case DB_FIRST:
+       case DB_LAST:
+       case DB_NEXT:
+       case DB_PREV:
+       case DB_SET_RANGE:
+               check_key = 1;
+               break;
+       case DB_SET:
+               break;
+       case DB_SET_RECNO:
+       case DB_GET_RECNO:
+               if (!F_ISSET(dbp, DB_BT_RECNUM))
+                       goto err;
+               check_key = 1;
+               break;
+       default:
+err:           return (__db_ferr(dbp->dbenv, "c_get", 0));
+       }
+
+       /* Check for invalid key/data flags. */
+       DB_CHECK_FLAGS(dbp->dbenv, "key", key->flags,
+           DB_DBT_MALLOC | DB_DBT_USERMEM | DB_DBT_PARTIAL);
+       DB_CHECK_FLAGS(dbp->dbenv, "data", data->flags,
+           DB_DBT_MALLOC | DB_DBT_USERMEM | DB_DBT_PARTIAL);
+
+       /* Check dbt's for valid flags when multi-threaded. */
+       if (F_ISSET(dbp, DB_AM_THREAD)) {
+               if (!F_ISSET(data, DB_DBT_USERMEM | DB_DBT_MALLOC))
+                       return (__db_ferr(dbp->dbenv, "threaded data", 1));
+               if (check_key &&
+                   !F_ISSET(key, DB_DBT_USERMEM | DB_DBT_MALLOC))
+                       return (__db_ferr(dbp->dbenv, "threaded key", 1));
+       }
+
+       /*
+        * The cursor must be initialized for DB_CURRENT, return -1 for an
+        * invalid cursor, otherwise 0.
+        */
+       return (isvalid || flags != DB_CURRENT ? 0 : EINVAL);
+}
+
+/*
+ * __db_cputchk --
+ *     Common cursor put argument checking routine.
+ *
+ * PUBLIC: int __db_cputchk __P((const DB *,
+ * PUBLIC:    const DBT *, DBT *, int, int, int));
+ */
+int
+__db_cputchk(dbp, key, data, flags, isrdonly, isvalid)
+       const DB *dbp;
+       const DBT *key;
+       DBT *data;
+       int flags, isrdonly, isvalid;
+{
+       int check_key;
+
+       /* Check for changes to a read-only tree. */
+       if (isrdonly)
+               return (__db_rdonly(dbp->dbenv, "c_put"));
+
+       /* Check for invalid dbc->c_put() function flags. */
+       check_key = 0;
+       switch (flags) {
+       case DB_AFTER:
+       case DB_BEFORE:
+               if (dbp->type == DB_RECNO && !F_ISSET(dbp, DB_RE_RENUMBER))
+                       goto err;
+               if (dbp->type != DB_RECNO && !F_ISSET(dbp, DB_AM_DUP))
+                       goto err;
+               break;
+       case DB_CURRENT:
+               break;
+       case DB_KEYFIRST:
+       case DB_KEYLAST:
+               if (dbp->type == DB_RECNO)
+                       goto err;
+               check_key = 1;
+               break;
+       default:
+err:           return (__db_ferr(dbp->dbenv, "c_put", 0));
+       }
+
+       /* Check for invalid key/data flags. */
+       if (check_key)
+               DB_CHECK_FLAGS(dbp->dbenv, "key", key->flags,
+                   DB_DBT_MALLOC | DB_DBT_USERMEM | DB_DBT_PARTIAL);
+       DB_CHECK_FLAGS(dbp->dbenv, "data", data->flags,
+           DB_DBT_MALLOC | DB_DBT_USERMEM | DB_DBT_PARTIAL);
+
+       /*
+        * The cursor must be initialized for anything other than DB_KEYFIRST
+        * and DB_KEYLAST, return -1 for an invalid cursor, otherwise 0.
+        */
+       return (isvalid ||
+           (flags != DB_KEYFIRST && flags != DB_KEYLAST) ? 0 : EINVAL);
+}
+
+/*
+ * __db_delchk --
+ *     Common delete argument checking routine.
+ *
+ * PUBLIC: int __db_delchk __P((const DB *, int, int));
+ */
+int
+__db_delchk(dbp, flags, isrdonly)
+       const DB *dbp;
+       int flags, isrdonly;
+{
+       /* Check for changes to a read-only tree. */
+       if (isrdonly)
+               return (__db_rdonly(dbp->dbenv, "delete"));
+
+       /* Check for invalid db->del() function flags. */
+       DB_CHECK_FLAGS(dbp->dbenv, "delete", flags, 0);
+
+       return (0);
+}
+
+/*
+ * __db_getchk --
+ *     Common get argument checking routine.
+ *
+ * PUBLIC: int __db_getchk __P((const DB *, const DBT *, DBT *, int));
+ */
+int
+__db_getchk(dbp, key, data, flags)
+       const DB *dbp;
+       const DBT *key;
+       DBT *data;
+       int flags;
+{
+       /* Check for invalid db->get() function flags. */
+       DB_CHECK_FLAGS(dbp->dbenv,
+           "get", flags, F_ISSET(dbp, DB_BT_RECNUM) ? DB_SET_RECNO : 0);
+
+       /* Check for invalid key/data flags. */
+       DB_CHECK_FLAGS(dbp->dbenv, "key", key->flags, 0);
+       DB_CHECK_FLAGS(dbp->dbenv, "data", data->flags,
+           DB_DBT_MALLOC | DB_DBT_USERMEM | DB_DBT_PARTIAL);
+       DB_CHECK_FCOMBO(dbp->dbenv,
+           "data", data->flags, DB_DBT_MALLOC, DB_DBT_USERMEM);
+       if (F_ISSET(dbp, DB_AM_THREAD) &&
+           !F_ISSET(data, DB_DBT_MALLOC | DB_DBT_USERMEM))
+               return (__db_ferr(dbp->dbenv, "threaded data", 1));
+
+       return (0);
+}
+
+/*
+ * __db_putchk --
+ *     Common put argument checking routine.
+ *
+ * PUBLIC: int __db_putchk __P((const DB *, DBT *, const DBT *, int, int, int));
+ */
+int
+__db_putchk(dbp, key, data, flags, isrdonly, isdup)
+       const DB *dbp;
+       DBT *key;
+       const DBT *data;
+       int flags, isrdonly, isdup;
+{
+       /* Check for changes to a read-only tree. */
+       if (isrdonly)
+               return (__db_rdonly(dbp->dbenv, "put"));
+
+       /* Check for invalid db->put() function flags. */
+       DB_CHECK_FLAGS(dbp->dbenv, "put", flags,
+           DB_NOOVERWRITE | (dbp->type == DB_RECNO ? DB_APPEND : 0));
+
+       /* Check for invalid key/data flags. */
+       DB_CHECK_FLAGS(dbp->dbenv, "key", key->flags, 0);
+       DB_CHECK_FLAGS(dbp->dbenv, "data", data->flags,
+           DB_DBT_MALLOC | DB_DBT_USERMEM | DB_DBT_PARTIAL);
+       DB_CHECK_FCOMBO(dbp->dbenv,
+           "data", data->flags, DB_DBT_MALLOC, DB_DBT_USERMEM);
+
+       /* Check for partial puts in the presence of duplicates. */
+       if (isdup && F_ISSET(data, DB_DBT_PARTIAL)) {
+               __db_err(dbp->dbenv,
+"a partial put in the presence of duplicates requires a cursor operation");
+               return (EINVAL);
+       }
+       return (0);
+}
+
+/*
+ * __db_statchk --
+ *     Common stat argument checking routine.
+ *
+ * PUBLIC: int __db_statchk __P((const DB *, int));
+ */
+int
+__db_statchk(dbp, flags)
+       const DB *dbp;
+       int flags;
+{
+       /* Check for invalid db->stat() function flags. */
+       DB_CHECK_FLAGS(dbp->dbenv, "stat", flags, DB_RECORDCOUNT);
+
+       if (LF_ISSET(DB_RECORDCOUNT) &&
+           dbp->type == DB_BTREE && !F_ISSET(dbp, DB_BT_RECNUM))
+               return (__db_ferr(dbp->dbenv, "stat", 0));
+
+       return (0);
+}
+
+/*
+ * __db_syncchk --
+ *     Common sync argument checking routine.
+ *
+ * PUBLIC: int __db_syncchk __P((const DB *, int));
+ */
+int
+__db_syncchk(dbp, flags)
+       const DB *dbp;
+       int flags;
+{
+       /* Check for invalid db->sync() function flags. */
+       DB_CHECK_FLAGS(dbp->dbenv, "sync", flags, 0);
+
+       return (0);
+}
+
+/*
+ * __db_ferr --
+ *     Common flag errors.
+ *
+ * PUBLIC: int __db_ferr __P((const DB_ENV *, char *, int));
+ */
+int
+__db_ferr(dbenv, name, combo)
+       const DB_ENV *dbenv;
+       const char *name;
+       int combo;
+{
+       __db_err(dbenv, "illegal flag %sspecified to %s",
+           combo ? "combination " : "", name);
+       return (EINVAL);
+}
+
+/*
+ * __db_rdonly --
+ *     Common readonly message.
+ */
+static int
+__db_rdonly(dbenv, name)
+       const DB_ENV *dbenv;
+       const char *name;
+{
+       __db_err(dbenv, "%s: attempt to modify a read-only tree", name);
+       return (EACCES);
+}
diff --git a/db2/common/db_log2.c b/db2/common/db_log2.c
new file mode 100644 (file)
index 0000000..9af0111
--- /dev/null
@@ -0,0 +1,68 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997
+ *     Sleepycat Software.  All rights reserved.
+ */
+/*
+ * Copyright (c) 1995, 1996
+ *     The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Margo Seltzer.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *     This product includes software developed by the University of
+ *     California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "config.h"
+
+#ifndef lint
+static const char sccsid[] = "@(#)db_log2.c    10.3 (Sleepycat) 6/21/97";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+#endif
+
+#include "db_int.h"
+#include "common_ext.h"
+
+/*
+ * PUBLIC: u_int32_t __db_log2 __P((u_int32_t));
+ */
+u_int32_t
+__db_log2(num)
+       u_int32_t num;
+{
+       u_int32_t i, limit;
+
+       limit = 1;
+       for (i = 0; limit < num; limit = limit << 1, i++);
+       return (i);
+}
diff --git a/db2/common/db_region.c b/db2/common/db_region.c
new file mode 100644 (file)
index 0000000..51f8f44
--- /dev/null
@@ -0,0 +1,565 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997
+ *     Sleepycat Software.  All rights reserved.
+ */
+/*
+ * Copyright (c) 1995, 1996
+ *     The President and Fellows of Harvard University.  All rights reserved.
+ *
+ * This code is derived from software contributed to Harvard by
+ * Margo Seltzer.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *     This product includes software developed by the University of
+ *     California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "config.h"
+
+#ifndef lint
+static const char sccsid[] = "@(#)db_region.c  10.12 (Sleepycat) 7/26/97";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#endif
+
+#include "db_int.h"
+#include "common_ext.h"
+
+static int __db_rmap __P((DB_ENV *, int, size_t, void *));
+
+/*
+ * __db_rcreate --
+ *
+ * Common interface for creating a shared region.  Handles synchronization
+ * across multiple processes.
+ *
+ * The dbenv contains the environment for this process, including naming
+ * information.  The path argument represents the parameters passed to
+ * the open routines and may be either a file or a directory.  If it is
+ * a directory, it must exist.  If it is a file, then the file parameter
+ * must be NULL, otherwise, file is the name to be created inside the
+ * directory path.
+ *
+ * The function returns a pointer to the shared region that has been mapped
+ * into memory, NULL on error.
+ *
+ * PUBLIC: int __db_rcreate __P((DB_ENV *, APPNAME,
+ * PUBLIC:    const char *, const char *, int, size_t, int *, void *));
+ */
+int
+__db_rcreate(dbenv, appname, path, file, mode, size, fdp, retp)
+       DB_ENV *dbenv;
+       APPNAME appname;
+       const char *path, *file;
+       int mode, *fdp;
+       size_t size;
+       void *retp;
+{
+       RLAYOUT *rp;
+       int fd, ret;
+       char *name;
+
+       fd = -1;
+       rp = NULL;
+
+       /*
+        * Get the filename -- note, if it's a temporary file, it will
+        * be created by the underlying temporary file creation code,
+        * so we have to check the file descriptor to be sure it's an
+        * error.
+        */
+       if ((ret = __db_appname(dbenv, appname, path, file, &fd, &name)) != 0)
+               return (ret);
+
+       /*
+        * Now open the file. We need to make sure that multiple processes
+        * that attempt to create the region at the same time are properly
+        * ordered, so we open it O_EXCL and O_CREAT so two simultaneous
+        * attempts to create the region will return failure in one of the
+        * attempts.
+        */
+       if (fd == -1 && (ret = __db_fdopen(name,
+           DB_CREATE | DB_EXCL, DB_CREATE | DB_EXCL, mode, &fd)) != 0) {
+               if (ret != EEXIST)
+                       __db_err(dbenv,
+                           "region create: %s: %s", name, strerror(ret));
+               goto err;
+       }
+       *fdp = fd;
+
+       /* Grow the region to the correct size. */
+       if ((ret = __db_rgrow(dbenv, fd, size)) != 0)
+               goto err;
+
+       /* Map the region in. */
+       if ((ret = __db_rmap(dbenv, fd, size, &rp)) != 0)
+               goto err;
+
+       /*
+        * Initialize the common information.
+        *
+        * !!!
+        * We have to order the region creates so that two processes don't try
+        * to simultaneously create the region and so that processes that are
+        * joining the region never see inconsistent data.  We'd like to play
+        * file permissions games, but we can't because WNT filesystems won't
+        * open a file mode 0.
+        *
+        * So, the process that's creating the region always acquires the lock
+        * before the setting the version number.  Any process joining always
+        * checks the version number before attempting to acquire the lock.
+        *
+        * We have to check the version number first, because if the version
+        * number has not been written, it's possible that the mutex has not
+        * been initialized in which case an attempt to get it could lead to
+        * random behavior.  If the version number isn't there (the file size
+        * is too small) or it's 0, we know that the region is being created.
+        */
+       (void)__db_mutex_init(&rp->lock, MUTEX_LOCK_OFFSET(rp, &rp->lock));
+       (void)__db_mutex_lock(&rp->lock,
+           fd, dbenv == NULL ? NULL : dbenv->db_yield);
+
+       rp->refcnt = 1;
+       rp->size = size;
+       rp->flags = 0;
+       db_version(&rp->majver, &rp->minver, &rp->patch);
+
+       if (name != NULL)
+               FREES(name);
+
+       *(void **)retp = rp;
+       return (0);
+
+err:   if (fd != -1) {
+               if (rp != NULL)
+                       (void)__db_munmap(rp, rp->size);
+               (void)__db_unlink(name);
+               (void)__db_close(fd);
+       }
+       if (name != NULL)
+               FREES(name);
+       return (ret);
+}
+
+/*
+ * __db_ropen --
+ *     Construct the name of a file, open it and map it in.
+ *
+ * PUBLIC: int __db_ropen __P((DB_ENV *,
+ * PUBLIC:    APPNAME, const char *, const char *, int, int *, void *));
+ */
+int
+__db_ropen(dbenv, appname, path, file, flags, fdp, retp)
+       DB_ENV *dbenv;
+       APPNAME appname;
+       const char *path, *file;
+       int flags, *fdp;
+       void *retp;
+{
+       RLAYOUT *rp;
+       off_t size1, size2;
+       int fd, ret;
+       char *name;
+
+       fd = -1;
+       rp = NULL;
+
+       /* Get the filename. */
+       if ((ret = __db_appname(dbenv, appname, path, file, NULL, &name)) != 0)
+               return (ret);
+
+       /* Open the file. */
+       if ((ret = __db_fdopen(name, flags, DB_MUTEXDEBUG, 0, &fd)) != 0) {
+               __db_err(dbenv, "region open: %s: %s", name, strerror(ret));
+               goto err2;
+       }
+
+       *fdp = fd;
+
+       /*
+        * Map the file in.  We have to do things in a strange order so that
+        * we don't get into a situation where the file was just created and
+        * isn't yet initialized.  See the comment in __db_rcreate() above.
+        *
+        * XXX
+        * We'd like to test to see if the file is too big to mmap.  Since we
+        * don't know what size or type off_t's or size_t's are, or the largest
+        * unsigned integral type is, or what random insanity the local C
+        * compiler will perpetrate, doing the comparison in a portable way is
+        * flatly impossible.  Hope that mmap fails if the file is too large.
+        *
+        */
+       if ((ret = __db_stat(dbenv, name, fd, &size1, NULL)) != 0)
+               goto err2;
+
+       /* Check to make sure the first block has been written. */
+       if ((size_t) size1 < sizeof(RLAYOUT)) {
+               ret = EAGAIN;
+               goto err2;
+       }
+
+       /* Map in whatever is there. */
+       if ((ret = __db_rmap(dbenv, fd, size1, &rp)) != 0)
+               goto err2;
+
+       /*
+        * Check to make sure the region has been initialized.  We can't just
+        * grab the lock because the lock may not have been initialized yet.
+        */
+       if (rp->majver == 0) {
+               ret = EAGAIN;
+               goto err2;
+       }
+
+       /* Get the region lock. */
+       if (!LF_ISSET(DB_MUTEXDEBUG))
+               (void)__db_mutex_lock(&rp->lock,
+                   fd, dbenv == NULL ? NULL : dbenv->db_yield);
+
+       /*
+        * The file may have been half-written if we were descheduled between
+        * getting the size of the file and checking the major version.  Check
+        * to make sure we got the entire file.
+        */
+       if ((ret = __db_stat(dbenv, name, fd, &size2, NULL)) != 0)
+               goto err1;
+       if (size1 != size2) {
+               ret = EAGAIN;
+               goto err1;
+       }
+
+       /* The file may have just been deleted. */
+       if (F_ISSET(rp, DB_R_DELETED)) {
+               ret = EAGAIN;
+               goto err1;
+       }
+
+       /* Increment the reference count. */
+       ++rp->refcnt;
+
+       /* Release the lock. */
+       if (!LF_ISSET(DB_MUTEXDEBUG))
+               (void)__db_mutex_unlock(&rp->lock, fd);
+
+       FREES(name);
+
+       *(void **)retp = rp;
+       return (0);
+
+err1:  if (!LF_ISSET(DB_MUTEXDEBUG))
+               (void)__db_mutex_unlock(&rp->lock, fd);
+err2:  if (rp != NULL)
+               (void)__db_munmap(rp, rp->size);
+       if (fd != -1)
+               (void)__db_close(fd);
+       FREES(name);
+       return (ret);
+}
+
+/*
+ * __db_rclose --
+ *     Close a shared memory region.
+ *
+ * PUBLIC: int __db_rclose __P((DB_ENV *, int, void *));
+ */
+int
+__db_rclose(dbenv, fd, ptr)
+       DB_ENV *dbenv;
+       int fd;
+       void *ptr;
+{
+       RLAYOUT *rp;
+       int ret, t_ret;
+       const char *fail;
+
+       rp = ptr;
+       fail = NULL;
+
+       /* Get the lock. */
+       if ((ret = __db_mutex_lock(&rp->lock,
+           fd, dbenv == NULL ? NULL : dbenv->db_yield)) != 0) {
+               fail = "lock get";
+               goto err;
+       }
+
+       /* Decrement the reference count. */
+       --rp->refcnt;
+
+       /* Release the lock. */
+       if ((t_ret = __db_mutex_unlock(&rp->lock, fd)) != 0 && fail == NULL) {
+               ret = t_ret;
+               fail = "lock release";
+       }
+
+       /* Discard the region. */
+       if ((t_ret = __db_munmap(ptr, rp->size)) != 0 && fail == NULL) {
+               ret = t_ret;
+               fail = "munmap";
+       }
+
+       if ((t_ret = __db_close(fd)) != 0 && fail == NULL) {
+               ret = t_ret;
+               fail = "close";
+       }
+
+       if (fail == NULL)
+               return (0);
+
+err:   __db_err(dbenv, "region detach: %s: %s", fail, strerror(ret));
+       return (ret);
+}
+
+/*
+ * __db_runlink --
+ *     Remove a shared memory region.
+ *
+ * PUBLIC: int __db_runlink __P((DB_ENV *,
+ * PUBLIC:    APPNAME, const char *, const char *, int));
+ */
+int
+__db_runlink(dbenv, appname, path, file, force)
+       DB_ENV *dbenv;
+       APPNAME appname;
+       const char *path, *file;
+       int force;
+{
+       RLAYOUT *rp;
+       int cnt, fd, ret, t_ret;
+       char *name;
+
+       rp = NULL;
+
+       /* Get the filename. */
+       if ((ret = __db_appname(dbenv, appname, path, file, NULL, &name)) != 0)
+               return (ret);
+
+       /* If the file doesn't exist, we're done. */
+       if (__db_exists(name, NULL))
+               return (0);             /* XXX: ENOENT? */
+
+       /*
+        * If we're called with a force flag, try and unlink the file.  This
+        * may not succeed if the file is currently open, but there's nothing
+        * we can do about that.  There is a race condition between the check
+        * for existence above and the actual unlink.  If someone else snuck
+        * in and removed it before we do the remove, then we might get an
+        * ENOENT error.  If we get the ENOENT, we treat it as success, just
+        * as we do above.
+        */
+       if (force) {
+               if ((ret = __db_unlink(name)) != 0 && ret != ENOENT)
+                       goto err1;
+               FREES(name);
+               return (0);
+       }
+
+       /* Open and lock the region. */
+       if ((ret = __db_ropen(dbenv, appname, path, file, 0, &fd, &rp)) != 0)
+               goto err1;
+       (void)__db_mutex_lock(&rp->lock,
+           fd, dbenv == NULL ? NULL : dbenv->db_yield);
+
+       /* If the region is currently being deleted, fail. */
+       if (F_ISSET(rp, DB_R_DELETED)) {
+               ret = ENOENT;           /* XXX: ENOENT? */
+               goto err2;
+       }
+
+       /* If the region is currently in use by someone else, fail. */
+       if (rp->refcnt > 1) {
+               ret = EBUSY;
+               goto err2;
+       }
+
+       /* Set the delete flag. */
+       F_SET(rp, DB_R_DELETED);
+
+       /* Release the lock and close the region. */
+       (void)__db_mutex_unlock(&rp->lock, fd);
+       if ((t_ret = __db_rclose(dbenv, fd, rp)) != 0 && ret == 0)
+               goto err1;
+
+       /*
+        * Unlink the region.  There's a race here -- other threads or
+        * processes might be opening the region while we're trying to
+        * remove it.  They'll fail, because we've set the DELETED flag,
+        * but they could still stop us from succeeding in the unlink.
+        */
+       for (cnt = 5; cnt > 0; --cnt) {
+               if ((ret = __db_unlink(name)) == 0)
+                       break;
+               (void)__db_sleep(0, 250000);
+       }
+       if (ret == 0) {
+               FREES(name);
+               return (0);
+       }
+
+       /* Not a clue.  Try to clear the DB_R_DELETED flag. */
+       if ((ret = __db_ropen(dbenv, appname, path, file, 0, &fd, &rp)) != 0)
+               goto err1;
+       (void)__db_mutex_lock(&rp->lock,
+           fd, dbenv == NULL ? NULL : dbenv->db_yield);
+       F_CLR(rp, DB_R_DELETED);
+       /* FALLTHROUGH */
+
+err2:  (void)__db_mutex_unlock(&rp->lock, fd);
+       (void)__db_rclose(dbenv, fd, rp);
+err1:  __db_err(dbenv, "region unlink: %s: %s", name, strerror(ret));
+       FREES(name);
+       return (ret);
+}
+
+/*
+ * DB creates all regions on 4K boundaries so that we don't make the
+ * underlying VM unhappy.
+ */
+#define        __DB_VMPAGESIZE (4 * 1024)
+
+/*
+ * __db_rgrow --
+ *     Extend a region by a specified amount.
+ *
+ * PUBLIC: int __db_rgrow __P((DB_ENV *, int, size_t));
+ */
+int
+__db_rgrow(dbenv, fd, incr)
+       DB_ENV *dbenv;
+       int fd;
+       size_t incr;
+{
+#ifdef MMAP_INIT_NEEDED
+       size_t i;
+#endif
+       ssize_t nw;
+       int ret;
+       char buf[__DB_VMPAGESIZE];
+
+       /* Seek to the end of the region. */
+       if ((ret = __db_lseek(fd, 0, 0, 0, SEEK_END)) != 0)
+               goto err;
+
+       /* Write nuls to the new bytes. */
+       memset(buf, 0, sizeof(buf));
+
+       /*
+        * Historically, some systems required that all of the bytes of the
+        * region be written before you could mmap it and access it randomly.
+        */
+#ifdef MMAP_INIT_NEEDED
+       /* Extend the region by writing each new page. */
+       for (i = 0; i < incr; i += __DB_VMPAGESIZE) {
+               if ((ret = __db_write(fd, buf, sizeof(buf), &nw)) != 0)
+                       goto err;
+               if (nw != sizeof(buf))
+                       goto eio;
+       }
+#else
+       /*
+        * Extend the region by writing the last page.
+        *
+        * Round off the increment to the next page boundary.
+        */
+       incr += __DB_VMPAGESIZE - 1;
+       incr -= incr % __DB_VMPAGESIZE;
+
+       /* Write the last page, not the page after the last. */
+       if ((ret = __db_lseek(fd, 0, 0, incr - __DB_VMPAGESIZE, SEEK_CUR)) != 0)
+               goto err;
+       if ((ret = __db_write(fd, buf, sizeof(buf), &nw)) != 0)
+               goto err;
+       if (nw != sizeof(buf))
+               goto eio;
+#endif
+       return (0);
+
+eio:   ret = EIO;
+err:   __db_err(dbenv, "region grow: %s", strerror(ret));
+       return (ret);
+}
+
+/*
+ * __db_rremap --
+ *     Unmap the old region and map in a new region of a new size.  If
+ *     either call fails, returns NULL, else returns the address of the
+ *     new region.
+ *
+ * PUBLIC: int __db_rremap __P((DB_ENV *, void *, size_t, size_t, int, void *));
+ */
+int
+__db_rremap(dbenv, ptr, oldsize, newsize, fd, retp)
+       DB_ENV *dbenv;
+       void *ptr, *retp;
+       size_t oldsize, newsize;
+       int fd;
+{
+       int ret;
+
+       if ((ret = __db_munmap(ptr, oldsize)) != 0) {
+               __db_err(dbenv, "region remap: munmap: %s", strerror(ret));
+               return (ret);
+       }
+
+       return (__db_rmap(dbenv, fd, newsize, retp));
+}
+
+/*
+ * __db_rmap --
+ *     Attach to a shared memory region.
+ */
+static int
+__db_rmap(dbenv, fd, size, retp)
+       DB_ENV *dbenv;
+       int fd;
+       size_t size;
+       void *retp;
+{
+       RLAYOUT *rp;
+       int ret;
+
+       if ((ret = __db_mmap(fd, size, 0, 0, &rp)) != 0) {
+               __db_err(dbenv, "region map: mmap %s", strerror(ret));
+               return (ret);
+       }
+       if (rp->size < size)
+               rp->size = size;
+
+       *(void **)retp = rp;
+       return (0);
+}
diff --git a/db2/common/db_salloc.c b/db2/common/db_salloc.c
new file mode 100644 (file)
index 0000000..f0202dd
--- /dev/null
@@ -0,0 +1,290 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997
+ *     Sleepycat Software.  All rights reserved.
+ */
+
+#include "config.h"
+
+#ifndef lint
+static const char sccsid[] = "@(#)db_salloc.c  10.6 (Sleepycat) 7/5/97";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+
+#include <errno.h>
+#include <stdio.h>
+#endif
+
+#include "db_int.h"
+#include "shqueue.h"
+#include "common_ext.h"
+
+/*
+ * Implement shared memory region allocation, using simple first-fit algorithm.
+ * The model is that we take a "chunk" of shared memory store and begin carving
+ * it up into areas, similarly to how malloc works.  We do coalescing on free.
+ *
+ * The "len" field in the __data struct contains the length of the free region
+ * (less the size_t bytes that holds the length).  We use the address provided
+ * by the caller to find this length, which allows us to free a chunk without
+ * requiring that the caller pass in the length of the chunk they're freeing.
+ */
+SH_LIST_HEAD(__head);
+struct __data {
+       size_t len;
+       SH_LIST_ENTRY links;
+};
+
+/*
+ * __db_shalloc_init --
+ *     Initialize the area as one large chunk.
+ *
+ * PUBLIC: void __db_shalloc_init __P((void *, size_t));
+ */
+void
+__db_shalloc_init(area, size)
+       void *area;
+       size_t size;
+{
+       struct __data *elp;
+       struct __head *hp;
+
+       hp = area;
+       SH_LIST_INIT(hp);
+
+       elp = (struct __data *)(hp + 1);
+       elp->len = size - sizeof(struct __head) - sizeof(elp->len);
+       SH_LIST_INSERT_HEAD(hp, elp, links, __data);
+}
+
+/*
+ * __db_shalloc --
+ *     Allocate some space from the shared region.
+ *
+ * PUBLIC: int __db_shalloc __P((void *, size_t, size_t, void *));
+ */
+int
+__db_shalloc(p, len, align, retp)
+       void *p, *retp;
+       size_t len, align;
+{
+       struct __data *elp;
+       size_t *sp;
+       void *rp;
+
+       /*
+        * We never allocate less than the size of a struct __data, align
+        * to less than a size_t boundary, or align to something that's not
+        * a multiple of a size_t.
+        */
+       if (len < sizeof(struct __data))
+               len = sizeof(struct __data);
+       align = align <= sizeof(size_t) ?
+           sizeof(size_t) : ALIGN(align, sizeof(size_t));
+
+       /* Walk the list, looking for a slot. */
+       for (elp = SH_LIST_FIRST((struct __head *)p, __data);
+           elp != NULL;
+           elp = SH_LIST_NEXT(elp, links, __data)) {
+               /*
+                * Calculate the value of the returned pointer if we were to
+                * use this chunk.
+                *      + Find the end of the chunk.
+                *      + Subtract the memory the user wants.
+                *      + Find the closest previous correctly-aligned address.
+                */
+               rp = (u_int8_t *)elp + sizeof(size_t) + elp->len;
+               rp = (u_int8_t *)rp - len;
+               rp = (u_int8_t *)((ALIGNTYPE)rp & ~(align - 1));
+
+               /*
+                * Rp may now point before elp->links, in which case the chunk
+                * was too small, and we have to try again.
+                */
+               if ((u_int8_t *)rp < (u_int8_t *)&elp->links)
+                       continue;
+
+               *(void **)retp = rp;
+
+               /*
+                * If there are at least 32 bytes of additional memory, divide
+                * the chunk into two chunks.
+                */
+               if ((u_int8_t *)rp >= (u_int8_t *)&elp->links + 32) {
+                       sp = rp;
+                       *--sp = elp->len -
+                           ((u_int8_t *)rp - (u_int8_t *)&elp->links);
+                       elp->len -= *sp + sizeof(size_t);
+                       return (0);
+               }
+
+               /*
+                * Otherwise, we return the entire chunk, wasting some amount
+                * of space to keep the list compact.  However, because the
+                * address we're returning to the user may not be the address
+                * of the start of the region for alignment reasons, set the
+                * size_t length fields back to the "real" length field to a
+                * flag value, so that we can find the real length during free.
+                */
+#define        ILLEGAL_SIZE    1
+               SH_LIST_REMOVE(elp, links, __data);
+               for (sp = rp; (u_int8_t *)--sp >= (u_int8_t *)&elp->links;)
+                       *sp = ILLEGAL_SIZE;
+               return (0);
+       }
+
+       /* Nothing found large enough; need to figure out how to grow region. */
+       return (ENOMEM);
+}
+
+/*
+ * __db_shalloc_free --
+ *     Free a shared memory allocation.
+ *
+ * PUBLIC: void __db_shalloc_free __P((void *, void *));
+ */
+void
+__db_shalloc_free(regionp, ptr)
+       void *regionp, *ptr;
+{
+       struct __data *elp, *lastp, *newp;
+       struct __head *hp;
+       size_t free_size, *sp;
+       int merged;
+
+       /*
+        * Step back over flagged length fields to find the beginning of
+        * the object and its real size.
+        */
+       for (sp = (size_t *)ptr; sp[-1] == ILLEGAL_SIZE; --sp);
+       ptr = sp;
+
+       newp = (struct __data *)((u_int8_t *)ptr - sizeof(size_t));
+       free_size = newp->len;
+
+       /*
+        * Walk the list, looking for where this entry goes.
+        *
+        * We keep the free list sorted by address so that coalescing is
+        * trivial.
+        *
+        * XXX
+        * Probably worth profiling this to see how expensive it is.
+        */
+       hp = (struct __head *)regionp;
+       for (elp = SH_LIST_FIRST(hp, __data), lastp = NULL;
+           elp != NULL && (void *)elp < (void *)ptr;
+           lastp = elp, elp = SH_LIST_NEXT(elp, links, __data));
+
+       /*
+        * Elp is either NULL (we reached the end of the list), or the slot
+        * after the one that's being returned.  Lastp is either NULL (we're
+        * returning the first element of the list) or the element before the
+        * one being returned.
+        *
+        * Check for coalescing with the next element.
+        */
+       merged = 0;
+       if ((u_int8_t *)ptr + free_size == (u_int8_t *)elp) {
+               newp->len += elp->len + sizeof(size_t);
+               SH_LIST_REMOVE(elp, links, __data);
+               if (lastp != NULL)
+                       SH_LIST_INSERT_AFTER(lastp, newp, links, __data);
+               else
+                       SH_LIST_INSERT_HEAD(hp, newp, links, __data);
+               merged = 1;
+       }
+
+       /* Check for coalescing with the previous element. */
+       if (lastp != NULL && (u_int8_t *)lastp +
+           lastp->len + sizeof(size_t) == (u_int8_t *)newp) {
+               lastp->len += newp->len + sizeof(size_t);
+
+               /*
+                * If we have already put the new element into the list take
+                * it back off again because it's just been merged with the
+                * previous element.
+                */
+               if (merged)
+                       SH_LIST_REMOVE(newp, links, __data);
+               merged = 1;
+       }
+
+       if (!merged)
+               if (lastp == NULL)
+                       SH_LIST_INSERT_HEAD(hp, newp, links, __data);
+               else
+                       SH_LIST_INSERT_AFTER(lastp, newp, links, __data);
+}
+
+/*
+ * __db_shalloc_count --
+ *     Return the amount of memory on the free list.
+ *
+ * PUBLIC: size_t __db_shalloc_count __P((void *));
+ */
+size_t
+__db_shalloc_count(addr)
+       void *addr;
+{
+       struct __data *elp;
+       size_t count;
+
+       count = 0;
+       for (elp = SH_LIST_FIRST((struct __head *)addr, __data);
+           elp != NULL;
+           elp = SH_LIST_NEXT(elp, links, __data))
+               count += elp->len;
+
+       return (count);
+}
+
+/*
+ * __db_shsizeof --
+ *     Return the size of a shalloc'd piece of memory.
+ *
+ * PUBLIC: size_t __db_shsizeof __P((void *));
+ */
+size_t
+__db_shsizeof(ptr)
+       void *ptr;
+{
+       struct __data *elp;
+       size_t *sp;
+
+       /*
+        * Step back over flagged length fields to find the beginning of
+        * the object and its real size.
+        */
+       for (sp = (size_t *)ptr; sp[-1] == ILLEGAL_SIZE; --sp);
+
+       elp = (struct __data *)((u_int8_t *)sp - sizeof(size_t));
+       return (elp->len);
+}
+
+#ifdef DEBUG
+/*
+ * __db_shalloc_dump --
+ *
+ * PUBLIC: void __db_shalloc_dump __P((FILE *, void *));
+ */
+void
+__db_shalloc_dump(fp, addr)
+       FILE *fp;
+       void *addr;
+{
+       struct __data *elp;
+
+       if (fp == NULL)
+               fp = stderr;
+
+       for (elp = SH_LIST_FIRST((struct __head *)addr, __data);
+           elp != NULL;
+           elp = SH_LIST_NEXT(elp, links, __data))
+               fprintf(fp, "%#lx: %lu\t", (u_long)elp, (u_long)elp->len);
+       fprintf(fp, "\n");
+}
+#endif
diff --git a/db2/common/db_shash.c b/db2/common/db_shash.c
new file mode 100644 (file)
index 0000000..988de8a
--- /dev/null
@@ -0,0 +1,90 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997
+ *     Sleepycat Software.  All rights reserved.
+ */
+
+#include "config.h"
+
+#ifndef lint
+static const char sccsid[] = "@(#)db_shash.c   10.3 (Sleepycat) 6/21/97";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+#endif
+
+#include "db_int.h"
+#include "shqueue.h"
+#include "common_ext.h"
+
+/* Powers-of-2 and close-by prime number pairs. */
+static const struct {
+       int     power;
+       int     prime;
+} list[] = {
+       {  64,    67},
+       { 128,   131},
+       { 256,   257},
+       { 512,   521},
+       {1024,  1031},
+       {2048,  2053},
+       {4096,  4099},
+       {8192,  8191},
+       {0,        0}
+};
+
+/*
+ * __db_tablesize --
+ *     Choose a size for the hash table.
+ *
+ * PUBLIC: int __db_tablesize __P((int));
+ */
+int
+__db_tablesize(n_buckets)
+       int n_buckets;
+{
+       int i;
+
+       /*
+        * We try to be clever about how big we make the hash tables.  Pick
+        * a prime number close to the "suggested" number of elements that
+        * will be in the hash table.  We shoot for minimum collisions (i.e.
+        * one element in each bucket).  We use 64 as the minimum table size.
+        *
+        * Ref: Sedgewick, Algorithms in C, "Hash Functions"
+        */
+       if (n_buckets < 64)
+               n_buckets = 64;
+
+       for (i = 0;; ++i) {
+               if (list[i].power == 0) {
+                       --i;
+                       break;
+               }
+               if (list[i].power >= n_buckets)
+                       break;
+       }
+       return (list[i].prime);
+}
+
+/*
+ * __db_hashinit --
+ *     Initialize a hash table that resides in shared memory.
+ *
+ * PUBLIC: void __db_hashinit __P((void *, int));
+ */
+void
+__db_hashinit(begin, nelements)
+       void *begin;
+       int nelements;
+{
+       int i;
+       SH_TAILQ_HEAD(hash_head) *headp;
+
+       headp = (struct hash_head *)begin;
+
+       for (i = 0; i < nelements; i++, headp++)
+               SH_TAILQ_INIT(headp);
+}
diff --git a/db2/compat.h b/db2/compat.h
new file mode 100644 (file)
index 0000000..5183bef
--- /dev/null
@@ -0,0 +1,10 @@
+/* Compatibility gunk for the db library.  */
+
+#include <sys/types.h>
+
+#define EFTYPE EINVAL
+
+/* Emulate Solaris llseek().  */
+typedef loff_t offset_t;
+
+extern int llseek (int fd, loff_t offset, int whence);
diff --git a/db2/config.h b/db2/config.h
new file mode 100644 (file)
index 0000000..ed1246d
--- /dev/null
@@ -0,0 +1,142 @@
+/* config.h.  Generated automatically by configure.  */
+/* config.h.in.  Generated automatically from configure.in by autoheader.  */
+
+/* ...but edited by hand to be used in GNU libc.  */
+#include <endian.h>
+#include <sys/stat.h>          /* To get _STATBUF_ST_BLKSIZE.  */
+
+/* Define to empty if the keyword does not work.  */
+/* #undef const */
+
+/* Define if your struct stat has st_blksize.  */
+#ifdef _STATBUF_ST_BLKSIZE
+# define HAVE_ST_BLKSIZE 1
+#endif
+
+/* Define to `int' if <sys/types.h> doesn't define.  */
+/* #undef mode_t */
+
+/* Define to `long' if <sys/types.h> doesn't define.  */
+/* #undef off_t */
+
+/* Define to `int' if <sys/types.h> doesn't define.  */
+/* #undef pid_t */
+
+/* Define to `unsigned' if <sys/types.h> doesn't define.  */
+/* #undef size_t */
+
+/* Define if you have the ANSI C header files.  */
+#define STDC_HEADERS 1
+
+/* Define if your processor stores words with the most significant
+   byte first (like Motorola and SPARC, unlike Intel and VAX).  */
+#if __BYTE_ORDER == BIG_ENDIAN
+# define WORDS_BIGENDIAN 1
+#endif
+
+/* Define to `int' if <sys/types.h> doesn't define.  */
+/* #undef ssize_t */
+
+/* Define if you want a debugging version. */
+/* #undef DEBUG */
+
+/* Define if you have sigfillset (and sigprocmask). */
+#define HAVE_SIGFILLSET 1
+
+/* Define if seeking to 64-bit file offsets requires the _llseek() call. */
+/* #undef HAVE_LLSEEK */
+
+/* Define if seeking to 64-bit file offsets requires the _lseeki64() call. */
+/* #undef HAVE_LSEEKI */
+
+/* Define if you have spinlocks. */
+/* #undef HAVE_SPINLOCKS */
+
+/* Define if you want to use mc68020/gcc assembly spinlocks. */
+/* #undef HAVE_ASSEM_MC68020_GCC */
+
+/* Define if you want to use sparc/gcc assembly spinlocks. */
+/* #undef HAVE_ASSEM_SPARC_GCC */
+
+/* Define if you want to use uts4/cc assembly spinlocks. */
+/* #undef HAVE_ASSEM_UTS4_CC */
+
+/* Define if you want to use x86/gcc assembly spinlocks. */
+/* #undef HAVE_ASSEM_X86_GCC */
+
+/* Define if you have the AIX _check_lock spinlocks. */
+/* #undef HAVE_FUNC_AIX */
+
+/* Define if you have the OSF1 or HPPA msemaphore spinlocks. */
+/* #undef HAVE_FUNC_MSEM */
+
+/* Define if you have the SGI abilock_t spinlocks. */
+/* #undef HAVE_FUNC_SGI */
+
+/* Define if you have the Solaris mutex_t spinlocks. */
+/* #undef HAVE_FUNC_SOLARIS */
+
+/* Define if your sprintf returns a pointer, not a length. */
+/* #undef SPRINTF_RET_CHARPNT */
+
+/* Define if you have the getcwd function.  */
+#define HAVE_GETCWD 1
+
+/* Define if you have the getopt function.  */
+#define HAVE_GETOPT 1
+
+/* Define if you have the getuid function.  */
+#define HAVE_GETUID 1
+
+/* Define if you have the memcmp function.  */
+#define HAVE_MEMCMP 1
+
+/* Define if you have the memcpy function.  */
+#define HAVE_MEMCPY 1
+
+/* Define if you have the memmove function.  */
+#define HAVE_MEMMOVE 1
+
+/* Define if you have the mmap function.  */
+#define HAVE_MMAP 1
+
+/* Define if you have the raise function.  */
+#define HAVE_RAISE 1
+
+/* Define if you have the select function.  */
+#define HAVE_SELECT 1
+
+/* Define if you have the snprintf function.  */
+#define HAVE_SNPRINTF 1
+
+/* Define if you have the strdup function.  */
+#define HAVE_STRDUP 1
+
+/* Define if you have the strerror function.  */
+#define HAVE_STRERROR 1
+
+/* Define if you have the strsep function.  */
+#define HAVE_STRSEP 1
+
+/* Define if you have the vsnprintf function.  */
+#define HAVE_VSNPRINTF 1
+
+/* Define if you have the <dirent.h> header file.  */
+#define HAVE_DIRENT_H 1
+
+/* Define if you have the <ndir.h> header file.  */
+/* #undef HAVE_NDIR_H */
+
+/* Define if you have the <sys/dir.h> header file.  */
+/* #undef HAVE_SYS_DIR_H */
+
+/* Define if you have the <sys/ndir.h> header file.  */
+/* #undef HAVE_SYS_NDIR_H */
+
+/* Define if you have the <sys/select.h> header file.  */
+#define HAVE_SYS_SELECT_H 1
+
+/* Define if you have the <sys/time.h> header file.  */
+#define HAVE_SYS_TIME_H 1
+
+#include_next <config.h>
diff --git a/db2/db.h b/db2/db.h
new file mode 100644 (file)
index 0000000..3769579
--- /dev/null
+++ b/db2/db.h
@@ -0,0 +1,796 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997
+ *     Sleepycat Software.  All rights reserved.
+ *
+ *     @(#)db.h.src    10.67 (Sleepycat) 8/25/97
+ */
+
+#ifndef _DB_H_
+#define        _DB_H_
+
+#ifndef __NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+
+#include <stdio.h>
+#endif
+
+/*
+ * XXX
+ * MacOS: ensure that Metrowerks C makes enumeration types int sized.
+ */
+#ifdef __MWERKS__
+#pragma enumsalwaysint on
+#endif
+
+/*
+ * XXX
+ * Handle function prototypes and the keyword "const".  This steps on name
+ * space that DB doesn't control, but all of the other solutions are worse.
+ */
+#undef __P
+#if defined(__STDC__) || defined(__cplusplus)
+#define        __P(protos)     protos          /* ANSI C prototypes */
+#else
+#define        const
+#define        __P(protos)     ()              /* K&R C preprocessor */
+#endif
+
+/*
+ * !!!
+ * DB needs basic information about specifically sized types.  If they're
+ * not provided by the system, typedef them here.
+ *
+ * We protect them against multiple inclusion using __BIT_TYPES_DEFINED__,
+ * as does BIND and Kerberos, since we don't know for sure what #include
+ * files the user is using.
+ *
+ * !!!
+ * We also provide the standard u_int, u_long etc., if they're not provided
+ * by the system.  This isn't completely necessary, but the example programs
+ * need them.
+ */
+#ifndef        __BIT_TYPES_DEFINED__
+#define        __BIT_TYPES_DEFINED__
+
+
+
+
+
+#endif
+
+
+
+
+
+
+#define        DB_VERSION_MAJOR        2
+#define        DB_VERSION_MINOR        3
+#define        DB_VERSION_PATCH        4
+#define        DB_VERSION_STRING       "Sleepycat Software: DB 2.3.4: (8/20/97)"
+
+typedef        u_int32_t       db_pgno_t;      /* Page number type. */
+typedef        u_int16_t       db_indx_t;      /* Page offset type. */
+#define        DB_MAX_PAGES    0xffffffff      /* >= # of pages in a file */
+
+typedef        u_int32_t       db_recno_t;     /* Record number type. */
+typedef size_t         DB_LOCK;        /* Object returned by lock manager. */
+#define        DB_MAX_RECORDS  0xffffffff      /* >= # of records in a tree */
+
+#define        DB_FILE_ID_LEN          20      /* DB file ID length. */
+
+/* Forward structure declarations, so applications get type checking. */
+struct __db;           typedef struct __db DB;
+#ifdef DB_DBM_HSEARCH
+                       typedef struct __db DBM;
+#endif
+struct __db_bt_stat;   typedef struct __db_bt_stat DB_BTREE_STAT;
+struct __db_dbt;       typedef struct __db_dbt DBT;
+struct __db_env;       typedef struct __db_env DB_ENV;
+struct __db_info;      typedef struct __db_info DB_INFO;
+struct __db_lockregion;        typedef struct __db_lockregion DB_LOCKREGION;
+struct __db_lockreq;   typedef struct __db_lockreq DB_LOCKREQ;
+struct __db_locktab;   typedef struct __db_locktab DB_LOCKTAB;
+struct __db_log;       typedef struct __db_log DB_LOG;
+struct __db_lsn;       typedef struct __db_lsn DB_LSN;
+struct __db_mpool;     typedef struct __db_mpool DB_MPOOL;
+struct __db_mpool_fstat;typedef struct __db_mpool_fstat DB_MPOOL_FSTAT;
+struct __db_mpool_stat;        typedef struct __db_mpool_stat DB_MPOOL_STAT;
+struct __db_mpoolfile; typedef struct __db_mpoolfile DB_MPOOLFILE;
+struct __db_txn;       typedef struct __db_txn DB_TXN;
+struct __db_txn_active;        typedef struct __db_txn_active DB_TXN_ACTIVE;
+struct __db_txn_stat;  typedef struct __db_txn_stat DB_TXN_STAT;
+struct __db_txnmgr;    typedef struct __db_txnmgr DB_TXNMGR;
+struct __db_txnregion; typedef struct __db_txnregion DB_TXNREGION;
+struct __dbc;          typedef struct __dbc DBC;
+
+/* Key/data structure -- a Data-Base Thang. */
+struct __db_dbt {
+       void     *data;                 /* key/data */
+       u_int32_t size;                 /* key/data length */
+       u_int32_t ulen;                 /* RO: length of user buffer. */
+       u_int32_t dlen;                 /* RO: get/put record length. */
+       u_int32_t doff;                 /* RO: get/put record offset. */
+
+#define        DB_DBT_INTERNAL 0x01            /* Perform any mallocs using regular
+                                          malloc, not the user's malloc. */
+#define        DB_DBT_MALLOC   0x02            /* Return in allocated memory. */
+#define        DB_DBT_PARTIAL  0x04            /* Partial put/get. */
+#define        DB_DBT_USERMEM  0x08            /* Return in user's memory. */
+       u_int32_t flags;
+};
+
+/*
+ * Database configuration and initialization.
+ */
+ /*
+  * Flags understood by both db_open(3) and db_appinit(3).
+  */
+#define        DB_CREATE               0x00001 /* O_CREAT: create file as necessary. */
+#define        DB_NOMMAP               0x00002 /* Don't mmap underlying file. */
+#define        DB_THREAD               0x00004 /* Free-thread DB package handles. */
+
+/*
+ * Flags understood by db_appinit(3).
+ *
+ * DB_APP_INIT and DB_MUTEXDEBUG are internal only, and not documented.
+ */
+/*                             0x00007    COMMON MASK. */
+#define        DB_APP_INIT             0x00008 /* Appinit called, paths initialized. */
+#define        DB_INIT_LOCK            0x00010 /* Initialize locking. */
+#define        DB_INIT_LOG             0x00020 /* Initialize logging. */
+#define        DB_INIT_MPOOL           0x00040 /* Initialize mpool. */
+#define        DB_INIT_TXN             0x00080 /* Initialize transactions. */
+#define        DB_MPOOL_PRIVATE        0x00100 /* Mpool: private memory pool. */
+#define        DB_MUTEXDEBUG           0x00200 /* Do not get/set mutexes in regions. */
+#define        DB_RECOVER              0x00400 /* Run normal recovery. */
+#define        DB_RECOVER_FATAL        0x00800 /* Run catastrophic recovery. */
+#define        DB_TXN_NOSYNC           0x01000 /* Do not sync log on commit. */
+#define        DB_USE_ENVIRON          0x02000 /* Use the environment. */
+#define        DB_USE_ENVIRON_ROOT     0x04000 /* Use the environment if root. */
+
+/* CURRENTLY UNUSED LOCK FLAGS. */
+#define        DB_TXN_LOCK_2PL         0x00000 /* Two-phase locking. */
+#define        DB_TXN_LOCK_OPTIMISTIC  0x00000 /* Optimistic locking. */
+#define        DB_TXN_LOCK_MASK        0x00000 /* Lock flags mask. */
+
+/* CURRENTLY UNUSED LOG FLAGS. */
+#define        DB_TXN_LOG_REDO         0x00000 /* Redo-only logging. */
+#define        DB_TXN_LOG_UNDO         0x00000 /* Undo-only logging. */
+#define        DB_TXN_LOG_UNDOREDO     0x00000 /* Undo/redo write-ahead logging. */
+#define        DB_TXN_LOG_MASK         0x00000 /* Log flags mask. */
+
+/*
+ * Flags understood by db_open(3).
+ *
+ * DB_EXCL and DB_TEMPORARY are internal only, and not documented.
+ * DB_SEQUENTIAL is currently internal, but likely to be exported some day.
+ */
+/*                             0x00007    COMMON MASK. */
+/*                             0x07fff    ALREADY USED. */
+#define        DB_EXCL                 0x08000 /* O_EXCL: exclusive open. */
+#define        DB_RDONLY               0x10000 /* O_RDONLY: read-only. */
+#define        DB_SEQUENTIAL           0x20000 /* Indicate sequential access. */
+#define        DB_TEMPORARY            0x40000 /* Remove on last close. */
+#define        DB_TRUNCATE             0x80000 /* O_TRUNCATE: replace existing DB. */
+
+/*
+ * Deadlock detector modes; used in the DBENV structure to configure the
+ * locking subsystem.
+ */
+#define        DB_LOCK_NORUN           0x0
+#define        DB_LOCK_DEFAULT         0x1
+#define        DB_LOCK_OLDEST          0x2
+#define        DB_LOCK_RANDOM          0x3
+#define        DB_LOCK_YOUNGEST        0x4
+
+struct __db_env {
+       int              db_lorder;     /* Byte order. */
+
+                                       /* Error message callback. */
+       void (*db_errcall) __P((const char *, char *));
+       FILE            *db_errfile;    /* Error message file stream. */
+       const char      *db_errpfx;     /* Error message prefix. */
+       int              db_verbose;    /* Generate debugging messages. */
+
+       /* User paths. */
+       char            *db_home;       /* Database home. */
+       char            *db_log_dir;    /* Database log file directory. */
+       char            *db_tmp_dir;    /* Database tmp file directory. */
+
+       char           **db_data_dir;   /* Database data file directories. */
+       int              data_cnt;      /* Database data file slots. */
+       int              data_next;     /* Next Database data file slot. */
+
+       /* Locking. */
+       DB_LOCKTAB      *lk_info;       /* Return from lock_open(). */
+       u_int8_t        *lk_conflicts;  /* Two dimensional conflict matrix. */
+       int              lk_modes;      /* Number of lock modes in table. */
+       unsigned int     lk_max;        /* Maximum number of locks. */
+       u_int32_t        lk_detect;     /* Deadlock detect on every conflict. */
+       int (*db_yield) __P((void));    /* Yield function for threads. */
+
+       /* Logging. */
+       DB_LOG          *lg_info;       /* Return from log_open(). */
+       u_int32_t        lg_max;        /* Maximum file size. */
+
+       /* Memory pool. */
+       DB_MPOOL        *mp_info;       /* Return from memp_open(). */
+       size_t           mp_mmapsize;   /* Maximum file size for mmap. */
+       size_t           mp_size;       /* Bytes in the mpool cache. */
+
+       /* Transactions. */
+       DB_TXNMGR       *tx_info;       /* Return from txn_open(). */
+       unsigned int     tx_max;        /* Maximum number of transactions. */
+       int (*tx_recover)               /* Dispatch function for recovery. */
+           __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+
+       u_int32_t        flags;         /* Flags. */
+};
+
+/*******************************************************
+ * Access methods.
+ *******************************************************/
+typedef enum {
+       DB_BTREE=1,                     /* B+tree. */
+       DB_HASH,                        /* Extended Linear Hashing. */
+       DB_RECNO,                       /* Fixed and variable-length records. */
+       DB_UNKNOWN                      /* Figure it out on open. */
+} DBTYPE;
+
+#define        DB_BTREEVERSION 6               /* Current btree version. */
+#define        DB_BTREEOLDVER  6               /* Oldest btree version supported. */
+#define        DB_BTREEMAGIC   0x053162
+
+#define        DB_HASHVERSION  5               /* Current hash version. */
+#define        DB_HASHOLDVER   4               /* Oldest hash version supported. */
+#define        DB_HASHMAGIC    0x061561
+
+#define        DB_LOGVERSION   2               /* Current log version. */
+#define        DB_LOGOLDVER    2               /* Oldest log version supported. */
+#define        DB_LOGMAGIC     0x040988
+
+struct __db_info {
+       int              db_lorder;     /* Byte order. */
+       size_t           db_cachesize;  /* Underlying cache size. */
+       size_t           db_pagesize;   /* Underlying page size. */
+
+                                       /* Local heap allocation. */
+       void *(*db_malloc) __P((size_t));
+
+       /* Btree access method. */
+       int              bt_maxkey;     /* Maximum keys per page. */
+       int              bt_minkey;     /* Minimum keys per page. */
+       int (*bt_compare)               /* Comparison function. */
+           __P((const DBT *, const DBT *));
+       size_t (*bt_prefix)             /* Prefix function. */
+           __P((const DBT *, const DBT *));
+
+       /* Hash access method. */
+       unsigned int     h_ffactor;     /* Fill factor. */
+       unsigned int     h_nelem;       /* Number of elements. */
+       u_int32_t       (*h_hash)       /* Hash function. */
+           __P((const void *, u_int32_t));
+
+       /* Recno access method. */
+       int              re_pad;        /* Fixed-length padding byte. */
+       int              re_delim;      /* Variable-length delimiting byte. */
+       u_int32_t        re_len;        /* Length for fixed-length records. */
+       char            *re_source;     /* Source file name. */
+
+#define        DB_DELIMITER            0x0001  /* Recno: re_delim set. */
+#define        DB_DUP                  0x0002  /* Btree, Hash: duplicate keys. */
+#define        DB_FIXEDLEN             0x0004  /* Recno: fixed-length records. */
+#define        DB_PAD                  0x0008  /* Recno: re_pad set. */
+#define        DB_RECNUM               0x0010  /* Btree: record numbers. */
+#define        DB_RENUMBER             0x0020  /* Recno: renumber on insert/delete. */
+#define        DB_SNAPSHOT             0x0040  /* Recno: snapshot the input. */
+       u_int32_t        flags;
+};
+
+/*
+ * DB access method and cursor operation codes.  These are implemented as
+ * bit fields for future flexibility, but currently only a single one may
+ * be specified to any function.
+ */
+#define        DB_AFTER        0x000001        /* c_put() */
+#define        DB_APPEND       0x000002        /* put() */
+#define        DB_BEFORE       0x000004        /* c_put() */
+#define        DB_CHECKPOINT   0x000008        /* log_put(), log_get() */
+#define        DB_CURRENT      0x000010        /* c_get(), c_put(), log_get() */
+#define        DB_FIRST        0x000020        /* c_get(), log_get() */
+#define        DB_FLUSH        0x000040        /* log_put() */
+#define        DB_GET_RECNO    0x000080        /* c_get() */
+#define        DB_KEYFIRST     0x000100        /* c_put() */
+#define        DB_KEYLAST      0x000200        /* c_put() */
+#define        DB_LAST         0x000400        /* c_get(), log_get() */
+#define        DB_NEXT         0x000800        /* c_get(), log_get() */
+#define        DB_NOOVERWRITE  0x001000        /* put() */
+#define        DB_NOSYNC       0x002000        /* close() */
+#define        DB_PREV         0x004000        /* c_get(), log_get() */
+#define        DB_RECORDCOUNT  0x008000        /* stat() */
+#define        DB_SET          0x010000        /* c_get(), log_get() */
+#define        DB_SET_RANGE    0x020000        /* c_get() */
+#define        DB_SET_RECNO    0x040000        /* get(), c_get() */
+
+/* DB (user visible) error return codes. */
+#define        DB_INCOMPLETE           ( -1)   /* Sync didn't finish. */
+#define        DB_KEYEMPTY             ( -2)   /* The key/data pair was deleted or
+                                          was never created by the user. */
+#define        DB_KEYEXIST             ( -3)   /* The key/data pair already exists. */
+#define        DB_LOCK_DEADLOCK        ( -4)   /* Locker killed to resolve deadlock. */
+#define        DB_LOCK_NOTGRANTED      ( -5)   /* Lock unavailable, no-wait set. */
+#define        DB_LOCK_NOTHELD         ( -6)   /* Lock not held by locker. */
+#define        DB_NOTFOUND             ( -7)   /* Key/data pair not found (EOF). */
+
+/* DB (private) error return codes. */
+#define        DB_DELETED              ( -8)   /* Recovery file marked deleted. */
+#define        DB_NEEDSPLIT            ( -9)   /* Page needs to be split. */
+#define        DB_REGISTERED           (-10)   /* Entry was previously registered. */
+#define        DB_SWAPBYTES            (-11)   /* Database needs byte swapping. */
+
+struct __db_ilock {                    /* Internal DB access method lock. */
+       db_pgno_t       pgno;           /* Page being locked. */
+                                       /* File id. */
+       u_int8_t        fileid[DB_FILE_ID_LEN];
+};
+
+/* DB access method description structure. */
+struct __db {
+       void    *mutex;                 /* Synchronization for free threading */
+       DBTYPE   type;                  /* DB access method. */
+       DB_ENV  *dbenv;                 /* DB_ENV structure. */
+       DB_ENV  *mp_dbenv;              /* DB_ENV for local mpool creation. */
+
+       DB      *master;                /* Original DB created by db_open. */
+       void    *internal;              /* Access method private. */
+
+       DB_MPOOL        *mp;            /* The access method's mpool. */
+       DB_MPOOLFILE    *mpf;           /* The access method's mpool file. */
+
+       /*
+        * XXX
+        * Explicit representations of structures in queue.h.
+        *
+        * TAILQ_HEAD(curs_queue, __dbc);
+        */
+       struct {
+               struct __dbc *tqh_first;
+               struct __dbc **tqh_last;
+       } curs_queue;
+
+       /*
+        * XXX
+        * Explicit representations of structures in queue.h.
+        *
+        * LIST_HEAD(handleq, __db);
+        * LIST_ENTRY(__db);
+        */
+       struct {
+               struct __db *lh_first;
+       } handleq;                      /* List of handles for this DB. */
+       struct {
+               struct __db *le_next;
+               struct __db **le_prev;
+       } links;                        /* Links for the handle list. */
+
+       u_int32_t log_fileid;           /* Logging file id. */
+
+       DB_TXN   *txn;                  /* Current transaction. */
+       u_int32_t locker;               /* Default process' locker id. */
+       DBT       lock_dbt;             /* DBT referencing lock. */
+       struct __db_ilock lock;         /* Lock. */
+
+       size_t    pgsize;               /* Logical page size of file. */
+
+                                       /* Local heap allocation. */
+       void *(*db_malloc) __P((size_t));
+
+                                       /* Functions. */
+       int (*close)    __P((DB *, int));
+       int (*cursor)   __P((DB *, DB_TXN *, DBC **));
+       int (*del)      __P((DB *, DB_TXN *, DBT *, int));
+       int (*fd)       __P((DB *, int *));
+       int (*get)      __P((DB *, DB_TXN *, DBT *, DBT *, int));
+       int (*put)      __P((DB *, DB_TXN *, DBT *, DBT *, int));
+       int (*stat)     __P((DB *, void *, void *(*)(size_t), int));
+       int (*sync)     __P((DB *, int));
+
+#define        DB_AM_DUP       0x000001        /* DB_DUP (internal). */
+#define        DB_AM_INMEM     0x000002        /* In-memory; no sync on close. */
+#define        DB_AM_LOCKING   0x000004        /* Perform locking. */
+#define        DB_AM_LOGGING   0x000008        /* Perform logging. */
+#define        DB_AM_MLOCAL    0x000010        /* Database memory pool is local. */
+#define        DB_AM_PGDEF     0x000020        /* Page size was defaulted. */
+#define        DB_AM_RDONLY    0x000040        /* Database is readonly. */
+#define        DB_AM_RECOVER   0x000080        /* In recovery (do not log or lock). */
+#define        DB_AM_SWAP      0x000100        /* Pages need to be byte-swapped. */
+#define        DB_AM_THREAD    0x000200        /* DB is multi-threaded. */
+#define        DB_BT_RECNUM    0x000400        /* DB_RECNUM (internal) */
+#define        DB_HS_DIRTYMETA 0x000800        /* Hash: Metadata page modified. */
+#define        DB_RE_DELIMITER 0x001000        /* DB_DELIMITER (internal). */
+#define        DB_RE_FIXEDLEN  0x002000        /* DB_FIXEDLEN (internal). */
+#define        DB_RE_PAD       0x004000        /* DB_PAD (internal). */
+#define        DB_RE_RENUMBER  0x008000        /* DB_RENUMBER (internal). */
+#define        DB_RE_SNAPSHOT  0x010000        /* DB_SNAPSHOT (internal). */
+
+       u_int32_t flags;
+};
+
+/* Cursor description structure. */
+struct __dbc {
+       DB *dbp;                        /* Related DB access method. */
+       DB_TXN   *txn;                  /* Associated transaction. */
+
+       /*
+        * XXX
+        * Explicit representations of structures in queue.h.
+        *
+        * TAILQ_ENTRY(__dbc);
+        */
+       struct {
+               struct __dbc *tqe_next;
+               struct __dbc **tqe_prev;
+       } links;
+
+       void     *internal;             /* Access method private. */
+
+       int (*c_close)  __P((DBC *));
+       int (*c_del)    __P((DBC *, int));
+       int (*c_get)    __P((DBC *, DBT *, DBT *, int));
+       int (*c_put)    __P((DBC *, DBT *, DBT *, int));
+};
+
+/* Btree/recno statistics structure. */
+struct __db_bt_stat {
+       u_int32_t bt_flags;             /* Open flags. */
+       u_int32_t bt_maxkey;            /* Maxkey value. */
+       u_int32_t bt_minkey;            /* Minkey value. */
+       u_int32_t bt_re_len;            /* Fixed-length record length. */
+       u_int32_t bt_re_pad;            /* Fixed-length record pad. */
+       u_int32_t bt_pagesize;          /* Page size. */
+       u_int32_t bt_levels;            /* Tree levels. */
+       u_int32_t bt_nrecs;             /* Number of records. */
+       u_int32_t bt_int_pg;            /* Internal pages. */
+       u_int32_t bt_leaf_pg;           /* Leaf pages. */
+       u_int32_t bt_dup_pg;            /* Duplicate pages. */
+       u_int32_t bt_over_pg;           /* Overflow pages. */
+       u_int32_t bt_free;              /* Pages on the free list. */
+       u_int32_t bt_freed;             /* Pages freed for reuse. */
+       u_int32_t bt_int_pgfree;        /* Bytes free in internal pages. */
+       u_int32_t bt_leaf_pgfree;       /* Bytes free in leaf pages. */
+       u_int32_t bt_dup_pgfree;        /* Bytes free in duplicate pages. */
+       u_int32_t bt_over_pgfree;       /* Bytes free in overflow pages. */
+       u_int32_t bt_pfxsaved;          /* Bytes saved by prefix compression. */
+       u_int32_t bt_split;             /* Total number of splits. */
+       u_int32_t bt_rootsplit;         /* Root page splits. */
+       u_int32_t bt_fastsplit;         /* Fast splits. */
+       u_int32_t bt_added;             /* Items added. */
+       u_int32_t bt_deleted;           /* Items deleted. */
+       u_int32_t bt_get;               /* Items retrieved. */
+       u_int32_t bt_cache_hit;         /* Hits in fast-insert code. */
+       u_int32_t bt_cache_miss;        /* Misses in fast-insert code. */
+};
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+int   db_appinit __P((const char *, char * const *, DB_ENV *, int));
+int   db_appexit __P((DB_ENV *));
+int   db_open __P((const char *, DBTYPE, int, int, DB_ENV *, DB_INFO *, DB **));
+const char *db_version __P((int *, int *, int *));
+#if defined(__cplusplus)
+};
+#endif
+
+/*******************************************************
+ * Locking
+ *******************************************************/
+#define        DB_LOCKVERSION  1
+#define        DB_LOCKMAGIC    0x090193
+
+/* Flag values for lock_vec(). */
+#define        DB_LOCK_NOWAIT          0x01    /* Don't wait on unavailable lock. */
+
+/* Flag values for lock_detect(). */
+#define        DB_LOCK_CONFLICT        0x01    /* Run on any conflict. */
+
+/* Request types. */
+typedef enum {
+       DB_LOCK_DUMP,                   /* Display held locks. */
+       DB_LOCK_GET,                    /* Get the lock. */
+       DB_LOCK_PUT,                    /* Release the lock. */
+       DB_LOCK_PUT_ALL,                /* Release locker's locks. */
+       DB_LOCK_PUT_OBJ                 /* Release locker's locks on obj. */
+} db_lockop_t;
+
+/* Simple R/W lock modes and for multi-granularity intention locking. */
+typedef enum {
+       DB_LOCK_NG=0,                   /* Not granted. */
+       DB_LOCK_READ,                   /* Shared/read. */
+       DB_LOCK_WRITE,                  /* Exclusive/write. */
+       DB_LOCK_IREAD,                  /* Intent to share/read. */
+       DB_LOCK_IWRITE,                 /* Intent exclusive/write. */
+       DB_LOCK_IWR                     /* Intent to read and write. */
+} db_lockmode_t;
+
+/* Lock request structure. */
+struct __db_lockreq {
+       db_lockop_t      op;            /* Operation. */
+       db_lockmode_t    mode;          /* Requested mode. */
+       u_int32_t        locker;        /* Locker identity. */
+       DBT             *obj;           /* Object being locked. */
+       DB_LOCK          lock;          /* Lock returned. */
+};
+
+/*
+ * Commonly used conflict matrices.
+ *
+ * Standard Read/Write (or exclusive/shared) locks.
+ */
+#define        DB_LOCK_RW_N    3
+extern const u_int8_t db_rw_conflicts[];
+
+/* Multi-granularity locking. */
+#define        DB_LOCK_RIW_N   6
+extern const u_int8_t db_riw_conflicts[];
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+int      lock_close __P((DB_LOCKTAB *));
+int      lock_detect __P((DB_LOCKTAB *, int, u_int32_t));
+int      lock_get __P((DB_LOCKTAB *,
+           u_int32_t, int, const DBT *, db_lockmode_t, DB_LOCK *));
+int      lock_id __P((DB_LOCKTAB *, u_int32_t *));
+int      lock_open __P((const char *, int, int, DB_ENV *, DB_LOCKTAB **));
+int      lock_put __P((DB_LOCKTAB *, DB_LOCK));
+int      lock_unlink __P((const char *, int, DB_ENV *));
+int      lock_vec __P((DB_LOCKTAB *,
+           u_int32_t, int, DB_LOCKREQ *, int, DB_LOCKREQ **));
+#if defined(__cplusplus)
+};
+#endif
+
+/*******************************************************
+ * Logging.
+ *******************************************************/
+/* Flag values for log_archive(). */
+#define        DB_ARCH_ABS             0x001   /* Absolute pathnames. */
+#define        DB_ARCH_DATA            0x002   /* Data files. */
+#define        DB_ARCH_LOG             0x004   /* Log files. */
+
+/*
+ * A DB_LSN has two parts, a fileid which identifies a specific file, and an
+ * offset within that file.  The fileid is an unsigned 4-byte quantity that
+ * uniquely identifies a file within the log directory -- currently a simple
+ * counter inside the log.  The offset is also an unsigned 4-byte value.  The
+ * log manager guarantees the offset is never more than 4 bytes by switching
+ * to a new log file before the maximum length imposed by an unsigned 4-byte
+ * offset is reached.
+ */
+struct __db_lsn {
+       u_int32_t       file;           /* File ID. */
+       u_int32_t       offset;         /* File offset. */
+};
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+int     log_archive __P((DB_LOG *, char **[], int, void *(*)(size_t)));
+int     log_close __P((DB_LOG *));
+int     log_compare __P((const DB_LSN *, const DB_LSN *));
+int     log_file __P((DB_LOG *, const DB_LSN *, char *, size_t));
+int     log_flush __P((DB_LOG *, const DB_LSN *));
+int     log_get __P((DB_LOG *, DB_LSN *, DBT *, int));
+int     log_open __P((const char *, int, int, DB_ENV *, DB_LOG **));
+int     log_put __P((DB_LOG *, DB_LSN *, const DBT *, int));
+int     log_register __P((DB_LOG *, DB *, const char *, DBTYPE, u_int32_t *));
+int     log_unlink __P((const char *, int, DB_ENV *));
+int     log_unregister __P((DB_LOG *, u_int32_t));
+#if defined(__cplusplus)
+};
+#endif
+
+/*******************************************************
+ * Mpool
+ *******************************************************/
+/* Flag values for memp_fget(). */
+#define        DB_MPOOL_CREATE         0x001   /* Create a page. */
+#define        DB_MPOOL_LAST           0x002   /* Return the last page. */
+#define        DB_MPOOL_NEW            0x004   /* Create a new page. */
+
+/* Flag values for memp_fput(), memp_fset(). */
+#define        DB_MPOOL_CLEAN          0x001   /* Clear modified bit. */
+#define        DB_MPOOL_DIRTY          0x002   /* Page is modified. */
+#define        DB_MPOOL_DISCARD        0x004   /* Don't cache the page. */
+
+/* Mpool statistics structure. */
+struct __db_mpool_stat {
+       size_t st_cachesize;            /* Cache size. */
+       unsigned long st_cache_hit;     /* Pages found in the cache. */
+       unsigned long st_cache_miss;    /* Pages not found in the cache. */
+       unsigned long st_map;           /* Pages from mapped files. */
+       unsigned long st_page_create;   /* Pages created in the cache. */
+       unsigned long st_page_in;       /* Pages read in. */
+       unsigned long st_page_out;      /* Pages written out. */
+       unsigned long st_ro_evict;      /* Read-only pages evicted. */
+       unsigned long st_rw_evict;      /* Read-write pages evicted. */
+       unsigned long st_hash_buckets;  /* Number of hash buckets. */
+       unsigned long st_hash_searches; /* Total hash chain searches. */
+       unsigned long st_hash_longest;  /* Longest hash chain searched. */
+       unsigned long st_hash_examined; /* Total hash entries searched. */
+};
+
+/* Mpool file statistics structure. */
+struct __db_mpool_fstat {
+       char *file_name;                /* File name. */
+       size_t st_pagesize;             /* Page size. */
+       unsigned long st_cache_hit;     /* Pages found in the cache. */
+       unsigned long st_cache_miss;    /* Pages not found in the cache. */
+       unsigned long st_map;           /* Pages from mapped files. */
+       unsigned long st_page_create;   /* Pages created in the cache. */
+       unsigned long st_page_in;       /* Pages read in. */
+       unsigned long st_page_out;      /* Pages written out. */
+};
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+int    memp_close __P((DB_MPOOL *));
+int    memp_fclose __P((DB_MPOOLFILE *));
+int    memp_fget __P((DB_MPOOLFILE *, db_pgno_t *, unsigned long, void *));
+int    memp_fopen __P((DB_MPOOL *, const char *,
+           int, int, int, size_t, int, DBT *, u_int8_t *, DB_MPOOLFILE **));
+int    memp_fput __P((DB_MPOOLFILE *, void *, unsigned long));
+int    memp_fset __P((DB_MPOOLFILE *, void *, unsigned long));
+int    memp_fsync __P((DB_MPOOLFILE *));
+int    memp_open __P((const char *, int, int, DB_ENV *, DB_MPOOL **));
+int    memp_register __P((DB_MPOOL *, int,
+           int (*)(db_pgno_t, void *, DBT *),
+           int (*)(db_pgno_t, void *, DBT *)));
+int    memp_stat __P((DB_MPOOL *,
+           DB_MPOOL_STAT **, DB_MPOOL_FSTAT ***, void *(*)(size_t)));
+int    memp_sync __P((DB_MPOOL *, DB_LSN *));
+int    memp_unlink __P((const char *, int, DB_ENV *));
+#if defined(__cplusplus)
+};
+#endif
+
+/*******************************************************
+ * Transactions.
+ *******************************************************/
+#define        DB_TXNVERSION   1
+#define        DB_TXNMAGIC     0x041593
+
+/* Operations values to the tx_recover() function. */
+#define        DB_TXN_BACKWARD_ROLL    1       /* Read the log backwards. */
+#define        DB_TXN_FORWARD_ROLL     2       /* Read the log forwards. */
+#define        DB_TXN_OPENFILES        3       /* Read for open files. */
+#define        DB_TXN_REDO             4       /* Redo the operation. */
+#define        DB_TXN_UNDO             5       /* Undo the operation. */
+
+/* Internal transaction status values. */
+
+/* Transaction statistics structure. */
+struct __db_txn_active {
+       u_int32_t       txnid;          /* Transaction ID */
+       DB_LSN          lsn;            /* Lsn of the begin record */
+};
+
+struct __db_txn_stat {
+       DB_LSN          st_last_ckp;    /* lsn of the last checkpoint */
+       DB_LSN          st_pending_ckp; /* last checkpoint did not finish */
+       time_t          st_time_ckp;    /* time of last checkpoint */
+       u_int32_t       st_last_txnid;  /* last transaction id given out */
+       u_int32_t       st_maxtxns;     /* maximum number of active txns */
+       u_int32_t       st_naborts;     /* number of aborted transactions */
+       u_int32_t       st_nbegins;     /* number of begun transactions */
+       u_int32_t       st_ncommits;    /* number of committed transactions */
+       u_int32_t       st_nactive;     /* number of active transactions */
+       DB_TXN_ACTIVE   *st_txnarray;   /* array of active transactions */
+};
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+int      txn_abort __P((DB_TXN *));
+int      txn_begin __P((DB_TXNMGR *, DB_TXN *, DB_TXN **));
+int      txn_checkpoint __P((const DB_TXNMGR *, long, long));
+int      txn_commit __P((DB_TXN *));
+int      txn_close __P((DB_TXNMGR *));
+u_int32_t txn_id __P((DB_TXN *));
+int      txn_open __P((const char *, int, int, DB_ENV *, DB_TXNMGR **));
+int      txn_prepare __P((DB_TXN *));
+int      txn_stat __P((DB_TXNMGR *, DB_TXN_STAT **, void *(*)(size_t)));
+int      txn_unlink __P((const char *, int, DB_ENV *));
+#if defined(__cplusplus)
+};
+#endif
+
+#ifdef DB_DBM_HSEARCH
+/*******************************************************
+ * Dbm/Ndbm historic interfaces.
+ *******************************************************/
+#define        DBM_INSERT      0               /* Flags to dbm_store(). */
+#define        DBM_REPLACE     1
+
+/*
+ * The db(3) support for ndbm(3) always appends this suffix to the
+ * file name to avoid overwriting the user's original database.
+ */
+#define        DBM_SUFFIX      ".db"
+
+typedef struct {
+       char *dptr;
+       int dsize;
+} datum;
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+int     dbminit __P((char *));
+#if !defined(__cplusplus)
+int     delete __P((datum));
+#endif
+datum   fetch __P((datum));
+datum   firstkey __P((void));
+datum   nextkey __P((datum));
+int     store __P((datum, datum));
+
+/*
+ * !!!
+ * Don't prototype:
+ *
+ *      dbm_clearerr(DBM *db);
+ *      dbm_dirfno(DBM *db);
+ *      dbm_error(DBM *db);
+ *      dbm_pagfno(DBM *db);
+ *      dbm_rdonly(DBM *db);
+ *
+ * they weren't documented and were historically implemented as #define's.
+ */
+void    dbm_close __P((DBM *));
+int     dbm_delete __P((DBM *, datum));
+datum   dbm_fetch __P((DBM *, datum));
+datum   dbm_firstkey __P((DBM *));
+long    dbm_forder __P((DBM *, datum));
+datum   dbm_nextkey __P((DBM *));
+DBM    *dbm_open __P((const char *, int, int));
+int     dbm_store __P((DBM *, datum, datum, int));
+#if defined(__cplusplus)
+};
+#endif
+
+/*******************************************************
+ * Hsearch historic interface.
+ *******************************************************/
+typedef enum {
+       FIND, ENTER
+} ACTION;
+
+typedef struct entry {
+       char *key;
+       void *data;
+} ENTRY;
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+int     hcreate __P((unsigned int));
+void    hdestroy __P((void));
+ENTRY  *hsearch __P((ENTRY, ACTION));
+#if defined(__cplusplus)
+};
+#endif
+#endif /* DB_DBM_HSEARCH */
+
+/*
+ * XXX
+ * MacOS: Reset Metrowerks C enum sizes.
+ */
+#ifdef __MWERKS__
+#pragma enumsalwaysint reset
+#endif
+#endif /* !_DB_H_ */
diff --git a/db2/db/db.c b/db2/db/db.c
new file mode 100644 (file)
index 0000000..df3a9d2
--- /dev/null
@@ -0,0 +1,818 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997
+ *     Sleepycat Software.  All rights reserved.
+ */
+/*
+ * Copyright (c) 1990, 1993, 1994, 1995, 1996
+ *     Keith Bostic.  All rights reserved.
+ */
+/*
+ * Copyright (c) 1990, 1993, 1994, 1995
+ *     The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *     This product includes software developed by the University of
+ *     California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "config.h"
+
+#ifndef lint
+static const char sccsid[] = "@(#)db.c 10.37 (Sleepycat) 8/23/97";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#endif
+
+#include "db_int.h"
+#include "shqueue.h"
+#include "db_page.h"
+#include "db_shash.h"
+#include "db_swap.h"
+#include "btree.h"
+#include "hash.h"
+#include "mp.h"
+#include "db_am.h"
+#include "common_ext.h"
+
+static int db_close __P((DB *, int));
+static int db_fd __P((DB *, int *));
+
+/*
+ * If the metadata page has the flag set, set the local flag.  If the page
+ * does NOT have the flag set, return EINVAL if the user's dbinfo argument
+ * caused us to already set the local flag.
+ */
+#define        DBINFO_FCHK(dbp, fn, meta_flags, m_name, dbp_name) {            \
+       if ((meta_flags) & (m_name))                                    \
+               F_SET(dbp, dbp_name);                                   \
+       else                                                            \
+               if (F_ISSET(dbp, dbp_name)) {                           \
+                       __db_err(dbenv,                                 \
+           "%s: %s specified in dbinfo argument but not set in file",  \
+                           fname, fn);                                 \
+                       goto einval;                                    \
+               }                                                       \
+}
+
+/*
+ * db_open --
+ *     Main library interface to the DB access methods.
+ */
+int
+db_open(fname, type, flags, mode, dbenv, dbinfo, dbpp)
+       const char *fname;
+       DBTYPE type;
+       int flags, mode;
+       DB_ENV *dbenv;
+       DB_INFO *dbinfo;
+       DB **dbpp;
+{
+       BTMETA *btm;
+       DB *dbp;
+       DBT pgcookie;
+       DB_ENV *envp, t_dbenv;
+       DB_PGINFO pginfo;
+       HASHHDR *hashm;
+       off_t io;
+       size_t cachesize;
+       ssize_t nr;
+       int fd, ftype, need_fileid, restore, ret, retry_cnt, swapped;
+       char *real_name, mbuf[512];
+
+       /* Validate arguments. */
+#ifdef HAVE_SPINLOCKS
+#define        OKFLAGS (DB_CREATE | DB_NOMMAP | DB_RDONLY | DB_THREAD | DB_TRUNCATE)
+#else
+#define        OKFLAGS (DB_CREATE | DB_NOMMAP | DB_RDONLY | DB_TRUNCATE)
+#endif
+       if ((ret = __db_fchk(dbenv, "db_open", flags, OKFLAGS)) != 0)
+               return (ret);
+
+       /* Initialize for error return. */
+       fd = -1;
+       need_fileid = 1;
+       real_name = NULL;
+
+       /* Allocate the DB structure, reference the DB_ENV structure. */
+       if ((dbp = (DB *)calloc(1, sizeof(DB))) == NULL) {
+               __db_err(dbenv, "%s", strerror(ENOMEM));
+               return (ENOMEM);
+       }
+       dbp->dbenv = dbenv;
+
+       /* Convert the dbinfo flags. */
+       if (dbinfo != NULL) {
+               /*
+                * !!!
+                * We can't check for illegal flags until we know what type
+                * of open we're doing.
+                */
+               if (F_ISSET(dbinfo, DB_DELIMITER))
+                       F_SET(dbp, DB_RE_DELIMITER);
+               if (F_ISSET(dbinfo, DB_DUP))
+                       F_SET(dbp, DB_AM_DUP);
+               if (F_ISSET(dbinfo, DB_FIXEDLEN))
+                       F_SET(dbp, DB_RE_FIXEDLEN);
+               if (F_ISSET(dbinfo, DB_PAD))
+                       F_SET(dbp, DB_RE_PAD);
+               if (F_ISSET(dbinfo, DB_RECNUM))
+                       F_SET(dbp, DB_BT_RECNUM);
+               if (F_ISSET(dbinfo, DB_RENUMBER))
+                       F_SET(dbp, DB_RE_RENUMBER);
+               if (F_ISSET(dbinfo, DB_SNAPSHOT))
+                       F_SET(dbp, DB_RE_SNAPSHOT);
+       }
+
+       /* Set based on the open(2) flags. */
+       if (LF_ISSET(DB_RDONLY))
+               F_SET(dbp, DB_AM_RDONLY);
+
+       /* Check threading fields. */
+       if (LF_ISSET(DB_THREAD)) {
+               if ((dbp->mutex =
+                   (db_mutex_t *)malloc(sizeof(db_mutex_t))) == NULL) {
+                       __db_err(dbenv, "%s", strerror(ENOMEM));
+                       ret = ENOMEM;
+                       goto err;
+               }
+               __db_mutex_init(dbp->mutex, 0);
+
+               F_SET(dbp, DB_AM_THREAD);
+       }
+
+       /*
+        * Always set the master and initialize the queues, so we can
+        * use these fields without checking the thread bit.
+        */
+       dbp->master = dbp;
+       LIST_INIT(&dbp->handleq);
+       LIST_INSERT_HEAD(&dbp->handleq, dbp, links);
+       TAILQ_INIT(&dbp->curs_queue);
+
+       /*
+        * Set based on the dbenv fields, although no logging or transactions
+        * are possible for temporary files.
+        */
+       if (dbp->dbenv != NULL) {
+               if (dbenv->lk_info != NULL)
+                       F_SET(dbp, DB_AM_LOCKING);
+               if (fname != NULL && dbenv->lg_info != NULL)
+                       F_SET(dbp, DB_AM_LOGGING);
+       }
+
+       /* Set the common fields. */
+       if (dbinfo == NULL) {
+               dbp->pgsize = 0;
+               dbp->db_malloc = NULL;
+       } else {
+               dbp->pgsize = dbinfo->db_pagesize;
+               dbp->db_malloc = dbinfo->db_malloc;
+       }
+
+       /* Fill in the default file mode. */
+       if (mode == 0)
+               mode = S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP;
+
+       /* Check if the user wants us to swap byte order. */
+       if (dbinfo != NULL)
+               switch (ret = __db_byteorder(dbenv, dbinfo->db_lorder)) {
+               case 0:
+                       break;
+               case DB_SWAPBYTES:
+                       F_SET(dbp, DB_AM_SWAP);
+                       break;
+               default:
+                       goto err;
+               }
+
+       /*
+        * If we have a file name, try and read the first page, figure out
+        * what type of file it is, and initialize everything we can based
+        * on that file's meta-data page.
+        *
+        * XXX
+        * We don't actually expect zero-length strings as arguments.  We
+        * do the check, permitting them, because scripting languages, e.g.,
+        * the Tcl test suite, doesn't know anything about passing NULL's.
+        */
+       if (fname != NULL && fname[0] != '\0') {
+               /* Get the real file name. */
+               if ((ret = __db_appname(dbenv,
+                    DB_APP_DATA, NULL, fname, NULL, &real_name)) != 0)
+                       goto err;
+
+               /*
+                * Open the backing file.  We need to make sure that multiple
+                * processes attempting to create the file at the same time
+                * are properly ordered so that only one of them creates the
+                * "unique" file id, so we open it O_EXCL and O_CREAT so two
+                * simultaneous attempts to create the region will return
+                * failure in one of the attempts.  If we're one of the ones
+                * that fail, we simply retry without the O_CREAT flag, which
+                * will require that the meta-data page exist.
+                */
+#undef OKFLAGS
+#define        OKFLAGS \
+    DB_CREATE | DB_NOMMAP | DB_RDONLY | DB_THREAD | DB_TRUNCATE
+               retry_cnt = 0;
+open_retry:    if (LF_ISSET(DB_CREATE)) {
+                       if ((ret = __db_fdopen(real_name, flags | DB_EXCL,
+                           OKFLAGS | DB_EXCL, mode, &fd)) != 0)
+                               if (ret == EEXIST) {
+                                       LF_CLR(DB_CREATE);
+                                       goto open_retry;
+                               } else {
+                                       __db_err(dbenv,
+                                           "%s: %s", fname, strerror(ret));
+                                       goto err;
+                               }
+               } else
+                       if ((ret = __db_fdopen(real_name,
+                           flags, OKFLAGS, mode, &fd)) != 0) {
+                               __db_err(dbenv, "%s: %s", fname, strerror(ret));
+                               goto err;
+                       }
+
+               /*
+                * Use the optimum I/O size as the pagesize if a pagesize not
+                * specified.  Some filesystems have 64K as their optimum I/O
+                * size, but as that results in impossibly large default cache
+                * sizes, we limit the default pagesize to 16K.
+                */
+               if (dbp->pgsize == 0) {
+                       if ((ret = __db_stat(dbp->dbenv,
+                           real_name, fd, NULL, &io)) != 0)
+                               goto err;
+                       if (io < 512)
+                               io = 512;
+                       if (io > 16 * 1024)
+                               io = 16 * 1024;
+                       dbp->pgsize = io;
+                       F_SET(dbp, DB_AM_PGDEF);
+               }
+
+               /*
+                * Try and read the first disk sector -- this code assumes
+                * that the meta-data for all access methods fits in 512
+                * bytes, and that no database will be smaller than that.
+                */
+               if ((ret = __db_read(fd, mbuf, sizeof(mbuf), &nr)) != 0)
+                       goto err;
+
+               /* The fd is no longer needed. */
+               (void)__db_close(fd);
+               fd = -1;
+
+               if (nr != sizeof(mbuf)) {
+                       if (nr != 0) {
+                               __db_err(dbenv,
+                                   "%s: unexpected file format", fname);
+                               goto einval;
+                       }
+                       /*
+                        * The only way we can reach here with the DB_CREATE
+                        * flag set is if we created the file.  If we didn't
+                        * create the file, there's a chance that someone else
+                        * is busily doing so.  Sleep and give them a chance,
+                        * because we need the metadata page their going to
+                        * write.
+                        */
+                       if (!LF_ISSET(DB_CREATE) && retry_cnt++ < 3) {
+                               __db_sleep(1, 0);
+                               goto open_retry;
+                       }
+                       if (type == DB_UNKNOWN) {
+                               __db_err(dbenv,
+                                   "%s: DBTYPE of unknown with empty file",
+                                   fname);
+                               goto einval;
+                       }
+                       goto empty;
+               }
+
+               /*
+                * A found file overrides some user information.  We'll check
+                * for possible error conditions based on conflicts between
+                * the file and the user's arguments below.
+                */
+               swapped = 0;
+               F_CLR(dbp, DB_AM_SWAP);
+
+retry:         switch (((BTMETA *)mbuf)->magic) {
+               case DB_BTREEMAGIC:
+                       if (type != DB_BTREE &&
+                           type != DB_RECNO && type != DB_UNKNOWN)
+                               goto einval;
+
+                       btm = (BTMETA *)mbuf;
+                       if (swapped && (ret = __bam_mswap((PAGE *)btm)) != 0)
+                               goto err;
+
+                       if (btm->version < DB_BTREEOLDVER ||
+                           btm->version > DB_BTREEVERSION) {
+                               __db_err(dbenv,
+                                   "%s: unsupported btree version number %lu",
+                                   fname, (u_long)btm->version);
+                               goto einval;
+                       }
+                       dbp->pgsize = btm->pagesize;
+                       F_CLR(dbp, DB_AM_PGDEF);
+
+                       if ((ret = __db_fchk(dbenv,
+                           "db_open", btm->flags, BTM_MASK)) != 0)
+                               goto err;
+                       DBINFO_FCHK(dbp, "DB_DUP",
+                           btm->flags, BTM_DUP, DB_AM_DUP);
+                       if (F_ISSET(btm, BTM_RECNO)) {
+                               DBINFO_FCHK(dbp, "DB_FIXEDLEN",
+                                   btm->flags, BTM_FIXEDLEN, DB_RE_FIXEDLEN);
+                               DBINFO_FCHK(dbp, "DB_RENUMBER",
+                                   btm->flags, BTM_RENUMBER, DB_RE_RENUMBER);
+                               type = DB_RECNO;
+                       } else {
+                               DBINFO_FCHK(dbp, "DB_RECNUM",
+                                   btm->flags, BTM_RECNUM, DB_BT_RECNUM);
+                               type = DB_BTREE;
+                       }
+
+                       /* Copy the file's unique id. */
+                       need_fileid = 0;
+                       memcpy(dbp->lock.fileid, btm->uid, DB_FILE_ID_LEN);
+                       break;
+               case DB_HASHMAGIC:
+                       if (type != DB_HASH && type != DB_UNKNOWN)
+                               goto einval;
+
+                       hashm = (HASHHDR *)mbuf;
+                       if (swapped && (ret = __ham_mswap((PAGE *)hashm)) != 0)
+                               goto err;
+
+                       if (hashm->version < DB_HASHOLDVER ||
+                           hashm->version > DB_HASHVERSION) {
+                               __db_err(dbenv,
+                                   "%s: unsupported hash version number %lu",
+                                   fname, hashm->version);
+                               goto einval;
+                       }
+                       dbp->pgsize = hashm->pagesize;
+                       F_CLR(dbp, DB_AM_PGDEF);
+
+                       if ((ret = __db_fchk(dbenv,
+                           "db_open", hashm->flags, DB_HASH_DUP)) != 0)
+                               goto err;
+                       DBINFO_FCHK(dbp, "DB_DUP",
+                           hashm->flags, DB_HASH_DUP, DB_AM_DUP);
+                       type = DB_HASH;
+
+                       /* Copy the file's unique id. */
+                       need_fileid = 0;
+                       memcpy(dbp->lock.fileid, hashm->uid, DB_FILE_ID_LEN);
+                       break;
+               default:
+                       if (swapped) {
+                               __db_err(dbenv, "unrecognized file type");
+                               goto einval;
+                       }
+                       M_32_SWAP(((BTMETA *)mbuf)->magic);
+                       F_SET(dbp, DB_AM_SWAP);
+
+                       swapped = 1;
+                       goto retry;
+               }
+       } else {
+               fname = real_name = NULL;
+
+               if (type == DB_UNKNOWN) {
+                       __db_err(dbenv,
+                           "DBTYPE of unknown without existing file");
+                       goto einval;
+               }
+               F_SET(dbp, DB_AM_INMEM);
+       }
+
+empty: /*
+        * By the time we get here we've either set the type or we're taking
+        * it from the user.
+        */
+       dbp->type = type;
+
+       /*
+        * Set the page size to the best value for I/O to this file.  Don't
+        * overflow the page offset type.  The page size must be db_indx_t
+        * aligned and >= MIN_PAGE_SIZE.
+        *
+        * XXX
+        * Should we be checking for a page size that's not a multiple of 512?
+        */
+       if (dbp->pgsize == 0) {
+               F_SET(dbp, DB_AM_PGDEF);
+               dbp->pgsize = 8 * 1024;
+       }
+       if (dbp->pgsize < DB_MIN_PGSIZE ||
+           dbp->pgsize > DB_MAX_PGSIZE ||
+           dbp->pgsize & (sizeof(db_indx_t) - 1)) {
+               __db_err(dbenv, "illegal page size");
+               goto einval;
+       }
+
+       /*
+        * Set and/or correct the cache size; must be a multiple of the
+        * page size.
+        */
+       if (dbinfo == NULL || dbinfo->db_cachesize == 0)
+               cachesize = dbp->pgsize * DB_MINCACHE;
+       else {
+               cachesize = dbinfo->db_cachesize;
+               if (cachesize & (dbp->pgsize - 1))
+                       cachesize += (~cachesize & (dbp->pgsize - 1)) + 1;
+               if (cachesize < dbp->pgsize * DB_MINCACHE)
+                       cachesize = dbp->pgsize * DB_MINCACHE;
+               if (cachesize < 20 * 1024)
+                       cachesize = 20 * 1024;
+       }
+
+       /*
+        * If no mpool supplied by the application, attach to a local,
+        * created buffer pool.
+        *
+        * XXX
+        * If the user has a DB_ENV structure, we have to use a temporary
+        * one so that we don't step on their values.  If the user doesn't,
+        * we have to create one, and keep it around until the call to the
+        * memp_close() function.  This is all so the mpool functions get
+        * the error stuff right.
+        */
+       if (dbenv == NULL || dbenv->mp_info == NULL) {
+               F_SET(dbp, DB_AM_MLOCAL);
+
+               if (dbenv == NULL) {
+                       if ((dbp->mp_dbenv =
+                           (DB_ENV *)calloc(sizeof(DB_ENV), 1)) == NULL) {
+                               ret = ENOMEM;
+                               goto err;
+                       }
+
+                       envp = dbp->mp_dbenv;
+                       restore = 0;
+               } else {
+                       t_dbenv = *dbenv;
+
+                       envp = dbenv;
+                       restore = 1;
+               }
+               envp->mp_size = cachesize;
+               F_SET(envp, DB_MPOOL_PRIVATE);
+               if ((ret = memp_open(NULL,
+                   DB_CREATE, S_IRUSR | S_IWUSR, envp, &dbp->mp)) != 0)
+                       goto err;
+               if (restore)
+                       *dbenv = t_dbenv;
+       } else
+               dbp->mp = dbenv->mp_info;
+
+       /* Register DB's pgin/pgout functions. */
+       if ((ret = memp_register(dbp->mp,
+           DB_FTYPE_BTREE, __bam_pgin, __bam_pgout)) != 0)
+               goto err;
+       if ((ret = memp_register(dbp->mp,
+           DB_FTYPE_HASH, __ham_pgin, __ham_pgout)) != 0)
+               goto err;
+
+       /*
+        * If we don't already have one, get a unique file ID.  If the file
+        * is a temporary file, then we have to create a unique file ID --
+        * no backing file will be created until the mpool cache is filled
+        * forcing it to go to disk.  The created ID must never match any
+        * potential real file ID -- we know it won't because real file IDs
+        * contain a time stamp after the dev/ino pair, and we're simply
+        * storing a 4-byte locker ID.
+        *
+        * XXX
+        * Store the file id in the locker structure -- we can get it from
+        * there as necessary, and it saves having two copies.
+        */
+       if (need_fileid)
+               if (fname == NULL) {
+                       memset(dbp->lock.fileid, 0, DB_FILE_ID_LEN);
+                       if (F_ISSET(dbp, DB_AM_LOCKING) &&
+                           (ret = lock_id(dbenv->lk_info,
+                           (u_int32_t *)dbp->lock.fileid)) != 0)
+                               goto err;
+               } else
+                       if ((ret = __db_fileid(dbenv,
+                           real_name, 1, dbp->lock.fileid)) != 0)
+                               goto err;
+
+       /* No further use for the real name. */
+       if (real_name != NULL)
+               FREES(real_name);
+       real_name = NULL;
+
+       /*
+        * Open a backing file in the memory pool.
+        *
+        * If we need to process the file's pages on I/O, set the file type.
+        * If it's a hash file, always call pgin and pgout routines.  This
+        * means that hash files can never be mapped into process memory.  If
+        * it's a btree file and requires swapping, we need to page the file
+        * in and out.  This has to be right -- we can't mmap files that are
+        * being paged in and out.
+        */
+       if (type == DB_HASH)
+               ftype = DB_FTYPE_HASH;
+       else
+               ftype = F_ISSET(dbp, DB_AM_SWAP) ? DB_FTYPE_BTREE : 0;
+       pginfo.db_pagesize = dbp->pgsize;
+       pginfo.needswap = F_ISSET(dbp, DB_AM_SWAP);
+       pgcookie.data = &pginfo;
+       pgcookie.size = sizeof(DB_PGINFO);
+
+       if ((ret = memp_fopen(dbp->mp, fname, ftype,
+           F_ISSET(dbp, DB_AM_RDONLY) ? DB_RDONLY : 0, 0, dbp->pgsize,
+           0, &pgcookie, dbp->lock.fileid, &dbp->mpf)) != 0)
+               goto err;
+
+       /* Get a log file id. */
+       if (F_ISSET(dbp, DB_AM_LOGGING) &&
+           (ret = log_register(dbenv->lg_info,
+           dbp, fname, type, &dbp->log_fileid)) != 0)
+               goto err;
+
+       /*
+        * Get a locker id for this DB, and build the lock cookie: the first
+        * db_pgno_t bytes are the page number, the next N bytes are the file
+        * id.
+        */
+       if (F_ISSET(dbp, DB_AM_LOCKING)) {
+               if ((ret = lock_id(dbenv->lk_info, &dbp->locker)) != 0)
+                       goto err;
+               dbp->lock_dbt.size = sizeof(dbp->lock);
+               dbp->lock_dbt.data = &dbp->lock;
+       }
+
+       /* Call the real open function. */
+       switch (type) {
+       case DB_BTREE:
+               if (dbinfo != NULL && (ret = __db_fchk(dbenv,
+                   "db_open", dbinfo->flags, DB_RECNUM | DB_DUP)) != 0)
+                       goto err;
+               if (dbinfo != NULL && (ret = __db_fcchk(dbenv,
+                   "db_open", dbinfo->flags, DB_DUP, DB_RECNUM)) != 0)
+                       goto err;
+               if ((ret = __bam_open(dbp, type, dbinfo)) != 0)
+                       goto err;
+               break;
+       case DB_HASH:
+               if (dbinfo != NULL && (ret = __db_fchk(dbenv,
+                   "db_open", dbinfo->flags, DB_DUP)) != 0)
+                       goto err;
+               if ((ret = __ham_open(dbp, dbinfo)) != 0)
+                       goto err;
+               break;
+       case DB_RECNO:
+#define        DB_INFO_FLAGS \
+       (DB_DELIMITER | DB_FIXEDLEN | DB_PAD | DB_RENUMBER | DB_SNAPSHOT)
+               if (dbinfo != NULL && (ret = __db_fchk(dbenv,
+                   "db_open", dbinfo->flags, DB_INFO_FLAGS)) != 0)
+                       goto err;
+               if ((ret = __ram_open(dbp, type, dbinfo)) != 0)
+                       goto err;
+               break;
+       default:
+               abort();
+       }
+
+       /* Call a local close routine. */
+       dbp->close = db_close;
+       dbp->fd = db_fd;
+
+       *dbpp = dbp;
+       return (0);
+
+einval:        ret = EINVAL;
+err:   /* Close the file descriptor. */
+       if (fd != -1)
+               (void)__db_close(fd);
+
+       /* Discard the log file id. */
+       if (dbp->log_fileid != 0)
+               (void)log_unregister(dbenv->lg_info, dbp->log_fileid);
+
+       /* Close the memory pool file. */
+       if (dbp->mpf != NULL)
+               (void)memp_fclose(dbp->mpf);
+
+       /* If the memory pool was local, close it. */
+       if (F_ISSET(dbp, DB_AM_MLOCAL) && dbp->mp != NULL)
+               (void)memp_close(dbp->mp);
+
+       /* If we allocated a DB_ENV, discard it. */
+       if (dbp->mp_dbenv != NULL)
+               FREE(dbp->mp_dbenv, sizeof(DB_ENV));
+
+       if (real_name != NULL)
+               FREES(real_name);
+       if (dbp != NULL)
+               FREE(dbp, sizeof(DB));
+
+       return (ret);
+}
+
+/*
+ * db_close --
+ *     Close a DB tree.
+ */
+static int
+db_close(dbp, flags)
+       DB *dbp;
+       int flags;
+{
+       DBC *dbc;
+       DB *tdbp;
+       int ret, t_ret;
+
+       ret = 0;
+
+       /* Sync the underlying file. */
+       if (!LF_ISSET(DB_NOSYNC) &&
+           (t_ret = dbp->sync(dbp, 0)) != 0 && ret == 0)
+               ret = t_ret;
+
+       /*
+        * Call the underlying access method close routine for all the
+        * cursors and handles.
+        */
+       for (tdbp = LIST_FIRST(&dbp->handleq);
+           tdbp != NULL; tdbp = LIST_NEXT(tdbp, links)) {
+
+               while ((dbc = TAILQ_FIRST(&tdbp->curs_queue)) != NULL)
+                       if ((t_ret = dbc->c_close(dbc)) != 0 && ret == 0)
+                               ret = t_ret;
+
+               switch (tdbp->type) {
+               case DB_BTREE:
+                       if ((t_ret = __bam_close(tdbp)) != 0 && ret == 0)
+                               ret = t_ret;
+                       break;
+               case DB_HASH:
+                       if ((t_ret = __ham_close(tdbp)) != 0 && ret == 0)
+                               ret = t_ret;
+                       break;
+               case DB_RECNO:
+                       if ((t_ret = __ram_close(tdbp)) != 0 && ret == 0)
+                               ret = t_ret;
+                       break;
+               default:
+                       abort();
+               }
+
+       }
+
+       /* Sync the memory pool. */
+       if ((t_ret = memp_fsync(dbp->mpf)) != 0 && ret == 0)
+               ret = t_ret;
+
+       /* Close the memory pool file. */
+       if ((t_ret = memp_fclose(dbp->mpf)) != 0 && ret == 0)
+               ret = t_ret;
+
+       /* If the memory pool was local, close it. */
+       if (F_ISSET(dbp, DB_AM_MLOCAL) &&
+           (t_ret = memp_close(dbp->mp)) != 0 && ret == 0)
+               ret = t_ret;
+
+       /* Discard the mutex. */
+       if (dbp->mutex != NULL)
+               FREE(dbp->mutex, sizeof(db_mutex_t));
+
+       /* Discard the log file id. */
+       if (F_ISSET(dbp, DB_AM_LOGGING))
+               (void)log_unregister(dbp->dbenv->lg_info, dbp->log_fileid);
+
+       /* Discard the lock cookie for all handles. */
+       for (tdbp = LIST_FIRST(&dbp->handleq);
+           tdbp != NULL; tdbp = LIST_NEXT(tdbp, links))
+               if (F_ISSET(tdbp, DB_AM_LOCKING)) {
+#ifdef DEBUG
+                       DB_LOCKREQ request;
+
+                       /*
+                        * If we're running tests, display any locks currently
+                        * held.  It's possible that some applications may hold
+                        * locks for long periods, e.g., conference room locks,
+                        * but the DB tests should never close holding locks.
+                        */
+                       request.op = DB_LOCK_DUMP;
+                       if ((t_ret = lock_vec(tdbp->dbenv->lk_info,
+                           tdbp->locker, 0, &request, 1, NULL)) != 0 &&
+                           ret == 0)
+                               ret = EAGAIN;
+#endif
+               }
+
+       /* If we allocated a DB_ENV, discard it. */
+       if (dbp->mp_dbenv != NULL)
+               FREE(dbp->mp_dbenv, sizeof(DB_ENV));
+
+       /* Free all of the DB's. */
+       LIST_REMOVE(dbp, links);
+       while ((tdbp = LIST_FIRST(&dbp->handleq)) != NULL) {
+               LIST_REMOVE(tdbp, links);
+               FREE(tdbp, sizeof(*tdbp));
+       }
+       FREE(dbp, sizeof(*dbp));
+
+       return (ret);
+}
+
+/*
+ * db_fd --
+ *     Return a file descriptor for flock'ing.
+ */
+static int
+db_fd(dbp, fdp)
+        DB *dbp;
+       int *fdp;
+{
+       /* In-memory database can't have a file descriptor. */
+       if (F_ISSET(dbp, DB_AM_INMEM))
+               return (ENOENT);
+
+       /*
+        * XXX
+        * Truly spectacular layering violation.  As we don't open the
+        * underlying file until we need it, it may not be initialized.
+        */
+       if ((*fdp = dbp->mpf->fd) == -1)
+               return (ENOENT);
+       return (0);
+}
+
+/*
+ * __db_pgerr --
+ *     Error when unable to retrieve a specified page.
+ *
+ * PUBLIC: int __db_pgerr __P((DB *, db_pgno_t));
+ */
+int
+__db_pgerr(dbp, pgno)
+       DB *dbp;
+       db_pgno_t pgno;
+{
+       __db_err(dbp->dbenv,
+           "unable to create/retrieve page %lu", (u_long)pgno);
+       return (__db_panic(dbp));
+}
+
+/*
+ * __db_pgfmt --
+ *     Error when a page has the wrong format.
+ *
+ * PUBLIC: int __db_pgfmt __P((DB *, db_pgno_t));
+ */
+int
+__db_pgfmt(dbp, pgno)
+       DB *dbp;
+       db_pgno_t pgno;
+{
+       __db_err(dbp->dbenv,
+           "page %lu: illegal page type or format", (u_long)pgno);
+       return (__db_panic(dbp));
+}
diff --git a/db2/db/db.src b/db2/db/db.src
new file mode 100644 (file)
index 0000000..a3e2f7b
--- /dev/null
@@ -0,0 +1,154 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997
+ *     Sleepycat Software.  All rights reserved.
+ *     @(#)db.src      10.3 (Sleepycat) 8/18/97
+ */
+#include "config.h"
+
+PREFIX db
+
+/*
+ * addrem -- Add or remove an entry from a duplicate page.
+ *
+ * opcode:     identifies if this is an add or delete.
+ * fileid:     file identifier of the file being modified.
+ * pgno:       duplicate page number.
+ * indx:       location at which to insert or delete.
+ * nbytes:     number of bytes added/removed to/from the page.
+ * hdr:                header for the data item.
+ * dbt:                data that is deleted or is to be added.
+ * pagelsn:    former lsn of the page.
+ *
+ * If the hdr was NULL then, the dbt is a regular B_KEYDATA.
+ * If the dbt was NULL then the hdr is a complete item to be
+ * pasted on the page.
+ */
+BEGIN addrem
+ARG    opcode          u_int32_t       lu
+ARG    fileid          u_int32_t       lu
+ARG    pgno            db_pgno_t       lu
+ARG    indx            u_int32_t       lu
+ARG    nbytes          size_t          lu
+DBT    hdr             DBT             s
+DBT    dbt             DBT             s
+POINTER        pagelsn         DB_LSN *        lu
+END
+
+/*
+ * split -- Handles the split of a duplicate page.
+ *
+ * opcode:     defines whether we are splitting from or splitting onto
+ * fileid:     file identifier of the file being modified.
+ * pgno:       page number being split.
+ * pageimage:  entire page contents.
+ * pagelsn:    former lsn of the page.
+ */
+BEGIN split
+ARG    opcode          u_int32_t       lu
+ARG    fileid          u_int32_t       lu
+ARG    pgno            db_pgno_t       lu
+DBT    pageimage       DBT             s
+POINTER        pagelsn         DB_LSN *        lu
+END
+
+/*
+ * big -- Handles addition and deletion of big key/data items.
+ *
+ * opcode:     identifies get/put.
+ * fileid:     file identifier of the file being modified.
+ * pgno:       page onto which data is being added/removed.
+ * prev_pgno:  the page before the one we are logging.
+ * next_pgno:  the page after the one we are logging.
+ * dbt:                data being written onto the page.
+ * pagelsn:    former lsn of the orig_page.
+ * prevlsn:    former lsn of the prev_pgno.
+ * nextlsn:    former lsn of the next_pgno. This is not currently used, but
+ *             may be used later if we actually do overwrites of big key/
+ *             data items in place.
+ */
+BEGIN big
+ARG    opcode          u_int32_t       lu
+ARG    fileid          u_int32_t       lu
+ARG    pgno            db_pgno_t       lu
+ARG    prev_pgno       db_pgno_t       lu
+ARG    next_pgno       db_pgno_t       lu
+DBT    dbt             DBT             s
+POINTER        pagelsn         DB_LSN *        lu
+POINTER        prevlsn         DB_LSN *        lu
+POINTER        nextlsn         DB_LSN *        lu
+END
+
+/*
+ * ovref -- Handles increment of overflow page reference count.
+ *
+ * fileid:     identifies the file being modified.
+ * pgno:       page number being incremented.
+ * lsn         the page's original lsn.
+ */
+BEGIN ovref
+ARG    fileid          u_int32_t       lu
+ARG    pgno            db_pgno_t       lu
+POINTER        lsn             DB_LSN *        lu
+END
+
+/*
+ * relink -- Handles relinking around a page.
+ *
+ * pgno:       the page being changed.
+ * lsn         the page's original lsn.
+ * prev:       the previous page.
+ * lsn_prev:   the previous page's original lsn.
+ * next:       the next page.
+ * lsn_next:   the previous page's original lsn.
+ */
+BEGIN relink
+ARG    fileid          u_int32_t       lu
+ARG    pgno            db_pgno_t       lu
+POINTER        lsn             DB_LSN *        lu
+ARG    prev            db_pgno_t       lu
+POINTER        lsn_prev        DB_LSN *        lu
+ARG    next            db_pgno_t       lu
+POINTER        lsn_next        DB_LSN *        lu
+END
+
+/*
+ * Addpage -- Handles adding a new duplicate page onto the end of
+ * an existing duplicate page.
+ * fileid:     identifies the file being changed.
+ * pgno:       page number to which a new page is being added.
+ * lsn:                lsn of pgno
+ * nextpgno:   new page number being added.
+ * nextlsn:    lsn of nextpgno;
+ */
+BEGIN addpage
+ARG    fileid          u_int32_t       lu
+ARG    pgno            db_pgno_t       lu
+POINTER        lsn             DB_LSN *        lu
+ARG    nextpgno        db_pgno_t       lu
+POINTER        nextlsn         DB_LSN *        lu
+END
+
+/*
+ * Debug -- log an operation upon entering an access method.
+ * op:         Operation (cursor, c_close, c_get, c_put, c_del,
+ *             get, put, delete).
+ * fileid:     identifies the file being acted upon.
+ * key:                key paramater
+ * data:       data parameter
+ * flags:      flags parameter
+ */
+BEGIN debug
+DBT    op              DBT             s
+ARG    fileid          u_int32_t       lu
+DBT    key             DBT             s
+DBT    data            DBT             s
+ARG    arg_flags       u_int32_t       lu
+END
+
+/*
+ * noop -- do nothing, but get an LSN.
+ */
+BEGIN noop
+END
diff --git a/db2/db/db_auto.c b/db2/db/db_auto.c
new file mode 100644 (file)
index 0000000..4684f1a
--- /dev/null
@@ -0,0 +1,1462 @@
+/* Do not edit: automatically built by dist/db_gen.sh. */
+#include "config.h"
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <ctype.h>
+#include <errno.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+#endif
+
+#include "db_int.h"
+#include "shqueue.h"
+#include "db_page.h"
+#include "db_dispatch.h"
+#include "db_am.h"
+#include "common_ext.h"
+
+/*
+ * PUBLIC: int __db_addrem_log
+ * PUBLIC:     __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t,
+ * PUBLIC:     u_int32_t, u_int32_t, db_pgno_t, u_int32_t,
+ * PUBLIC:     size_t, DBT *, DBT *, DB_LSN *));
+ */
+int __db_addrem_log(logp, txnid, ret_lsnp, flags,
+       opcode, fileid, pgno, indx, nbytes, hdr,
+       dbt, pagelsn)
+       DB_LOG *logp;
+       DB_TXN *txnid;
+       DB_LSN *ret_lsnp;
+       u_int32_t flags;
+       u_int32_t opcode;
+       u_int32_t fileid;
+       db_pgno_t pgno;
+       u_int32_t indx;
+       size_t nbytes;
+       DBT *hdr;
+       DBT *dbt;
+       DB_LSN * pagelsn;
+{
+       DBT logrec;
+       DB_LSN *lsnp, null_lsn;
+       u_int32_t zero;
+       u_int32_t rectype, txn_num;
+       int ret;
+       u_int8_t *bp;
+
+       rectype = DB_db_addrem;
+       txn_num = txnid == NULL ? 0 : txnid->txnid;
+       if (txnid == NULL) {
+               null_lsn.file = 0;
+               null_lsn.offset = 0;
+               lsnp = &null_lsn;
+       } else
+               lsnp = &txnid->last_lsn;
+       logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN)
+           + sizeof(opcode)
+           + sizeof(fileid)
+           + sizeof(pgno)
+           + sizeof(indx)
+           + sizeof(nbytes)
+           + sizeof(u_int32_t) + (hdr == NULL ? 0 : hdr->size)
+           + sizeof(u_int32_t) + (dbt == NULL ? 0 : dbt->size)
+           + sizeof(*pagelsn);
+       if ((logrec.data = (void *)malloc(logrec.size)) == NULL)
+               return (ENOMEM);
+
+       bp = logrec.data;
+       memcpy(bp, &rectype, sizeof(rectype));
+       bp += sizeof(rectype);
+       memcpy(bp, &txn_num, sizeof(txn_num));
+       bp += sizeof(txn_num);
+       memcpy(bp, lsnp, sizeof(DB_LSN));
+       bp += sizeof(DB_LSN);
+       memcpy(bp, &opcode, sizeof(opcode));
+       bp += sizeof(opcode);
+       memcpy(bp, &fileid, sizeof(fileid));
+       bp += sizeof(fileid);
+       memcpy(bp, &pgno, sizeof(pgno));
+       bp += sizeof(pgno);
+       memcpy(bp, &indx, sizeof(indx));
+       bp += sizeof(indx);
+       memcpy(bp, &nbytes, sizeof(nbytes));
+       bp += sizeof(nbytes);
+       if (hdr == NULL) {
+               zero = 0;
+               memcpy(bp, &zero, sizeof(u_int32_t));
+               bp += sizeof(u_int32_t);
+       } else {
+               memcpy(bp, &hdr->size, sizeof(hdr->size));
+               bp += sizeof(hdr->size);
+               memcpy(bp, hdr->data, hdr->size);
+               bp += hdr->size;
+       }
+       if (dbt == NULL) {
+               zero = 0;
+               memcpy(bp, &zero, sizeof(u_int32_t));
+               bp += sizeof(u_int32_t);
+       } else {
+               memcpy(bp, &dbt->size, sizeof(dbt->size));
+               bp += sizeof(dbt->size);
+               memcpy(bp, dbt->data, dbt->size);
+               bp += dbt->size;
+       }
+       if (pagelsn != NULL)
+               memcpy(bp, pagelsn, sizeof(*pagelsn));
+       else
+               memset(bp, 0, sizeof(*pagelsn));
+       bp += sizeof(*pagelsn);
+#ifdef DEBUG
+       if ((u_int32_t)(bp - (u_int8_t *)logrec.data) != logrec.size)
+               fprintf(stderr, "Error in log record length");
+#endif
+       ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags);
+       if (txnid != NULL)
+               txnid->last_lsn = *ret_lsnp;
+       free(logrec.data);
+       return (ret);
+}
+
+/*
+ * PUBLIC: int __db_addrem_print
+ * PUBLIC:    __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+ */
+
+int
+__db_addrem_print(notused1, dbtp, lsnp, notused3, notused4)
+       DB_LOG *notused1;
+       DBT *dbtp;
+       DB_LSN *lsnp;
+       int notused3;
+       void *notused4;
+{
+       __db_addrem_args *argp;
+       u_int32_t i;
+       int c, ret;
+
+       i = 0;
+       c = 0;
+       notused1 = NULL;
+       notused3 = 0;
+       notused4 = NULL;
+
+       if((ret = __db_addrem_read(dbtp->data, &argp)) != 0)
+               return (ret);
+       printf("[%lu][%lu]db_addrem: rec: %lu txnid %lx prevlsn [%lu][%lu]\n",
+           (u_long)lsnp->file,
+           (u_long)lsnp->offset,
+           (u_long)argp->type,
+           (u_long)argp->txnid->txnid,
+           (u_long)argp->prev_lsn.file,
+           (u_long)argp->prev_lsn.offset);
+       printf("\topcode: %lu\n", (u_long)argp->opcode);
+       printf("\tfileid: %lu\n", (u_long)argp->fileid);
+       printf("\tpgno: %lu\n", (u_long)argp->pgno);
+       printf("\tindx: %lu\n", (u_long)argp->indx);
+       printf("\tnbytes: %lu\n", (u_long)argp->nbytes);
+       printf("\thdr: ");
+       for (i = 0; i < argp->hdr.size; i++) {
+               c = ((char *)argp->hdr.data)[i];
+               if (isprint(c) || c == 0xa)
+                       putchar(c);
+               else
+                       printf("%#x ", c);
+       }
+       printf("\n");
+       printf("\tdbt: ");
+       for (i = 0; i < argp->dbt.size; i++) {
+               c = ((char *)argp->dbt.data)[i];
+               if (isprint(c) || c == 0xa)
+                       putchar(c);
+               else
+                       printf("%#x ", c);
+       }
+       printf("\n");
+       printf("\tpagelsn: [%lu][%lu]\n",
+           (u_long)argp->pagelsn.file, (u_long)argp->pagelsn.offset);
+       printf("\n");
+       free(argp);
+       return (0);
+}
+
+/*
+ * PUBLIC: int __db_addrem_read __P((void *, __db_addrem_args **));
+ */
+int
+__db_addrem_read(recbuf, argpp)
+       void *recbuf;
+       __db_addrem_args **argpp;
+{
+       __db_addrem_args *argp;
+       u_int8_t *bp;
+
+       argp = (__db_addrem_args *)malloc(sizeof(__db_addrem_args) +
+           sizeof(DB_TXN));
+       if (argp == NULL)
+               return (ENOMEM);
+       argp->txnid = (DB_TXN *)&argp[1];
+       bp = recbuf;
+       memcpy(&argp->type, bp, sizeof(argp->type));
+       bp += sizeof(argp->type);
+       memcpy(&argp->txnid->txnid,  bp, sizeof(argp->txnid->txnid));
+       bp += sizeof(argp->txnid->txnid);
+       memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN));
+       bp += sizeof(DB_LSN);
+       memcpy(&argp->opcode, bp, sizeof(argp->opcode));
+       bp += sizeof(argp->opcode);
+       memcpy(&argp->fileid, bp, sizeof(argp->fileid));
+       bp += sizeof(argp->fileid);
+       memcpy(&argp->pgno, bp, sizeof(argp->pgno));
+       bp += sizeof(argp->pgno);
+       memcpy(&argp->indx, bp, sizeof(argp->indx));
+       bp += sizeof(argp->indx);
+       memcpy(&argp->nbytes, bp, sizeof(argp->nbytes));
+       bp += sizeof(argp->nbytes);
+       memcpy(&argp->hdr.size, bp, sizeof(u_int32_t));
+       bp += sizeof(u_int32_t);
+       argp->hdr.data = bp;
+       bp += argp->hdr.size;
+       memcpy(&argp->dbt.size, bp, sizeof(u_int32_t));
+       bp += sizeof(u_int32_t);
+       argp->dbt.data = bp;
+       bp += argp->dbt.size;
+       memcpy(&argp->pagelsn, bp,  sizeof(argp->pagelsn));
+       bp += sizeof(argp->pagelsn);
+       *argpp = argp;
+       return (0);
+}
+
+/*
+ * PUBLIC: int __db_split_log
+ * PUBLIC:     __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t,
+ * PUBLIC:     u_int32_t, u_int32_t, db_pgno_t, DBT *,
+ * PUBLIC:     DB_LSN *));
+ */
+int __db_split_log(logp, txnid, ret_lsnp, flags,
+       opcode, fileid, pgno, pageimage, pagelsn)
+       DB_LOG *logp;
+       DB_TXN *txnid;
+       DB_LSN *ret_lsnp;
+       u_int32_t flags;
+       u_int32_t opcode;
+       u_int32_t fileid;
+       db_pgno_t pgno;
+       DBT *pageimage;
+       DB_LSN * pagelsn;
+{
+       DBT logrec;
+       DB_LSN *lsnp, null_lsn;
+       u_int32_t zero;
+       u_int32_t rectype, txn_num;
+       int ret;
+       u_int8_t *bp;
+
+       rectype = DB_db_split;
+       txn_num = txnid == NULL ? 0 : txnid->txnid;
+       if (txnid == NULL) {
+               null_lsn.file = 0;
+               null_lsn.offset = 0;
+               lsnp = &null_lsn;
+       } else
+               lsnp = &txnid->last_lsn;
+       logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN)
+           + sizeof(opcode)
+           + sizeof(fileid)
+           + sizeof(pgno)
+           + sizeof(u_int32_t) + (pageimage == NULL ? 0 : pageimage->size)
+           + sizeof(*pagelsn);
+       if ((logrec.data = (void *)malloc(logrec.size)) == NULL)
+               return (ENOMEM);
+
+       bp = logrec.data;
+       memcpy(bp, &rectype, sizeof(rectype));
+       bp += sizeof(rectype);
+       memcpy(bp, &txn_num, sizeof(txn_num));
+       bp += sizeof(txn_num);
+       memcpy(bp, lsnp, sizeof(DB_LSN));
+       bp += sizeof(DB_LSN);
+       memcpy(bp, &opcode, sizeof(opcode));
+       bp += sizeof(opcode);
+       memcpy(bp, &fileid, sizeof(fileid));
+       bp += sizeof(fileid);
+       memcpy(bp, &pgno, sizeof(pgno));
+       bp += sizeof(pgno);
+       if (pageimage == NULL) {
+               zero = 0;
+               memcpy(bp, &zero, sizeof(u_int32_t));
+               bp += sizeof(u_int32_t);
+       } else {
+               memcpy(bp, &pageimage->size, sizeof(pageimage->size));
+               bp += sizeof(pageimage->size);
+               memcpy(bp, pageimage->data, pageimage->size);
+               bp += pageimage->size;
+       }
+       if (pagelsn != NULL)
+               memcpy(bp, pagelsn, sizeof(*pagelsn));
+       else
+               memset(bp, 0, sizeof(*pagelsn));
+       bp += sizeof(*pagelsn);
+#ifdef DEBUG
+       if ((u_int32_t)(bp - (u_int8_t *)logrec.data) != logrec.size)
+               fprintf(stderr, "Error in log record length");
+#endif
+       ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags);
+       if (txnid != NULL)
+               txnid->last_lsn = *ret_lsnp;
+       free(logrec.data);
+       return (ret);
+}
+
+/*
+ * PUBLIC: int __db_split_print
+ * PUBLIC:    __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+ */
+
+int
+__db_split_print(notused1, dbtp, lsnp, notused3, notused4)
+       DB_LOG *notused1;
+       DBT *dbtp;
+       DB_LSN *lsnp;
+       int notused3;
+       void *notused4;
+{
+       __db_split_args *argp;
+       u_int32_t i;
+       int c, ret;
+
+       i = 0;
+       c = 0;
+       notused1 = NULL;
+       notused3 = 0;
+       notused4 = NULL;
+
+       if((ret = __db_split_read(dbtp->data, &argp)) != 0)
+               return (ret);
+       printf("[%lu][%lu]db_split: rec: %lu txnid %lx prevlsn [%lu][%lu]\n",
+           (u_long)lsnp->file,
+           (u_long)lsnp->offset,
+           (u_long)argp->type,
+           (u_long)argp->txnid->txnid,
+           (u_long)argp->prev_lsn.file,
+           (u_long)argp->prev_lsn.offset);
+       printf("\topcode: %lu\n", (u_long)argp->opcode);
+       printf("\tfileid: %lu\n", (u_long)argp->fileid);
+       printf("\tpgno: %lu\n", (u_long)argp->pgno);
+       printf("\tpageimage: ");
+       for (i = 0; i < argp->pageimage.size; i++) {
+               c = ((char *)argp->pageimage.data)[i];
+               if (isprint(c) || c == 0xa)
+                       putchar(c);
+               else
+                       printf("%#x ", c);
+       }
+       printf("\n");
+       printf("\tpagelsn: [%lu][%lu]\n",
+           (u_long)argp->pagelsn.file, (u_long)argp->pagelsn.offset);
+       printf("\n");
+       free(argp);
+       return (0);
+}
+
+/*
+ * PUBLIC: int __db_split_read __P((void *, __db_split_args **));
+ */
+int
+__db_split_read(recbuf, argpp)
+       void *recbuf;
+       __db_split_args **argpp;
+{
+       __db_split_args *argp;
+       u_int8_t *bp;
+
+       argp = (__db_split_args *)malloc(sizeof(__db_split_args) +
+           sizeof(DB_TXN));
+       if (argp == NULL)
+               return (ENOMEM);
+       argp->txnid = (DB_TXN *)&argp[1];
+       bp = recbuf;
+       memcpy(&argp->type, bp, sizeof(argp->type));
+       bp += sizeof(argp->type);
+       memcpy(&argp->txnid->txnid,  bp, sizeof(argp->txnid->txnid));
+       bp += sizeof(argp->txnid->txnid);
+       memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN));
+       bp += sizeof(DB_LSN);
+       memcpy(&argp->opcode, bp, sizeof(argp->opcode));
+       bp += sizeof(argp->opcode);
+       memcpy(&argp->fileid, bp, sizeof(argp->fileid));
+       bp += sizeof(argp->fileid);
+       memcpy(&argp->pgno, bp, sizeof(argp->pgno));
+       bp += sizeof(argp->pgno);
+       memcpy(&argp->pageimage.size, bp, sizeof(u_int32_t));
+       bp += sizeof(u_int32_t);
+       argp->pageimage.data = bp;
+       bp += argp->pageimage.size;
+       memcpy(&argp->pagelsn, bp,  sizeof(argp->pagelsn));
+       bp += sizeof(argp->pagelsn);
+       *argpp = argp;
+       return (0);
+}
+
+/*
+ * PUBLIC: int __db_big_log
+ * PUBLIC:     __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t,
+ * PUBLIC:     u_int32_t, u_int32_t, db_pgno_t, db_pgno_t,
+ * PUBLIC:     db_pgno_t, DBT *, DB_LSN *, DB_LSN *,
+ * PUBLIC:     DB_LSN *));
+ */
+int __db_big_log(logp, txnid, ret_lsnp, flags,
+       opcode, fileid, pgno, prev_pgno, next_pgno, dbt,
+       pagelsn, prevlsn, nextlsn)
+       DB_LOG *logp;
+       DB_TXN *txnid;
+       DB_LSN *ret_lsnp;
+       u_int32_t flags;
+       u_int32_t opcode;
+       u_int32_t fileid;
+       db_pgno_t pgno;
+       db_pgno_t prev_pgno;
+       db_pgno_t next_pgno;
+       DBT *dbt;
+       DB_LSN * pagelsn;
+       DB_LSN * prevlsn;
+       DB_LSN * nextlsn;
+{
+       DBT logrec;
+       DB_LSN *lsnp, null_lsn;
+       u_int32_t zero;
+       u_int32_t rectype, txn_num;
+       int ret;
+       u_int8_t *bp;
+
+       rectype = DB_db_big;
+       txn_num = txnid == NULL ? 0 : txnid->txnid;
+       if (txnid == NULL) {
+               null_lsn.file = 0;
+               null_lsn.offset = 0;
+               lsnp = &null_lsn;
+       } else
+               lsnp = &txnid->last_lsn;
+       logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN)
+           + sizeof(opcode)
+           + sizeof(fileid)
+           + sizeof(pgno)
+           + sizeof(prev_pgno)
+           + sizeof(next_pgno)
+           + sizeof(u_int32_t) + (dbt == NULL ? 0 : dbt->size)
+           + sizeof(*pagelsn)
+           + sizeof(*prevlsn)
+           + sizeof(*nextlsn);
+       if ((logrec.data = (void *)malloc(logrec.size)) == NULL)
+               return (ENOMEM);
+
+       bp = logrec.data;
+       memcpy(bp, &rectype, sizeof(rectype));
+       bp += sizeof(rectype);
+       memcpy(bp, &txn_num, sizeof(txn_num));
+       bp += sizeof(txn_num);
+       memcpy(bp, lsnp, sizeof(DB_LSN));
+       bp += sizeof(DB_LSN);
+       memcpy(bp, &opcode, sizeof(opcode));
+       bp += sizeof(opcode);
+       memcpy(bp, &fileid, sizeof(fileid));
+       bp += sizeof(fileid);
+       memcpy(bp, &pgno, sizeof(pgno));
+       bp += sizeof(pgno);
+       memcpy(bp, &prev_pgno, sizeof(prev_pgno));
+       bp += sizeof(prev_pgno);
+       memcpy(bp, &next_pgno, sizeof(next_pgno));
+       bp += sizeof(next_pgno);
+       if (dbt == NULL) {
+               zero = 0;
+               memcpy(bp, &zero, sizeof(u_int32_t));
+               bp += sizeof(u_int32_t);
+       } else {
+               memcpy(bp, &dbt->size, sizeof(dbt->size));
+               bp += sizeof(dbt->size);
+               memcpy(bp, dbt->data, dbt->size);
+               bp += dbt->size;
+       }
+       if (pagelsn != NULL)
+               memcpy(bp, pagelsn, sizeof(*pagelsn));
+       else
+               memset(bp, 0, sizeof(*pagelsn));
+       bp += sizeof(*pagelsn);
+       if (prevlsn != NULL)
+               memcpy(bp, prevlsn, sizeof(*prevlsn));
+       else
+               memset(bp, 0, sizeof(*prevlsn));
+       bp += sizeof(*prevlsn);
+       if (nextlsn != NULL)
+               memcpy(bp, nextlsn, sizeof(*nextlsn));
+       else
+               memset(bp, 0, sizeof(*nextlsn));
+       bp += sizeof(*nextlsn);
+#ifdef DEBUG
+       if ((u_int32_t)(bp - (u_int8_t *)logrec.data) != logrec.size)
+               fprintf(stderr, "Error in log record length");
+#endif
+       ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags);
+       if (txnid != NULL)
+               txnid->last_lsn = *ret_lsnp;
+       free(logrec.data);
+       return (ret);
+}
+
+/*
+ * PUBLIC: int __db_big_print
+ * PUBLIC:    __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+ */
+
+int
+__db_big_print(notused1, dbtp, lsnp, notused3, notused4)
+       DB_LOG *notused1;
+       DBT *dbtp;
+       DB_LSN *lsnp;
+       int notused3;
+       void *notused4;
+{
+       __db_big_args *argp;
+       u_int32_t i;
+       int c, ret;
+
+       i = 0;
+       c = 0;
+       notused1 = NULL;
+       notused3 = 0;
+       notused4 = NULL;
+
+       if((ret = __db_big_read(dbtp->data, &argp)) != 0)
+               return (ret);
+       printf("[%lu][%lu]db_big: rec: %lu txnid %lx prevlsn [%lu][%lu]\n",
+           (u_long)lsnp->file,
+           (u_long)lsnp->offset,
+           (u_long)argp->type,
+           (u_long)argp->txnid->txnid,
+           (u_long)argp->prev_lsn.file,
+           (u_long)argp->prev_lsn.offset);
+       printf("\topcode: %lu\n", (u_long)argp->opcode);
+       printf("\tfileid: %lu\n", (u_long)argp->fileid);
+       printf("\tpgno: %lu\n", (u_long)argp->pgno);
+       printf("\tprev_pgno: %lu\n", (u_long)argp->prev_pgno);
+       printf("\tnext_pgno: %lu\n", (u_long)argp->next_pgno);
+       printf("\tdbt: ");
+       for (i = 0; i < argp->dbt.size; i++) {
+               c = ((char *)argp->dbt.data)[i];
+               if (isprint(c) || c == 0xa)
+                       putchar(c);
+               else
+                       printf("%#x ", c);
+       }
+       printf("\n");
+       printf("\tpagelsn: [%lu][%lu]\n",
+           (u_long)argp->pagelsn.file, (u_long)argp->pagelsn.offset);
+       printf("\tprevlsn: [%lu][%lu]\n",
+           (u_long)argp->prevlsn.file, (u_long)argp->prevlsn.offset);
+       printf("\tnextlsn: [%lu][%lu]\n",
+           (u_long)argp->nextlsn.file, (u_long)argp->nextlsn.offset);
+       printf("\n");
+       free(argp);
+       return (0);
+}
+
+/*
+ * PUBLIC: int __db_big_read __P((void *, __db_big_args **));
+ */
+int
+__db_big_read(recbuf, argpp)
+       void *recbuf;
+       __db_big_args **argpp;
+{
+       __db_big_args *argp;
+       u_int8_t *bp;
+
+       argp = (__db_big_args *)malloc(sizeof(__db_big_args) +
+           sizeof(DB_TXN));
+       if (argp == NULL)
+               return (ENOMEM);
+       argp->txnid = (DB_TXN *)&argp[1];
+       bp = recbuf;
+       memcpy(&argp->type, bp, sizeof(argp->type));
+       bp += sizeof(argp->type);
+       memcpy(&argp->txnid->txnid,  bp, sizeof(argp->txnid->txnid));
+       bp += sizeof(argp->txnid->txnid);
+       memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN));
+       bp += sizeof(DB_LSN);
+       memcpy(&argp->opcode, bp, sizeof(argp->opcode));
+       bp += sizeof(argp->opcode);
+       memcpy(&argp->fileid, bp, sizeof(argp->fileid));
+       bp += sizeof(argp->fileid);
+       memcpy(&argp->pgno, bp, sizeof(argp->pgno));
+       bp += sizeof(argp->pgno);
+       memcpy(&argp->prev_pgno, bp, sizeof(argp->prev_pgno));
+       bp += sizeof(argp->prev_pgno);
+       memcpy(&argp->next_pgno, bp, sizeof(argp->next_pgno));
+       bp += sizeof(argp->next_pgno);
+       memcpy(&argp->dbt.size, bp, sizeof(u_int32_t));
+       bp += sizeof(u_int32_t);
+       argp->dbt.data = bp;
+       bp += argp->dbt.size;
+       memcpy(&argp->pagelsn, bp,  sizeof(argp->pagelsn));
+       bp += sizeof(argp->pagelsn);
+       memcpy(&argp->prevlsn, bp,  sizeof(argp->prevlsn));
+       bp += sizeof(argp->prevlsn);
+       memcpy(&argp->nextlsn, bp,  sizeof(argp->nextlsn));
+       bp += sizeof(argp->nextlsn);
+       *argpp = argp;
+       return (0);
+}
+
+/*
+ * PUBLIC: int __db_ovref_log
+ * PUBLIC:     __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t,
+ * PUBLIC:     u_int32_t, db_pgno_t, DB_LSN *));
+ */
+int __db_ovref_log(logp, txnid, ret_lsnp, flags,
+       fileid, pgno, lsn)
+       DB_LOG *logp;
+       DB_TXN *txnid;
+       DB_LSN *ret_lsnp;
+       u_int32_t flags;
+       u_int32_t fileid;
+       db_pgno_t pgno;
+       DB_LSN * lsn;
+{
+       DBT logrec;
+       DB_LSN *lsnp, null_lsn;
+       u_int32_t rectype, txn_num;
+       int ret;
+       u_int8_t *bp;
+
+       rectype = DB_db_ovref;
+       txn_num = txnid == NULL ? 0 : txnid->txnid;
+       if (txnid == NULL) {
+               null_lsn.file = 0;
+               null_lsn.offset = 0;
+               lsnp = &null_lsn;
+       } else
+               lsnp = &txnid->last_lsn;
+       logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN)
+           + sizeof(fileid)
+           + sizeof(pgno)
+           + sizeof(*lsn);
+       if ((logrec.data = (void *)malloc(logrec.size)) == NULL)
+               return (ENOMEM);
+
+       bp = logrec.data;
+       memcpy(bp, &rectype, sizeof(rectype));
+       bp += sizeof(rectype);
+       memcpy(bp, &txn_num, sizeof(txn_num));
+       bp += sizeof(txn_num);
+       memcpy(bp, lsnp, sizeof(DB_LSN));
+       bp += sizeof(DB_LSN);
+       memcpy(bp, &fileid, sizeof(fileid));
+       bp += sizeof(fileid);
+       memcpy(bp, &pgno, sizeof(pgno));
+       bp += sizeof(pgno);
+       if (lsn != NULL)
+               memcpy(bp, lsn, sizeof(*lsn));
+       else
+               memset(bp, 0, sizeof(*lsn));
+       bp += sizeof(*lsn);
+#ifdef DEBUG
+       if ((u_int32_t)(bp - (u_int8_t *)logrec.data) != logrec.size)
+               fprintf(stderr, "Error in log record length");
+#endif
+       ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags);
+       if (txnid != NULL)
+               txnid->last_lsn = *ret_lsnp;
+       free(logrec.data);
+       return (ret);
+}
+
+/*
+ * PUBLIC: int __db_ovref_print
+ * PUBLIC:    __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+ */
+
+int
+__db_ovref_print(notused1, dbtp, lsnp, notused3, notused4)
+       DB_LOG *notused1;
+       DBT *dbtp;
+       DB_LSN *lsnp;
+       int notused3;
+       void *notused4;
+{
+       __db_ovref_args *argp;
+       u_int32_t i;
+       int c, ret;
+
+       i = 0;
+       c = 0;
+       notused1 = NULL;
+       notused3 = 0;
+       notused4 = NULL;
+
+       if((ret = __db_ovref_read(dbtp->data, &argp)) != 0)
+               return (ret);
+       printf("[%lu][%lu]db_ovref: rec: %lu txnid %lx prevlsn [%lu][%lu]\n",
+           (u_long)lsnp->file,
+           (u_long)lsnp->offset,
+           (u_long)argp->type,
+           (u_long)argp->txnid->txnid,
+           (u_long)argp->prev_lsn.file,
+           (u_long)argp->prev_lsn.offset);
+       printf("\tfileid: %lu\n", (u_long)argp->fileid);
+       printf("\tpgno: %lu\n", (u_long)argp->pgno);
+       printf("\tlsn: [%lu][%lu]\n",
+           (u_long)argp->lsn.file, (u_long)argp->lsn.offset);
+       printf("\n");
+       free(argp);
+       return (0);
+}
+
+/*
+ * PUBLIC: int __db_ovref_read __P((void *, __db_ovref_args **));
+ */
+int
+__db_ovref_read(recbuf, argpp)
+       void *recbuf;
+       __db_ovref_args **argpp;
+{
+       __db_ovref_args *argp;
+       u_int8_t *bp;
+
+       argp = (__db_ovref_args *)malloc(sizeof(__db_ovref_args) +
+           sizeof(DB_TXN));
+       if (argp == NULL)
+               return (ENOMEM);
+       argp->txnid = (DB_TXN *)&argp[1];
+       bp = recbuf;
+       memcpy(&argp->type, bp, sizeof(argp->type));
+       bp += sizeof(argp->type);
+       memcpy(&argp->txnid->txnid,  bp, sizeof(argp->txnid->txnid));
+       bp += sizeof(argp->txnid->txnid);
+       memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN));
+       bp += sizeof(DB_LSN);
+       memcpy(&argp->fileid, bp, sizeof(argp->fileid));
+       bp += sizeof(argp->fileid);
+       memcpy(&argp->pgno, bp, sizeof(argp->pgno));
+       bp += sizeof(argp->pgno);
+       memcpy(&argp->lsn, bp,  sizeof(argp->lsn));
+       bp += sizeof(argp->lsn);
+       *argpp = argp;
+       return (0);
+}
+
+/*
+ * PUBLIC: int __db_relink_log
+ * PUBLIC:     __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t,
+ * PUBLIC:     u_int32_t, db_pgno_t, DB_LSN *, db_pgno_t,
+ * PUBLIC:     DB_LSN *, db_pgno_t, DB_LSN *));
+ */
+int __db_relink_log(logp, txnid, ret_lsnp, flags,
+       fileid, pgno, lsn, prev, lsn_prev, next,
+       lsn_next)
+       DB_LOG *logp;
+       DB_TXN *txnid;
+       DB_LSN *ret_lsnp;
+       u_int32_t flags;
+       u_int32_t fileid;
+       db_pgno_t pgno;
+       DB_LSN * lsn;
+       db_pgno_t prev;
+       DB_LSN * lsn_prev;
+       db_pgno_t next;
+       DB_LSN * lsn_next;
+{
+       DBT logrec;
+       DB_LSN *lsnp, null_lsn;
+       u_int32_t rectype, txn_num;
+       int ret;
+       u_int8_t *bp;
+
+       rectype = DB_db_relink;
+       txn_num = txnid == NULL ? 0 : txnid->txnid;
+       if (txnid == NULL) {
+               null_lsn.file = 0;
+               null_lsn.offset = 0;
+               lsnp = &null_lsn;
+       } else
+               lsnp = &txnid->last_lsn;
+       logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN)
+           + sizeof(fileid)
+           + sizeof(pgno)
+           + sizeof(*lsn)
+           + sizeof(prev)
+           + sizeof(*lsn_prev)
+           + sizeof(next)
+           + sizeof(*lsn_next);
+       if ((logrec.data = (void *)malloc(logrec.size)) == NULL)
+               return (ENOMEM);
+
+       bp = logrec.data;
+       memcpy(bp, &rectype, sizeof(rectype));
+       bp += sizeof(rectype);
+       memcpy(bp, &txn_num, sizeof(txn_num));
+       bp += sizeof(txn_num);
+       memcpy(bp, lsnp, sizeof(DB_LSN));
+       bp += sizeof(DB_LSN);
+       memcpy(bp, &fileid, sizeof(fileid));
+       bp += sizeof(fileid);
+       memcpy(bp, &pgno, sizeof(pgno));
+       bp += sizeof(pgno);
+       if (lsn != NULL)
+               memcpy(bp, lsn, sizeof(*lsn));
+       else
+               memset(bp, 0, sizeof(*lsn));
+       bp += sizeof(*lsn);
+       memcpy(bp, &prev, sizeof(prev));
+       bp += sizeof(prev);
+       if (lsn_prev != NULL)
+               memcpy(bp, lsn_prev, sizeof(*lsn_prev));
+       else
+               memset(bp, 0, sizeof(*lsn_prev));
+       bp += sizeof(*lsn_prev);
+       memcpy(bp, &next, sizeof(next));
+       bp += sizeof(next);
+       if (lsn_next != NULL)
+               memcpy(bp, lsn_next, sizeof(*lsn_next));
+       else
+               memset(bp, 0, sizeof(*lsn_next));
+       bp += sizeof(*lsn_next);
+#ifdef DEBUG
+       if ((u_int32_t)(bp - (u_int8_t *)logrec.data) != logrec.size)
+               fprintf(stderr, "Error in log record length");
+#endif
+       ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags);
+       if (txnid != NULL)
+               txnid->last_lsn = *ret_lsnp;
+       free(logrec.data);
+       return (ret);
+}
+
+/*
+ * PUBLIC: int __db_relink_print
+ * PUBLIC:    __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+ */
+
+int
+__db_relink_print(notused1, dbtp, lsnp, notused3, notused4)
+       DB_LOG *notused1;
+       DBT *dbtp;
+       DB_LSN *lsnp;
+       int notused3;
+       void *notused4;
+{
+       __db_relink_args *argp;
+       u_int32_t i;
+       int c, ret;
+
+       i = 0;
+       c = 0;
+       notused1 = NULL;
+       notused3 = 0;
+       notused4 = NULL;
+
+       if((ret = __db_relink_read(dbtp->data, &argp)) != 0)
+               return (ret);
+       printf("[%lu][%lu]db_relink: rec: %lu txnid %lx prevlsn [%lu][%lu]\n",
+           (u_long)lsnp->file,
+           (u_long)lsnp->offset,
+           (u_long)argp->type,
+           (u_long)argp->txnid->txnid,
+           (u_long)argp->prev_lsn.file,
+           (u_long)argp->prev_lsn.offset);
+       printf("\tfileid: %lu\n", (u_long)argp->fileid);
+       printf("\tpgno: %lu\n", (u_long)argp->pgno);
+       printf("\tlsn: [%lu][%lu]\n",
+           (u_long)argp->lsn.file, (u_long)argp->lsn.offset);
+       printf("\tprev: %lu\n", (u_long)argp->prev);
+       printf("\tlsn_prev: [%lu][%lu]\n",
+           (u_long)argp->lsn_prev.file, (u_long)argp->lsn_prev.offset);
+       printf("\tnext: %lu\n", (u_long)argp->next);
+       printf("\tlsn_next: [%lu][%lu]\n",
+           (u_long)argp->lsn_next.file, (u_long)argp->lsn_next.offset);
+       printf("\n");
+       free(argp);
+       return (0);
+}
+
+/*
+ * PUBLIC: int __db_relink_read __P((void *, __db_relink_args **));
+ */
+int
+__db_relink_read(recbuf, argpp)
+       void *recbuf;
+       __db_relink_args **argpp;
+{
+       __db_relink_args *argp;
+       u_int8_t *bp;
+
+       argp = (__db_relink_args *)malloc(sizeof(__db_relink_args) +
+           sizeof(DB_TXN));
+       if (argp == NULL)
+               return (ENOMEM);
+       argp->txnid = (DB_TXN *)&argp[1];
+       bp = recbuf;
+       memcpy(&argp->type, bp, sizeof(argp->type));
+       bp += sizeof(argp->type);
+       memcpy(&argp->txnid->txnid,  bp, sizeof(argp->txnid->txnid));
+       bp += sizeof(argp->txnid->txnid);
+       memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN));
+       bp += sizeof(DB_LSN);
+       memcpy(&argp->fileid, bp, sizeof(argp->fileid));
+       bp += sizeof(argp->fileid);
+       memcpy(&argp->pgno, bp, sizeof(argp->pgno));
+       bp += sizeof(argp->pgno);
+       memcpy(&argp->lsn, bp,  sizeof(argp->lsn));
+       bp += sizeof(argp->lsn);
+       memcpy(&argp->prev, bp, sizeof(argp->prev));
+       bp += sizeof(argp->prev);
+       memcpy(&argp->lsn_prev, bp,  sizeof(argp->lsn_prev));
+       bp += sizeof(argp->lsn_prev);
+       memcpy(&argp->next, bp, sizeof(argp->next));
+       bp += sizeof(argp->next);
+       memcpy(&argp->lsn_next, bp,  sizeof(argp->lsn_next));
+       bp += sizeof(argp->lsn_next);
+       *argpp = argp;
+       return (0);
+}
+
+/*
+ * PUBLIC: int __db_addpage_log
+ * PUBLIC:     __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t,
+ * PUBLIC:     u_int32_t, db_pgno_t, DB_LSN *, db_pgno_t,
+ * PUBLIC:     DB_LSN *));
+ */
+int __db_addpage_log(logp, txnid, ret_lsnp, flags,
+       fileid, pgno, lsn, nextpgno, nextlsn)
+       DB_LOG *logp;
+       DB_TXN *txnid;
+       DB_LSN *ret_lsnp;
+       u_int32_t flags;
+       u_int32_t fileid;
+       db_pgno_t pgno;
+       DB_LSN * lsn;
+       db_pgno_t nextpgno;
+       DB_LSN * nextlsn;
+{
+       DBT logrec;
+       DB_LSN *lsnp, null_lsn;
+       u_int32_t rectype, txn_num;
+       int ret;
+       u_int8_t *bp;
+
+       rectype = DB_db_addpage;
+       txn_num = txnid == NULL ? 0 : txnid->txnid;
+       if (txnid == NULL) {
+               null_lsn.file = 0;
+               null_lsn.offset = 0;
+               lsnp = &null_lsn;
+       } else
+               lsnp = &txnid->last_lsn;
+       logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN)
+           + sizeof(fileid)
+           + sizeof(pgno)
+           + sizeof(*lsn)
+           + sizeof(nextpgno)
+           + sizeof(*nextlsn);
+       if ((logrec.data = (void *)malloc(logrec.size)) == NULL)
+               return (ENOMEM);
+
+       bp = logrec.data;
+       memcpy(bp, &rectype, sizeof(rectype));
+       bp += sizeof(rectype);
+       memcpy(bp, &txn_num, sizeof(txn_num));
+       bp += sizeof(txn_num);
+       memcpy(bp, lsnp, sizeof(DB_LSN));
+       bp += sizeof(DB_LSN);
+       memcpy(bp, &fileid, sizeof(fileid));
+       bp += sizeof(fileid);
+       memcpy(bp, &pgno, sizeof(pgno));
+       bp += sizeof(pgno);
+       if (lsn != NULL)
+               memcpy(bp, lsn, sizeof(*lsn));
+       else
+               memset(bp, 0, sizeof(*lsn));
+       bp += sizeof(*lsn);
+       memcpy(bp, &nextpgno, sizeof(nextpgno));
+       bp += sizeof(nextpgno);
+       if (nextlsn != NULL)
+               memcpy(bp, nextlsn, sizeof(*nextlsn));
+       else
+               memset(bp, 0, sizeof(*nextlsn));
+       bp += sizeof(*nextlsn);
+#ifdef DEBUG
+       if ((u_int32_t)(bp - (u_int8_t *)logrec.data) != logrec.size)
+               fprintf(stderr, "Error in log record length");
+#endif
+       ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags);
+       if (txnid != NULL)
+               txnid->last_lsn = *ret_lsnp;
+       free(logrec.data);
+       return (ret);
+}
+
+/*
+ * PUBLIC: int __db_addpage_print
+ * PUBLIC:    __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+ */
+
+int
+__db_addpage_print(notused1, dbtp, lsnp, notused3, notused4)
+       DB_LOG *notused1;
+       DBT *dbtp;
+       DB_LSN *lsnp;
+       int notused3;
+       void *notused4;
+{
+       __db_addpage_args *argp;
+       u_int32_t i;
+       int c, ret;
+
+       i = 0;
+       c = 0;
+       notused1 = NULL;
+       notused3 = 0;
+       notused4 = NULL;
+
+       if((ret = __db_addpage_read(dbtp->data, &argp)) != 0)
+               return (ret);
+       printf("[%lu][%lu]db_addpage: rec: %lu txnid %lx prevlsn [%lu][%lu]\n",
+           (u_long)lsnp->file,
+           (u_long)lsnp->offset,
+           (u_long)argp->type,
+           (u_long)argp->txnid->txnid,
+           (u_long)argp->prev_lsn.file,
+           (u_long)argp->prev_lsn.offset);
+       printf("\tfileid: %lu\n", (u_long)argp->fileid);
+       printf("\tpgno: %lu\n", (u_long)argp->pgno);
+       printf("\tlsn: [%lu][%lu]\n",
+           (u_long)argp->lsn.file, (u_long)argp->lsn.offset);
+       printf("\tnextpgno: %lu\n", (u_long)argp->nextpgno);
+       printf("\tnextlsn: [%lu][%lu]\n",
+           (u_long)argp->nextlsn.file, (u_long)argp->nextlsn.offset);
+       printf("\n");
+       free(argp);
+       return (0);
+}
+
+/*
+ * PUBLIC: int __db_addpage_read __P((void *, __db_addpage_args **));
+ */
+int
+__db_addpage_read(recbuf, argpp)
+       void *recbuf;
+       __db_addpage_args **argpp;
+{
+       __db_addpage_args *argp;
+       u_int8_t *bp;
+
+       argp = (__db_addpage_args *)malloc(sizeof(__db_addpage_args) +
+           sizeof(DB_TXN));
+       if (argp == NULL)
+               return (ENOMEM);
+       argp->txnid = (DB_TXN *)&argp[1];
+       bp = recbuf;
+       memcpy(&argp->type, bp, sizeof(argp->type));
+       bp += sizeof(argp->type);
+       memcpy(&argp->txnid->txnid,  bp, sizeof(argp->txnid->txnid));
+       bp += sizeof(argp->txnid->txnid);
+       memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN));
+       bp += sizeof(DB_LSN);
+       memcpy(&argp->fileid, bp, sizeof(argp->fileid));
+       bp += sizeof(argp->fileid);
+       memcpy(&argp->pgno, bp, sizeof(argp->pgno));
+       bp += sizeof(argp->pgno);
+       memcpy(&argp->lsn, bp,  sizeof(argp->lsn));
+       bp += sizeof(argp->lsn);
+       memcpy(&argp->nextpgno, bp, sizeof(argp->nextpgno));
+       bp += sizeof(argp->nextpgno);
+       memcpy(&argp->nextlsn, bp,  sizeof(argp->nextlsn));
+       bp += sizeof(argp->nextlsn);
+       *argpp = argp;
+       return (0);
+}
+
+/*
+ * PUBLIC: int __db_debug_log
+ * PUBLIC:     __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t,
+ * PUBLIC:     DBT *, u_int32_t, DBT *, DBT *,
+ * PUBLIC:     u_int32_t));
+ */
+int __db_debug_log(logp, txnid, ret_lsnp, flags,
+       op, fileid, key, data, arg_flags)
+       DB_LOG *logp;
+       DB_TXN *txnid;
+       DB_LSN *ret_lsnp;
+       u_int32_t flags;
+       DBT *op;
+       u_int32_t fileid;
+       DBT *key;
+       DBT *data;
+       u_int32_t arg_flags;
+{
+       DBT logrec;
+       DB_LSN *lsnp, null_lsn;
+       u_int32_t zero;
+       u_int32_t rectype, txn_num;
+       int ret;
+       u_int8_t *bp;
+
+       rectype = DB_db_debug;
+       txn_num = txnid == NULL ? 0 : txnid->txnid;
+       if (txnid == NULL) {
+               null_lsn.file = 0;
+               null_lsn.offset = 0;
+               lsnp = &null_lsn;
+       } else
+               lsnp = &txnid->last_lsn;
+       logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN)
+           + sizeof(u_int32_t) + (op == NULL ? 0 : op->size)
+           + sizeof(fileid)
+           + sizeof(u_int32_t) + (key == NULL ? 0 : key->size)
+           + sizeof(u_int32_t) + (data == NULL ? 0 : data->size)
+           + sizeof(arg_flags);
+       if ((logrec.data = (void *)malloc(logrec.size)) == NULL)
+               return (ENOMEM);
+
+       bp = logrec.data;
+       memcpy(bp, &rectype, sizeof(rectype));
+       bp += sizeof(rectype);
+       memcpy(bp, &txn_num, sizeof(txn_num));
+       bp += sizeof(txn_num);
+       memcpy(bp, lsnp, sizeof(DB_LSN));
+       bp += sizeof(DB_LSN);
+       if (op == NULL) {
+               zero = 0;
+               memcpy(bp, &zero, sizeof(u_int32_t));
+               bp += sizeof(u_int32_t);
+       } else {
+               memcpy(bp, &op->size, sizeof(op->size));
+               bp += sizeof(op->size);
+               memcpy(bp, op->data, op->size);
+               bp += op->size;
+       }
+       memcpy(bp, &fileid, sizeof(fileid));
+       bp += sizeof(fileid);
+       if (key == NULL) {
+               zero = 0;
+               memcpy(bp, &zero, sizeof(u_int32_t));
+               bp += sizeof(u_int32_t);
+       } else {
+               memcpy(bp, &key->size, sizeof(key->size));
+               bp += sizeof(key->size);
+               memcpy(bp, key->data, key->size);
+               bp += key->size;
+       }
+       if (data == NULL) {
+               zero = 0;
+               memcpy(bp, &zero, sizeof(u_int32_t));
+               bp += sizeof(u_int32_t);
+       } else {
+               memcpy(bp, &data->size, sizeof(data->size));
+               bp += sizeof(data->size);
+               memcpy(bp, data->data, data->size);
+               bp += data->size;
+       }
+       memcpy(bp, &arg_flags, sizeof(arg_flags));
+       bp += sizeof(arg_flags);
+#ifdef DEBUG
+       if ((u_int32_t)(bp - (u_int8_t *)logrec.data) != logrec.size)
+               fprintf(stderr, "Error in log record length");
+#endif
+       ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags);
+       if (txnid != NULL)
+               txnid->last_lsn = *ret_lsnp;
+       free(logrec.data);
+       return (ret);
+}
+
+/*
+ * PUBLIC: int __db_debug_print
+ * PUBLIC:    __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+ */
+
+int
+__db_debug_print(notused1, dbtp, lsnp, notused3, notused4)
+       DB_LOG *notused1;
+       DBT *dbtp;
+       DB_LSN *lsnp;
+       int notused3;
+       void *notused4;
+{
+       __db_debug_args *argp;
+       u_int32_t i;
+       int c, ret;
+
+       i = 0;
+       c = 0;
+       notused1 = NULL;
+       notused3 = 0;
+       notused4 = NULL;
+
+       if((ret = __db_debug_read(dbtp->data, &argp)) != 0)
+               return (ret);
+       printf("[%lu][%lu]db_debug: rec: %lu txnid %lx prevlsn [%lu][%lu]\n",
+           (u_long)lsnp->file,
+           (u_long)lsnp->offset,
+           (u_long)argp->type,
+           (u_long)argp->txnid->txnid,
+           (u_long)argp->prev_lsn.file,
+           (u_long)argp->prev_lsn.offset);
+       printf("\top: ");
+       for (i = 0; i < argp->op.size; i++) {
+               c = ((char *)argp->op.data)[i];
+               if (isprint(c) || c == 0xa)
+                       putchar(c);
+               else
+                       printf("%#x ", c);
+       }
+       printf("\n");
+       printf("\tfileid: %lu\n", (u_long)argp->fileid);
+       printf("\tkey: ");
+       for (i = 0; i < argp->key.size; i++) {
+               c = ((char *)argp->key.data)[i];
+               if (isprint(c) || c == 0xa)
+                       putchar(c);
+               else
+                       printf("%#x ", c);
+       }
+       printf("\n");
+       printf("\tdata: ");
+       for (i = 0; i < argp->data.size; i++) {
+               c = ((char *)argp->data.data)[i];
+               if (isprint(c) || c == 0xa)
+                       putchar(c);
+               else
+                       printf("%#x ", c);
+       }
+       printf("\n");
+       printf("\targ_flags: %lu\n", (u_long)argp->arg_flags);
+       printf("\n");
+       free(argp);
+       return (0);
+}
+
+/*
+ * PUBLIC: int __db_debug_read __P((void *, __db_debug_args **));
+ */
+int
+__db_debug_read(recbuf, argpp)
+       void *recbuf;
+       __db_debug_args **argpp;
+{
+       __db_debug_args *argp;
+       u_int8_t *bp;
+
+       argp = (__db_debug_args *)malloc(sizeof(__db_debug_args) +
+           sizeof(DB_TXN));
+       if (argp == NULL)
+               return (ENOMEM);
+       argp->txnid = (DB_TXN *)&argp[1];
+       bp = recbuf;
+       memcpy(&argp->type, bp, sizeof(argp->type));
+       bp += sizeof(argp->type);
+       memcpy(&argp->txnid->txnid,  bp, sizeof(argp->txnid->txnid));
+       bp += sizeof(argp->txnid->txnid);
+       memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN));
+       bp += sizeof(DB_LSN);
+       memcpy(&argp->op.size, bp, sizeof(u_int32_t));
+       bp += sizeof(u_int32_t);
+       argp->op.data = bp;
+       bp += argp->op.size;
+       memcpy(&argp->fileid, bp, sizeof(argp->fileid));
+       bp += sizeof(argp->fileid);
+       memcpy(&argp->key.size, bp, sizeof(u_int32_t));
+       bp += sizeof(u_int32_t);
+       argp->key.data = bp;
+       bp += argp->key.size;
+       memcpy(&argp->data.size, bp, sizeof(u_int32_t));
+       bp += sizeof(u_int32_t);
+       argp->data.data = bp;
+       bp += argp->data.size;
+       memcpy(&argp->arg_flags, bp, sizeof(argp->arg_flags));
+       bp += sizeof(argp->arg_flags);
+       *argpp = argp;
+       return (0);
+}
+
+/*
+ * PUBLIC: int __db_noop_log
+ * PUBLIC:     __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t));
+ */
+int __db_noop_log(logp, txnid, ret_lsnp, flags)
+       DB_LOG *logp;
+       DB_TXN *txnid;
+       DB_LSN *ret_lsnp;
+       u_int32_t flags;
+{
+       DBT logrec;
+       DB_LSN *lsnp, null_lsn;
+       u_int32_t rectype, txn_num;
+       int ret;
+       u_int8_t *bp;
+
+       rectype = DB_db_noop;
+       txn_num = txnid == NULL ? 0 : txnid->txnid;
+       if (txnid == NULL) {
+               null_lsn.file = 0;
+               null_lsn.offset = 0;
+               lsnp = &null_lsn;
+       } else
+               lsnp = &txnid->last_lsn;
+       logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN);
+       if ((logrec.data = (void *)malloc(logrec.size)) == NULL)
+               return (ENOMEM);
+
+       bp = logrec.data;
+       memcpy(bp, &rectype, sizeof(rectype));
+       bp += sizeof(rectype);
+       memcpy(bp, &txn_num, sizeof(txn_num));
+       bp += sizeof(txn_num);
+       memcpy(bp, lsnp, sizeof(DB_LSN));
+       bp += sizeof(DB_LSN);
+#ifdef DEBUG
+       if ((u_int32_t)(bp - (u_int8_t *)logrec.data) != logrec.size)
+               fprintf(stderr, "Error in log record length");
+#endif
+       ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags);
+       if (txnid != NULL)
+               txnid->last_lsn = *ret_lsnp;
+       free(logrec.data);
+       return (ret);
+}
+
+/*
+ * PUBLIC: int __db_noop_print
+ * PUBLIC:    __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+ */
+
+int
+__db_noop_print(notused1, dbtp, lsnp, notused3, notused4)
+       DB_LOG *notused1;
+       DBT *dbtp;
+       DB_LSN *lsnp;
+       int notused3;
+       void *notused4;
+{
+       __db_noop_args *argp;
+       u_int32_t i;
+       int c, ret;
+
+       i = 0;
+       c = 0;
+       notused1 = NULL;
+       notused3 = 0;
+       notused4 = NULL;
+
+       if((ret = __db_noop_read(dbtp->data, &argp)) != 0)
+               return (ret);
+       printf("[%lu][%lu]db_noop: rec: %lu txnid %lx prevlsn [%lu][%lu]\n",
+           (u_long)lsnp->file,
+           (u_long)lsnp->offset,
+           (u_long)argp->type,
+           (u_long)argp->txnid->txnid,
+           (u_long)argp->prev_lsn.file,
+           (u_long)argp->prev_lsn.offset);
+       printf("\n");
+       free(argp);
+       return (0);
+}
+
+/*
+ * PUBLIC: int __db_noop_read __P((void *, __db_noop_args **));
+ */
+int
+__db_noop_read(recbuf, argpp)
+       void *recbuf;
+       __db_noop_args **argpp;
+{
+       __db_noop_args *argp;
+       u_int8_t *bp;
+
+       argp = (__db_noop_args *)malloc(sizeof(__db_noop_args) +
+           sizeof(DB_TXN));
+       if (argp == NULL)
+               return (ENOMEM);
+       argp->txnid = (DB_TXN *)&argp[1];
+       bp = recbuf;
+       memcpy(&argp->type, bp, sizeof(argp->type));
+       bp += sizeof(argp->type);
+       memcpy(&argp->txnid->txnid,  bp, sizeof(argp->txnid->txnid));
+       bp += sizeof(argp->txnid->txnid);
+       memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN));
+       bp += sizeof(DB_LSN);
+       *argpp = argp;
+       return (0);
+}
+
+/*
+ * PUBLIC: int __db_init_print __P((DB_ENV *));
+ */
+int
+__db_init_print(dbenv)
+       DB_ENV *dbenv;
+{
+       int ret;
+
+       if ((ret = __db_add_recovery(dbenv,
+           __db_addrem_print, DB_db_addrem)) != 0)
+               return (ret);
+       if ((ret = __db_add_recovery(dbenv,
+           __db_split_print, DB_db_split)) != 0)
+               return (ret);
+       if ((ret = __db_add_recovery(dbenv,
+           __db_big_print, DB_db_big)) != 0)
+               return (ret);
+       if ((ret = __db_add_recovery(dbenv,
+           __db_ovref_print, DB_db_ovref)) != 0)
+               return (ret);
+       if ((ret = __db_add_recovery(dbenv,
+           __db_relink_print, DB_db_relink)) != 0)
+               return (ret);
+       if ((ret = __db_add_recovery(dbenv,
+           __db_addpage_print, DB_db_addpage)) != 0)
+               return (ret);
+       if ((ret = __db_add_recovery(dbenv,
+           __db_debug_print, DB_db_debug)) != 0)
+               return (ret);
+       if ((ret = __db_add_recovery(dbenv,
+           __db_noop_print, DB_db_noop)) != 0)
+               return (ret);
+       return (0);
+}
+
+/*
+ * PUBLIC: int __db_init_recover __P((DB_ENV *));
+ */
+int
+__db_init_recover(dbenv)
+       DB_ENV *dbenv;
+{
+       int ret;
+
+       if ((ret = __db_add_recovery(dbenv,
+           __db_addrem_recover, DB_db_addrem)) != 0)
+               return (ret);
+       if ((ret = __db_add_recovery(dbenv,
+           __db_split_recover, DB_db_split)) != 0)
+               return (ret);
+       if ((ret = __db_add_recovery(dbenv,
+           __db_big_recover, DB_db_big)) != 0)
+               return (ret);
+       if ((ret = __db_add_recovery(dbenv,
+           __db_ovref_recover, DB_db_ovref)) != 0)
+               return (ret);
+       if ((ret = __db_add_recovery(dbenv,
+           __db_relink_recover, DB_db_relink)) != 0)
+               return (ret);
+       if ((ret = __db_add_recovery(dbenv,
+           __db_addpage_recover, DB_db_addpage)) != 0)
+               return (ret);
+       if ((ret = __db_add_recovery(dbenv,
+           __db_debug_recover, DB_db_debug)) != 0)
+               return (ret);
+       if ((ret = __db_add_recovery(dbenv,
+           __db_noop_recover, DB_db_noop)) != 0)
+               return (ret);
+       return (0);
+}
+
diff --git a/db2/db/db_conv.c b/db2/db/db_conv.c
new file mode 100644 (file)
index 0000000..39527c6
--- /dev/null
@@ -0,0 +1,219 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997
+ *     Sleepycat Software.  All rights reserved.
+ */
+/*
+ * Copyright (c) 1990, 1993, 1994, 1995, 1996
+ *     Keith Bostic.  All rights reserved.
+ */
+/*
+ * Copyright (c) 1990, 1993, 1994, 1995
+ *     The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *     This product includes software developed by the University of
+ *     California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "config.h"
+
+#ifndef lint
+static const char sccsid[] = "@(#)db_conv.c    10.4 (Sleepycat) 8/15/97";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+
+#include <errno.h>
+#endif
+
+#include "db_int.h"
+#include "db_page.h"
+#include "db_swap.h"
+#include "db_am.h"
+
+static int __db_convert __P((db_pgno_t, void *, int));
+
+/*
+ * __db_pgin, __db_pgout --
+ *
+ * PUBLIC: int __db_pgin __P((db_pgno_t, void *));
+ * PUBLIC: int __db_pgout __P((db_pgno_t, void *));
+ */
+int
+__db_pgin(pg, pp)
+       db_pgno_t pg;
+       void *pp;
+{
+       return (__db_convert(pg, pp, 1));
+}
+
+int
+__db_pgout(pg, pp)
+       db_pgno_t pg;
+       void *pp;
+{
+       return (__db_convert(pg, pp, 0));
+}
+
+/*
+ * __db_convert --
+ *     Actually convert a page.
+ */
+static int
+__db_convert(pg, pp, pgin)
+       db_pgno_t pg;                   /* Unused, but left for the future. */
+       void *pp;
+       int pgin;
+{
+       BINTERNAL *bi;
+       BKEYDATA *bk;
+       BOVERFLOW *bo;
+       HKEYDATA *hk;
+       PAGE *h;
+       RINTERNAL *ri;
+       db_indx_t i;
+       u_int8_t *p;
+
+       h = pp;
+       if (pgin) {
+               M_32_SWAP(h->lsn.file);
+               M_32_SWAP(h->lsn.offset);
+               M_32_SWAP(h->pgno);
+               M_32_SWAP(h->prev_pgno);
+               M_32_SWAP(h->next_pgno);
+               M_16_SWAP(h->entries);
+               M_16_SWAP(h->hf_offset);
+       }
+
+       switch (h->type) {
+       case P_HASH:
+               for (i = 0; i < NUM_ENT(h); i++) {
+                       if (pgin)
+                               M_16_SWAP(h->inp[i]);
+
+                       hk = GET_HKEYDATA(h, i);
+                       switch (hk->type) {
+                       case H_KEYDATA:
+                               break;
+                       case H_DUPLICATE:
+                       case H_OFFPAGE:
+                               p = (u_int8_t *)hk + sizeof(u_int8_t);
+                               ++p;
+                               SWAP32(p);                      /* tlen */
+                               SWAP32(p);                      /* pgno */
+                               SWAP16(p);                      /* offset */
+                               SWAP16(p);                      /* len */
+                               break;
+                       }
+
+                       if (!pgin)
+                               M_16_SWAP(h->inp[i]);
+               }
+               break;
+       case P_LBTREE:
+       case P_LRECNO:
+       case P_DUPLICATE:
+               for (i = 0; i < NUM_ENT(h); i++) {
+                       if (pgin)
+                               M_16_SWAP(h->inp[i]);
+
+                       bk = GET_BKEYDATA(h, i);
+                       switch (bk->type) {
+                       case B_KEYDATA:
+                               M_16_SWAP(bk->len);
+                               break;
+                       case B_DUPLICATE:
+                       case B_OVERFLOW:
+                               bo = (BOVERFLOW *)bk;
+                               M_32_SWAP(bo->tlen);
+                               M_32_SWAP(bo->pgno);
+                               break;
+                       }
+
+                       if (!pgin)
+                               M_16_SWAP(h->inp[i]);
+               }
+               break;
+       case P_IBTREE:
+               for (i = 0; i < NUM_ENT(h); i++) {
+                       if (pgin)
+                               M_16_SWAP(h->inp[i]);
+
+                       bi = GET_BINTERNAL(h, i);
+                       switch (bi->type) {
+                       case B_KEYDATA:
+                               M_16_SWAP(bi->len);
+                               M_32_SWAP(bi->pgno);
+                               M_32_SWAP(bi->nrecs);
+                               break;
+                       case B_DUPLICATE:
+                       case B_OVERFLOW:
+                               bo = (BOVERFLOW *)bi;
+                               M_32_SWAP(bo->tlen);
+                               M_32_SWAP(bo->pgno);
+                               break;
+                       }
+
+                       if (!pgin)
+                               M_16_SWAP(h->inp[i]);
+               }
+               break;
+       case P_IRECNO:
+               for (i = 0; i < NUM_ENT(h); i++) {
+                       if (pgin)
+                               M_16_SWAP(h->inp[i]);
+
+                       ri = GET_RINTERNAL(h, i);
+                       M_32_SWAP(ri->pgno);
+                       M_32_SWAP(ri->nrecs);
+
+                       if (!pgin)
+                               M_16_SWAP(h->inp[i]);
+               }
+       case P_OVERFLOW:
+       case P_INVALID:
+               /* Nothing to do. */
+               break;
+       default:
+               return (EINVAL);
+       }
+
+       if (!pgin) {
+               /* Swap the header information. */
+               M_32_SWAP(h->lsn.file);
+               M_32_SWAP(h->lsn.offset);
+               M_32_SWAP(h->pgno);
+               M_32_SWAP(h->prev_pgno);
+               M_32_SWAP(h->next_pgno);
+               M_16_SWAP(h->entries);
+               M_16_SWAP(h->hf_offset);
+       }
+       return (0);
+}
diff --git a/db2/db/db_dispatch.c b/db2/db/db_dispatch.c
new file mode 100644 (file)
index 0000000..3d7b162
--- /dev/null
@@ -0,0 +1,270 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997
+ *     Sleepycat Software.  All rights reserved.
+ */
+/*
+ * Copyright (c) 1995, 1996
+ *     The President and Fellows of Harvard University.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Margo Seltzer.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *     This product includes software developed by the University of
+ *     California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "config.h"
+
+#ifndef lint
+static const char sccsid[] = "@(#)db_dispatch.c        10.5 (Sleepycat) 7/2/97";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+#endif
+
+#include "db_int.h"
+#include "db_page.h"
+#include "db_dispatch.h"
+#include "db_am.h"
+#include "common_ext.h"
+
+/*
+ * Data structures to manage the DB dispatch table.  The dispatch table
+ * is a dynamically allocated array of pointers to dispatch functions.
+ * The dispatch_size is the number of entries possible in the current
+ * dispatch table and the dispatch_valid is the number of valid entries
+ * in the dispatch table.
+ */
+static int (**dispatch_table) __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+static u_int32_t dispatch_size = 0;
+
+/*
+ * __db_dispatch --
+ *
+ * This is the transaction dispatch function used by the db access methods.
+ * It is designed to handle the record format used by all the access
+ * methods (the one automatically generated by the db_{h,log,read}.sh
+ * scripts in the tools directory).  An application using a different
+ * recovery paradigm will supply a different dispatch function to txn_open.
+ *
+ * PUBLIC: int __db_dispatch __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+ */
+int
+__db_dispatch(logp, db, lsnp, redo, info)
+       DB_LOG *logp;           /* The log file. */
+       DBT *db;                /* The log record upon which to dispatch. */
+       DB_LSN *lsnp;           /* The lsn of the record being dispatched. */
+       int redo;               /* Redo this op (or undo it). */
+       void *info;
+{
+       u_int32_t rectype, txnid;
+
+       memcpy(&rectype, db->data, sizeof(rectype));
+       memcpy(&txnid, (u_int8_t *)db->data + sizeof(rectype), sizeof(txnid));
+
+       switch (redo) {
+       case TXN_REDO:
+       case TXN_UNDO:
+               return ((dispatch_table[rectype])(logp, db, lsnp, redo, info));
+       case TXN_OPENFILES:
+               if (rectype < DB_txn_BEGIN )
+                       return ((dispatch_table[rectype])(logp,
+                           db, lsnp, redo, info));
+               break;
+       case TXN_BACKWARD_ROLL:
+               /*
+                * Running full recovery in the backward pass.  If we've
+                * seen this txnid before and added to it our commit list,
+                * then we do nothing during this pass.  If we've never
+                * seen it, then we call the appropriate recovery routine
+                * in "abort mode".
+                */
+               if (__db_txnlist_find(info, txnid) == DB_NOTFOUND)
+                       return ((dispatch_table[rectype])(logp,
+                           db, lsnp, TXN_UNDO, info));
+               break;
+       case TXN_FORWARD_ROLL:
+               /*
+                * In the forward pass, if we haven't seen the transaction,
+                * do nothing, else recovery it.
+                */
+               if (__db_txnlist_find(info, txnid) != DB_NOTFOUND)
+                       return ((dispatch_table[rectype])(logp,
+                           db, lsnp, TXN_REDO, info));
+               break;
+       default:
+               abort();
+       }
+       return (0);
+}
+
+/*
+ * __db_add_recovery --
+ *
+ * PUBLIC: int __db_add_recovery __P((DB_ENV *,
+ * PUBLIC:    int (*)(DB_LOG *, DBT *, DB_LSN *, int, void *), u_int32_t));
+ */
+int
+__db_add_recovery(dbenv, func, ndx)
+       DB_ENV *dbenv;
+       int (*func) __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+       u_int32_t ndx;
+{
+       u_int32_t i;
+
+       /* Check if function is already registered. */
+       if (dispatch_table && ndx < dispatch_size &&
+           dispatch_table[ndx] != 0 && dispatch_table[ndx] != func)
+               return (DB_REGISTERED);
+
+       /* Check if we have to grow the table. */
+       if (ndx >= dispatch_size) {
+               if (dispatch_table == NULL)
+                       dispatch_table = (int (**)
+                           __P((DB_LOG *, DBT *, DB_LSN *, int, void *)))
+                           malloc(DB_user_BEGIN * sizeof(dispatch_table[0]));
+               else
+                       dispatch_table = (int (**)
+                           __P((DB_LOG *, DBT *, DB_LSN *, int, void *)))
+                           realloc(dispatch_table, (DB_user_BEGIN +
+                           dispatch_size) * sizeof(dispatch_table[0]));
+               if (dispatch_table == NULL) {
+                       __db_err(dbenv, "%s", strerror(ENOMEM));
+                       return (ENOMEM);
+               }
+               for (i = dispatch_size,
+                   dispatch_size += DB_user_BEGIN; i < dispatch_size; ++i)
+                       dispatch_table[i] = NULL;
+       }
+
+       dispatch_table[ndx] = func;
+       return (0);
+}
+
+/*
+ * __db_txnlist_init --
+ *     Initialize transaction linked list.
+ *
+ * PUBLIC: int __db_txnlist_init __P((void *));
+ */
+int
+__db_txnlist_init(retp)
+       void *retp;
+{
+       __db_txnhead *headp;
+
+       if ((headp =
+           (struct __db_txnhead *)malloc(sizeof(struct __db_txnhead))) == NULL)
+               return (ENOMEM);
+
+       LIST_INIT(&headp->head);
+       headp->maxid = 0;
+
+       *(void **)retp = headp;
+       return (0);
+}
+
+/*
+ * __db_txnlist_add --
+ *     Add an element to our transaction linked list.
+ *
+ * PUBLIC: int __db_txnlist_add __P((void *, u_int32_t));
+ */
+int
+__db_txnlist_add(listp, txnid)
+       void *listp;
+       u_int32_t txnid;
+{
+       __db_txnhead *hp;
+       __db_txnlist *elp;
+
+       if ((elp = (__db_txnlist *)malloc(sizeof(__db_txnlist))) == NULL)
+               return (ENOMEM);
+
+       elp->txnid = txnid;
+       hp = (struct __db_txnhead *)listp;
+       LIST_INSERT_HEAD(&hp->head, elp, links);
+       if (txnid > hp->maxid)
+               hp->maxid = txnid;
+
+       return (0);
+}
+
+/*
+ * __db_txnlist_find --
+ *     Checks to see if txnid is in the txnid list, returns 1 if found,
+ *     0 if not found.
+ *
+ * PUBLIC: int __db_txnlist_find __P((void *, u_int32_t));
+ */
+int
+__db_txnlist_find(listp, txnid)
+       void *listp;
+       u_int32_t txnid;
+{
+       __db_txnlist *p;
+       __db_txnhead *hp;
+
+       if ((hp = (struct __db_txnhead *)listp) == NULL)
+               return (DB_NOTFOUND);
+
+       if (hp->maxid < txnid) {
+               hp->maxid = txnid;
+               return (DB_NOTFOUND);
+       }
+
+       for (p = hp->head.lh_first; p != NULL; p = p->links.le_next)
+               if (p->txnid == txnid)
+                       return (0);
+
+       return (DB_NOTFOUND);
+}
+
+#ifdef DEBUG
+void
+__db_txnlist_print(listp)
+       void *listp;
+{
+       __db_txnlist *p;
+       __db_txnhead *hp;
+
+       hp = (struct __db_txnhead *)listp;
+       printf("Maxid: %lu\n", (u_long)hp->maxid);
+       for (p = hp->head.lh_first; p != NULL; p = p->links.le_next)
+               printf("TXNID: %lu\n", (u_long)p->txnid);
+}
+#endif
diff --git a/db2/db/db_dup.c b/db2/db/db_dup.c
new file mode 100644 (file)
index 0000000..8d364d5
--- /dev/null
@@ -0,0 +1,680 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997
+ *     Sleepycat Software.  All rights reserved.
+ */
+
+#include "config.h"
+
+#ifndef lint
+static const char sccsid[] = "@(#)db_dup.c     10.8 (Sleepycat) 7/20/97";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#endif
+
+#include "db_int.h"
+#include "db_page.h"
+#include "db_swap.h"
+#include "btree.h"
+#include "db_am.h"
+#include "common_ext.h"
+
+static int __db_addpage __P((DB *,
+    PAGE **, db_indx_t *, int (*)(DB *, u_int32_t, PAGE **)));
+static int __db_dsplit __P((DB *,
+    PAGE **, db_indx_t *, u_int32_t, int (*)(DB *, u_int32_t, PAGE **)));
+
+/*
+ * __db_dput --
+ *     Put a duplicate item onto a duplicate page at the given index.
+ *
+ * PUBLIC: int __db_dput __P((DB *,
+ * PUBLIC:    DBT *, PAGE **, db_indx_t *, int (*)(DB *, u_int32_t, PAGE **)));
+ */
+int
+__db_dput(dbp, dbt, pp, indxp, newfunc)
+       DB *dbp;
+       DBT *dbt;
+       PAGE **pp;
+       db_indx_t *indxp;
+       int (*newfunc) __P((DB *, u_int32_t, PAGE **));
+{
+       BOVERFLOW bo;
+       DBT *data_dbtp, hdr_dbt, *hdr_dbtp;
+       PAGE *pagep;
+       db_indx_t size, isize;
+       db_pgno_t pgno;
+       int ret;
+
+       /*
+        * We need some access method independent threshold for when we put
+        * a duplicate item onto an overflow page.
+        */
+       if (dbt->size > 0.25 * dbp->pgsize) {
+               if ((ret = __db_poff(dbp, dbt, &pgno, newfunc)) != 0)
+                       return (ret);
+               bo.deleted = 0;
+               bo.type = B_OVERFLOW;
+               bo.tlen = dbt->size;
+               bo.pgno = pgno;
+               hdr_dbt.data = &bo;
+               hdr_dbt.size = isize = BOVERFLOW_SIZE;
+               hdr_dbtp = &hdr_dbt;
+               size = BOVERFLOW_PSIZE;
+               data_dbtp = NULL;
+       } else {
+               size = BKEYDATA_PSIZE(dbt->size);
+               isize = BKEYDATA_SIZE(dbt->size);
+               hdr_dbtp = NULL;
+               data_dbtp = dbt;
+       }
+
+       pagep = *pp;
+       if (size > P_FREESPACE(pagep)) {
+               if (*indxp == NUM_ENT(*pp) && NEXT_PGNO(*pp) == PGNO_INVALID)
+                       ret = __db_addpage(dbp, pp, indxp, newfunc);
+               else
+                       ret = __db_dsplit(dbp, pp, indxp, isize, newfunc);
+               if (ret != 0)
+                       /* XXX: Pages not returned to free list. */
+                       return (ret);
+               pagep = *pp;
+       }
+
+       /*
+        * Now, pagep references the page on which to insert and indx is the
+        * the location to insert.
+        */
+       if ((ret = __db_pitem(dbp,
+           pagep, (u_int32_t)*indxp, isize, hdr_dbtp, data_dbtp)) != 0)
+               return (ret);
+
+       (void)memp_fset(dbp->mpf, pagep, DB_MPOOL_DIRTY);
+       return (0);
+}
+
+/*
+ * __db_drem --
+ *     Remove a duplicate at the given index on the given page.
+ *
+ * PUBLIC: int __db_drem __P((DB *,
+ * PUBLIC:    PAGE **, u_int32_t, int (*)(DB *, PAGE *)));
+ */
+int
+__db_drem(dbp, pp, indx, freefunc)
+       DB *dbp;
+       PAGE **pp;
+       u_int32_t indx;
+       int (*freefunc) __P((DB *, PAGE *));
+{
+       PAGE *pagep;
+       int ret;
+
+       pagep = *pp;
+
+       /* Check if we are freeing a big item. */
+       if (GET_BKEYDATA(pagep, indx)->type == B_OVERFLOW) {
+               if ((ret = __db_doff(dbp,
+                   GET_BOVERFLOW(pagep, indx)->pgno, freefunc)) != 0)
+                       return (ret);
+               ret = __db_ditem(dbp, pagep, indx, BOVERFLOW_SIZE);
+       } else
+               ret = __db_ditem(dbp, pagep, indx,
+                   BKEYDATA_SIZE(GET_BKEYDATA(pagep, indx)->len));
+       if (ret != 0)
+               return (ret);
+
+       if (NUM_ENT(pagep) == 0) {
+               /*
+                * If the page is emptied, then the page is freed and the pp
+                * parameter is set to reference the next, locked page in the
+                * duplicate chain, if one exists.  If there was no such page,
+                * then it is set to NULL.
+                *
+                * !!!
+                * __db_relink will set the dirty bit for us.
+                */
+               if ((ret = __db_relink(dbp, pagep, pp, 0)) != 0)
+                       return (ret);
+               if ((ret = freefunc(dbp, pagep)) != 0)
+                       return (ret);
+       } else
+               (void)memp_fset(dbp->mpf, pagep, DB_MPOOL_DIRTY);
+
+       return (0);
+}
+
+/*
+ * __db_dend --
+ *     Find the last page in a set of offpage duplicates.
+ *
+ * PUBLIC: int __db_dend __P((DB *, db_pgno_t, PAGE **));
+ */
+int
+__db_dend(dbp, pgno, pagep)
+       DB *dbp;
+       db_pgno_t pgno;
+       PAGE **pagep;
+{
+       PAGE *h;
+       int ret;
+
+       /*
+        * This implements DB_KEYLAST.  The last page is returned in pp; pgno
+        * should be the page number of the first page of the duplicate chain.
+        */
+       for (;;) {
+               if ((ret = memp_fget(dbp->mpf, &pgno, 0, &h)) != 0) {
+                       (void)__db_pgerr(dbp, pgno);
+                       return (ret);
+               }
+               if ((pgno = NEXT_PGNO(h)) == PGNO_INVALID)
+                       break;
+               (void)memp_fput(dbp->mpf, h, 0);
+       }
+
+       *pagep = h;
+       return (0);
+}
+
+/*
+ * __db_dsplit --
+ *     Split a page of duplicates, calculating the split point based
+ *     on an element of size "size" being added at "*indxp".
+ *     On entry hp contains a pointer to the page-pointer of the original
+ *     page.  On exit, it returns a pointer to the page containing "*indxp"
+ *     and "indxp" has been modified to reflect the index on the new page
+ *     where the element should be added.  The function returns with
+ *     the page on which the insert should happen, not yet put.
+ */
+static int
+__db_dsplit(dbp, hp, indxp, size, newfunc)
+       DB *dbp;
+       PAGE **hp;
+       db_indx_t *indxp;
+       u_int32_t size;
+       int (*newfunc) __P((DB *, u_int32_t, PAGE **));
+{
+       PAGE *h, *np, *tp;
+       BKEYDATA *bk;
+       DBT page_dbt;
+       db_indx_t indx, nindex, oindex, sum;
+       db_indx_t halfbytes, i, lastsum;
+       int did_indx, ret, s;
+
+       h = *hp;
+       indx = *indxp;
+
+       /* Create a temporary page to do compaction onto. */
+       if ((tp = (PAGE *)malloc(dbp->pgsize)) == NULL)
+               return (ENOMEM);
+#ifdef DEBUG
+       memset(tp, 0xff, dbp->pgsize);
+#endif
+       /* Create new page for the split. */
+       if ((ret = newfunc(dbp, P_DUPLICATE, &np)) != 0) {
+               FREE(tp, dbp->pgsize);
+               return (ret);
+       }
+
+       P_INIT(np, dbp->pgsize, PGNO(np), PGNO(h), NEXT_PGNO(h), 0,
+           P_DUPLICATE);
+       P_INIT(tp, dbp->pgsize, PGNO(h), PREV_PGNO(h), PGNO(np), 0,
+           P_DUPLICATE);
+
+       /* Figure out the split point */
+       halfbytes = (dbp->pgsize - HOFFSET(h)) / 2;
+       did_indx = 0;
+       for (sum = 0, lastsum = 0, i = 0; i < NUM_ENT(h); i++) {
+               if (i == indx) {
+                       sum += size;
+                       if (lastsum < halfbytes && sum >= halfbytes) {
+                               /* We've crossed the halfway point. */
+                               if ((db_indx_t)(halfbytes - lastsum) <
+                                   (db_indx_t)(sum - halfbytes)) {
+                                       *hp = np;
+                                       *indxp = 0;
+                                       i--;
+                               } else
+                                       *indxp = i;
+                               break;
+                       }
+                       *indxp = i;
+                       lastsum = sum;
+                       did_indx = 1;
+               }
+               if (GET_BKEYDATA(h, i)->type == B_KEYDATA)
+                       sum += BKEYDATA_SIZE(GET_BKEYDATA(h, i)->len);
+               else
+                       sum += BOVERFLOW_SIZE;
+
+               if (lastsum < halfbytes && sum >= halfbytes) {
+                       /* We've crossed the halfway point. */
+                       if ((db_indx_t)(halfbytes - lastsum) <
+                           (db_indx_t)(sum - halfbytes))
+                               i--;
+                       break;
+               }
+       }
+
+       /*
+        * Check if we have set the return values of the index pointer and
+        * page pointer.
+        */
+       if (!did_indx) {
+               *hp = np;
+               *indxp = indx - i - 1;
+       }
+
+       if (DB_LOGGING(dbp)) {
+               page_dbt.size = dbp->pgsize;
+               page_dbt.data = h;
+               if ((ret = __db_split_log(dbp->dbenv->lg_info,
+                   dbp->txn, &LSN(h), 0, DB_SPLITOLD, dbp->log_fileid,
+                   PGNO(h), &page_dbt, &LSN(h))) != 0) {
+                       FREE(tp, dbp->pgsize);
+                       return (ret);
+               }
+               LSN(tp) = LSN(h);
+       }
+
+       /*
+        * If it's a btree, adjust the cursors.
+        *
+        * i is the index of the last element to stay on the page.
+        */
+       if (dbp->type == DB_BTREE || dbp->type == DB_RECNO)
+               __bam_ca_split(dbp, PGNO(h), PGNO(h), PGNO(np), i + 1, 0);
+
+       for (nindex = 0, oindex = i + 1; oindex < NUM_ENT(h); oindex++) {
+               bk = GET_BKEYDATA(h, oindex);
+               if (bk->type == B_KEYDATA)
+                       s = BKEYDATA_SIZE(bk->len);
+               else
+                       s = BOVERFLOW_SIZE;
+
+               np->inp[nindex++] = HOFFSET(np) -= s;
+               memcpy((u_int8_t *)np + HOFFSET(np), bk, s);
+               NUM_ENT(np)++;
+       }
+
+       /*
+        * Now do data compaction by copying the remaining stuff onto the
+        * temporary page and then copying it back to the real page.
+        */
+       for (nindex = 0, oindex = 0; oindex <= i; oindex++) {
+               bk = GET_BKEYDATA(h, oindex);
+               if (bk->type == B_KEYDATA)
+                       s = BKEYDATA_SIZE(bk->len);
+               else
+                       s = BOVERFLOW_SIZE;
+
+               tp->inp[nindex++] = HOFFSET(tp) -= s;
+               memcpy((u_int8_t *)tp + HOFFSET(tp), bk, s);
+               NUM_ENT(tp)++;
+       }
+
+       /*
+        * This page (the temporary) should be only half full, so we do two
+        * memcpy's, one for the top of the page and one for the bottom of
+        * the page.  This way we avoid copying the middle which should be
+        * about half a page.
+        */
+       memcpy(h, tp, LOFFSET(tp));
+       memcpy((u_int8_t *)h + HOFFSET(tp),
+           (u_int8_t *)tp + HOFFSET(tp), dbp->pgsize - HOFFSET(tp));
+       FREE(tp, dbp->pgsize);
+
+       if (DB_LOGGING(dbp)) {
+               page_dbt.size = dbp->pgsize;
+               page_dbt.data = h;
+               if ((ret = __db_split_log(dbp->dbenv->lg_info,
+                   dbp->txn, &LSN(h), 0, DB_SPLITNEW, dbp->log_fileid,
+                   PGNO(h), &page_dbt, &LSN(h))) != 0)
+                       return (ret);
+
+               page_dbt.size = dbp->pgsize;
+               page_dbt.data = np;
+               if ((ret = __db_split_log(dbp->dbenv->lg_info,
+                   dbp->txn, &LSN(np), 0, DB_SPLITNEW, dbp->log_fileid,
+                   PGNO(np),  &page_dbt, &LSN(np))) != 0)
+                       return (ret);
+       }
+
+       /*
+        * Figure out if the location we're interested in is on the new
+        * page, and if so, reset the callers' pointer.  Push the other
+        * page back to the store.
+        */
+       if (*hp == h)
+               ret = memp_fput(dbp->mpf, np, DB_MPOOL_DIRTY);
+       else
+               ret = memp_fput(dbp->mpf, h, DB_MPOOL_DIRTY);
+
+       return (ret);
+}
+
+/*
+ * __db_ditem --
+ *     Remove an item from a page.
+ *
+ * PUBLIC:  int __db_ditem __P((DB *, PAGE *, int, u_int32_t));
+ */
+int
+__db_ditem(dbp, pagep, indx, nbytes)
+       DB *dbp;
+       PAGE *pagep;
+       int indx;
+       u_int32_t nbytes;
+{
+       DBT ldbt;
+       db_indx_t cnt, offset;
+       int ret;
+       u_int8_t *from;
+
+       if (DB_LOGGING(dbp)) {
+               ldbt.data = P_ENTRY(pagep, indx);
+               ldbt.size = nbytes;
+               if ((ret = __db_addrem_log(dbp->dbenv->lg_info, dbp->txn,
+                   &LSN(pagep), 0, DB_REM_DUP, dbp->log_fileid, PGNO(pagep),
+                   (u_int32_t)indx, nbytes, &ldbt, NULL, &LSN(pagep))) != 0)
+                       return (ret);
+       }
+
+       /*
+        * If there's only a single item on the page, we don't have to
+        * work hard.
+        */
+       if (NUM_ENT(pagep) == 1) {
+               NUM_ENT(pagep) = 0;
+               HOFFSET(pagep) = dbp->pgsize;
+               return (0);
+       }
+
+       /*
+        * Pack the remaining key/data items at the end of the page.  Use
+        * memmove(3), the regions may overlap.
+        */
+       from = (u_int8_t *)pagep + HOFFSET(pagep);
+       memmove(from + nbytes, from, pagep->inp[indx] - HOFFSET(pagep));
+       HOFFSET(pagep) += nbytes;
+
+       /* Adjust the indices' offsets. */
+       offset = pagep->inp[indx];
+       for (cnt = 0; cnt < NUM_ENT(pagep); ++cnt)
+               if (pagep->inp[cnt] < offset)
+                       pagep->inp[cnt] += nbytes;
+
+       /* Shift the indices down. */
+       --NUM_ENT(pagep);
+       if (indx != NUM_ENT(pagep))
+               memmove(&pagep->inp[indx], &pagep->inp[indx + 1],
+                   sizeof(db_indx_t) * (NUM_ENT(pagep) - indx));
+
+       /* If it's a btree, adjust the cursors. */
+       if (dbp->type == DB_BTREE || dbp->type == DB_RECNO)
+               __bam_ca_di(dbp, PGNO(pagep), indx, -1);
+
+       return (0);
+}
+
+/*
+ * __db_pitem --
+ *     Put an item on a page.
+ *
+ * PUBLIC: int __db_pitem
+ * PUBLIC:     __P((DB *, PAGE *, u_int32_t, u_int32_t, DBT *, DBT *));
+ */
+int
+__db_pitem(dbp, pagep, indx, nbytes, hdr, data)
+       DB *dbp;
+       PAGE *pagep;
+       u_int32_t indx;
+       u_int32_t nbytes;
+       DBT *hdr, *data;
+{
+       BKEYDATA bk;
+       DBT thdr;
+       int ret;
+       u_int8_t *p;
+
+       /*
+        * Put a single item onto a page.  The logic figuring out where to
+        * insert and whether it fits is handled in the caller.  All we do
+        * here is manage the page shuffling.  We cheat a little bit in that
+        * we don't want to copy the dbt on a normal put twice.  If hdr is
+        * NULL, we create a BKEYDATA structure on the page, otherwise, just
+        * copy the caller's information onto the page.
+        *
+        * This routine is also used to put entries onto the page where the
+        * entry is pre-built, e.g., during recovery.  In this case, the hdr
+        * will point to the entry, and the data argument will be NULL.
+        *
+        * !!!
+        * There's a tremendous potential for off-by-one errors here, since
+        * the passed in header sizes must be adjusted for the structure's
+        * placeholder for the trailing variable-length data field.
+        */
+       if (DB_LOGGING(dbp))
+               if ((ret = __db_addrem_log(dbp->dbenv->lg_info, dbp->txn,
+                   &LSN(pagep), 0, DB_ADD_DUP, dbp->log_fileid, PGNO(pagep),
+                   (u_int32_t)indx, nbytes, hdr, data, &LSN(pagep))) != 0)
+                       return (ret);
+
+       if (hdr == NULL) {
+               bk.deleted = 0;
+               bk.type = B_KEYDATA;
+               bk.len = data == NULL ? 0 : data->size;
+
+               thdr.data = &bk;
+               thdr.size = SSZA(BKEYDATA, data);
+               hdr = &thdr;
+       }
+
+       /* Adjust the index table, then put the item on the page. */
+       if (indx != NUM_ENT(pagep))
+               memmove(&pagep->inp[indx + 1], &pagep->inp[indx],
+                   sizeof(db_indx_t) * (NUM_ENT(pagep) - indx));
+       HOFFSET(pagep) -= nbytes;
+       pagep->inp[indx] = HOFFSET(pagep);
+       ++NUM_ENT(pagep);
+
+       p = P_ENTRY(pagep, indx);
+       memcpy(p, hdr->data, hdr->size);
+       if (data != NULL)
+               memcpy(p + hdr->size, data->data, data->size);
+
+       /* If it's a btree, adjust the cursors. */
+       if (dbp->type == DB_BTREE || dbp->type == DB_RECNO)
+               __bam_ca_di(dbp, PGNO(pagep), indx, 1);
+
+       return (0);
+}
+
+/*
+ * __db_relink --
+ *     Relink around a deleted page.
+ *
+ * PUBLIC: int __db_relink __P((DB *, PAGE *, PAGE **, int));
+ */
+int
+__db_relink(dbp, pagep, new_next, needlock)
+       DB *dbp;
+       PAGE *pagep, **new_next;
+       int needlock;
+{
+       PAGE *np, *pp;
+       DB_LOCK npl, ppl;
+       DB_LSN *nlsnp, *plsnp;
+       int ret;
+
+       ret = 0;
+       np = pp = NULL;
+       npl = ppl = LOCK_INVALID;
+       nlsnp = plsnp = NULL;
+
+       /* Retrieve and lock the two pages. */
+       if (pagep->next_pgno != PGNO_INVALID) {
+               if (needlock && (ret = __bam_lget(dbp,
+                   0, pagep->next_pgno, DB_LOCK_WRITE, &npl)) != 0)
+                       goto err;
+               if ((ret = memp_fget(dbp->mpf,
+                   &pagep->next_pgno, 0, &np)) != 0) {
+                       (void)__db_pgerr(dbp, pagep->next_pgno);
+                       goto err;
+               }
+               nlsnp = &np->lsn;
+       }
+       if (pagep->prev_pgno != PGNO_INVALID) {
+               if (needlock && (ret = __bam_lget(dbp,
+                   0, pagep->prev_pgno, DB_LOCK_WRITE, &ppl)) != 0)
+                       goto err;
+               if ((ret = memp_fget(dbp->mpf,
+                   &pagep->prev_pgno, 0, &pp)) != 0) {
+                       (void)__db_pgerr(dbp, pagep->next_pgno);
+                       goto err;
+               }
+               plsnp = &pp->lsn;
+       }
+
+       /* Log the change. */
+       if (DB_LOGGING(dbp)) {
+               if ((ret = __db_relink_log(dbp->dbenv->lg_info, dbp->txn,
+                   &pagep->lsn, 0, dbp->log_fileid, pagep->pgno, &pagep->lsn,
+                   pagep->prev_pgno, plsnp, pagep->next_pgno, nlsnp)) != 0)
+                       goto err;
+               if (np != NULL)
+                       np->lsn = pagep->lsn;
+               if (pp != NULL)
+                       pp->lsn = pagep->lsn;
+       }
+
+       /*
+        * Modify and release the two pages.
+        *
+        * !!!
+        * The parameter new_next gets set to the page following the page we
+        * are removing.  If there is no following page, then new_next gets
+        * set to NULL.
+        */
+       if (np != NULL) {
+               np->prev_pgno = pagep->prev_pgno;
+               if (new_next == NULL)
+                       ret = memp_fput(dbp->mpf, np, DB_MPOOL_DIRTY);
+               else {
+                       *new_next = np;
+                       ret = memp_fset(dbp->mpf, np, DB_MPOOL_DIRTY);
+               }
+               if (ret != 0)
+                       goto err;
+               if (needlock)
+                       (void)__bam_lput(dbp, npl);
+       } else if (new_next != NULL)
+               *new_next = NULL;
+
+       if (pp != NULL) {
+               pp->next_pgno = pagep->next_pgno;
+               if ((ret = memp_fput(dbp->mpf, pp, DB_MPOOL_DIRTY)) != 0)
+                       goto err;
+               if (needlock)
+                       (void)__bam_lput(dbp, ppl);
+       }
+       return (0);
+
+err:   if (np != NULL)
+               (void)memp_fput(dbp->mpf, np, 0);
+       if (needlock && npl != LOCK_INVALID)
+               (void)__bam_lput(dbp, npl);
+       if (pp != NULL)
+               (void)memp_fput(dbp->mpf, pp, 0);
+       if (needlock && ppl != LOCK_INVALID)
+               (void)__bam_lput(dbp, ppl);
+       return (ret);
+}
+
+/*
+ * __db_ddup --
+ *     Delete an offpage chain of duplicates.
+ *
+ * PUBLIC: int __db_ddup __P((DB *, db_pgno_t, int (*)(DB *, PAGE *)));
+ */
+int
+__db_ddup(dbp, pgno, freefunc)
+       DB *dbp;
+       db_pgno_t pgno;
+       int (*freefunc) __P((DB *, PAGE *));
+{
+       PAGE *pagep;
+       DBT tmp_dbt;
+       int ret;
+
+       do {
+               if ((ret = memp_fget(dbp->mpf, &pgno, 0, &pagep)) != 0) {
+                       (void)__db_pgerr(dbp, pgno);
+                       return (ret);
+               }
+
+               if (DB_LOGGING(dbp)) {
+                       tmp_dbt.data = pagep;
+                       tmp_dbt.size = dbp->pgsize;
+                       if ((ret = __db_split_log(dbp->dbenv->lg_info, dbp->txn,
+                           &LSN(pagep), 0, DB_SPLITOLD, dbp->log_fileid,
+                           PGNO(pagep), &tmp_dbt, &LSN(pagep))) != 0)
+                               return (ret);
+               }
+               pgno = pagep->next_pgno;
+               if ((ret = freefunc(dbp, pagep)) != 0)
+                       return (ret);
+       } while (pgno != PGNO_INVALID);
+
+       return (0);
+}
+
+/*
+ * __db_addpage --
+ *     Create a new page and link it onto the next_pgno field of the
+ *     current page.
+ */
+static int
+__db_addpage(dbp, hp, indxp, newfunc)
+       DB *dbp;
+       PAGE **hp;
+       db_indx_t *indxp;
+       int (*newfunc) __P((DB *, u_int32_t, PAGE **));
+{
+       PAGE *newpage;
+       int ret;
+
+       if ((ret = newfunc(dbp, P_DUPLICATE, &newpage)) != 0)
+               return (ret);
+
+       if (DB_LOGGING(dbp)) {
+               if ((ret = __db_addpage_log(dbp->dbenv->lg_info,
+                   dbp->txn, &LSN(*hp), 0, dbp->log_fileid,
+                   PGNO(*hp), &LSN(*hp), PGNO(newpage), &LSN(newpage))) != 0) {
+                       return (ret);
+               }
+               LSN(newpage) = LSN(*hp);
+       }
+
+       PREV_PGNO(newpage) = PGNO(*hp);
+       NEXT_PGNO(*hp) = PGNO(newpage);
+
+       if ((ret = memp_fput(dbp->mpf, *hp, DB_MPOOL_DIRTY)) != 0)
+               return (ret);
+       *hp = newpage;
+       *indxp = 0;
+       return (0);
+}
diff --git a/db2/db/db_overflow.c b/db2/db/db_overflow.c
new file mode 100644 (file)
index 0000000..2340e9e
--- /dev/null
@@ -0,0 +1,383 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997
+ *     Sleepycat Software.  All rights reserved.
+ */
+/*
+ * Copyright (c) 1990, 1993, 1994, 1995, 1996
+ *     Keith Bostic.  All rights reserved.
+ */
+/*
+ * Copyright (c) 1990, 1993, 1994, 1995
+ *     The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Mike Olson.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *     This product includes software developed by the University of
+ *     California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "config.h"
+
+#ifndef lint
+static const char sccsid[] = "@(#)db_overflow.c        10.4 (Sleepycat) 7/2/97";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#endif
+
+#include "db_int.h"
+#include "db_page.h"
+#include "db_am.h"
+#include "common_ext.h"
+
+/*
+ * Big key/data code.
+ *
+ * Big key and data entries are stored on linked lists of pages.  The initial
+ * reference is a structure with the total length of the item and the page
+ * number where it begins.  Each entry in the linked list contains a pointer
+ * to the next page of data, and so on.
+ */
+
+/*
+ * __db_goff --
+ *     Get an offpage item.
+ *
+ * PUBLIC: int __db_goff __P((DB *, DBT *,
+ * PUBLIC:     u_int32_t, db_pgno_t, void **, u_int32_t *));
+ */
+int
+__db_goff(dbp, dbt, tlen, pgno, bpp, bpsz)
+       DB *dbp;
+       DBT *dbt;
+       u_int32_t tlen;
+       db_pgno_t pgno;
+       void **bpp;
+       u_int32_t *bpsz;
+{
+       PAGE *h;
+       db_indx_t bytes;
+       int ret;
+       u_int32_t curoff, needed, start;
+       u_int8_t *p, *src;
+
+       /*
+        * Check if the buffer is big enough; if it is not and we are
+        * allowed to malloc space, then we'll malloc it.  If we are
+        * not (DB_DBT_USERMEM), then we'll set the dbt and return
+        * appropriately.
+        */
+       if (F_ISSET(dbt, DB_DBT_PARTIAL)) {
+               start = dbt->doff;
+               needed = dbt->dlen;
+       } else {
+               start = 0;
+               needed = tlen;
+       }
+
+       /*
+        * Allocate any necessary memory.
+        *
+        * XXX: Never allocate 0 bytes;
+        */
+       if (F_ISSET(dbt, DB_DBT_USERMEM)) {
+               if (needed > dbt->ulen) {
+                       dbt->size = needed;
+                       return (ENOMEM);
+               }
+       } else if (F_ISSET(dbt, DB_DBT_MALLOC)) {
+               dbt->data = dbp->db_malloc == NULL ?
+                   (void *)malloc(needed + 1) :
+                   (void *)dbp->db_malloc(needed + 1);
+               if (dbt->data == NULL)
+                       return (ENOMEM);
+       } else if (*bpsz == 0 || *bpsz < needed) {
+               *bpp = (*bpp == NULL ?
+                   (void *)malloc(needed + 1) :
+                   (void *)realloc(*bpp, needed + 1));
+               if (*bpp == NULL)
+                       return (ENOMEM);
+               *bpsz = needed + 1;
+               dbt->data = *bpp;
+       } else
+               dbt->data = *bpp;
+
+       /*
+        * Step through the linked list of pages, copying the data on each
+        * one into the buffer.  Never copy more than the total data length.
+        */
+       dbt->size = needed;
+       for (curoff = 0, p = dbt->data; pgno != P_INVALID && needed > 0;) {
+               if ((ret = memp_fget(dbp->mpf, &pgno, 0, &h)) != 0) {
+                       (void)__db_pgerr(dbp, pgno);
+                       return (ret);
+               }
+               /* Check if we need any bytes from this page. */
+               if (curoff + OV_LEN(h) >= start) {
+                       src = (u_int8_t *)h + P_OVERHEAD;
+                       bytes = OV_LEN(h);
+                       if (start > curoff) {
+                               src += start - curoff;
+                               bytes -= start - curoff;
+                       }
+                       if (bytes > needed)
+                               bytes = needed;
+                       memcpy(p, src, bytes);
+                       p += bytes;
+                       needed -= bytes;
+               }
+               curoff += OV_LEN(h);
+               pgno = h->next_pgno;
+               memp_fput(dbp->mpf, h, 0);
+       }
+       return (0);
+}
+
+/*
+ * __db_poff --
+ *     Put an offpage item.
+ *
+ * PUBLIC: int __db_poff __P((DB *, const DBT *, db_pgno_t *,
+ * PUBLIC:     int (*)(DB *, u_int32_t, PAGE **)));
+ */
+int
+__db_poff(dbp, dbt, pgnop, newfunc)
+       DB *dbp;
+       const DBT *dbt;
+       db_pgno_t *pgnop;
+       int (*newfunc) __P((DB *, u_int32_t, PAGE **));
+{
+       PAGE *pagep, *lastp;
+       DB_LSN new_lsn, null_lsn;
+       DBT tmp_dbt;
+       db_indx_t pagespace;
+       u_int32_t sz;
+       u_int8_t *p;
+       int ret;
+
+       /*
+        * Allocate pages and copy the key/data item into them.  Calculate the
+        * number of bytes we get for pages we fill completely with a single
+        * item.
+        */
+       pagespace = P_MAXSPACE(dbp->pgsize);
+
+       lastp = NULL;
+       for (p = dbt->data,
+           sz = dbt->size; sz > 0; p += pagespace, sz -= pagespace) {
+               /*
+                * Reduce pagespace so we terminate the loop correctly and
+                * don't copy too much data.
+                */
+               if (sz < pagespace)
+                       pagespace = sz;
+
+               /*
+                * Allocate and initialize a new page and copy all or part of
+                * the item onto the page.  If sz is less than pagespace, we
+                * have a partial record.
+                */
+               if ((ret = newfunc(dbp, P_OVERFLOW, &pagep)) != 0)
+                       return (ret);
+               if (DB_LOGGING(dbp)) {
+                       tmp_dbt.data = p;
+                       tmp_dbt.size = pagespace;
+                       ZERO_LSN(null_lsn);
+                       if ((ret = __db_big_log(dbp->dbenv->lg_info, dbp->txn,
+                           &new_lsn, 0, DB_ADD_BIG, dbp->log_fileid,
+                           PGNO(pagep), lastp ? PGNO(lastp) : PGNO_INVALID,
+                           PGNO_INVALID, &tmp_dbt, &LSN(pagep),
+                           lastp == NULL ? &null_lsn : &LSN(lastp),
+                           &null_lsn)) != 0)
+                               return (ret);
+
+                       /* Move lsn onto page. */
+                       if (lastp)
+                               LSN(lastp) = new_lsn;
+                       LSN(pagep) = new_lsn;
+               }
+
+               P_INIT(pagep, dbp->pgsize,
+                   PGNO(pagep), PGNO_INVALID, PGNO_INVALID, 0, P_OVERFLOW);
+               OV_LEN(pagep) = pagespace;
+               OV_REF(pagep) = 1;
+               memcpy((u_int8_t *)pagep + P_OVERHEAD, p, pagespace);
+
+               /*
+                * If this is the first entry, update the user's info.
+                * Otherwise, update the entry on the last page filled
+                * in and release that page.
+                */
+               if (lastp == NULL)
+                       *pgnop = PGNO(pagep);
+               else {
+                       lastp->next_pgno = PGNO(pagep);
+                       pagep->prev_pgno = PGNO(lastp);
+                       (void)memp_fput(dbp->mpf, lastp, DB_MPOOL_DIRTY);
+               }
+               lastp = pagep;
+       }
+       (void)memp_fput(dbp->mpf, lastp, DB_MPOOL_DIRTY);
+       return (0);
+}
+
+/*
+ * __db_ioff --
+ *     Increment the reference count on an overflow page.
+ *
+ * PUBLIC: int __db_ioff __P((DB *, db_pgno_t));
+ */
+int
+__db_ioff(dbp, pgno)
+       DB *dbp;
+       db_pgno_t pgno;
+{
+       PAGE *h;
+       int ret;
+
+       if ((ret = memp_fget(dbp->mpf, &pgno, 0, &h)) != 0) {
+               (void)__db_pgerr(dbp, pgno);
+               return (ret);
+       }
+
+       ++OV_REF(h);
+       if (DB_LOGGING(dbp) && (ret = __db_ovref_log(dbp->dbenv->lg_info,
+           dbp->txn, &LSN(h), 0, dbp->log_fileid, h->pgno, &LSN(h))) != 0)
+               return (ret);
+
+       (void)memp_fput(dbp->mpf, h, DB_MPOOL_DIRTY);
+       return (0);
+}
+
+/*
+ * __db_doff --
+ *     Delete an offpage chain of overflow pages.
+ *
+ * PUBLIC: int __db_doff __P((DB *, db_pgno_t, int (*)(DB *, PAGE *)));
+ */
+int
+__db_doff(dbp, pgno, freefunc)
+       DB *dbp;
+       db_pgno_t pgno;
+       int (*freefunc) __P((DB *, PAGE *));
+{
+       PAGE *pagep;
+       DB_LSN null_lsn;
+       DBT tmp_dbt;
+       int ret;
+
+       do {
+               if ((ret = memp_fget(dbp->mpf, &pgno, 0, &pagep)) != 0) {
+                       (void)__db_pgerr(dbp, pgno);
+                       return (ret);
+               }
+
+               /*
+                * If it's an overflow page and it's referenced by more than
+                * one key/data item, decrement the reference count and return.
+                */
+               if (TYPE(pagep) == P_OVERFLOW && OV_REF(pagep) > 1) {
+                       --OV_REF(pagep);
+                       (void)memp_fput(dbp->mpf, pagep, DB_MPOOL_DIRTY);
+                       return (0);
+               }
+
+               if (DB_LOGGING(dbp)) {
+                       tmp_dbt.data = (u_int8_t *)pagep + P_OVERHEAD;
+                       tmp_dbt.size = OV_LEN(pagep);
+                       ZERO_LSN(null_lsn);
+                       if ((ret = __db_big_log(dbp->dbenv->lg_info, dbp->txn,
+                           &LSN(pagep), 0, DB_REM_BIG, dbp->log_fileid,
+                           PGNO(pagep), PREV_PGNO(pagep), NEXT_PGNO(pagep),
+                           &tmp_dbt, &LSN(pagep), &null_lsn, &null_lsn)) != 0)
+                               return (ret);
+               }
+               pgno = pagep->next_pgno;
+               if ((ret = freefunc(dbp, pagep)) != 0)
+                       return (ret);
+       } while (pgno != PGNO_INVALID);
+
+       return (0);
+}
+
+/*
+ * __db_moff --
+ *     Match on overflow pages.
+ *
+ * Given a starting page number and a key, return <0, 0, >0 to indicate if the
+ * key on the page is less than, equal to or greater than the key specified.
+ *
+ * PUBLIC: int __db_moff __P((DB *, const DBT *, db_pgno_t));
+ */
+int
+__db_moff(dbp, dbt, pgno)
+       DB *dbp;
+       const DBT *dbt;
+       db_pgno_t pgno;
+{
+       PAGE *pagep;
+       u_int32_t cmp_bytes, key_left;
+       int ret;
+       u_int8_t *p1, *p2;
+
+       /* While there are both keys to compare. */
+       for (ret = 0, p1 = dbt->data,
+           key_left = dbt->size; key_left > 0 && pgno != PGNO_INVALID;) {
+               if (memp_fget(dbp->mpf, &pgno, 0, &pagep) != 0) {
+                       (void)__db_pgerr(dbp, pgno);
+                       return (0);     /* No system error return. */
+               }
+
+               cmp_bytes = OV_LEN(pagep) < key_left ? OV_LEN(pagep) : key_left;
+               key_left -= cmp_bytes;
+               for (p2 =
+                   (u_int8_t *)pagep + P_OVERHEAD; cmp_bytes-- > 0; ++p1, ++p2)
+                       if (*p1 != *p2) {
+                               ret = (long)*p1 - (long)*p2;
+                               break;
+                       }
+               pgno = NEXT_PGNO(pagep);
+               (void)memp_fput(dbp->mpf, pagep, 0);
+               if (ret != 0)
+                       return (ret);
+       }
+       if (key_left > 0)               /* DBT is longer than page key. */
+               return (-1);
+       if (pgno != PGNO_INVALID)       /* DBT is shorter than page key. */
+               return (1);
+       return (0);
+}
diff --git a/db2/db/db_pr.c b/db2/db/db_pr.c
new file mode 100644 (file)
index 0000000..c103b10
--- /dev/null
@@ -0,0 +1,785 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997
+ *     Sleepycat Software.  All rights reserved.
+ */
+
+#include "config.h"
+
+#ifndef lint
+static const char sccsid[] = "@(#)db_pr.c      10.14 (Sleepycat) 8/17/97";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+
+#include <ctype.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#endif
+
+#include "db_int.h"
+#include "db_page.h"
+#include "btree.h"
+#include "hash.h"
+#include "db_am.h"
+
+static void __db_proff __P((void *));
+static void __db_psize __P((DB_MPOOLFILE *));
+
+/*
+ * __db_loadme --
+ *     Force loading of this file.
+ *
+ * PUBLIC: void __db_loadme __P((void));
+ */
+void
+__db_loadme()
+{
+       getpid();
+}
+
+static FILE *set_fp;
+
+/*
+ * 64K is the maximum page size, so by default we check for offsets
+ * larger than that, and, where possible, we refine the test.
+ */
+#define        PSIZE_BOUNDARY  (64 * 1024 + 1)
+static size_t set_psize = PSIZE_BOUNDARY;
+
+/*
+ * __db_prinit --
+ *     Initialize tree printing routines.
+ *
+ * PUBLIC: FILE *__db_prinit __P((FILE *));
+ */
+FILE *
+__db_prinit(fp)
+       FILE *fp;
+{
+       if (set_fp == NULL)
+               set_fp = fp == NULL ? stdout : fp;
+       return (set_fp);
+}
+
+/*
+ * __db_dump --
+ *     Dump the tree to a file.
+ *
+ * PUBLIC: int __db_dump __P((DB *, char *, int));
+ */
+int
+__db_dump(dbp, name, all)
+       DB *dbp;
+       char *name;
+       int all;
+{
+       FILE *fp, *save_fp;
+
+       save_fp = NULL;                         /* XXX: Shut the compiler up. */
+
+       if (set_psize == PSIZE_BOUNDARY)
+               __db_psize(dbp->mpf);
+
+       if (name != NULL) {
+               if ((fp = fopen(name, "w")) == NULL)
+                       return (errno);
+               save_fp = set_fp;
+               set_fp = fp;
+       } else
+               fp = __db_prinit(NULL);
+
+       (void)__db_prdb(dbp);
+       if (dbp->type == DB_HASH)
+               (void)__db_prhash(dbp);
+       else
+               (void)__db_prbtree(dbp);
+       fprintf(fp, "%s\n", DB_LINE);
+       __db_prtree(dbp->mpf, all);
+
+       if (name != NULL) {
+               (void)fclose(fp);
+               set_fp = save_fp;
+       }
+       return (0);
+}
+
+/*
+ * __db_prdb --
+ *     Print out the DB structure information.
+ *
+ * PUBLIC: int __db_prdb __P((DB *));
+ */
+int
+__db_prdb(dbp)
+       DB *dbp;
+{
+       static const FN fn[] = {
+               { DB_AM_DUP,            "duplicates" },
+               { DB_AM_INMEM,          "in-memory" },
+               { DB_AM_LOCKING,        "locking" },
+               { DB_AM_LOGGING,        "logging" },
+               { DB_AM_MLOCAL,         "local mpool" },
+               { DB_AM_PGDEF,          "default page size" },
+               { DB_AM_RDONLY,         "read-only" },
+               { DB_AM_RECOVER,        "recover" },
+               { DB_AM_SWAP,           "needswap" },
+               { DB_AM_THREAD,         "thread" },
+               { DB_BT_RECNUM,         "btree:records" },
+               { DB_HS_DIRTYMETA,      "hash:dirty-meta" },
+               { DB_RE_DELIMITER,      "recno:delimiter" },
+               { DB_RE_FIXEDLEN,       "recno:fixed-length" },
+               { DB_RE_PAD,            "recno:pad" },
+               { DB_RE_RENUMBER,       "recno:renumber" },
+               { DB_RE_SNAPSHOT,       "recno:snapshot" },
+               { 0 },
+       };
+       FILE *fp;
+       const char *t;
+
+       fp = __db_prinit(NULL);
+
+       switch (dbp->type) {
+       case DB_BTREE:
+               t = "btree";
+               break;
+       case DB_HASH:
+               t = "hash";
+               break;
+       case DB_RECNO:
+               t = "recno";
+               break;
+       default:
+               t = "UNKNOWN";
+               break;
+       }
+
+       fprintf(fp, "%s ", t);
+       __db_prflags(dbp->flags, fn);
+       fprintf(fp, "\n");
+
+       return (0);
+}
+
+/*
+ * __db_prbtree --
+ *     Print out the btree internal information.
+ *
+ * PUBLIC: int __db_prbtree __P((DB *));
+ */
+int
+__db_prbtree(dbp)
+       DB *dbp;
+{
+       static const FN mfn[] = {
+               { BTM_DUP,      "duplicates" },
+               { BTM_RECNO,    "recno" },
+               { 0 },
+       };
+       BTMETA *mp;
+       BTREE *t;
+       DB_LOCK lock;
+       EPG *sp;
+       FILE *fp;
+       RECNO *rp;
+       db_pgno_t i;
+       int ret;
+
+       t = dbp->internal;
+       fp = __db_prinit(NULL);
+
+       (void)fprintf(fp, "%s\nOn-page metadata:\n", DB_LINE);
+       i = PGNO_METADATA;
+       if ((ret = __bam_lget(dbp, 0, PGNO_METADATA, DB_LOCK_READ, &lock)) != 0)
+               return (ret);
+
+       if ((ret = __bam_pget(dbp, (PAGE **)&mp, &i, 0)) != 0)
+               return (ret);
+
+       (void)fprintf(fp, "magic %#lx\n", (u_long)mp->magic);
+       (void)fprintf(fp, "version %lu\n", (u_long)mp->version);
+       (void)fprintf(fp, "pagesize %lu\n", (u_long)mp->pagesize);
+       (void)fprintf(fp, "maxkey: %lu minkey: %lu\n",
+           (u_long)mp->maxkey, (u_long)mp->minkey);
+       (void)fprintf(fp, "free %lu\n", (u_long)mp->free);
+       (void)fprintf(fp, "flags %lu", (u_long)mp->flags);
+       __db_prflags(mp->flags, mfn);
+       (void)fprintf(fp, "\n");
+       (void)memp_fput(dbp->mpf, mp, 0);
+       (void)__bam_lput(dbp, lock);
+
+       (void)fprintf(fp, "%s\nDB_INFO:\n", DB_LINE);
+       (void)fprintf(fp, "bt_maxkey: %lu bt_minkey: %lu\n",
+           (u_long)t->bt_maxkey, (u_long)t->bt_minkey);
+       (void)fprintf(fp, "bt_compare: %#lx bt_prefix: %#lx\n",
+           (u_long)t->bt_compare, (u_long)t->bt_prefix);
+       if ((rp = t->bt_recno) != NULL) {
+               (void)fprintf(fp,
+                   "re_delim: %#lx re_pad: %#lx re_len: %lu re_source: %s\n",
+                   (u_long)rp->re_delim, (u_long)rp->re_pad,
+                   (u_long)rp->re_len,
+                   rp->re_source == NULL ? "" : rp->re_source);
+               (void)fprintf(fp,
+                   "cmap: %#lx smap: %#lx emap: %#lx msize: %lu\n",
+                   (u_long)rp->re_cmap, (u_long)rp->re_smap,
+                   (u_long)rp->re_emap, (u_long)rp->re_msize);
+       }
+       (void)fprintf(fp, "stack:");
+       for (sp = t->bt_stack; sp < t->bt_sp; ++sp)
+               (void)fprintf(fp, " %lu", (u_long)sp->page->pgno);
+       (void)fprintf(fp, "\n");
+       (void)fprintf(fp, "ovflsize: %lu\n", (u_long)t->bt_ovflsize);
+       (void)fflush(fp);
+       return (0);
+}
+
+/*
+ * __db_prhash --
+ *     Print out the hash internal information.
+ *
+ * PUBLIC: int __db_prhash __P((DB *));
+ */
+int
+__db_prhash(dbp)
+       DB *dbp;
+{
+       FILE *fp;
+       HTAB *t;
+       int i, put_page, ret;
+       db_pgno_t pgno;
+
+       t = dbp->internal;
+
+       fp = __db_prinit(NULL);
+
+       fprintf(fp, "\thash_accesses    %lu\n", (u_long)t->hash_accesses);
+       fprintf(fp, "\thash_collisions  %lu\n", (u_long)t->hash_collisions);
+       fprintf(fp, "\thash_expansions  %lu\n", (u_long)t->hash_expansions);
+       fprintf(fp, "\thash_overflows   %lu\n", (u_long)t->hash_overflows);
+       fprintf(fp, "\thash_bigpages    %lu\n", (u_long)t->hash_bigpages);
+       fprintf(fp, "\n");
+
+       if (t->hdr == NULL) {
+               pgno = PGNO_METADATA;
+               if ((ret = memp_fget(dbp->mpf, &pgno, 0, &t->hdr)) != 0)
+                       return (ret);
+               put_page = 1;
+       } else
+               put_page = 0;
+
+       fprintf(fp, "\tmagic      %#lx\n", (u_long)t->hdr->magic);
+       fprintf(fp, "\tversion    %lu\n", (u_long)t->hdr->version);
+       fprintf(fp, "\tpagesize   %lu\n", (u_long)t->hdr->pagesize);
+       fprintf(fp, "\tovfl_point %lu\n", (u_long)t->hdr->ovfl_point);
+       fprintf(fp, "\tlast_freed %lu\n", (u_long)t->hdr->last_freed);
+       fprintf(fp, "\tmax_bucket %lu\n", (u_long)t->hdr->max_bucket);
+       fprintf(fp, "\thigh_mask  %#lx\n", (u_long)t->hdr->high_mask);
+       fprintf(fp, "\tlow_mask   %#lx\n", (u_long)t->hdr->low_mask);
+       fprintf(fp, "\tffactor    %lu\n", (u_long)t->hdr->ffactor);
+       fprintf(fp, "\tnelem      %lu\n", (u_long)t->hdr->nelem);
+       fprintf(fp, "\th_charkey  %#lx\n", (u_long)t->hdr->h_charkey);
+
+       for (i = 0; i < NCACHED; i++)
+               fprintf(fp, "%lu ", (u_long)t->hdr->spares[i]);
+       fprintf(fp, "\n");
+
+       (void)fflush(fp);
+       if (put_page) {
+               (void)memp_fput(dbp->mpf, (PAGE *)t->hdr, 0);
+               t->hdr = NULL;
+       }
+       return (0);
+}
+
+/*
+ * __db_prtree --
+ *     Print out the entire tree.
+ *
+ * PUBLIC: int __db_prtree __P((DB_MPOOLFILE *, int));
+ */
+int
+__db_prtree(mpf, all)
+       DB_MPOOLFILE *mpf;
+       int all;
+{
+       PAGE *h;
+       db_pgno_t i;
+       int ret, t_ret;
+
+       if (set_psize == PSIZE_BOUNDARY)
+               __db_psize(mpf);
+
+       ret = 0;
+       for (i = PGNO_ROOT;; ++i) {
+               if ((ret = memp_fget(mpf, &i, 0, &h)) != 0)
+                       break;
+               if (TYPE(h) != P_INVALID)
+                       if ((t_ret = __db_prpage(h, all)) != 0 && ret == 0)
+                               ret = t_ret;
+               (void)memp_fput(mpf, h, 0);
+       }
+       (void)fflush(__db_prinit(NULL));
+       return (ret);
+}
+
+/*
+ * __db_prnpage
+ *     -- Print out a specific page.
+ *
+ * PUBLIC: int __db_prnpage __P((DB_MPOOLFILE *, db_pgno_t));
+ */
+int
+__db_prnpage(mpf, pgno)
+       DB_MPOOLFILE *mpf;
+       db_pgno_t pgno;
+{
+       PAGE *h;
+       int ret;
+
+       if (set_psize == PSIZE_BOUNDARY)
+               __db_psize(mpf);
+
+       if ((ret = memp_fget(mpf, &pgno, 0, &h)) != 0)
+               return (ret);
+
+       ret = __db_prpage(h, 1);
+       (void)fflush(__db_prinit(NULL));
+
+       (void)memp_fput(mpf, h, 0);
+       return (ret);
+}
+
+/*
+ * __db_prpage
+ *     -- Print out a page.
+ *
+ * PUBLIC: int __db_prpage __P((PAGE *, int));
+ */
+int
+__db_prpage(h, all)
+       PAGE *h;
+       int all;
+{
+       BINTERNAL *bi;
+       BKEYDATA *bk;
+       HKEYDATA *hkd;
+       HOFFPAGE a_hkd;
+       FILE *fp;
+       RINTERNAL *ri;
+       db_indx_t dlen, len, i;
+       db_pgno_t pgno;
+       u_int8_t *p;
+       int deleted, ret;
+       const char *s;
+
+       bi = NULL;                              /* XXX: Shut the compiler up. */
+       bk = NULL;
+       hkd = NULL;
+       ri = NULL;
+
+       fp = __db_prinit(NULL);
+
+       switch (TYPE(h)) {
+       case P_DUPLICATE:
+               s = "duplicate";
+               break;
+       case P_HASH:
+               s = "hash";
+               break;
+       case P_IBTREE:
+               s = "btree internal";
+               break;
+       case P_INVALID:
+               s = "invalid";
+               break;
+       case P_IRECNO:
+               s = "recno internal";
+               break;
+       case P_LBTREE:
+               s = "btree leaf";
+               break;
+       case P_LRECNO:
+               s = "recno leaf";
+               break;
+       case P_OVERFLOW:
+               s = "overflow";
+               break;
+       default:
+               fprintf(fp, "ILLEGAL PAGE TYPE: page: %lu type: %lu\n",
+                   (u_long)h->pgno, (u_long)TYPE(h));
+                       return (1);
+       }
+       fprintf(fp, "page %4lu: (%s)\n", (u_long)h->pgno, s);
+       fprintf(fp, "    lsn.file: %lu lsn.offset: %lu",
+           (u_long)LSN(h).file, (u_long)LSN(h).offset);
+       if (TYPE(h) == P_IBTREE || TYPE(h) == P_IRECNO ||
+           (TYPE(h) == P_LRECNO && h->pgno == PGNO_ROOT))
+               fprintf(fp, " total records: %4lu", (u_long)RE_NREC(h));
+       fprintf(fp, "\n");
+       if (TYPE(h) == P_LBTREE || TYPE(h) == P_LRECNO)
+               fprintf(fp, "    prev: %4lu next: %4lu",
+                   (u_long)PREV_PGNO(h), (u_long)NEXT_PGNO(h));
+       if (TYPE(h) == P_IBTREE || TYPE(h) == P_LBTREE)
+               fprintf(fp, " level: %2lu", (u_long)h->level);
+       if (TYPE(h) == P_OVERFLOW) {
+               fprintf(fp, " ref cnt: %4lu ", (u_long)OV_REF(h));
+               __db_pr((u_int8_t *)h + P_OVERHEAD, OV_LEN(h));
+               return (0);
+       }
+       fprintf(fp, " entries: %4lu", (u_long)NUM_ENT(h));
+       fprintf(fp, " offset: %4lu\n", (u_long)HOFFSET(h));
+
+       if (!all || TYPE(h) == P_INVALID)
+               return (0);
+
+       ret = 0;
+       for (i = 0; i < NUM_ENT(h); i++) {
+               if (P_ENTRY(h, i) - (u_int8_t *)h < P_OVERHEAD ||
+                   (size_t)(P_ENTRY(h, i) - (u_int8_t *)h) >= set_psize) {
+                       fprintf(fp,
+                           "ILLEGAL PAGE OFFSET: indx: %lu of %lu\n",
+                           (u_long)i, (u_long)h->inp[i]);
+                       ret = EINVAL;
+                       continue;
+               }
+               deleted = 0;
+               switch (TYPE(h)) {
+               case P_HASH:
+                       hkd = GET_HKEYDATA(h, i);
+                       break;
+               case P_IBTREE:
+                       bi = GET_BINTERNAL(h, i);
+                       break;
+               case P_IRECNO:
+                       ri = GET_RINTERNAL(h, i);
+                       break;
+               case P_LBTREE:
+                       bk = GET_BKEYDATA(h, i);
+                       deleted = i % 2 == 0 &&
+                           GET_BKEYDATA(h, i + O_INDX)->deleted;
+                       break;
+               case P_LRECNO:
+               case P_DUPLICATE:
+                       bk = GET_BKEYDATA(h, i);
+                       deleted = GET_BKEYDATA(h, i)->deleted;
+                       break;
+               default:
+                       fprintf(fp,
+                           "ILLEGAL PAGE ITEM: %lu\n", (u_long)TYPE(h));
+                       ret = EINVAL;
+                       continue;
+               }
+               fprintf(fp, "   %s[%03lu] %4lu ",
+                   deleted ? "D" : " ", (u_long)i, (u_long)h->inp[i]);
+               switch (TYPE(h)) {
+               case P_HASH:
+                       switch (hkd->type) {
+                       case H_OFFDUP:
+                               memcpy(&pgno,
+                                   (u_int8_t *)hkd + SSZ(HOFFDUP, pgno),
+                                   sizeof(db_pgno_t));
+                               fprintf(fp,
+                                   "%4lu [offpage dups]\n", (u_long)pgno);
+                               break;
+                       case H_DUPLICATE:
+                               /*
+                                * If this is the first item on a page, then
+                                * we cannot figure out how long it is, so
+                                * we only print the first one in the duplicate
+                                * set.
+                                */
+                               if (i != 0)
+                                       len = LEN_HKEYDATA(h, 0, i);
+                               else
+                                       len = 1;
+
+                               fprintf(fp, "Duplicates:\n");
+                               for (p = hkd->data; p < hkd->data + len;) {
+                                       memcpy(&dlen, p, sizeof(db_indx_t));
+                                       p += sizeof(db_indx_t);
+                                       fprintf(fp, "\t\t");
+                                       __db_pr(p, dlen);
+                                       p += sizeof(db_indx_t) + dlen;
+                               }
+                               break;
+                       case H_KEYDATA:
+                               if (i != 0)
+                                       __db_pr(hkd->data,
+                                           LEN_HKEYDATA(h, 0, i));
+                               else
+                                       fprintf(fp, "%s\n", hkd->data);
+                               break;
+                       case H_OFFPAGE:
+                               memcpy(&a_hkd, hkd, HOFFPAGE_SIZE);
+                               fprintf(fp,
+                                   "overflow: total len: %4lu page: %4lu\n",
+                                   (u_long)a_hkd.tlen, (u_long)a_hkd.pgno);
+                               break;
+                       }
+                       break;
+               case P_IBTREE:
+                       fprintf(fp, "count: %4lu pgno: %4lu ",
+                           (u_long)bi->nrecs, (u_long)bi->pgno);
+                       switch (bi->type) {
+                       case B_KEYDATA:
+                               __db_pr(bi->data, bi->len);
+                               break;
+                       case B_DUPLICATE:
+                       case B_OVERFLOW:
+                               __db_proff(bi->data);
+                               break;
+                       default:
+                               fprintf(fp, "ILLEGAL BINTERNAL TYPE: %lu\n",
+                                   (u_long)bi->type);
+                               ret = EINVAL;
+                               break;
+                       }
+                       break;
+               case P_IRECNO:
+                       fprintf(fp, "entries %4lu pgno %4lu\n",
+                           (u_long)ri->nrecs, (u_long)ri->pgno);
+                       break;
+               case P_LBTREE:
+               case P_LRECNO:
+               case P_DUPLICATE:
+                       switch (bk->type) {
+                       case B_KEYDATA:
+                               __db_pr(bk->data, bk->len);
+                               break;
+                       case B_DUPLICATE:
+                       case B_OVERFLOW:
+                               __db_proff(bk);
+                               break;
+                       default:
+                               fprintf(fp,
+                           "ILLEGAL DUPLICATE/LBTREE/LRECNO TYPE: %lu\n",
+                                   (u_long)bk->type);
+                               ret = EINVAL;
+                               break;
+                       }
+                       break;
+               }
+       }
+       (void)fflush(fp);
+       return (ret);
+}
+
+/*
+ * __db_isbad
+ *     -- Decide if a page is corrupted.
+ *
+ * PUBLIC: int __db_isbad __P((PAGE *, int));
+ */
+int
+__db_isbad(h, die)
+       PAGE *h;
+       int die;
+{
+       BINTERNAL *bi;
+       BKEYDATA *bk;
+       HKEYDATA *hkd;
+       FILE *fp;
+       db_indx_t i;
+
+       bi = NULL;                              /* XXX: Shut the compiler up. */
+       bk = NULL;
+       hkd = NULL;
+
+       fp = __db_prinit(NULL);
+
+       switch (TYPE(h)) {
+       case P_DUPLICATE:
+       case P_HASH:
+       case P_IBTREE:
+       case P_INVALID:
+       case P_IRECNO:
+       case P_LBTREE:
+       case P_LRECNO:
+       case P_OVERFLOW:
+               break;
+       default:
+               fprintf(fp, "ILLEGAL PAGE TYPE: page: %lu type: %lu\n",
+                   (u_long)h->pgno, (u_long)TYPE(h));
+               goto bad;
+       }
+
+       for (i = 0; i < NUM_ENT(h); i++) {
+               if (P_ENTRY(h, i) - (u_int8_t *)h < P_OVERHEAD ||
+                   (size_t)(P_ENTRY(h, i) - (u_int8_t *)h) >= set_psize) {
+                       fprintf(fp,
+                           "ILLEGAL PAGE OFFSET: indx: %lu of %lu\n",
+                           (u_long)i, (u_long)h->inp[i]);
+                       goto bad;
+               }
+               switch (TYPE(h)) {
+               case P_HASH:
+                       hkd = GET_HKEYDATA(h, i);
+                       if (hkd->type != H_OFFDUP &&
+                           hkd->type != H_DUPLICATE &&
+                           hkd->type != H_KEYDATA &&
+                           hkd->type != H_OFFPAGE) {
+                               fprintf(fp, "ILLEGAL HASH TYPE: %lu\n",
+                                   (u_long)hkd->type);
+                               goto bad;
+                       }
+                       break;
+               case P_IBTREE:
+                       bi = GET_BINTERNAL(h, i);
+                       if (bi->type != B_KEYDATA &&
+                           bi->type != B_DUPLICATE &&
+                           bi->type != B_OVERFLOW) {
+                               fprintf(fp, "ILLEGAL BINTERNAL TYPE: %lu\n",
+                                   (u_long)bi->type);
+                               goto bad;
+                       }
+                       break;
+               case P_IRECNO:
+               case P_LBTREE:
+               case P_LRECNO:
+                       break;
+               case P_DUPLICATE:
+                       bk = GET_BKEYDATA(h, i);
+                       if (bk->type != B_KEYDATA &&
+                           bk->type != B_DUPLICATE &&
+                           bk->type != B_OVERFLOW) {
+                               fprintf(fp,
+                           "ILLEGAL DUPLICATE/LBTREE/LRECNO TYPE: %lu\n",
+                                   (u_long)bk->type);
+                               goto bad;
+                       }
+                       break;
+               default:
+                       fprintf(fp,
+                           "ILLEGAL PAGE ITEM: %lu\n", (u_long)TYPE(h));
+                       goto bad;
+               }
+       }
+       return (0);
+
+bad:   if (die) {
+               abort();
+               /* NOTREACHED */
+       }
+       return (1);
+}
+
+/*
+ * __db_pr --
+ *     Print out a data element.
+ *
+ * PUBLIC: void __db_pr __P((u_int8_t *, u_int32_t));
+ */
+void
+__db_pr(p, len)
+       u_int8_t *p;
+       u_int32_t len;
+{
+       FILE *fp;
+       int i, lastch;
+
+       fp = __db_prinit(NULL);
+
+       fprintf(fp, "len: %3lu", (u_long)len);
+       lastch = '.';
+       if (len != 0) {
+               fprintf(fp, " data: ");
+               for (i = len <= 20 ? len : 20; i > 0; --i, ++p) {
+                       lastch = *p;
+                       if (isprint(*p) || *p == '\n')
+                               fprintf(fp, "%c", *p);
+                       else
+                               fprintf(fp, "%#x", (u_int)*p);
+               }
+               if (len > 20) {
+                       fprintf(fp, "...");
+                       lastch = '.';
+               }
+       }
+       if (lastch != '\n')
+               fprintf(fp, "\n");
+}
+
+/*
+ * __db_proff --
+ *     Print out an off-page element.
+ */
+static void
+__db_proff(vp)
+       void *vp;
+{
+       FILE *fp;
+       BOVERFLOW *p;
+
+       fp = __db_prinit(NULL);
+
+       p = vp;
+       switch (p->type) {
+       case B_OVERFLOW:
+               fprintf(fp, "overflow: total len: %4lu page: %4lu\n",
+                   (u_long)p->tlen, (u_long)p->pgno);
+               break;
+       case B_DUPLICATE:
+               fprintf(fp, "duplicate: page: %4lu\n", (u_long)p->pgno);
+               break;
+       }
+}
+
+/*
+ * __db_prflags --
+ *     Print out flags values.
+ *
+ * PUBLIC: void __db_prflags __P((u_int32_t, const FN *));
+ */
+void
+__db_prflags(flags, fn)
+       u_int32_t flags;
+       FN const *fn;
+{
+       FILE *fp;
+       const FN *fnp;
+       int found;
+       const char *sep;
+
+       fp = __db_prinit(NULL);
+
+       sep = " (";
+       for (found = 0, fnp = fn; fnp->mask != 0; ++fnp)
+               if (fnp->mask & flags) {
+                       fprintf(fp, "%s%s", sep, fnp->name);
+                       sep = ", ";
+                       found = 1;
+               }
+       if (found)
+               fprintf(fp, ")");
+}
+
+/*
+ * __db_psize --
+ *     Get the page size.
+ */
+static void
+__db_psize(mpf)
+       DB_MPOOLFILE *mpf;
+{
+       BTMETA *mp;
+       db_pgno_t pgno;
+
+       set_psize = PSIZE_BOUNDARY - 1;
+
+       pgno = PGNO_METADATA;
+       if (memp_fget(mpf, &pgno, 0, &mp) != 0)
+               return;
+
+       switch (mp->magic) {
+       case DB_BTREEMAGIC:
+       case DB_HASHMAGIC:
+               set_psize = mp->pagesize;
+               break;
+       }
+       (void)memp_fput(mpf, mp, 0);
+}
diff --git a/db2/db/db_rec.c b/db2/db/db_rec.c
new file mode 100644 (file)
index 0000000..900b0ed
--- /dev/null
@@ -0,0 +1,623 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997
+ *     Sleepycat Software.  All rights reserved.
+ */
+
+#include "config.h"
+
+#ifndef lint
+static const char sccsid[] = "@(#)db_rec.c     10.8 (Sleepycat) 8/22/97";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+
+#endif
+#include <ctype.h>
+#include <errno.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "db_int.h"
+#include "shqueue.h"
+#include "db_page.h"
+#include "db_dispatch.h"
+#include "log.h"
+#include "hash.h"
+#include "btree.h"
+
+/*
+ * PUBLIC: int __db_addrem_recover
+ * PUBLIC:    __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+ *
+ * This log message is generated whenever we add or remove a duplicate
+ * to/from a duplicate page.  On recover, we just do the opposite.
+ */
+int
+__db_addrem_recover(logp, dbtp, lsnp, redo, info)
+       DB_LOG *logp;
+       DBT *dbtp;
+       DB_LSN *lsnp;
+       int redo;
+       void *info;
+{
+       __db_addrem_args *argp;
+       DB *file_dbp, *mdbp;
+       DB_MPOOLFILE *mpf;
+       PAGE *pagep;
+       int change, cmp_n, cmp_p, ret;
+
+       REC_PRINT(__db_addrem_print);
+       REC_INTRO(__db_addrem_read);
+
+       if ((ret = memp_fget(mpf, &argp->pgno, 0, &pagep)) != 0) {
+               if (!redo) {
+                       /*
+                        * We are undoing and the page doesn't exist.  That
+                        * is equivalent to having a pagelsn of 0, so we
+                        * would not have to undo anything.  In this case,
+                        * don't bother creating a page.
+                        */
+                       *lsnp = argp->prev_lsn;
+                       ret = 0;
+                       goto out;
+               } else
+                       if ((ret = memp_fget(mpf,
+                           &argp->pgno, DB_MPOOL_CREATE, &pagep)) != 0)
+                               goto out;
+       }
+
+       cmp_n = log_compare(lsnp, &LSN(pagep));
+       cmp_p = log_compare(&LSN(pagep), &argp->pagelsn);
+       change = 0;
+       if ((cmp_p == 0 && redo && argp->opcode == DB_ADD_DUP) ||
+           (cmp_n == 0 && !redo && argp->opcode == DB_REM_DUP)) {
+
+               /* Need to redo an add, or undo a delete. */
+               if ((ret = __db_pitem(file_dbp, pagep, argp->indx, argp->nbytes,
+                   argp->hdr.size == 0 ? NULL : &argp->hdr,
+                   argp->dbt.size == 0 ? NULL : &argp->dbt)) != 0)
+                       goto out;
+
+               change = DB_MPOOL_DIRTY;
+
+       } else if ((cmp_n == 0 && !redo && argp->opcode == DB_ADD_DUP) ||
+           (cmp_p == 0 && redo && argp->opcode == DB_REM_DUP)) {
+               /* Need to undo an add, or redo a delete. */
+               if ((ret = __db_ditem(file_dbp, pagep, argp->indx,
+                   argp->nbytes)) != 0)
+                       goto out;
+               change = DB_MPOOL_DIRTY;
+       }
+
+       if (change)
+               if (redo)
+                       LSN(pagep) = *lsnp;
+               else
+                       LSN(pagep) = argp->pagelsn;
+
+       if ((ret = memp_fput(mpf, pagep, change)) == 0)
+               *lsnp = argp->prev_lsn;
+
+out:   REC_CLOSE;
+}
+
+/*
+ * PUBLIC: int __db_split_recover __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+ */
+int
+__db_split_recover(logp, dbtp, lsnp, redo, info)
+       DB_LOG *logp;
+       DBT *dbtp;
+       DB_LSN *lsnp;
+       int redo;
+       void *info;
+{
+       __db_split_args *argp;
+       DB *file_dbp, *mdbp;
+       DB_MPOOLFILE *mpf;
+       PAGE *pagep;
+       int change, cmp_n, cmp_p, ret;
+
+       REC_PRINT(__db_split_print);
+       REC_INTRO(__db_split_read);
+
+       if ((ret = memp_fget(mpf, &argp->pgno, 0, &pagep)) != 0) {
+               if (!redo) {
+                       /*
+                        * We are undoing and the page doesn't exist.  That
+                        * is equivalent to having a pagelsn of 0, so we
+                        * would not have to undo anything.  In this case,
+                        * don't bother creating a page.
+                        */
+                       *lsnp = argp->prev_lsn;
+                       ret = 0;
+                       goto out;
+               } else
+                       if ((ret = memp_fget(mpf,
+                           &argp->pgno, DB_MPOOL_CREATE, &pagep)) != 0)
+                               goto out;
+       }
+
+       /*
+        * There are two types of log messages here, one for the old page
+        * and one for the new pages created.  The original image in the
+        * SPLITOLD record is used for undo.  The image in the SPLITNEW
+        * is used for redo.  We should never have a case where there is
+        * a redo operation and the SPLITOLD record is on disk, but not
+        * the SPLITNEW record.  Therefore, we only redo NEW messages
+        * and only undo OLD messages.
+        */
+
+       change = 0;
+       cmp_n = log_compare(lsnp, &LSN(pagep));
+       cmp_p = log_compare(&LSN(pagep), &argp->pagelsn);
+       if (cmp_p == 0 && redo) {
+               if (argp->opcode == DB_SPLITNEW) {
+                       /* Need to redo the split described. */
+                       memcpy(pagep,
+                           argp->pageimage.data, argp->pageimage.size);
+               }
+               LSN(pagep) = *lsnp;
+               change = DB_MPOOL_DIRTY;
+       } else if (cmp_n == 0 && !redo) {
+               if (argp->opcode == DB_SPLITOLD) {
+                       /* Put back the old image. */
+                       memcpy(pagep,
+                           argp->pageimage.data, argp->pageimage.size);
+               }
+               LSN(pagep) = argp->pagelsn;
+               change = DB_MPOOL_DIRTY;
+       }
+       if ((ret = memp_fput(mpf, pagep, change)) == 0)
+               *lsnp = argp->prev_lsn;
+
+out:   REC_CLOSE;
+}
+
+/*
+ * PUBLIC: int __db_big_recover __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+ */
+int
+__db_big_recover(logp, dbtp, lsnp, redo, info)
+       DB_LOG *logp;
+       DBT *dbtp;
+       DB_LSN *lsnp;
+       int redo;
+       void *info;
+{
+       __db_big_args *argp;
+       DB *file_dbp, *mdbp;
+       DB_MPOOLFILE *mpf;
+       PAGE *pagep;
+       int change, cmp_n, cmp_p, ret;
+
+       REC_PRINT(__db_big_print);
+       REC_INTRO(__db_big_read);
+
+       if ((ret = memp_fget(mpf, &argp->pgno, 0, &pagep)) != 0) {
+               if (!redo) {
+                       /*
+                        * We are undoing and the page doesn't exist.  That
+                        * is equivalent to having a pagelsn of 0, so we
+                        * would not have to undo anything.  In this case,
+                        * don't bother creating a page.
+                        */
+                       ret = 0;
+                       goto ppage;
+               } else
+                       if ((ret = memp_fget(mpf,
+                           &argp->pgno, DB_MPOOL_CREATE, &pagep)) != 0)
+                       goto out;
+       }
+
+       /*
+        * There are three pages we need to check.  The one on which we are
+        * adding data, the previous one whose next_pointer may have
+        * been updated, and the next one whose prev_pointer may have
+        * been updated.
+        */
+       cmp_n = log_compare(lsnp, &LSN(pagep));
+       cmp_p = log_compare(&LSN(pagep), &argp->pagelsn);
+       change = 0;
+       if ((cmp_p == 0 && redo && argp->opcode == DB_ADD_BIG) ||
+           (cmp_n == 0 && !redo && argp->opcode == DB_REM_BIG)) {
+               /* We are either redo-ing an add, or undoing a delete. */
+               P_INIT(pagep, file_dbp->pgsize, argp->pgno, argp->prev_pgno,
+                       argp->next_pgno, 0, P_OVERFLOW);
+               OV_LEN(pagep) = argp->dbt.size;
+               OV_REF(pagep) = 1;
+               memcpy((u_int8_t *)pagep + P_OVERHEAD, argp->dbt.data,
+                   argp->dbt.size);
+               PREV_PGNO(pagep) = argp->prev_pgno;
+               change = DB_MPOOL_DIRTY;
+       } else if ((cmp_n == 0 && !redo && argp->opcode == DB_ADD_BIG) ||
+           (cmp_p == 0 && redo && argp->opcode == DB_REM_BIG)) {
+               /*
+                * We are either undo-ing an add or redo-ing a delete.
+                * The page is about to be reclaimed in either case, so
+                * there really isn't anything to do here.
+                */
+               change = DB_MPOOL_DIRTY;
+       }
+       if (change)
+               LSN(pagep) = redo ? *lsnp : argp->pagelsn;
+
+       if ((ret = memp_fput(mpf, pagep, change)) != 0)
+               goto out;
+
+       /* Now check the previous page. */
+ppage: if (argp->prev_pgno != PGNO_INVALID) {
+               change = 0;
+               if ((ret = memp_fget(mpf, &argp->prev_pgno, 0, &pagep)) != 0)
+                       if (!redo) {
+                               /*
+                                * We are undoing and the page doesn't exist.
+                                * That is equivalent to having a pagelsn of 0,
+                                * so we would not have to undo anything.  In
+                                * this case, don't bother creating a page.
+                                */
+                               *lsnp = argp->prev_lsn;
+                               ret = 0;
+                               goto npage;
+                       } else
+                               if ((ret = memp_fget(mpf, &argp->prev_pgno,
+                                   DB_MPOOL_CREATE, &pagep)) != 0)
+                                       goto out;
+
+               cmp_n = log_compare(lsnp, &LSN(pagep));
+               cmp_p = log_compare(&LSN(pagep), &argp->prevlsn);
+
+               if ((cmp_p == 0 && redo && argp->opcode == DB_ADD_BIG) ||
+                   (cmp_n == 0 && !redo && argp->opcode == DB_REM_BIG)) {
+                       /* Redo add, undo delete. */
+                       NEXT_PGNO(pagep) = argp->pgno;
+                       change = DB_MPOOL_DIRTY;
+               } else if ((cmp_n == 0 &&
+                   !redo && argp->opcode == DB_ADD_BIG) ||
+                   (cmp_p == 0 && redo && argp->opcode == DB_REM_BIG)) {
+                       /* Redo delete, undo add. */
+                       NEXT_PGNO(pagep) = argp->next_pgno;
+                       change = DB_MPOOL_DIRTY;
+               }
+               if (change)
+                       LSN(pagep) = redo ? *lsnp : argp->prevlsn;
+               if ((ret = memp_fput(mpf, pagep, change)) != 0)
+                       goto out;
+       }
+
+       /* Now check the next page.  Can only be set on a delete. */
+npage: if (argp->next_pgno != PGNO_INVALID) {
+               change = 0;
+               if ((ret = memp_fget(mpf, &argp->next_pgno, 0, &pagep)) != 0)
+                       if (!redo) {
+                               /*
+                                * We are undoing and the page doesn't exist.
+                                * That is equivalent to having a pagelsn of 0,
+                                * so we would not have to undo anything.  In
+                                * this case, don't bother creating a page.
+                                */
+                               *lsnp = argp->prev_lsn;
+                               ret = 0;
+                               goto out;
+                       } else
+                               if ((ret = memp_fget(mpf, &argp->next_pgno,
+                                   DB_MPOOL_CREATE, &pagep)) != 0)
+                                       goto out;
+
+               cmp_n = log_compare(lsnp, &LSN(pagep));
+               cmp_p = log_compare(&LSN(pagep), &argp->nextlsn);
+               if (cmp_p == 0 && redo) {
+                       PREV_PGNO(pagep) = PGNO_INVALID;
+                       change = DB_MPOOL_DIRTY;
+               } else if (cmp_n == 0 && !redo) {
+                       PREV_PGNO(pagep) = argp->pgno;
+                       change = DB_MPOOL_DIRTY;
+               }
+               if (change)
+                       LSN(pagep) = redo ? *lsnp : argp->nextlsn;
+               if ((ret = memp_fput(mpf, pagep, change)) != 0)
+                       goto out;
+       }
+
+       *lsnp = argp->prev_lsn;
+
+out:   REC_CLOSE;
+}
+
+/*
+ * __db_ovref_recover --
+ *     Recovery function for __db_ioff().
+ *
+ * PUBLIC: int __db_ovref_recover __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+ */
+int
+__db_ovref_recover(logp, dbtp, lsnp, redo, info)
+       DB_LOG *logp;
+       DBT *dbtp;
+       DB_LSN *lsnp;
+       int redo;
+       void *info;
+{
+       __db_ovref_args *argp;
+       DB *file_dbp, *mdbp;
+       DB_MPOOLFILE *mpf;
+       PAGE *pagep;
+       int modified, ret;
+
+       REC_PRINT(__db_ovref_print);
+       REC_INTRO(__db_ovref_read);
+
+       if ((ret = memp_fget(mpf, &argp->pgno, 0, &pagep)) != 0) {
+               (void)__db_pgerr(file_dbp, argp->pgno);
+               goto out;
+       }
+
+       modified = 0;
+       if (log_compare(lsnp, &argp->lsn) == 0 && redo) {
+               /* Need to redo update described. */
+               ++OV_REF(pagep);
+
+               pagep->lsn = *lsnp;
+               modified = 1;
+       } else if (log_compare(lsnp, &LSN(pagep)) == 0 && !redo) {
+               /* Need to undo update described. */
+               --OV_REF(pagep);
+
+               pagep->lsn = argp->lsn;
+               modified = 1;
+       }
+       ret = memp_fput(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0);
+
+       *lsnp = argp->prev_lsn;
+
+out:   REC_CLOSE;
+}
+
+/*
+ * __db_relink_recover --
+ *     Recovery function for relink.
+ *
+ * PUBLIC: int __db_relink_recover
+ * PUBLIC:   __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+ */
+int
+__db_relink_recover(logp, dbtp, lsnp, redo, info)
+       DB_LOG *logp;
+       DBT *dbtp;
+       DB_LSN *lsnp;
+       int redo;
+       void *info;
+{
+       __db_relink_args *argp;
+       DB *file_dbp, *mdbp;
+       DB_MPOOLFILE *mpf;
+       PAGE *pagep;
+       int modified, ret;
+
+       REC_PRINT(__db_relink_print);
+       REC_INTRO(__db_relink_read);
+
+       /*
+        * There are three pages we need to check -- the page, and the
+        * previous and next pages, if they existed.
+        */
+       if ((ret = memp_fget(mpf, &argp->pgno, 0, &pagep)) != 0) {
+               if (redo) {
+                       (void)__db_pgerr(file_dbp, argp->pgno);
+                       goto out;
+               }
+               goto next;
+       }
+       modified = 0;
+       if (log_compare(lsnp, &argp->lsn) == 0 && redo) {
+               /* Redo the relink. */
+               pagep->lsn = *lsnp;
+               modified = 1;
+       } else if (log_compare(lsnp, &LSN(pagep)) == 0 && !redo) {
+               /* Undo the relink. */
+               pagep->next_pgno = argp->next;
+               pagep->prev_pgno = argp->prev;
+
+               pagep->lsn = argp->lsn;
+               modified = 1;
+       }
+       if ((ret = memp_fput(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) != 0) {
+               (void)__db_panic(file_dbp);
+               goto out;
+       }
+
+next:  if ((ret = memp_fget(mpf, &argp->next, 0, &pagep)) != 0) {
+               if (redo) {
+                       (void)__db_pgerr(file_dbp, argp->next);
+                       goto out;
+               }
+               goto prev;
+       }
+       modified = 0;
+       if (log_compare(lsnp, &argp->lsn_next) == 0 && redo) {
+               /* Redo the relink. */
+               pagep->prev_pgno = argp->prev;
+
+               pagep->lsn = *lsnp;
+               modified = 1;
+       } else if (log_compare(lsnp, &LSN(pagep)) == 0 && !redo) {
+               /* Undo the relink. */
+               pagep->prev_pgno = argp->pgno;
+
+               pagep->lsn = argp->lsn_next;
+               modified = 1;
+       }
+       if ((ret = memp_fput(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) != 0) {
+               (void)__db_panic(file_dbp);
+               goto out;
+       }
+
+prev:  if ((ret = memp_fget(mpf, &argp->prev, 0, &pagep)) != 0) {
+               if (redo) {
+                       (void)__db_pgerr(file_dbp, argp->prev);
+                       goto out;
+               }
+               goto done;
+       }
+       modified = 0;
+       if (log_compare(lsnp, &argp->lsn_prev) == 0 && redo) {
+               /* Redo the relink. */
+               pagep->next_pgno = argp->next;
+
+               pagep->lsn = *lsnp;
+               modified = 1;
+       } else if (log_compare(lsnp, &LSN(pagep)) == 0 && !redo) {
+               /* Undo the relink. */
+               pagep->next_pgno = argp->pgno;
+
+               pagep->lsn = argp->lsn_prev;
+               modified = 1;
+       }
+       if ((ret = memp_fput(mpf, pagep, modified ? DB_MPOOL_DIRTY : 0)) != 0) {
+               (void) __db_panic(file_dbp);
+               goto out;
+       }
+
+done:  *lsnp = argp->prev_lsn;
+       ret = 0;
+
+out:   REC_CLOSE;
+}
+
+/*
+ * PUBLIC: int __db_addpage_recover
+ * PUBLIC:    __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+ */
+int
+__db_addpage_recover(logp, dbtp, lsnp, redo, info)
+       DB_LOG *logp;
+       DBT *dbtp;
+       DB_LSN *lsnp;
+       int redo;
+       void *info;
+{
+       __db_addpage_args *argp;
+       DB *file_dbp, *mdbp;
+       DB_MPOOLFILE *mpf;
+       PAGE *pagep;
+       int change, cmp_n, cmp_p, ret;
+
+       REC_PRINT(__db_addpage_print);
+       REC_INTRO(__db_addpage_read);
+
+       /*
+        * We need to check two pages: the old one and the new one onto
+        * which we're going to add duplicates.  Do the old one first.
+        */
+       if ((ret = memp_fget(mpf, &argp->pgno, 0, &pagep)) != 0)
+               goto out;
+
+       change = 0;
+       cmp_n = log_compare(lsnp, &LSN(pagep));
+       cmp_p = log_compare(&LSN(pagep), &argp->lsn);
+       if (cmp_p == 0 && redo) {
+               NEXT_PGNO(pagep) = argp->nextpgno;
+
+               LSN(pagep) = *lsnp;
+               change = DB_MPOOL_DIRTY;
+       } else if (cmp_n == 0 && !redo) {
+               NEXT_PGNO(pagep) = PGNO_INVALID;
+
+               LSN(pagep) = argp->lsn;
+               change = DB_MPOOL_DIRTY;
+       }
+       if ((ret = memp_fput(mpf, pagep, change)) != 0)
+               goto out;
+
+       if ((ret = memp_fget(mpf, &argp->nextpgno, 0, &pagep)) != 0)
+               if (!redo) {
+                       /*
+                        * We are undoing and the page doesn't exist.  That
+                        * is equivalent to having a pagelsn of 0, so we
+                        * would not have to undo anything.  In this case,
+                        * don't bother creating a page.
+                        */
+                       ret = 0;
+                       goto out;
+               } else
+                       if ((ret = memp_fget(mpf,
+                           &argp->nextpgno, DB_MPOOL_CREATE, &pagep)) != 0)
+                               goto out;
+
+       change = 0;
+       cmp_n = log_compare(lsnp, &LSN(pagep));
+       cmp_p = log_compare(&LSN(pagep), &argp->nextlsn);
+       if (cmp_p == 0 && redo) {
+               PREV_PGNO(pagep) = argp->pgno;
+
+               LSN(pagep) = *lsnp;
+               change = DB_MPOOL_DIRTY;
+       } else if (cmp_n == 0 && !redo) {
+               PREV_PGNO(pagep) = PGNO_INVALID;
+
+               LSN(pagep) = argp->nextlsn;
+               change = DB_MPOOL_DIRTY;
+       }
+       ret = memp_fput(mpf, pagep, change);
+
+out:   if (ret == 0)
+               *lsnp = argp->prev_lsn;
+       REC_CLOSE;
+}
+
+/*
+ * __db_debug_recover --
+ *     Recovery function for debug.
+ *
+ * PUBLIC: int __db_debug_recover __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+ */
+int
+__db_debug_recover(logp, dbtp, lsnp, redo, info)
+       DB_LOG *logp;
+       DBT *dbtp;
+       DB_LSN *lsnp;
+       int redo;
+       void *info;
+{
+       __db_debug_args *argp;
+       int ret;
+
+       REC_PRINT(__db_debug_print);
+       REC_NOOP_INTRO(__db_debug_read);
+
+       *lsnp = argp->prev_lsn;
+       ret = 0;
+
+       REC_NOOP_CLOSE;
+}
+
+/*
+ * __db_noop_recover --
+ *     Recovery function for noop.
+ *
+ * PUBLIC: int __db_noop_recover
+ * PUBLIC:   __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+ */
+int
+__db_noop_recover(logp, dbtp, lsnp, redo, info)
+       DB_LOG *logp;
+       DBT *dbtp;
+       DB_LSN *lsnp;
+       int redo;
+       void *info;
+{
+       __db_noop_args *argp;
+       int ret;
+
+       REC_PRINT(__db_noop_print);
+       REC_NOOP_INTRO(__db_noop_read);
+
+       *lsnp = argp->prev_lsn;
+       ret = 0;
+
+       REC_NOOP_CLOSE;
+}
diff --git a/db2/db/db_ret.c b/db2/db/db_ret.c
new file mode 100644 (file)
index 0000000..ddeb26e
--- /dev/null
@@ -0,0 +1,149 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997
+ *     Sleepycat Software.  All rights reserved.
+ */
+
+#include "config.h"
+
+#ifndef lint
+static const char sccsid[] = "@(#)db_ret.c     10.5 (Sleepycat) 7/12/97";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#endif
+
+#include "db_int.h"
+#include "db_page.h"
+#include "btree.h"
+#include "hash.h"
+#include "db_am.h"
+
+/*
+ * __db_ret --
+ *     Build return DBT.
+ *
+ * PUBLIC: int __db_ret __P((DB *,
+ * PUBLIC:    PAGE *, u_int32_t, DBT *, void **, u_int32_t *));
+ */
+int
+__db_ret(dbp, h, indx, dbt, memp, memsize)
+       DB *dbp;
+       PAGE *h;
+       u_int32_t indx;
+       DBT *dbt;
+       void **memp;
+       u_int32_t *memsize;
+{
+       BKEYDATA *bk;
+       HOFFPAGE ho;
+       BOVERFLOW *bo;
+       u_int32_t len;
+       void *data, *hk;
+
+       switch (TYPE(h)) {
+       case P_HASH:
+               hk = P_ENTRY(h, indx);
+               if (((HKEYDATA *)hk)->type == H_OFFPAGE) {
+                       memcpy(&ho, hk, sizeof(HOFFPAGE));
+                       return (__db_goff(dbp, dbt,
+                           ho.tlen, ho.pgno, memp, memsize));
+               }
+               len = LEN_HKEYDATA(h, dbp->pgsize, indx);
+               data = ((HKEYDATA *)hk)->data;
+               break;
+       case P_DUPLICATE:
+       case P_LBTREE:
+       case P_LRECNO:
+               bk = GET_BKEYDATA(h, indx);
+               if (bk->type == B_OVERFLOW) {
+                       bo = (BOVERFLOW *)bk;
+                       return (__db_goff(dbp, dbt,
+                           bo->tlen, bo->pgno, memp, memsize));
+               }
+               len = bk->len;
+               data = bk->data;
+               break;
+       default:
+               return (__db_pgfmt(dbp, h->pgno));
+       }
+
+       return (__db_retcopy(dbt, data, len, memp, memsize,
+           F_ISSET(dbt, DB_DBT_INTERNAL) ? NULL : dbp->db_malloc));
+}
+
+/*
+ * __db_retcopy --
+ *     Copy the returned data into the user's DBT, handling special flags.
+ *
+ * PUBLIC: int __db_retcopy __P((DBT *,
+ * PUBLIC:    void *, u_int32_t, void **, u_int32_t *, void *(*)(size_t)));
+ */
+int
+__db_retcopy(dbt, data, len, memp, memsize, db_malloc)
+       DBT *dbt;
+       void *data;
+       u_int32_t len;
+       void **memp;
+       u_int32_t *memsize;
+       void *(*db_malloc) __P((size_t));
+{
+       /* If returning a partial record, reset the length. */
+       if (F_ISSET(dbt, DB_DBT_PARTIAL)) {
+               data = (u_int8_t *)data + dbt->doff;
+               if (len > dbt->doff) {
+                       len -= dbt->doff;
+                       if (len > dbt->dlen)
+                               len = dbt->dlen;
+               } else
+                       len = 0;
+       }
+
+       /*
+        * Return the length of the returned record in the DBT size field.
+        * This satisfies the requirement that if we're using user memory
+        * and insufficient memory was provided, return the amount necessary
+        * in the size field.
+        */
+       dbt->size = len;
+
+       /*
+        * Allocate any necessary memory.
+        *
+        * XXX: Never allocate 0 bytes.
+        */
+       if (F_ISSET(dbt, DB_DBT_MALLOC)) {
+               dbt->data = db_malloc == NULL ?
+                   (void *)malloc(len + 1) :
+                   (void *)db_malloc(len + 1);
+               if (dbt->data == NULL)
+                       return (ENOMEM);
+       } else if (F_ISSET(dbt, DB_DBT_USERMEM)) {
+               if (dbt->ulen < len)
+                       return (ENOMEM);
+       } else if (memp == NULL || memsize == NULL) {
+               return (EINVAL);
+       } else {
+               if (*memsize == 0 || *memsize < len) {
+                       *memp = *memp == NULL ?
+                           (void *)malloc(len + 1) :
+                           (void *)realloc(*memp, len + 1);
+                       if (*memp == NULL) {
+                               *memsize = 0;
+                               return (ENOMEM);
+                       }
+                       *memsize = len + 1;
+               }
+               dbt->data = *memp;
+       }
+
+       memcpy(dbt->data, data, len);
+       return (0);
+}
diff --git a/db2/db/db_thread.c b/db2/db/db_thread.c
new file mode 100644 (file)
index 0000000..e956e80
--- /dev/null
@@ -0,0 +1,125 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997
+ *     Sleepycat Software.  All rights reserved.
+ */
+
+#include "config.h"
+
+#ifndef lint
+static const char sccsid[] = "@(#)db_thread.c  8.11 (Sleepycat) 8/18/97";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+#endif
+
+#include "db_int.h"
+#include "db_page.h"
+#include "shqueue.h"
+#include "db_am.h"
+
+static int __db_getlockid __P((DB *, DB *));
+
+/*
+ * __db_gethandle --
+ *     Called by db access method routines when the DB_THREAD flag is set.
+ *     This routine returns a handle, either an existing handle from the
+ *     chain of handles, or creating one if necessary.
+ *
+ * PUBLIC: int __db_gethandle __P((DB *, int (*)(DB *, DB *), DB **));
+ */
+int
+__db_gethandle(dbp, am_func, dbpp)
+       DB *dbp, **dbpp;
+       int (*am_func) __P((DB *, DB *));
+{
+       DB *ret_dbp;
+       int ret, t_ret;
+
+       if ((ret = __db_mutex_lock((db_mutex_t *)dbp->mutex, -1,
+           dbp->dbenv == NULL ? NULL : dbp->dbenv->db_yield)) != 0)
+               return (ret);
+
+       if ((ret_dbp = LIST_FIRST(&dbp->handleq)) != NULL)
+               /* Simply take one off the list. */
+               LIST_REMOVE(ret_dbp, links);
+       else {
+               /* Allocate a new handle. */
+               if ((ret_dbp = (DB *)malloc(sizeof(*dbp))) == NULL) {
+                       ret = ENOMEM;
+                       goto err;
+               }
+               memcpy(ret_dbp, dbp, sizeof(*dbp));
+               ret_dbp->internal = NULL;
+               TAILQ_INIT(&ret_dbp->curs_queue);
+
+               /* Set the locker, the lock structure and the lock DBT. */
+               if ((ret = __db_getlockid(dbp, ret_dbp)) != 0)
+                       goto err;
+
+               /* Finally, call the access method specific dup function. */
+               if ((ret = am_func(dbp, ret_dbp)) != 0)
+                       goto err;
+       }
+
+       *dbpp = ret_dbp;
+
+       if (0) {
+err:           if (ret_dbp != NULL)
+                       FREE(ret_dbp, sizeof(*ret_dbp));
+       }
+       if ((t_ret =
+           __db_mutex_unlock((db_mutex_t *)dbp->mutex, -1)) != 0 && ret == 0)
+               ret = t_ret;
+       return (ret);
+}
+
+/*
+ * __db_puthandle --
+ *     Return a DB handle to the pool for later use.
+ *
+ * PUBLIC: int __db_puthandle __P((DB *));
+ */
+int
+__db_puthandle(dbp)
+       DB *dbp;
+{
+       DB *master;
+       int ret;
+
+       master = dbp->master;
+       if ((ret = __db_mutex_lock((db_mutex_t *)master->mutex, -1,
+           dbp->dbenv == NULL ? NULL : dbp->dbenv->db_yield)) != 0)
+               return (ret);
+
+       LIST_INSERT_HEAD(&master->handleq, dbp, links);
+
+       return (__db_mutex_unlock((db_mutex_t *)master->mutex, -1));
+}
+
+/*
+ * __db_getlockid --
+ *     Create a new locker ID and copy the file lock information from
+ *     the old DB into the new one.
+ */
+static int
+__db_getlockid(dbp, new_dbp)
+       DB *dbp, *new_dbp;
+{
+       int ret;
+
+       if (F_ISSET(dbp, DB_AM_LOCKING)) {
+               if ((ret = lock_id(dbp->dbenv->lk_info, &new_dbp->locker)) != 0)
+                       return (ret);
+               memcpy(new_dbp->lock.fileid, dbp->lock.fileid, DB_FILE_ID_LEN);
+               new_dbp->lock_dbt.size = sizeof(new_dbp->lock);
+               new_dbp->lock_dbt.data = &new_dbp->lock;
+       }
+       return (0);
+}
diff --git a/db2/db185/db185.c b/db2/db185/db185.c
new file mode 100644 (file)
index 0000000..933f55c
--- /dev/null
@@ -0,0 +1,472 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997
+ *     Sleepycat Software.  All rights reserved.
+ */
+
+#include "config.h"
+
+#ifndef lint
+static const char copyright[] =
+"@(#) Copyright (c) 1997\n\
+       Sleepycat Software Inc.  All rights reserved.\n";
+static const char sccsid[] = "@(#)db185.c      8.13 (Sleepycat) 8/24/97";
+#endif
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#endif
+
+#include "db_int.h"
+#include "db185_int.h"
+#include "common_ext.h"
+
+static int db185_close __P((DB185 *));
+static int db185_del __P((const DB185 *, const DBT185 *, u_int));
+static int db185_fd __P((const DB185 *));
+static int db185_get __P((const DB185 *, const DBT185 *, DBT185 *, u_int));
+static int db185_put __P((const DB185 *, DBT185 *, const DBT185 *, u_int));
+static int db185_seq __P((const DB185 *, DBT185 *, DBT185 *, u_int));
+static int db185_sync __P((const DB185 *, u_int));
+
+DB185 *
+__dbopen(file, oflags, mode, type, openinfo)
+       const char *file;
+       int oflags, mode;
+       DBTYPE type;
+       const void *openinfo;
+{
+       const BTREEINFO *bi;
+       const HASHINFO *hi;
+       const RECNOINFO *ri;
+       DB *dbp;
+       DB185 *db185p;
+       DB_INFO dbinfo, *dbinfop;
+       int s_errno;
+
+       if ((db185p = (DB185 *)calloc(1, sizeof(DB185))) == NULL)
+               return (NULL);
+       dbinfop = NULL;
+       memset(&dbinfo, 0, sizeof(dbinfo));
+
+       /*
+        * !!!
+        * The DBTYPE enum wasn't initialized in DB 185, so it's off-by-one
+        * from DB 2.0.
+        */
+       switch (type) {
+       case 0:                                 /* DB_BTREE */
+               type = DB_BTREE;
+               if ((bi = openinfo) != NULL) {
+                       dbinfop = &dbinfo;
+                       if (bi->flags & ~R_DUP)
+                               goto einval;
+                       if (bi->flags & R_DUP)
+                               dbinfop->flags |= DB_DUP;
+                       dbinfop->db_cachesize = bi->cachesize;
+                       dbinfop->bt_maxkey = bi->maxkeypage;
+                       dbinfop->bt_minkey = bi->minkeypage;
+                       dbinfop->db_pagesize = bi->psize;
+                       /*
+                        * !!!
+                        * Comparisons and prefix calls work because the DBT
+                        * structures in 1.85 and 2.0 have the same initial
+                        * fields.
+                        */
+                       dbinfop->bt_compare = bi->compare;
+                       dbinfop->bt_prefix = bi->prefix;
+                       dbinfop->db_lorder = bi->lorder;
+               }
+               break;
+       case 1:                                 /* DB_HASH */
+               type = DB_HASH;
+               if ((hi = openinfo) != NULL) {
+                       dbinfop = &dbinfo;
+                       dbinfop->db_pagesize = hi->bsize;
+                       dbinfop->h_ffactor = hi->ffactor;
+                       dbinfop->h_nelem = hi->nelem;
+                       dbinfop->db_cachesize = hi->cachesize;
+                       dbinfop->h_hash = hi->hash;
+                       dbinfop->db_lorder = hi->lorder;
+               }
+
+               break;
+       case 2:                                 /* DB_RECNO */
+               type = DB_RECNO;
+               dbinfop = &dbinfo;
+
+               /* DB 1.85 did renumbering by default. */
+               dbinfop->flags |= DB_RENUMBER;
+
+               /*
+                * !!!
+                * The file name given to DB 1.85 recno is the name of the DB
+                * 2.0 backing file.  If the file doesn't exist, create it if
+                * the user has the O_CREAT flag set, DB 1.85 did it for you,
+                * and DB 2.0 doesn't.
+                *
+                * !!!
+                * Note, the file name in DB 1.85 was a const -- we don't do
+                * that in DB 2.0, so do that cast.
+                */
+               if (file != NULL) {
+                       if (oflags & O_CREAT && __db_exists(file, NULL) != 0)
+                               (void)close(open(file, oflags, mode));
+                       dbinfop->re_source = (char *)file;
+                       file = NULL;
+               }
+
+               if ((ri = openinfo) != NULL) {
+                       /*
+                        * !!!
+                        * We can't support the bfname field.
+                        */
+#define        BFMSG   "DB: DB 1.85's recno bfname field is not supported.\n"
+                       if (ri->bfname != NULL) {
+                               (void)write(2, BFMSG, sizeof(BFMSG) - 1);
+                               goto einval;
+                       }
+
+                       if (ri->flags & ~(R_FIXEDLEN | R_NOKEY | R_SNAPSHOT))
+                               goto einval;
+                       if (ri->flags & R_FIXEDLEN) {
+                               dbinfop->flags |= DB_FIXEDLEN;
+                               if (ri->bval != 0) {
+                                       dbinfop->flags |= DB_PAD;
+                                       dbinfop->re_pad = ri->bval;
+                               }
+                       } else
+                               if (ri->bval != 0) {
+                                       dbinfop->flags |= DB_DELIMITER;
+                                       dbinfop->re_delim = ri->bval;
+                               }
+
+                       /*
+                        * !!!
+                        * We ignore the R_NOKEY flag, but that's okay, it was
+                        * only an optimization that was never implemented.
+                        */
+
+                       if (ri->flags & R_SNAPSHOT)
+                               dbinfop->flags |= DB_SNAPSHOT;
+
+                       dbinfop->db_cachesize = ri->cachesize;
+                       dbinfop->db_pagesize = ri->psize;
+                       dbinfop->db_lorder = ri->lorder;
+                       dbinfop->re_len = ri->reclen;
+               }
+               break;
+       default:
+               goto einval;
+       }
+
+       db185p->close = db185_close;
+       db185p->del = db185_del;
+       db185p->fd = db185_fd;
+       db185p->get = db185_get;
+       db185p->put = db185_put;
+       db185p->seq = db185_seq;
+       db185p->sync = db185_sync;
+
+       /*
+        * !!!
+        * Store the returned pointer to the real DB 2.0 structure in the
+        * internal pointer.  Ugly, but we're not going for pretty, here.
+        */
+       if ((errno = db_open(file,
+           type, __db_oflags(oflags), mode, NULL, dbinfop, &dbp)) != 0) {
+               free(db185p);
+               return (NULL);
+       }
+
+       /* Create the cursor used for sequential ops. */
+       if ((errno = dbp->cursor(dbp, NULL, &((DB185 *)db185p)->dbc)) != 0) {
+               s_errno = errno;
+               (void)dbp->close(dbp, 0);
+               free(db185p);
+               errno = s_errno;
+               return (NULL);
+       }
+
+       db185p->internal = dbp;
+       return (db185p);
+
+einval:        free(db185p);
+       errno = EINVAL;
+       return (NULL);
+}
+weak_alias (__dbopen, dbopen)
+
+static int
+db185_close(db185p)
+       DB185 *db185p;
+{
+       DB *dbp;
+
+       dbp = (DB *)db185p->internal;
+
+       errno = dbp->close(dbp, 0);
+
+       free(db185p);
+
+       return (errno == 0 ? 0 : -1);
+}
+
+static int
+db185_del(db185p, key185, flags)
+       const DB185 *db185p;
+       const DBT185 *key185;
+       u_int flags;
+{
+       DB *dbp;
+       DBT key;
+
+       dbp = (DB *)db185p->internal;
+
+       memset(&key, 0, sizeof(key));
+       key.data = key185->data;
+       key.size = key185->size;
+
+       if (flags & ~R_CURSOR)
+               goto einval;
+       if (flags & R_CURSOR)
+               errno = db185p->dbc->c_del(db185p->dbc, 0);
+       else
+               errno = dbp->del(dbp, NULL, &key, 0);
+
+       switch (errno) {
+       case 0:
+               return (0);
+       case DB_NOTFOUND:
+               return (1);
+       }
+       return (-1);
+
+einval:        errno = EINVAL;
+       return (-1);
+}
+
+static int
+db185_fd(db185p)
+       const DB185 *db185p;
+{
+       DB *dbp;
+       int fd;
+
+       dbp = (DB *)db185p->internal;
+
+       return ((errno = dbp->fd(dbp, &fd)) == 0 ? fd : -1);
+}
+
+static int
+db185_get(db185p, key185, data185, flags)
+       const DB185 *db185p;
+       const DBT185 *key185;
+       DBT185 *data185;
+       u_int flags;
+{
+       DB *dbp;
+       DBT key, data;
+
+       dbp = (DB *)db185p->internal;
+
+       memset(&key, 0, sizeof(key));
+       key.data = key185->data;
+       key.size = key185->size;
+       memset(&data, 0, sizeof(data));
+       data.data = data185->data;
+       data.size = data185->size;
+
+       if (flags)
+               goto einval;
+
+       switch (errno = dbp->get(dbp, NULL, &key, &data, 0)) {
+       case 0:
+               data185->data = data.data;
+               data185->size = data.size;
+               return (0);
+       case DB_NOTFOUND:
+               return (1);
+       }
+       return (-1);
+
+einval:        errno = EINVAL;
+       return (-1);
+}
+
+static int
+db185_put(db185p, key185, data185, flags)
+       const DB185 *db185p;
+       DBT185 *key185;
+       const DBT185 *data185;
+       u_int flags;
+{
+       DB *dbp;
+       DBC *dbcp_put;
+       DBT key, data;
+       int s_errno;
+
+       dbp = (DB *)db185p->internal;
+
+       memset(&key, 0, sizeof(key));
+       key.data = key185->data;
+       key.size = key185->size;
+       memset(&data, 0, sizeof(data));
+       data.data = data185->data;
+       data.size = data185->size;
+
+       switch (flags) {
+       case 0:
+               errno = dbp->put(dbp, NULL, &key, &data, 0);
+               break;
+       case R_CURSOR:
+               errno =
+                   db185p->dbc->c_put(db185p->dbc, &key, &data, DB_CURRENT);
+               break;
+       case R_IAFTER:
+       case R_IBEFORE:
+               if (dbp->type != DB_RECNO)
+                       goto einval;
+
+               if ((errno = dbp->cursor(dbp, NULL, &dbcp_put)) != 0)
+                       return (-1);
+               if ((errno =
+                   dbcp_put->c_get(dbcp_put, &key, &data, DB_SET)) != 0) {
+                       s_errno = errno;
+                       (void)dbcp_put->c_close(dbcp_put);
+                       errno = s_errno;
+                       return (-1);
+               }
+               memset(&data, 0, sizeof(data));
+               data.data = data185->data;
+               data.size = data185->size;
+               errno = dbcp_put->c_put(dbcp_put,
+                   &key, &data, flags == R_IAFTER ? DB_AFTER : DB_BEFORE);
+               s_errno = errno;
+               (void)dbcp_put->c_close(dbcp_put);
+               errno = s_errno;
+               break;
+       case R_NOOVERWRITE:
+               errno = dbp->put(dbp, NULL, &key, &data, DB_NOOVERWRITE);
+               break;
+       case R_SETCURSOR:
+               if (dbp->type != DB_BTREE && dbp->type != DB_RECNO)
+                       goto einval;
+
+               if ((errno = dbp->put(dbp, NULL, &key, &data, 0)) != 0)
+                       break;
+               errno =
+                   db185p->dbc->c_get(db185p->dbc, &key, &data, DB_SET_RANGE);
+               break;
+       default:
+               goto einval;
+       }
+
+       switch (errno) {
+       case 0:
+               key185->data = key.data;
+               key185->size = key.size;
+               return (0);
+       case DB_KEYEXIST:
+               return (1);
+       }
+       return (-1);
+
+einval:        errno = EINVAL;
+       return (-1);
+}
+
+static int
+db185_seq(db185p, key185, data185, flags)
+       const DB185 *db185p;
+       DBT185 *key185, *data185;
+       u_int flags;
+{
+       DB *dbp;
+       DBT key, data;
+
+       dbp = (DB *)db185p->internal;
+
+       memset(&key, 0, sizeof(key));
+       key.data = key185->data;
+       key.size = key185->size;
+       memset(&data, 0, sizeof(data));
+       data.data = data185->data;
+       data.size = data185->size;
+
+       switch (flags) {
+       case R_CURSOR:
+               flags = DB_SET_RANGE;
+               break;
+       case R_FIRST:
+               flags = DB_FIRST;
+               break;
+       case R_LAST:
+               if (dbp->type != DB_BTREE && dbp->type != DB_RECNO)
+                       goto einval;
+               flags = DB_LAST;
+               break;
+       case R_NEXT:
+               flags = DB_NEXT;
+               break;
+       case R_PREV:
+               if (dbp->type != DB_BTREE && dbp->type != DB_RECNO)
+                       goto einval;
+               flags = DB_PREV;
+               break;
+       default:
+               goto einval;
+       }
+       switch (errno = db185p->dbc->c_get(db185p->dbc, &key, &data, flags)) {
+       case 0:
+               key185->data = key.data;
+               key185->size = key.size;
+               data185->data = data.data;
+               data185->size = data.size;
+               return (0);
+       case DB_NOTFOUND:
+               return (1);
+       }
+       return (-1);
+
+einval:        errno = EINVAL;
+       return (-1);
+}
+
+static int
+db185_sync(db185p, flags)
+       const DB185 *db185p;
+       u_int flags;
+{
+       DB *dbp;
+
+       dbp = (DB *)db185p->internal;
+
+       switch (flags) {
+       case 0:
+               break;
+       case R_RECNOSYNC:
+               /*
+                * !!!
+                * We can't support the R_RECNOSYNC flag.
+                */
+#define        RSMSG   "DB: DB 1.85's R_RECNOSYNC sync flag is not supported.\n"
+               (void)write(2, RSMSG, sizeof(RSMSG) - 1);
+               goto einval;
+       default:
+               goto einval;
+       }
+
+       return ((errno = dbp->sync(dbp, 0)) == 0 ? 0 : -1);
+
+einval:        errno = EINVAL;
+       return (-1);
+}
diff --git a/db2/db185/db185_int.h b/db2/db185/db185_int.h
new file mode 100644 (file)
index 0000000..656dfdd
--- /dev/null
@@ -0,0 +1,137 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1997
+ *     Sleepycat Software.  All rights reserved.
+ */
+/*
+ * Copyright (c) 1990, 1993, 1994, 1995, 1996
+ *     Keith Bostic.  All rights reserved.
+ */
+/*
+ * Copyright (c) 1990, 1993, 1994
+ *     The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *     This product includes software developed by the University of
+ *     California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *     @(#)db185_int.h 8.4 (Sleepycat) 7/27/97
+ */
+
+#ifndef _DB185_H_
+#define        _DB185_H_
+
+/* Routine flags. */
+#define        R_CURSOR        1               /* del, put, seq */
+#define        __R_UNUSED      2               /* UNUSED */
+#define        R_FIRST         3               /* seq */
+#define        R_IAFTER        4               /* put (RECNO) */
+#define        R_IBEFORE       5               /* put (RECNO) */
+#define        R_LAST          6               /* seq (BTREE, RECNO) */
+#define        R_NEXT          7               /* seq */
+#define        R_NOOVERWRITE   8               /* put */
+#define        R_PREV          9               /* seq (BTREE, RECNO) */
+#define        R_SETCURSOR     10              /* put (RECNO) */
+#define        R_RECNOSYNC     11              /* sync (RECNO) */
+
+typedef struct {
+       void    *data;                  /* data */
+       size_t   size;                  /* data length */
+} DBT185;
+
+/* Access method description structure. */
+typedef struct __db185 {
+       DBTYPE type;                    /* Underlying db type. */
+       int (*close)    __P((struct __db185 *));
+       int (*del)      __P((const struct __db185 *, const DBT185 *, u_int));
+       int (*get)
+           __P((const struct __db185 *, const DBT185 *, DBT185 *, u_int));
+       int (*put)
+           __P((const struct __db185 *, DBT185 *, const DBT185 *, u_int));
+       int (*seq)
+           __P((const struct __db185 *, DBT185 *, DBT185 *, u_int));
+       int (*sync)     __P((const struct __db185 *, u_int));
+       void *internal;                 /* Access method private. */
+       int (*fd)       __P((const struct __db185 *));
+
+       /*
+        * !!!
+        * Added to the end of the DB 1.85 DB structure, it's needed to
+        * hold the DB 2.0 cursor used for DB 1.85 sequential operations.
+        */
+       DBC *dbc;                       /* DB 1.85 sequential cursor. */
+} DB185;
+
+/* Structure used to pass parameters to the btree routines. */
+typedef struct {
+#define        R_DUP           0x01    /* duplicate keys */
+       u_long  flags;
+       u_int   cachesize;      /* bytes to cache */
+       int     maxkeypage;     /* maximum keys per page */
+       int     minkeypage;     /* minimum keys per page */
+       u_int   psize;          /* page size */
+       int     (*compare)      /* comparison function */
+           __P((const DBT *, const DBT *));
+       size_t  (*prefix)       /* prefix function */
+           __P((const DBT *, const DBT *));
+       int     lorder;         /* byte order */
+} BTREEINFO;
+
+/* Structure used to pass parameters to the hashing routines. */
+typedef struct {
+       u_int   bsize;          /* bucket size */
+       u_int   ffactor;        /* fill factor */
+       u_int   nelem;          /* number of elements */
+       u_int   cachesize;      /* bytes to cache */
+       u_int32_t               /* hash function */
+               (*hash) __P((const void *, size_t));
+       int     lorder;         /* byte order */
+} HASHINFO;
+
+/* Structure used to pass parameters to the record routines. */
+typedef struct {
+#define        R_FIXEDLEN      0x01    /* fixed-length records */
+#define        R_NOKEY         0x02    /* key not required */
+#define        R_SNAPSHOT      0x04    /* snapshot the input */
+       u_long  flags;
+       u_int   cachesize;      /* bytes to cache */
+       u_int   psize;          /* page size */
+       int     lorder;         /* byte order */
+       size_t  reclen;         /* record length (fixed-length records) */
+       u_char  bval;           /* delimiting byte (variable-length records */
+       char    *bfname;        /* btree file name */
+} RECNOINFO;
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+DB185 *dbopen __P((const char *, int, int, DBTYPE, const void *));
+#if defined(__cplusplus)
+};
+#endif
+#endif /* !_DB185_H_ */
diff --git a/db2/db_185.h b/db2/db_185.h
new file mode 100644 (file)
index 0000000..650d365
--- /dev/null
@@ -0,0 +1,171 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997
+ *     Sleepycat Software.  All rights reserved.
+ */
+/*
+ * Copyright (c) 1990, 1993, 1994
+ *     The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *     This product includes software developed by the University of
+ *     California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *     @(#)db_185.h.src        8.3 (Sleepycat) 7/27/97
+ */
+
+#ifndef _DB_185_H_
+#define        _DB_185_H_
+
+#include <sys/types.h>
+
+#include <limits.h>
+
+/*
+ * XXX
+ * Handle function prototypes and the keyword "const".  This steps on name
+ * space that DB doesn't control, but all of the other solutions are worse.
+ */
+#undef __P
+#if defined(__STDC__) || defined(__cplusplus)
+#define        __P(protos)     protos          /* ANSI C prototypes */
+#else
+#define        const
+#define        __P(protos)     ()              /* K&R C preprocessor */
+#endif
+
+#define        RET_ERROR       -1              /* Return values. */
+#define        RET_SUCCESS      0
+#define        RET_SPECIAL      1
+
+#ifndef        __BIT_TYPES_DEFINED__
+#define        __BIT_TYPES_DEFINED__
+
+
+
+
+
+#endif
+
+#define        MAX_PAGE_NUMBER 0xffffffff      /* >= # of pages in a file */
+typedef u_int32_t      pgno_t;
+#define        MAX_PAGE_OFFSET 65535           /* >= # of bytes in a page */
+typedef u_int16_t      indx_t;
+#define        MAX_REC_NUMBER  0xffffffff      /* >= # of records in a tree */
+typedef u_int32_t      recno_t;
+
+/* Key/data structure -- a Data-Base Thang. */
+typedef struct {
+       void    *data;                  /* data */
+       size_t   size;                  /* data length */
+} DBT;
+
+/* Routine flags. */
+#define        R_CURSOR        1               /* del, put, seq */
+#define        __R_UNUSED      2               /* UNUSED */
+#define        R_FIRST         3               /* seq */
+#define        R_IAFTER        4               /* put (RECNO) */
+#define        R_IBEFORE       5               /* put (RECNO) */
+#define        R_LAST          6               /* seq (BTREE, RECNO) */
+#define        R_NEXT          7               /* seq */
+#define        R_NOOVERWRITE   8               /* put */
+#define        R_PREV          9               /* seq (BTREE, RECNO) */
+#define        R_SETCURSOR     10              /* put (RECNO) */
+#define        R_RECNOSYNC     11              /* sync (RECNO) */
+
+typedef enum { DB_BTREE, DB_HASH, DB_RECNO } DBTYPE;
+
+/* Access method description structure. */
+typedef struct __db {
+       DBTYPE type;                    /* Underlying db type. */
+       int (*close)    __P((struct __db *));
+       int (*del)      __P((const struct __db *, const DBT *, u_int));
+       int (*get)      __P((const struct __db *, const DBT *, DBT *, u_int));
+       int (*put)      __P((const struct __db *, DBT *, const DBT *, u_int));
+       int (*seq)      __P((const struct __db *, DBT *, DBT *, u_int));
+       int (*sync)     __P((const struct __db *, u_int));
+       void *internal;                 /* Access method private. */
+       int (*fd)       __P((const struct __db *));
+} DB;
+
+#define        BTREEMAGIC      0x053162
+#define        BTREEVERSION    3
+
+/* Structure used to pass parameters to the btree routines. */
+typedef struct {
+#define        R_DUP           0x01    /* duplicate keys */
+       u_long  flags;
+       u_int   cachesize;      /* bytes to cache */
+       int     maxkeypage;     /* maximum keys per page */
+       int     minkeypage;     /* minimum keys per page */
+       u_int   psize;          /* page size */
+       int     (*compare)      /* comparison function */
+           __P((const DBT *, const DBT *));
+       size_t  (*prefix)       /* prefix function */
+           __P((const DBT *, const DBT *));
+       int     lorder;         /* byte order */
+} BTREEINFO;
+
+#define        HASHMAGIC       0x061561
+#define        HASHVERSION     2
+
+/* Structure used to pass parameters to the hashing routines. */
+typedef struct {
+       u_int   bsize;          /* bucket size */
+       u_int   ffactor;        /* fill factor */
+       u_int   nelem;          /* number of elements */
+       u_int   cachesize;      /* bytes to cache */
+       u_int32_t               /* hash function */
+               (*hash) __P((const void *, size_t));
+       int     lorder;         /* byte order */
+} HASHINFO;
+
+/* Structure used to pass parameters to the record routines. */
+typedef struct {
+#define        R_FIXEDLEN      0x01    /* fixed-length records */
+#define        R_NOKEY         0x02    /* key not required */
+#define        R_SNAPSHOT      0x04    /* snapshot the input */
+       u_long  flags;
+       u_int   cachesize;      /* bytes to cache */
+       u_int   psize;          /* page size */
+       int     lorder;         /* byte order */
+       size_t  reclen;         /* record length (fixed-length records) */
+       u_char  bval;           /* delimiting byte (variable-length records */
+       char    *bfname;        /* btree file name */
+} RECNOINFO;
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+DB *__dbopen __P((const char *, int, int, DBTYPE, const void *));
+DB *dbopen __P((const char *, int, int, DBTYPE, const void *));
+
+#if defined(__cplusplus)
+};
+#endif
+#endif /* !_DB_185_H_ */
diff --git a/db2/db_int.h b/db2/db_int.h
new file mode 100644 (file)
index 0000000..23fb106
--- /dev/null
@@ -0,0 +1,332 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997
+ *     Sleepycat Software.  All rights reserved.
+ *
+ *     @(#)db_int.h.src        10.28 (Sleepycat) 8/20/97
+ */
+
+#ifndef _DB_INTERNAL_H_
+#define        _DB_INTERNAL_H_
+
+#include "db.h"                                /* Standard DB include file. */
+#include "queue.h"
+#include "os_ext.h"
+
+/*******************************************************
+ * General purpose constants and macros.
+ *******************************************************/
+#define        UINT32_T_MAX    0xffffffff      /* Maximum 32 bit unsigned. */
+#define        UINT16_T_MAX        0xffff      /* Maximum 16 bit unsigned. */
+
+#define        DB_MIN_PGSIZE   0x000200        /* Minimum page size. */
+#define        DB_MAX_PGSIZE   0x010000        /* Maximum page size. */
+
+#define        DB_MINCACHE     10              /* Minimum cached pages */
+
+/*
+ * Aligning items to particular sizes or in pages or memory.  ALIGNP is a
+ * separate macro, as we've had to cast the pointer to different integral
+ * types on different architectures.
+ *
+ * We cast pointers into unsigned longs when manipulating them because C89
+ * guarantees that u_long is the largest available integral type and further,
+ * to never generate overflows.  However, neither C89 or C9X  requires that
+ * any integer type be large enough to hold a pointer, although C9X created
+ * the intptr_t type, which is guaranteed to hold a pointer but may or may
+ * not exist.  At some point in the future, we should test for intptr_t and
+ * use it where available.
+ */
+#undef ALIGNTYPE
+#define        ALIGNTYPE               u_long
+#undef ALIGNP
+#define        ALIGNP(value, bound)    ALIGN((ALIGNTYPE)value, bound)
+#undef ALIGN
+#define        ALIGN(value, bound)     (((value) + (bound) - 1) & ~((bound) - 1))
+
+/*
+ * There are several on-page structures that are declared to have a number of
+ * fields followed by a variable length array of items.  The structure size
+ * without including the variable length array or the address of the first of
+ * those elements can be found using SSZ.
+ *
+ * This macro can also be used to find the offset of a structure element in a
+ * structure.  This is used in various places to copy structure elements from
+ * unaligned memory references, e.g., pointers into a packed page.
+ *
+ * There are two versions because compilers object if you take the address of
+ * an array.
+ */
+#undef SSZ
+#define SSZ(name, field)       ((int)&(((name *)0)->field))
+
+#undef SSZA
+#define SSZA(name, field)      ((int)&(((name *)0)->field[0]))
+
+/* Free and free-string macros that overwrite memory during debugging. */
+#ifdef DEBUG
+#undef FREE
+#define        FREE(p, len) {                                                  \
+       memset(p, 0xff, len);                                           \
+       free(p);                                                        \
+}
+#undef FREES
+#define        FREES(p) {                                                      \
+       FREE(p, strlen(p));                                             \
+}
+#else
+#undef FREE
+#define        FREE(p, len) {                                                  \
+       free(p);                                                        \
+}
+#undef FREES
+#define        FREES(p) {                                                      \
+       free(p);                                                        \
+}
+#endif
+
+/* Structure used to print flag values. */
+typedef struct __fn {
+       u_int32_t mask;                 /* Flag value. */
+       const char *name;               /* Flag name. */
+} FN;
+
+/* Set, clear and test flags. */
+#define        F_SET(p, f)     (p)->flags |= (f)
+#define        F_CLR(p, f)     (p)->flags &= ~(f)
+#define        F_ISSET(p, f)   ((p)->flags & (f))
+#define        LF_SET(f)       (flags |= (f))
+#define        LF_CLR(f)       (flags &= ~(f))
+#define        LF_ISSET(f)     (flags & (f))
+
+/* Display separator string. */
+#undef DB_LINE
+#define        DB_LINE "=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-="
+
+/*******************************************************
+ * Files.
+ *******************************************************/
+#ifndef MAXPATHLEN             /* Maximum path length. */
+#ifdef PATH_MAX
+#define        MAXPATHLEN      PATH_MAX
+#else
+#define        MAXPATHLEN      1024
+#endif
+#endif
+
+#define        PATH_DOT        "."     /* Current working directory. */
+#define        PATH_SEPARATOR  "/"     /* Path separator character. */
+
+#ifndef S_IRUSR                        /* UNIX specific file permissions. */
+#define        S_IRUSR 0000400         /* R for owner */
+#define        S_IWUSR 0000200         /* W for owner */
+#define        S_IRGRP 0000040         /* R for group */
+#define        S_IWGRP 0000020         /* W for group */
+#define        S_IROTH 0000004         /* R for other */
+#define        S_IWOTH 0000002         /* W for other */
+#endif
+
+#ifndef S_ISDIR                        /* UNIX specific: directory test. */
+#define        S_ISDIR(m)      ((m & 0170000) == 0040000)
+#endif
+
+/*******************************************************
+ * Mutex support.
+ *******************************************************/
+typedef unsigned char tsl_t;
+
+
+
+/*
+ * !!!
+ * Various systems require different alignments for mutexes (the worst we've
+ * seen so far is 16-bytes on some HP architectures).  The mutex (tsl_t) must
+ * be first in the db_mutex_t structure, which must itself be first in the
+ * region.  This ensures the alignment is as returned by mmap(2), which should
+ * be sufficient.  All other mutex users must ensure proper alignment locally.
+ */
+#define        MUTEX_ALIGNMENT 1
+
+/*
+ * The offset of a mutex in memory.
+ */
+#define        MUTEX_LOCK_OFFSET(a, b) ((off_t)((u_int8_t *)b - (u_int8_t *)a))
+
+typedef struct _db_mutex_t {
+#ifdef HAVE_SPINLOCKS
+       tsl_t   tsl_resource;           /* Resource test and set. */
+#ifdef DEBUG
+       u_long  pid;                    /* Lock holder: 0 or process pid. */
+#endif
+#else
+       off_t   off;                    /* Backing file offset. */
+       u_long  pid;                    /* Lock holder: 0 or process pid. */
+#endif
+#ifdef MUTEX_STATISTICS
+       u_long  mutex_set_wait;         /* Blocking mutex: required waiting. */
+       u_long  mutex_set_nowait;       /* Blocking mutex: without waiting. */
+#endif
+} db_mutex_t;
+
+#include "mutex_ext.h"
+
+/*******************************************************
+ * Access methods.
+ *******************************************************/
+/* Lock/unlock a DB thread. */
+#define        DB_THREAD_LOCK(dbp)                                             \
+       (F_ISSET(dbp, DB_AM_THREAD) ?                                   \
+           __db_mutex_lock((db_mutex_t *)(dbp)->mutex,  -1,            \
+               (dbp)->dbenv == NULL ? NULL : (dbp)->dbenv->db_yield) : 0)
+#define        DB_THREAD_UNLOCK(dbp)                                           \
+       (F_ISSET(dbp, DB_AM_THREAD) ?                                   \
+           __db_mutex_unlock((db_mutex_t *)(dbp)->mutex,  -1) : 0)
+
+/* Btree/recno local statistics structure. */
+struct __db_bt_lstat;  typedef struct __db_bt_lstat DB_BTREE_LSTAT;
+struct __db_bt_lstat {
+       u_int32_t bt_freed;             /* Pages freed for reuse. */
+       u_int32_t bt_pfxsaved;          /* Bytes saved by prefix compression. */
+       u_int32_t bt_split;             /* Total number of splits. */
+       u_int32_t bt_rootsplit;         /* Root page splits. */
+       u_int32_t bt_fastsplit;         /* Fast splits. */
+       u_int32_t bt_added;             /* Items added. */
+       u_int32_t bt_deleted;           /* Items deleted. */
+       u_int32_t bt_get;               /* Items retrieved. */
+       u_int32_t bt_cache_hit;         /* Hits in fast-insert code. */
+       u_int32_t bt_cache_miss;        /* Misses in fast-insert code. */
+};
+
+/*******************************************************
+ * Environment.
+ *******************************************************/
+/* Type passed to __db_appname(). */
+typedef enum {
+       DB_APP_NONE=0,                  /* No type (region). */
+       DB_APP_DATA,                    /* Data file. */
+       DB_APP_LOG,                     /* Log file. */
+       DB_APP_TMP                      /* Temporary file. */
+} APPNAME;
+
+/*******************************************************
+ * Regions.
+ *******************************************************/
+/*
+ * The shared memory regions share an initial structure so that the general
+ * region code can handle races between the region being deleted and other
+ * processes waiting on the region mutex.
+ *
+ * !!!
+ * Note, the mutex must be the first entry in the region; see comment above.
+ */
+typedef struct _rlayout {
+       db_mutex_t lock;                /* Region mutex. */
+       u_int32_t  refcnt;              /* Region reference count. */
+       size_t     size;                /* Region length. */
+       int        majver;              /* Major version number. */
+       int        minver;              /* Minor version number. */
+       int        patch;               /* Patch version number. */
+
+#define        DB_R_DELETED    0x01            /* Region was deleted. */
+       u_int32_t  flags;
+} RLAYOUT;
+
+/*******************************************************
+ * Mpool.
+ *******************************************************/
+/*
+ * File types for DB access methods.  Negative numbers are reserved to DB.
+ */
+#define        DB_FTYPE_BTREE          -1      /* Btree. */
+#define        DB_FTYPE_HASH           -2      /* Hash. */
+
+/* Structure used as the DB pgin/pgout pgcookie. */
+typedef struct __dbpginfo {
+       size_t  db_pagesize;            /* Underlying page size. */
+       int     needswap;               /* If swapping required. */
+} DB_PGINFO;
+
+/*******************************************************
+ * Log.
+ *******************************************************/
+/* Initialize an LSN to 'zero'. */
+#define        ZERO_LSN(LSN) {                                                 \
+       (LSN).file = 0;                                                 \
+       (LSN).offset = 0;                                               \
+}
+
+/* Return 1 if LSN is a 'zero' lsn, otherwise return 0. */
+#define        IS_ZERO_LSN(LSN)        ((LSN).file == 0)
+
+/* Test if we need to log a change. */
+#define        DB_LOGGING(dbp) \
+       (F_ISSET(dbp, DB_AM_LOGGING) && !F_ISSET(dbp, DB_AM_RECOVER))
+
+#ifdef DEBUG
+/*
+ * Debugging macro to log operations.
+ *     If DEBUG_WOP is defined, log operations that modify the database.
+ *     If DEBUG_ROP is defined, log operations that read the database.
+ *
+ * D dbp
+ * T txn
+ * O operation (string)
+ * K key
+ * A data
+ * F flags
+ */
+#define        LOG_OP(D, T, O, K, A, F) {                                      \
+       DB_LSN _lsn;                                                    \
+       DBT _op;                                                        \
+       if (DB_LOGGING((D))) {                                          \
+               memset(&_op, 0, sizeof(_op));                           \
+               _op.data = O;                                           \
+               _op.size = strlen(O) + 1;                               \
+               (void)__db_debug_log((D)->dbenv->lg_info,               \
+                   T, &_lsn, 0, &_op, (D)->log_fileid, K, A, F);       \
+       }                                                               \
+}
+#ifdef DEBUG_ROP
+#define        DEBUG_LREAD(D, T, O, K, A, F)   LOG_OP(D, T, O, K, A, F)
+#else
+#define        DEBUG_LREAD(D, T, O, K, A, F)
+#endif
+#ifdef DEBUG_WOP
+#define        DEBUG_LWRITE(D, T, O, K, A, F)  LOG_OP(D, T, O, K, A, F)
+#else
+#define        DEBUG_LWRITE(D, T, O, K, A, F)
+#endif
+#else
+#define        DEBUG_LREAD(D, T, O, K, A, F)
+#define        DEBUG_LWRITE(D, T, O, K, A, F)
+#endif /* DEBUG */
+
+/*******************************************************
+ * Transactions and recovery.
+ *******************************************************/
+/*
+ * The locker id space is divided between the transaction manager and the lock
+ * manager.  Lockid's start at 0 and go to MAX_LOCKER_ID.  Txn Id's start at
+ * MAX_LOCKER_ID + 1 and go up to MAX_TXNID.
+ */
+#define        MAX_LOCKER_ID   0x0fffffff
+#define        MAX_TXNID       0xffffffff
+
+/*
+ * Out of band value for a lock.  The locks are returned to callers as offsets
+ * into the lock regions.  Since the RLAYOUT structure begins all regions, an
+ * offset of 0 is guaranteed not to be a valid lock.
+ */
+#define        LOCK_INVALID    0
+
+/* The structure allocated for every transaction. */
+struct __db_txn {
+       DB_TXNMGR       *mgrp;          /* Pointer to transaction manager. */
+       DB_TXN          *parent;        /* Pointer to transaction's parent. */
+       DB_LSN          last_lsn;       /* Lsn of last log write. */
+       u_int32_t       txnid;          /* Unique transaction id. */
+       size_t          off;            /* Detail structure within region. */
+       TAILQ_ENTRY(__db_txn) links;
+};
+#endif /* !_DB_INTERNAL_H_ */
diff --git a/db2/dbm/dbm.c b/db2/dbm/dbm.c
new file mode 100644 (file)
index 0000000..8daa980
--- /dev/null
@@ -0,0 +1,410 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997
+ *     Sleepycat Software.  All rights reserved.
+ */
+/*
+ * Copyright (c) 1990, 1993
+ *     Margo Seltzer.  All rights reserved.
+ */
+/*
+ * Copyright (c) 1990, 1993
+ *     The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Margo Seltzer.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *     This product includes software developed by the University of
+ *     California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "config.h"
+
+#ifndef lint
+static const char sccsid[] = "@(#)dbm.c        10.5 (Sleepycat) 7/19/97";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/param.h>
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <string.h>
+#endif
+
+#define        DB_DBM_HSEARCH
+#include "db_int.h"
+
+#include "db_page.h"
+#include "hash.h"
+
+/*
+ *
+ * This package provides dbm and ndbm compatible interfaces to DB.
+ *
+ * The DBM routines, which call the NDBM routines.
+ */
+static DBM *__cur_db;
+
+static void __db_no_open __P((void));
+
+/* Provide prototypes here since there are none in db.h.  */
+int     dbm_error __P((DBM *));
+int     dbm_clearerr __P((DBM *));
+int     dbm_dirfno __P((DBM *));
+int     dbm_pagfno __P((DBM *));
+
+int
+dbminit(file)
+       char *file;
+{
+       if (__cur_db != NULL)
+               (void)dbm_close(__cur_db);
+       if ((__cur_db =
+           dbm_open(file, O_CREAT | O_RDWR, S_IRUSR | S_IWUSR)) != NULL)
+               return (0);
+       if ((__cur_db = dbm_open(file, O_RDONLY, 0)) != NULL)
+               return (0);
+       return (-1);
+}
+
+datum
+fetch(key)
+       datum key;
+{
+       datum item;
+
+       if (__cur_db == NULL) {
+               __db_no_open();
+               item.dptr = 0;
+               return (item);
+       }
+       return (dbm_fetch(__cur_db, key));
+}
+
+datum
+firstkey()
+{
+       datum item;
+
+       if (__cur_db == NULL) {
+               __db_no_open();
+               item.dptr = 0;
+               return (item);
+       }
+       return (dbm_firstkey(__cur_db));
+}
+
+datum
+nextkey(key)
+       datum key;
+{
+       datum item;
+
+       if (__cur_db == NULL) {
+               __db_no_open();
+               item.dptr = 0;
+               return (item);
+       }
+       return (dbm_nextkey(__cur_db));
+}
+
+int
+delete(key)
+       datum key;
+{
+       int ret;
+
+       if (__cur_db == NULL) {
+               __db_no_open();
+               return (-1);
+       }
+       ret = dbm_delete(__cur_db, key);
+       if (ret == 0)
+               ret = (((DB *)__cur_db)->sync)((DB *)__cur_db, 0);
+       return (ret);
+}
+
+int
+store(key, dat)
+       datum key, dat;
+{
+       int ret;
+
+       if (__cur_db == NULL) {
+               __db_no_open();
+               return (-1);
+       }
+       ret = dbm_store(__cur_db, key, dat, DBM_REPLACE);
+       if (ret == 0)
+               ret = (((DB *)__cur_db)->sync)((DB *)__cur_db, 0);
+       return (ret);
+}
+
+static void
+__db_no_open()
+{
+       (void)fprintf(stderr, "dbm: no open database.\n");
+}
+
+/*
+ * This package provides dbm and ndbm compatible interfaces to DB.
+ *
+ * The NDBM routines, which call the DB routines.
+ */
+/*
+ * Returns:
+ *     *DBM on success
+ *      NULL on failure
+ */
+DBM *
+dbm_open(file, oflags, mode)
+       const char *file;
+       int oflags, mode;
+{
+       DB *dbp;
+       DB_INFO dbinfo;
+       char path[MAXPATHLEN];
+
+       memset(&dbinfo, 0, sizeof(dbinfo));
+       dbinfo.db_pagesize = 4096;
+       dbinfo.h_ffactor = 40;
+       dbinfo.h_nelem = 1;
+
+       (void)snprintf(path, sizeof(path), "%s%s", file, DBM_SUFFIX);
+       if ((errno = db_open(path,
+           DB_HASH, __db_oflags(oflags), mode, NULL, &dbinfo, &dbp)) != 0)
+               return (NULL);
+       return ((DBM *)dbp);
+}
+
+/*
+ * Returns:
+ *     Nothing.
+ */
+void
+dbm_close(db)
+       DBM *db;
+{
+       (void)db->close(db, 0);
+}
+
+/*
+ * Returns:
+ *     DATUM on success
+ *     NULL on failure
+ */
+datum
+dbm_fetch(db, key)
+       DBM *db;
+       datum key;
+{
+       DBT _key, _data;
+       datum data;
+       int status;
+
+       memset(&_key, 0, sizeof(DBT));
+       memset(&_data, 0, sizeof(DBT));
+       _key.size = key.dsize;
+       _key.data = key.dptr;
+       status = db->get((DB *)db, NULL, &_key, &_data, 0);
+       if (status) {
+               data.dptr = NULL;
+               data.dsize = 0;
+       } else {
+               data.dptr = _data.data;
+               data.dsize = _data.size;
+       }
+       return (data);
+}
+
+/*
+ * Returns:
+ *     DATUM on success
+ *     NULL on failure
+ */
+datum
+dbm_firstkey(db)
+       DBM *db;
+{
+       DBT _key, _data;
+       datum key;
+       int status;
+
+       DBC *cp;
+
+       if ((cp = TAILQ_FIRST(&db->curs_queue)) == NULL)
+               if ((errno = db->cursor(db, NULL, &cp)) != 0) {
+                       memset(&key, 0, sizeof(key));
+                       return (key);
+               }
+
+       memset(&_key, 0, sizeof(DBT));
+       memset(&_data, 0, sizeof(DBT));
+       status = (cp->c_get)(cp, &_key, &_data, DB_FIRST);
+       if (status) {
+               key.dptr = NULL;
+               key.dsize = 0;
+       } else {
+               key.dptr = _key.data;
+               key.dsize = _key.size;
+       }
+       return (key);
+}
+
+/*
+ * Returns:
+ *     DATUM on success
+ *     NULL on failure
+ */
+datum
+dbm_nextkey(db)
+       DBM *db;
+{
+       DBC *cp;
+       DBT _key, _data;
+       datum key;
+       int status;
+
+       if ((cp = TAILQ_FIRST(&db->curs_queue)) == NULL)
+               if ((errno = db->cursor(db, NULL, &cp)) != 0) {
+                       memset(&key, 0, sizeof(key));
+                       return (key);
+               }
+
+       memset(&_key, 0, sizeof(DBT));
+       memset(&_data, 0, sizeof(DBT));
+       status = (cp->c_get)(cp, &_key, &_data, DB_NEXT);
+       if (status) {
+               key.dptr = NULL;
+               key.dsize = 0;
+       } else {
+               key.dptr = _key.data;
+               key.dsize = _key.size;
+       }
+       return (key);
+}
+
+/*
+ * Returns:
+ *      0 on success
+ *     <0 failure
+ */
+int
+dbm_delete(db, key)
+       DBM *db;
+       datum key;
+{
+       DBT _key;
+       int ret;
+
+       memset(&_key, 0, sizeof(DBT));
+       _key.data = key.dptr;
+       _key.size = key.dsize;
+       ret = (((DB *)db)->del)((DB *)db, NULL, &_key, 0);
+       if (ret < 0)
+               errno = ENOENT;
+       else if (ret > 0) {
+               errno = ret;
+               ret = -1;
+       }
+       return (ret);
+}
+
+/*
+ * Returns:
+ *      0 on success
+ *     <0 failure
+ *      1 if DBM_INSERT and entry exists
+ */
+int
+dbm_store(db, key, data, flags)
+       DBM *db;
+       datum key, data;
+       int flags;
+{
+       DBT _key, _data;
+
+       memset(&_key, 0, sizeof(DBT));
+       memset(&_data, 0, sizeof(DBT));
+       _key.data = key.dptr;
+       _key.size = key.dsize;
+       _data.data = data.dptr;
+       _data.size = data.dsize;
+       return (db->put((DB *)db,
+           NULL, &_key, &_data, (flags == DBM_INSERT) ? DB_NOOVERWRITE : 0));
+}
+
+int
+dbm_error(db)
+       DBM *db;
+{
+       HTAB *hp;
+
+       hp = (HTAB *)db->internal;
+       return (hp->local_errno);
+}
+
+int
+dbm_clearerr(db)
+       DBM *db;
+{
+       HTAB *hp;
+
+       hp = (HTAB *)db->internal;
+       hp->local_errno = 0;
+       return (0);
+}
+
+/*
+ * XXX
+ * We only have a single file descriptor that we can return, not two.  Return
+ * the same one for both files.  Hopefully, the user is using it for locking
+ * and picked one to use at random.
+ */
+int
+dbm_dirfno(db)
+       DBM *db;
+{
+       int fd;
+
+       (void)db->fd(db, &fd);
+       return (fd);
+}
+
+int
+dbm_pagfno(db)
+       DBM *db;
+{
+       int fd;
+
+       (void)db->fd(db, &fd);
+       return (fd);
+}
diff --git a/db2/hash/hash.c b/db2/hash/hash.c
new file mode 100644 (file)
index 0000000..6d8c400
--- /dev/null
@@ -0,0 +1,1440 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997
+ *     Sleepycat Software.  All rights reserved.
+ */
+/*
+ * Copyright (c) 1990, 1993, 1994
+ *     Margo Seltzer.  All rights reserved.
+ */
+/*
+ * Copyright (c) 1990, 1993, 1994
+ *     The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Margo Seltzer.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *     This product includes software developed by the University of
+ *     California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "config.h"
+
+#ifndef lint
+static const char sccsid[] = "@(#)hash.c       10.25 (Sleepycat) 8/24/97";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#endif
+
+#include "shqueue.h"
+#include "db_int.h"
+#include "db_page.h"
+#include "db_am.h"
+#include "db_ext.h"
+#include "hash.h"
+#include "log.h"
+
+static int  __ham_c_close __P((DBC *));
+static int  __ham_c_del __P((DBC *, int));
+static int  __ham_c_get __P((DBC *, DBT *, DBT *, int));
+static int  __ham_c_put __P((DBC *, DBT *, DBT *, int));
+static int  __ham_c_init __P((DB *, DB_TXN *, DBC **));
+static int  __ham_cursor __P((DB *, DB_TXN *, DBC **));
+static int  __ham_delete __P((DB *, DB_TXN *, DBT *, int));
+static int  __ham_dup_return __P((HTAB *, HASH_CURSOR *, DBT *, int));
+static int  __ham_get __P((DB *, DB_TXN *, DBT *, DBT *, int));
+static void __ham_init_htab __P((HTAB *));
+static int  __ham_lookup __P((HTAB *,
+               HASH_CURSOR *, const DBT *, u_int32_t, db_lockmode_t));
+static int  __ham_overwrite __P((HTAB *, HASH_CURSOR *, DBT *));
+static int  __ham_put __P((DB *, DB_TXN *, DBT *, DBT *, int));
+static int  __ham_sync __P((DB *, int));
+
+/************************** INTERFACE ROUTINES ***************************/
+/* OPEN/CLOSE */
+
+/*
+ * __ham_open --
+ *
+ * PUBLIC: int __ham_open __P((DB *, DB_INFO *));
+ */
+int
+__ham_open(dbp, dbinfo)
+       DB *dbp;
+       DB_INFO *dbinfo;
+{
+       DB_ENV *dbenv;
+       DBC *curs;
+       HTAB *hashp;
+       int file_existed, ret;
+
+       dbenv = dbp->dbenv;
+
+       if ((hashp = (HTAB *)calloc(1, sizeof(HTAB))) == NULL)
+               return (ENOMEM);
+       hashp->dbp = dbp;
+
+       /* Set the hash function if specified by the user. */
+       if (dbinfo != NULL && dbinfo->h_hash != NULL)
+               hashp->hash = dbinfo->h_hash;
+
+       /*
+        * Initialize the remaining fields of the dbp.  The type, close and
+        * fd functions are all set in db_open.
+        */
+       dbp->internal = hashp;
+       dbp->cursor = __ham_cursor;
+       dbp->del = __ham_delete;
+       dbp->get = __ham_get;
+       dbp->put = __ham_put;
+       dbp->sync = __ham_sync;
+
+       /* If locking is turned on, lock the meta data page. */
+       if (F_ISSET(dbp, DB_AM_LOCKING)) {
+               dbp->lock.pgno = BUCKET_INVALID;
+               if ((ret = lock_get(dbenv->lk_info, dbp->locker,
+                   0, &dbp->lock_dbt, DB_LOCK_READ, &hashp->hlock)) != 0) {
+                       if (ret < 0)
+                               ret = EAGAIN;
+                       goto out;
+               }
+       }
+
+       /*
+        * Now, we can try to read the meta-data page and figure out
+        * if we set up locking and get the meta-data page properly.
+        * If this is a new file, initialize it, and put it back dirty.
+        */
+       if ((ret = __ham_get_page(hashp->dbp, 0, (PAGE **)&hashp->hdr)) != 0)
+               goto out;
+
+       /* Initialize the hashp structure */
+       if (hashp->hdr->magic == DB_HASHMAGIC) {
+               file_existed = 1;
+               /* File exists, verify the data in the header. */
+               if (hashp->hash == NULL)
+                       hashp->hash =
+                           hashp->hdr->version < 5 ? __ham_func4 : __ham_func5;
+               if (hashp->hash(CHARKEY, sizeof(CHARKEY)) !=
+                   hashp->hdr->h_charkey) {
+                       __db_err(hashp->dbp->dbenv,
+                           "hash: incompatible hash function");
+                       ret = EINVAL;
+                       goto out;
+               }
+               if (F_ISSET(hashp->hdr, DB_HASH_DUP))
+                       F_SET(dbp, DB_AM_DUP);
+       } else {
+               /*
+                * File does not exist, we must initialize the header.  If
+                * locking is enabled that means getting a write lock first.
+                */
+               file_existed = 0;
+               if (F_ISSET(dbp, DB_AM_LOCKING) &&
+                   ((ret = lock_put(dbenv->lk_info, hashp->hlock)) != 0 ||
+                   (ret = lock_get(dbenv->lk_info, dbp->locker, 0,
+                       &dbp->lock_dbt, DB_LOCK_WRITE, &hashp->hlock)) != 0)) {
+                       if (ret < 0)
+                               ret = EAGAIN;
+                       goto out;
+               }
+
+               hashp->hdr->nelem = dbinfo != NULL ? dbinfo->h_nelem : 0;
+               hashp->hdr->ffactor =
+                   dbinfo != NULL && dbinfo->h_ffactor ? dbinfo->h_ffactor : 0;
+               __ham_init_htab(hashp);
+               if (F_ISSET(dbp, DB_AM_DUP))
+                       F_SET(hashp->hdr, DB_HASH_DUP);
+               if ((ret = __ham_dirty_page(hashp, (PAGE *)hashp->hdr)) != 0)
+                       goto out;
+       }
+
+       /* Initialize the default cursor. */
+       __ham_c_init(dbp, NULL, &curs);
+       TAILQ_INSERT_TAIL(&dbp->curs_queue, curs, links);
+
+       /* Allocate memory for our split buffer. */
+       if ((hashp->split_buf = (PAGE *)malloc(dbp->pgsize)) == NULL) {
+               ret = ENOMEM;
+               goto out;
+       }
+
+#ifdef NO_STATISTICS_FOR_DB_ERR
+       __db_err(dbp->dbenv,
+           "%s%lx\n%s%ld\n%s%ld\n%s%ld\n%s%ld\n%s0x%lx\n%s0x%lx\n%s%ld\n%s%ld\n%s0x%lx",
+           "TABLE POINTER   ", (long)hashp,
+           "BUCKET SIZE     ", (long)hashp->hdr->pagesize,
+           "FILL FACTOR     ", (long)hashp->hdr->ffactor,
+           "MAX BUCKET      ", (long)hashp->hdr->max_bucket,
+           "OVFL POINT      ", (long)hashp->hdr->ovfl_point,
+           "LAST FREED      ", (long)hashp->hdr->last_freed,
+           "HIGH MASK       ", (long)hashp->hdr->high_mask,
+           "LOW  MASK       ", (long)hashp->hdr->low_mask,
+           "NELEM           ", (long)hashp->hdr->nelem,
+           "FLAGS           ", (long)hashp->hdr->flags);
+#endif
+
+       /* Release the meta data page */
+       (void)__ham_put_page(hashp->dbp, (PAGE *)hashp->hdr, 0);
+       if (F_ISSET(dbp, DB_AM_LOCKING) &&
+           (ret = lock_put(dbenv->lk_info, hashp->hlock)) != 0) {
+               if (ret < 0)
+                       ret = EAGAIN;
+               goto out;
+       }
+
+       hashp->hlock = 0;
+       hashp->hdr = NULL;
+       /* Sync the file so that we know that the meta data goes to disk. */
+       if (!file_existed && (ret = dbp->sync(dbp, 0)) != 0)
+               goto out;
+       return (0);
+
+out:   (void)__ham_close(dbp);
+       return (ret);
+}
+
+/*
+ * PUBLIC: int  __ham_close __P((DB *));
+ */
+int
+__ham_close(dbp)
+       DB *dbp;
+{
+       HTAB *hashp;
+       int ret, t_ret;
+
+       DEBUG_LWRITE(dbp, NULL, "ham_close", NULL, NULL, 0);
+       hashp = (HTAB *)dbp->internal;
+       ret = 0;
+
+       /* Free the split page. */
+       if (hashp->split_buf)
+               FREE(hashp->split_buf, dbp->pgsize);
+
+       if (hashp->hdr && (t_ret = __ham_put_page(hashp->dbp,
+           (PAGE *)hashp->hdr, 0)) != 0 && ret == 0)
+               ret = t_ret;
+       if (hashp->hlock && (t_ret = lock_put(hashp->dbp->dbenv->lk_info,
+           hashp->hlock)) != 0 && ret == 0)
+               ret = t_ret;
+
+       FREE(hashp, sizeof(HTAB));
+       dbp->internal = NULL;
+       return (ret);
+}
+
+/************************** LOCAL CREATION ROUTINES **********************/
+/*
+ * Returns 0 on No Error
+ */
+static void
+__ham_init_htab(hashp)
+       HTAB *hashp;
+{
+       u_int32_t nelem;
+       int32_t l2, nbuckets;
+
+       nelem = hashp->hdr->nelem;
+       hashp->hdr->pagesize = hashp->dbp->pgsize;
+       ZERO_LSN(hashp->hdr->lsn);
+       hashp->hdr->magic = DB_HASHMAGIC;
+       hashp->hdr->version = DB_HASHVERSION;
+       if (hashp->hash == NULL)
+               hashp->hash =
+                   hashp->hdr->version < 5 ? __ham_func4 : __ham_func5;
+       hashp->hdr->h_charkey = hashp->hash(CHARKEY, sizeof(CHARKEY));
+       if (nelem != 0 && hashp->hdr->ffactor != 0) {
+               nelem = (nelem - 1) / hashp->hdr->ffactor + 1;
+               l2 = __db_log2(nelem > 2 ? nelem : 2);
+       } else
+               l2 = 2;
+
+       nbuckets = 1 << l2;
+
+       hashp->hdr->spares[l2] = 0;
+       hashp->hdr->spares[l2 + 1] = 0;
+       hashp->hdr->ovfl_point = l2;
+       hashp->hdr->last_freed = PGNO_INVALID;
+
+       hashp->hdr->max_bucket = hashp->hdr->high_mask = nbuckets - 1;
+       hashp->hdr->low_mask = (nbuckets >> 1) - 1;
+       memcpy(hashp->hdr->uid, hashp->dbp->lock.fileid, DB_FILE_ID_LEN);
+}
+
+/********************** DESTROY/CLOSE ROUTINES ************************/
+
+
+/*
+ * Write modified pages to disk
+ *
+ * Returns:
+ *      0 == OK
+ *     -1 ERROR
+ */
+static int
+__ham_sync(dbp, flags)
+       DB *dbp;
+       int flags;
+{
+       int ret;
+
+       DEBUG_LWRITE(dbp, NULL, "ham_sync", NULL, NULL, flags);
+       if ((ret = __db_syncchk(dbp, flags)) != 0)
+               return (ret);
+       if (F_ISSET(dbp, DB_AM_RDONLY))
+               return (0);
+
+       if ((ret = memp_fsync(dbp->mpf)) == DB_INCOMPLETE)
+               ret = 0;
+
+       return (ret);
+}
+
+/*******************************SEARCH ROUTINES *****************************/
+/*
+ * All the access routines return
+ *
+ * Returns:
+ *      0 on SUCCESS
+ *      1 to indicate an external ERROR (i.e. key not found, etc)
+ *     -1 to indicate an internal ERROR (i.e. out of memory, etc)
+ */
+
+static int
+__ham_get(dbp, txn, key, data, flags)
+       DB *dbp;
+       DB_TXN *txn;
+       DBT *key;
+       DBT *data;
+       int flags;
+{
+       DB *ldbp;
+       DBC *cp;
+       HTAB *hashp;
+       HASH_CURSOR *hcp;
+       int ret, t_ret;
+
+       DEBUG_LREAD(dbp, txn, "ham_get", key, NULL, flags);
+       if ((ret = __db_getchk(dbp, key, data, flags)) != 0)
+               return (ret);
+
+       ldbp = dbp;
+       if (F_ISSET(dbp, DB_AM_THREAD) &&
+           (ret = __db_gethandle(dbp, __ham_hdup, &ldbp)) != 0)
+               return (ret);
+
+       hashp = (HTAB *)ldbp->internal;
+       SET_LOCKER(ldbp, txn);
+       GET_META(ldbp, hashp);
+       cp = TAILQ_FIRST(&ldbp->curs_queue);
+
+       hashp->hash_accesses++;
+       hcp = (HASH_CURSOR *)TAILQ_FIRST(&ldbp->curs_queue)->internal;
+       if ((ret = __ham_lookup(hashp, hcp, key, 0, DB_LOCK_READ)) == 0)
+               if (F_ISSET(hcp, H_OK))
+                       ret = __ham_dup_return(hashp, hcp, data, DB_FIRST);
+               else /* Key was not found */
+                       ret = DB_NOTFOUND;
+
+       if ((t_ret = __ham_item_done(hashp, hcp, 0)) != 0 && ret == 0)
+               ret = t_ret;
+       RELEASE_META(ldbp, hashp);
+       if (F_ISSET(dbp, DB_AM_THREAD))
+               __db_puthandle(ldbp);
+       return (ret);
+}
+
+static int
+__ham_put(dbp, txn, key, data, flags)
+       DB *dbp;
+       DB_TXN *txn;
+       DBT *key;
+       DBT *data;
+       int flags;
+{
+       DB *ldbp;
+       HTAB *hashp;
+       HASH_CURSOR *hcp;
+       DBT tmp_val, *myval;
+       int ret, t_ret;
+       u_int32_t nbytes;
+
+       DEBUG_LWRITE(dbp, txn, "ham_put", key, data, flags);
+       if ((ret = __db_putchk(dbp, key, data,
+           flags, F_ISSET(dbp, DB_AM_RDONLY), F_ISSET(dbp, DB_AM_DUP))) != 0)
+               return (ret);
+
+       ldbp = dbp;
+       if (F_ISSET(dbp, DB_AM_THREAD) &&
+           (ret = __db_gethandle(dbp, __ham_hdup, &ldbp)) != 0)
+               return (ret);
+
+       hashp = (HTAB *)ldbp->internal;
+       SET_LOCKER(ldbp, txn);
+       GET_META(ldbp, hashp);
+       hcp = TAILQ_FIRST(&ldbp->curs_queue)->internal;
+
+       nbytes = (ISBIG(hashp, key->size) ? HOFFPAGE_PSIZE :
+           HKEYDATA_PSIZE(key->size)) +
+           (ISBIG(hashp, data->size) ? HOFFPAGE_PSIZE :
+           HKEYDATA_PSIZE(data->size));
+
+       hashp->hash_accesses++;
+       ret = __ham_lookup(hashp, hcp, key, nbytes, DB_LOCK_WRITE);
+
+       if (ret == DB_NOTFOUND) {
+               ret = 0;
+               if (hcp->seek_found_page != PGNO_INVALID &&
+                   hcp->seek_found_page != hcp->pgno) {
+                       if ((ret = __ham_item_done(hashp, hcp, 0)) != 0)
+                               goto out;
+                       hcp->pgno = hcp->seek_found_page;
+                       hcp->bndx = NDX_INVALID;
+               }
+
+               if (F_ISSET(data, DB_DBT_PARTIAL) && data->doff != 0) {
+                       /*
+                        * Doing a partial put, but the key does not exist
+                        * and we are not beginning the write at 0.  We
+                        * must create a data item padded up to doff and
+                        * then write the new bytes represented by val.
+                        */
+                       ret = __ham_init_dbt(&tmp_val, data->size + data->doff,
+                           &hcp->big_data, &hcp->big_datalen);
+                       if (ret == 0) {
+                               memset(tmp_val.data, 0, data->doff);
+                               memcpy((u_int8_t *)tmp_val.data + data->doff,
+                                   data->data, data->size);
+                               myval = &tmp_val;
+                       }
+               } else
+                       myval = (DBT *)data;
+
+               if (ret == 0)
+                       ret = __ham_add_el(hashp, hcp, key, myval, H_KEYDATA);
+       } else if (ret == 0 && F_ISSET(hcp, H_OK)) {
+               if (flags == DB_NOOVERWRITE)
+                       ret = DB_KEYEXIST;
+               else if (F_ISSET(ldbp, DB_AM_DUP))
+                       ret = __ham_add_dup(hashp, hcp, data, DB_KEYLAST);
+               else
+                       ret = __ham_overwrite(hashp, hcp, data);
+       }
+
+       /* Free up all the cursor pages. */
+       if ((t_ret = __ham_item_done(hashp, hcp, ret == 0)) != 0 && ret == 0)
+               ret = t_ret;
+       /* Now check if we have to grow. */
+out:   if (ret == 0 && F_ISSET(hcp, H_EXPAND)) {
+               ret = __ham_expand_table(hashp);
+               F_CLR(hcp, H_EXPAND);
+       }
+
+       if ((t_ret = __ham_item_done(hashp, hcp, ret == 0)) != 0 && ret == 0)
+               ret = t_ret;
+       RELEASE_META(ldbp, hashp);
+       if (F_ISSET(dbp, DB_AM_THREAD))
+               __db_puthandle(ldbp);
+       return (ret);
+}
+
+static int
+__ham_cursor(dbp, txnid, dbcp)
+       DB *dbp;
+       DB_TXN *txnid;
+       DBC **dbcp;
+{
+       int ret;
+
+       DEBUG_LWRITE(dbp, txnid, "ham_cursor", NULL, NULL, 0);
+       if ((ret = __ham_c_init(dbp, txnid, dbcp)) != 0)
+               return (ret);
+
+       DB_THREAD_LOCK(dbp);
+       TAILQ_INSERT_TAIL(&dbp->curs_queue, *dbcp, links);
+       DB_THREAD_UNLOCK(dbp);
+       return (ret);
+}
+
+static int
+__ham_c_init(dbp, txnid, dbcp)
+       DB *dbp;
+       DB_TXN *txnid;
+       DBC **dbcp;
+{
+       DBC *db_curs;
+       HASH_CURSOR *new_curs;
+
+       if ((db_curs = (DBC *)calloc(sizeof(DBC), 1)) == NULL)
+               return (ENOMEM);
+
+       if ((new_curs =
+           (HASH_CURSOR *)calloc(sizeof(struct cursor_t), 1)) == NULL) {
+               FREE(db_curs, sizeof(DBC));
+               return (ENOMEM);
+       }
+
+       db_curs->internal = new_curs;
+       db_curs->c_close = __ham_c_close;
+       db_curs->c_del = __ham_c_del;
+       db_curs->c_get = __ham_c_get;
+       db_curs->c_put = __ham_c_put;
+       db_curs->txn = txnid;
+       db_curs->dbp = dbp;
+
+       new_curs->db_cursor = db_curs;
+       __ham_item_init(new_curs);
+
+       if (dbcp != NULL)
+               *dbcp = db_curs;
+       return (0);
+}
+
+static int
+__ham_delete(dbp, txn, key, flags)
+       DB *dbp;
+       DB_TXN *txn;
+       DBT *key;
+       int flags;
+{
+       DB *ldbp;
+       HTAB *hashp;
+       HASH_CURSOR *hcp;
+       int ret, t_ret;
+
+       DEBUG_LWRITE(dbp, txn, "ham_delete", key, NULL, flags);
+       if ((ret = __db_delchk(dbp, flags, F_ISSET(dbp, DB_AM_RDONLY))) != 0)
+               return (ret);
+
+       ldbp = dbp;
+       if (F_ISSET(dbp, DB_AM_THREAD) &&
+           (ret = __db_gethandle(dbp, __ham_hdup, &ldbp)) != 0)
+               return (ret);
+       hashp = (HTAB *)ldbp->internal;
+       SET_LOCKER(ldbp, txn);
+       GET_META(ldbp, hashp);
+       hcp = TAILQ_FIRST(&ldbp->curs_queue)->internal;
+
+       hashp->hash_accesses++;
+       if ((ret = __ham_lookup(hashp, hcp, key, 0, DB_LOCK_WRITE)) == 0)
+               if (F_ISSET(hcp, H_OK))
+                       ret = __ham_del_pair(hashp, hcp);
+               else
+                       ret = DB_NOTFOUND;
+
+       if ((t_ret = __ham_item_done(hashp, hcp, ret == 0)) != 0 && ret == 0)
+               ret = t_ret;
+       RELEASE_META(ldbp, hashp);
+       if (F_ISSET(dbp, DB_AM_THREAD))
+               __db_puthandle(ldbp);
+       return (ret);
+}
+
+/* ****************** CURSORS ********************************** */
+static int
+__ham_c_close(cursor)
+       DBC *cursor;
+{
+       DB  *ldbp;
+       HTAB *hashp;
+       HASH_CURSOR *hcp;
+       int ret;
+
+       DEBUG_LWRITE(cursor->dbp, cursor->txn, "ham_c_close", NULL, NULL, 0);
+       /*
+        * If the pagep, dpagep, and lock fields of the cursor are all NULL,
+        * then there really isn't a need to get a handle here.  However,
+        * the normal case is that at least one of those fields is non-NULL,
+        * and putting those checks in here would couple the ham_item_done
+        * functionality with cursor close which would be pretty disgusting.
+        * Instead, we pay the overhead here of always getting the handle.
+        */
+       ldbp = cursor->dbp;
+       if (F_ISSET(cursor->dbp, DB_AM_THREAD) &&
+           (ret = __db_gethandle(cursor->dbp, __ham_hdup, &ldbp)) != 0)
+               return (ret);
+       hashp = (HTAB *)ldbp->internal;
+       hcp = (HASH_CURSOR *)cursor->internal;
+       ret = __ham_item_done(hashp, hcp, 0);
+
+       if (hcp->big_key)
+               FREE(hcp->big_key, hcp->big_keylen);
+       if (hcp->big_data)
+               FREE(hcp->big_data, hcp->big_datalen);
+
+       /*
+        * All cursors (except the default ones) are linked off the master.
+        * Therefore, when we close the cursor, we have to remove it from
+        * the master, not the local one.  When we are closing the file in
+        * its entirety, then we clear the THREAD bit and the master and
+        * local are identical, so we remove the correct one.
+        */
+       DB_THREAD_LOCK(cursor->dbp);
+       TAILQ_REMOVE(&cursor->dbp->curs_queue, cursor, links);
+       DB_THREAD_UNLOCK(cursor->dbp);
+
+       if (F_ISSET(cursor->dbp, DB_AM_THREAD))
+               __db_puthandle(ldbp);
+
+       FREE(hcp, sizeof(HASH_CURSOR));
+       FREE(cursor, sizeof(DBC));
+       return (ret);
+}
+
+static int
+__ham_c_del(cursor, flags)
+       DBC *cursor;
+       int flags;
+{
+       DB *ldbp;
+       HTAB *hashp;
+       HASH_CURSOR *hcp;
+       HASH_CURSOR save_curs;
+       db_pgno_t ppgno, chg_pgno;
+       int ret, t_ret;
+
+       DEBUG_LWRITE(cursor->dbp, cursor->txn, "ham_c_del", NULL, NULL, flags);
+       ldbp = cursor->dbp;
+       if (F_ISSET(cursor->dbp, DB_AM_THREAD) &&
+           (ret = __db_gethandle(cursor->dbp, __ham_hdup, &ldbp)) != 0)
+               return (ret);
+       hashp = (HTAB *)ldbp->internal;
+       hcp = (HASH_CURSOR *)cursor->internal;
+       save_curs = *hcp;
+       if ((ret = __db_cdelchk(ldbp, flags,
+           F_ISSET(ldbp, DB_AM_RDONLY), IS_VALID(hcp))) != 0)
+               return (ret);
+       if (F_ISSET(hcp, H_DELETED))
+               return (DB_NOTFOUND);
+
+       SET_LOCKER(hashp->dbp, cursor->txn);
+       GET_META(hashp->dbp, hashp);
+       hashp->hash_accesses++;
+       if ((ret = __ham_get_cpage(hashp, hcp, DB_LOCK_WRITE)) != 0)
+               goto out;
+       if (F_ISSET(hcp, H_ISDUP) && hcp->dpgno != PGNO_INVALID) {
+               ppgno = PREV_PGNO(hcp->dpagep);
+
+               /* Remove item from duplicate page. */
+               chg_pgno = hcp->dpgno;
+               if ((ret = __db_drem(hashp->dbp,
+                   &hcp->dpagep, hcp->dndx, __ham_del_page)) != 0)
+                       goto out;
+
+               /*
+                * There are 4 cases.
+                * 1. We removed an item on a page, but nothing else changed.
+                * 2. We removed the last item on a page, but there is a
+                *    following page of duplicates.
+                * 3. We removed the last item on a page, this page was the
+                *    last page in a duplicate set, but there were dups before
+                *    it.
+                * 4. We removed the last item on a page, removing the last
+                *    duplicate.
+                * In case 1 hcp->dpagep is unchanged.
+                * In case 2 hcp->dpagep comes back pointing to the next dup
+                *     page.
+                * In case 3 hcp->dpagep comes back NULL.
+                * In case 4 hcp->dpagep comes back NULL.
+                */
+               if (hcp->dpagep == NULL) {
+                       if (ppgno != PGNO_INVALID) {            /* Case 3 */
+                               hcp->dpgno = ppgno;
+                               if ((ret = __ham_get_cpage(hashp, hcp,
+                                   DB_LOCK_READ)) != 0)
+                                       goto out;
+                               hcp->dndx = NUM_ENT(hcp->dpagep);
+                               F_SET(hcp, H_DELETED);
+                       } else {                                /* Case 4 */
+                               ret = __ham_del_pair(hashp, hcp);
+                               hcp->dpgno = PGNO_INVALID;
+                               /*
+                                * Delpair updated the cursor queue, so we
+                                * don't have to do that here.
+                                */
+                               chg_pgno = PGNO_INVALID;
+                       }
+               } else if (PGNO(hcp->dpagep) != hcp->dpgno) {
+                       hcp->dndx = 0;                          /* Case 2 */
+                       hcp->dpgno = PGNO(hcp->dpagep);
+                       if (ppgno == PGNO_INVALID)
+                               memcpy(P_ENTRY(hcp->pagep,
+                                   H_DATAINDEX(hcp->bndx)) +
+                                   SSZ(HOFFDUP, pgno), &hcp->dpgno,
+                                   sizeof(db_pgno_t));
+                       F_SET(hcp, H_DELETED);
+               } else                                  /* Case 1 */
+                       F_SET(hcp, H_DELETED);
+               if (chg_pgno != PGNO_INVALID)
+                       __ham_c_update(hashp, hcp, chg_pgno, 0, 0, 1);
+       } else if (F_ISSET(hcp, H_ISDUP)) {                     /* on page */
+               if (hcp->dup_off == 0 && DUP_SIZE(hcp->dup_len) ==
+                   LEN_HDATA(hcp->pagep, hashp->hdr->pagesize, hcp->bndx))
+                       ret = __ham_del_pair(hashp, hcp);
+               else {
+                       DBT repldbt;
+
+                       repldbt.flags = 0;
+                       F_SET(&repldbt, DB_DBT_PARTIAL);
+                       repldbt.doff = hcp->dup_off;
+                       repldbt.dlen = DUP_SIZE(hcp->dup_len);
+                       repldbt.size = 0;
+                       ret = __ham_replpair(hashp, hcp, &repldbt, 0);
+                       hcp->dup_tlen -= DUP_SIZE(hcp->dup_len);
+                       __ham_c_update(hashp, hcp, hcp->pgno,
+                           DUP_SIZE(hcp->dup_len), 0, 1);
+                       F_SET(hcp, H_DELETED);
+               }
+
+       } else
+               /* Not a duplicate */
+               ret = __ham_del_pair(hashp, hcp);
+
+out:   if ((t_ret = __ham_item_done(hashp, hcp, ret == 0)) != 0 && ret == 0)
+               t_ret = ret;
+       if (ret != 0)
+               *hcp = save_curs;
+       RELEASE_META(hashp->dbp, hashp);
+       if (F_ISSET(cursor->dbp, DB_AM_THREAD))
+               __db_puthandle(ldbp);
+       return (ret);
+}
+
+static int
+__ham_c_get(cursor, key, data, flags)
+       DBC *cursor;
+       DBT *key;
+       DBT *data;
+       int flags;
+{
+       DB *ldbp;
+       HTAB *hashp;
+       HASH_CURSOR *hcp, save_curs;
+       int get_key, ret, t_ret;
+
+       DEBUG_LREAD(cursor->dbp, cursor->txn, "ham_c_get",
+           flags == DB_SET || flags == DB_SET_RANGE ? key : NULL,
+           NULL, flags);
+       ldbp = cursor->dbp;
+       if (F_ISSET(cursor->dbp, DB_AM_THREAD) &&
+           (ret = __db_gethandle(cursor->dbp, __ham_hdup, &ldbp)) != 0)
+               return (ret);
+       hashp = (HTAB *)(ldbp->internal);
+       hcp = (HASH_CURSOR *)cursor->internal;
+       save_curs = *hcp;
+       if ((ret =
+           __db_cgetchk(hashp->dbp, key, data, flags, IS_VALID(hcp))) != 0)
+               return (ret);
+
+       SET_LOCKER(hashp->dbp, cursor->txn);
+       GET_META(hashp->dbp, hashp);
+       hashp->hash_accesses++;
+
+       hcp->seek_size = 0;
+
+       ret = 0;
+       get_key = 1;
+       switch (flags) {
+       case DB_PREV:
+               if (hcp->bucket != BUCKET_INVALID) {
+                       ret = __ham_item_prev(hashp, hcp, DB_LOCK_READ);
+                       break;
+               }
+               /* FALL THROUGH */
+       case DB_LAST:
+               ret = __ham_item_last(hashp, hcp, DB_LOCK_READ);
+               break;
+       case DB_FIRST:
+               ret = __ham_item_first(hashp, hcp, DB_LOCK_READ);
+               break;
+       case DB_NEXT:
+               if (hcp->bucket == BUCKET_INVALID)
+                       hcp->bucket = 0;
+               ret = __ham_item_next(hashp, hcp, DB_LOCK_READ);
+               break;
+       case DB_SET:
+       case DB_SET_RANGE:
+               ret = __ham_lookup(hashp, hcp, key, 0, DB_LOCK_READ);
+               get_key = 0;
+               break;
+       case DB_CURRENT:
+               if (F_ISSET(hcp, H_DELETED)) {
+                       ret = DB_KEYEMPTY;
+                       goto out;
+               }
+
+               ret = __ham_item(hashp, hcp, DB_LOCK_READ);
+               break;
+       }
+
+       /*
+        * Must always enter this loop to do error handling and
+        * check for big key/data pair.
+        */
+       while (1) {
+               if (ret != 0 && ret != DB_NOTFOUND)
+                       goto out1;
+               else if (F_ISSET(hcp, H_OK)) {
+                       /* Get the key. */
+                       if (get_key && (ret = __db_ret(hashp->dbp, hcp->pagep,
+                           H_KEYINDEX(hcp->bndx), key, &hcp->big_key,
+                           &hcp->big_keylen)) != 0)
+                               goto out1;
+
+                       ret = __ham_dup_return(hashp, hcp, data, flags);
+                       break;
+               } else if (!F_ISSET(hcp, H_NOMORE)) {
+                       abort();
+                       break;
+               }
+
+               /*
+                * Ran out of entries in a bucket; change buckets.
+                */
+               switch (flags) {
+                       case DB_LAST:
+                       case DB_PREV:
+                               ret = __ham_item_done(hashp, hcp, 0);
+                               if (hcp->bucket == 0) {
+                                       ret = DB_NOTFOUND;
+                                       goto out1;
+                               }
+                               hcp->bucket--;
+                               hcp->bndx = NDX_INVALID;
+                               if (ret == 0)
+                                       ret = __ham_item_prev(hashp,
+                                           hcp, DB_LOCK_READ);
+                               break;
+                       case DB_FIRST:
+                       case DB_NEXT:
+                               ret = __ham_item_done(hashp, hcp, 0);
+                               hcp->bndx = NDX_INVALID;
+                               hcp->bucket++;
+                               hcp->pgno = PGNO_INVALID;
+                               hcp->pagep = NULL;
+                               if (hcp->bucket > hashp->hdr->max_bucket) {
+                                       ret = DB_NOTFOUND;
+                                       goto out1;
+                               }
+                               if (ret == 0)
+                                       ret = __ham_item_next(hashp,
+                                           hcp, DB_LOCK_READ);
+                               break;
+                       case DB_SET:
+                       case DB_SET_RANGE:
+                               /* Key not found. */
+                               ret = DB_NOTFOUND;
+                               goto out1;
+               }
+       }
+out1:  if ((t_ret = __ham_item_done(hashp, hcp, 0)) != 0 && ret == 0)
+               t_ret = ret;
+out:   if (ret)
+               *hcp = save_curs;
+       RELEASE_META(hashp->dbp, hashp);
+       if (F_ISSET(cursor->dbp, DB_AM_THREAD))
+               __db_puthandle(ldbp);
+       return (ret);
+}
+
+static int
+__ham_c_put(cursor, key, data, flags)
+       DBC *cursor;
+       DBT *key;
+       DBT *data;
+       int flags;
+{
+       DB *ldbp;
+       HTAB *hashp;
+       HASH_CURSOR *hcp, save_curs;
+       int ret, t_ret;
+       u_int32_t nbytes;
+
+       DEBUG_LWRITE(cursor->dbp, cursor->txn, "ham_c_put",
+           flags == DB_KEYFIRST || flags == DB_KEYLAST ? key : NULL,
+           NULL, flags);
+       ldbp = cursor->dbp;
+       if (F_ISSET(cursor->dbp, DB_AM_THREAD) &&
+           (ret = __db_gethandle(cursor->dbp, __ham_hdup, &ldbp)) != 0)
+               return (ret);
+       hashp = (HTAB *)(ldbp->internal);
+       hcp = (HASH_CURSOR *)cursor->internal;
+       save_curs = *hcp;
+
+       if ((ret = __db_cputchk(hashp->dbp, key, data, flags,
+           F_ISSET(ldbp, DB_AM_RDONLY), IS_VALID(hcp))) != 0)
+               return (ret);
+       if (F_ISSET(hcp, H_DELETED))
+               return (DB_NOTFOUND);
+
+       SET_LOCKER(hashp->dbp, cursor->txn);
+       GET_META(hashp->dbp, hashp);
+       ret = 0;
+
+       switch (flags) {
+       case DB_KEYLAST:
+       case DB_KEYFIRST:
+               nbytes = (ISBIG(hashp, key->size) ? HOFFPAGE_PSIZE :
+                   HKEYDATA_PSIZE(key->size)) +
+                   (ISBIG(hashp, data->size) ? HOFFPAGE_PSIZE :
+                   HKEYDATA_PSIZE(data->size));
+               ret = __ham_lookup(hashp, hcp, key, nbytes, DB_LOCK_WRITE);
+               break;
+       case DB_BEFORE:
+       case DB_AFTER:
+       case DB_CURRENT:
+               ret = __ham_item(hashp, hcp, DB_LOCK_WRITE);
+               break;
+       }
+
+       if (ret == 0) {
+               if (flags == DB_CURRENT && !F_ISSET(ldbp, DB_AM_DUP))
+                       ret = __ham_overwrite(hashp, hcp, data);
+               else
+                       ret = __ham_add_dup(hashp, hcp, data, flags);
+       }
+
+       if (ret == 0 && F_ISSET(hcp, H_EXPAND)) {
+               ret = __ham_expand_table(hashp);
+               F_CLR(hcp, H_EXPAND);
+       }
+
+       if ((t_ret = __ham_item_done(hashp, hcp, ret == 0)) != 0 && ret == 0)
+               ret = t_ret;
+       if (ret != 0)
+               *hcp = save_curs;
+       RELEASE_META(hashp->dbp, hashp);
+       if (F_ISSET(cursor->dbp, DB_AM_THREAD))
+               __db_puthandle(ldbp);
+       return (ret);
+}
+
+/********************************* UTILITIES ************************/
+
+/*
+ * __ham_expand_table --
+ *
+ * PUBLIC: int __ham_expand_table __P((HTAB *));
+ */
+int
+__ham_expand_table(hashp)
+       HTAB *hashp;
+{
+       u_int32_t old_bucket, new_bucket;
+       u_int32_t spare_ndx;
+       int ret;
+
+       ret = 0;
+       DIRTY_META(hashp, ret);
+       if (ret)
+               return (ret);
+
+       if (DB_LOGGING(hashp->dbp)) {
+               DB_LSN new_lsn;
+
+               if ((ret = __ham_splitmeta_log(hashp->dbp->dbenv->lg_info,
+                   (DB_TXN *)hashp->dbp->txn, &new_lsn, 0,
+                   hashp->dbp->log_fileid,
+                   hashp->hdr->max_bucket, hashp->hdr->ovfl_point,
+                   hashp->hdr->spares[hashp->hdr->ovfl_point],
+                   &hashp->hdr->lsn)) != 0)
+                       return (ret);
+
+               hashp->hdr->lsn = new_lsn;
+       }
+
+       hashp->hash_expansions++;
+       new_bucket = ++hashp->hdr->max_bucket;
+       old_bucket = (hashp->hdr->max_bucket & hashp->hdr->low_mask);
+
+       /*
+        * If the split point is increasing (hdr.max_bucket's log base 2
+        * increases), max sure that we have enough extra pages, then
+        * copy the current contents of the spare split bucket to the
+        * next bucket.
+        */
+       spare_ndx = __db_log2(hashp->hdr->max_bucket + 1);
+       if (spare_ndx > hashp->hdr->ovfl_point) {
+               /*
+                * We are about to shift the split point.  Make sure that
+                * if the next doubling is going to be big (more than 8
+                * pages), we have some extra pages around.
+                */
+               if (hashp->hdr->spares[hashp->hdr->ovfl_point] == 0 &&
+                   new_bucket >= 8)
+                       __ham_init_ovflpages(hashp);
+
+               hashp->hdr->spares[spare_ndx] =
+                   hashp->hdr->spares[hashp->hdr->ovfl_point];
+               hashp->hdr->ovfl_point = spare_ndx;
+       }
+
+       if (new_bucket > hashp->hdr->high_mask) {
+               /* Starting a new doubling */
+               hashp->hdr->low_mask = hashp->hdr->high_mask;
+               hashp->hdr->high_mask = new_bucket | hashp->hdr->low_mask;
+       }
+
+       if (BUCKET_TO_PAGE(hashp, new_bucket) > MAX_PAGES(hashp)) {
+               __db_err(hashp->dbp->dbenv,
+                   "hash: Cannot allocate new bucket.  Pages exhausted.");
+               return (ENOSPC);
+       }
+
+       /* Relocate records to the new bucket */
+       return (__ham_split_page(hashp, old_bucket, new_bucket));
+}
+
+/*
+ * PUBLIC: u_int32_t __ham_call_hash __P((HTAB *, u_int8_t *, int32_t));
+ */
+u_int32_t
+__ham_call_hash(hashp, k, len)
+       HTAB *hashp;
+       u_int8_t *k;
+       int32_t len;
+{
+       u_int32_t n, bucket;
+
+       n = (u_int32_t)hashp->hash(k, len);
+       bucket = n & hashp->hdr->high_mask;
+       if (bucket > hashp->hdr->max_bucket)
+               bucket = bucket & hashp->hdr->low_mask;
+       return (bucket);
+}
+
+/*
+ * Check for duplicates, and call __db_ret appropriately.  Release
+ * everything held by the cursor.
+ */
+static int
+__ham_dup_return(hashp, hcp, val, flags)
+       HTAB *hashp;
+       HASH_CURSOR *hcp;
+       DBT *val;
+       int flags;
+{
+       HKEYDATA *hk;
+       PAGE *pp;
+       DBT *myval, tmp_val;
+       db_indx_t ndx;
+       db_pgno_t pgno;
+       u_int8_t type;
+       int indx, ret;
+       db_indx_t len;
+
+       /* Check for duplicate and return the first one. */
+       ndx = H_DATAINDEX(hcp->bndx);
+       type = GET_HKEYDATA(hcp->pagep, ndx)->type;
+       pp = hcp->pagep;
+       myval = val;
+
+       /*
+        * There are 3 cases:
+        * 1. We are not in duplicate, simply call db_ret.
+        * 2. We are looking at keys and stumbled onto a duplicate.
+        * 3. We are in the middle of a duplicate set. (ISDUP set)
+        */
+
+       /*
+        * Here we check for the case where we just stumbled onto a
+        * duplicate.  In this case, we do initialization and then
+        * let the normal duplicate code handle it.
+        */
+       if (!F_ISSET(hcp, H_ISDUP))
+               if (type == H_DUPLICATE) {
+                       F_SET(hcp, H_ISDUP);
+                       hcp->dup_tlen = LEN_HDATA(hcp->pagep,
+                           hashp->hdr->pagesize, hcp->bndx);
+                       hk = H_PAIRDATA(hcp->pagep, hcp->bndx);
+                       if (flags == DB_LAST || flags == DB_PREV) {
+                               hcp->dndx = 0;
+                               hcp->dup_off = 0;
+                               do {
+                                       memcpy(&len, hk->data + hcp->dup_off,
+                                           sizeof(db_indx_t));
+                                       hcp->dup_off += DUP_SIZE(len);
+                                       hcp->dndx++;
+                               } while (hcp->dup_off < hcp->dup_tlen);
+                               hcp->dup_off -= DUP_SIZE(len);
+                               hcp->dndx--;
+                       } else {
+                               memcpy(&len, hk->data, sizeof(db_indx_t));
+                               hcp->dup_off = 0;
+                               hcp->dndx = 0;
+                       }
+                       hcp->dup_len = len;
+               } else if (type == H_OFFDUP) {
+                       F_SET(hcp, H_ISDUP);
+                       memcpy(&pgno,
+                           P_ENTRY(hcp->pagep, ndx) + SSZ(HOFFDUP, pgno),
+                           sizeof(db_pgno_t));
+                       if (flags == DB_LAST || flags == DB_PREV) {
+                               indx = (int)hcp->dndx;
+                               if ((ret = __db_dend(hashp->dbp,
+                                   pgno, &hcp->dpagep)) != 0)
+                                       return (ret);
+                               hcp->dpgno = PGNO(hcp->dpagep);
+                               hcp->dndx = NUM_ENT(hcp->dpagep) - 1;
+                       } else if ((ret = __ham_next_cpage(hashp,
+                           hcp, pgno, 0, H_ISDUP)) != 0)
+                               return (ret);
+               }
+
+
+       /*
+        * Now, everything is initialized, grab a duplicate if
+        * necessary.
+        */
+       if (F_ISSET(hcp, H_ISDUP))
+               if (hcp->dpgno != PGNO_INVALID) {
+                       pp = hcp->dpagep;
+                       ndx = hcp->dndx;
+               } else {
+                       /*
+                        * Copy the DBT in case we are retrieving into
+                        * user memory and we need the parameters for
+                        * it.
+                        */
+                       memcpy(&tmp_val, val, sizeof(*val));
+                       F_SET(&tmp_val, DB_DBT_PARTIAL);
+                       tmp_val.dlen = hcp->dup_len;
+                       tmp_val.doff = hcp->dup_off + sizeof(db_indx_t);
+                       myval = &tmp_val;
+               }
+
+
+       /*
+        * Finally, if we had a duplicate, pp, ndx, and myval should be
+        * set appropriately.
+        */
+       if ((ret = __db_ret(hashp->dbp, pp, ndx, myval, &hcp->big_data,
+           &hcp->big_datalen)) != 0)
+               return (ret);
+
+       /*
+        * In case we sent a temporary off to db_ret, set the real
+        * return values.
+        */
+       val->data = myval->data;
+       val->size = myval->size;
+
+       return (0);
+}
+
+static int
+__ham_overwrite(hashp, hcp, nval)
+       HTAB *hashp;
+       HASH_CURSOR *hcp;
+       DBT *nval;
+{
+       DBT *myval, tmp_val;
+       HKEYDATA *hk;
+
+       if (F_ISSET(hashp->dbp, DB_AM_DUP))
+               return (__ham_add_dup(hashp, hcp, nval, DB_KEYLAST));
+       else if (!F_ISSET(nval, DB_DBT_PARTIAL)) {
+               /* Put/overwrite */
+               memcpy(&tmp_val, nval, sizeof(*nval));
+               F_SET(&tmp_val, DB_DBT_PARTIAL);
+               tmp_val.doff = 0;
+               hk = H_PAIRDATA(hcp->pagep, hcp->bndx);
+               if (hk->type == H_OFFPAGE)
+                       memcpy(&tmp_val.dlen,
+                           (u_int8_t *)hk + SSZ(HOFFPAGE, tlen),
+                           sizeof(u_int32_t));
+               else
+                       tmp_val.dlen = LEN_HDATA(hcp->pagep,
+                           hashp->hdr->pagesize,hcp->bndx);
+               myval = &tmp_val;
+       } else /* Regular partial put */
+               myval = nval;
+
+       return (__ham_replpair(hashp, hcp, myval, 0));
+}
+
+/*
+ * Given a key and a cursor, sets the cursor to the page/ndx on which
+ * the key resides.  If the key is found, the cursor H_OK flag is set
+ * and the pagep, bndx, pgno (dpagep, dndx, dpgno) fields are set.
+ * If the key is not found, the H_OK flag is not set.  If the sought
+ * field is non-0, the pagep, bndx, pgno (dpagep, dndx, dpgno) fields
+ * are set indicating where an add might take place.  If it is 0,
+ * non of the cursor pointer field are valid.
+ */
+static int
+__ham_lookup(hashp, hcp, key, sought, mode)
+       HTAB *hashp;
+       HASH_CURSOR *hcp;
+       const DBT *key;
+       u_int32_t sought;
+       db_lockmode_t mode;
+{
+       HKEYDATA *hk;
+       db_pgno_t pgno;
+       u_int32_t tlen;
+       int match, ret, t_ret;
+
+       /*
+        * Set up cursor so that we're looking for space to add an item
+        * as we cycle through the pages looking for the key.
+        */
+       if ((ret = __ham_item_reset(hashp, hcp)) != 0)
+               return (ret);
+       hcp->seek_size = sought;
+
+       hcp->bucket = __ham_call_hash(hashp, (u_int8_t *)key->data, key->size);
+       while (1) {
+               if ((ret = __ham_item_next(hashp, hcp, mode)) != 0)
+                       return (ret);
+
+               if (F_ISSET(hcp, H_NOMORE))
+                       break;
+
+               hk = H_PAIRKEY(hcp->pagep, hcp->bndx);
+               switch (hk->type) {
+               case H_OFFPAGE:
+                       memcpy(&tlen, (u_int8_t *)hk + SSZ(HOFFPAGE, tlen),
+                           sizeof(u_int32_t));
+                       if (tlen == key->size) {
+                               memcpy(&pgno,
+                                   (u_int8_t *)hk + SSZ(HOFFPAGE, pgno),
+                                   sizeof(db_pgno_t));
+                               match = __db_moff(hashp->dbp, key, pgno);
+                               if (match == 0) {
+                                       F_SET(hcp, H_OK);
+                                       return (0);
+                               }
+                       }
+                       break;
+               case H_KEYDATA:
+                       if (key->size == LEN_HKEY(hcp->pagep,
+                           hashp->hdr->pagesize, hcp->bndx) &&
+                           memcmp(key->data, hk->data, key->size) == 0) {
+                               F_SET(hcp, H_OK);
+                               return (0);
+                       }
+                       break;
+               case H_DUPLICATE:
+               case H_OFFDUP:
+                       /*
+                        * These are errors because keys are never
+                        * duplicated, only data items are.
+                        */
+                       return (__db_pgfmt(hashp->dbp, PGNO(hcp->pagep)));
+               }
+               hashp->hash_collisions++;
+       }
+
+       /*
+        * Item was not found, adjust cursor properly.
+        */
+
+       if (sought != 0)
+               return (ret);
+
+       if ((t_ret = __ham_item_done(hashp, hcp, 0)) != 0 && ret == 0)
+               ret = t_ret;
+       return (ret);
+}
+
+/*
+ * Initialize a dbt using some possibly already allocated storage
+ * for items.
+ * PUBLIC: int __ham_init_dbt __P((DBT *, u_int32_t, void **, u_int32_t *));
+ */
+int
+__ham_init_dbt(dbt, size, bufp, sizep)
+       DBT *dbt;
+       u_int32_t size;
+       void **bufp;
+       u_int32_t *sizep;
+{
+       memset(dbt, 0, sizeof(*dbt));
+       if (*sizep < size) {
+               if ((*bufp = (void *)(*bufp == NULL ?
+                   malloc(size) : realloc(*bufp, size))) == NULL) {
+                       *sizep = 0;
+                       return (ENOMEM);
+               }
+               *sizep = size;
+       }
+       dbt->data = *bufp;
+       dbt->size = size;
+       return (0);
+}
+
+/*
+ * Adjust the cursor after an insert or delete.  The cursor passed is
+ * the one that was operated upon; we just need to check any of the
+ * others.
+ *
+ * len indicates the length of the item added/deleted
+ * add indicates if the item indicated by the cursor has just been
+ * added (add == 1) or deleted (add == 0).
+ * dup indicates if the addition occurred into a duplicate set.
+ *
+ * PUBLIC: void __ham_c_update __P((HTAB *,
+ * PUBLIC:    HASH_CURSOR *, db_pgno_t, u_int32_t, int, int));
+ */
+void
+__ham_c_update(hashp, hcp, chg_pgno, len, add, dup)
+       HTAB *hashp;
+       HASH_CURSOR *hcp;
+       db_pgno_t chg_pgno;
+       u_int32_t len;
+       int add;
+       int dup;
+{
+       DBC *cp;
+       HTAB *hp;
+       HASH_CURSOR *lcp;
+       int page_deleted;
+
+       /*
+        * Regular adds are always at the end of a given page,
+        * so we never have to adjust anyone's cursor after
+        * a regular add.
+        */
+       if (!dup && add)
+               return;
+
+       page_deleted = chg_pgno != PGNO_INVALID &&
+           ((!dup && chg_pgno != hcp->pgno) ||
+           (dup && chg_pgno != hcp->dpgno));
+
+       hp = hcp->db_cursor->dbp->master->internal;
+       DB_THREAD_LOCK(hp->dbp);
+
+       for (cp = TAILQ_FIRST(&hp->dbp->curs_queue); cp != NULL;
+           cp = TAILQ_NEXT(cp, links)) {
+               if (cp->internal == hcp)
+                       continue;
+
+               lcp = (HASH_CURSOR *)cp->internal;
+
+               if (!dup && lcp->pgno != chg_pgno)
+                       continue;
+
+               if (dup && F_ISSET(hcp, H_DELETED) && lcp->pgno != chg_pgno)
+                       continue;
+
+               if (dup && !F_ISSET(hcp, H_DELETED) && lcp->dpgno != chg_pgno)
+                       continue;
+
+               if (page_deleted) {
+                       if (dup) {
+                               lcp->dpgno = hcp->dpgno;
+                               lcp->dndx = hcp->dndx;
+                       } else {
+                               lcp->pgno = hcp->pgno;
+                               lcp->bndx = hcp->bndx;
+                               lcp->bucket = hcp->bucket;
+                       }
+                       F_CLR(lcp, H_ISDUP);
+                       continue;
+               }
+
+               if (!dup && lcp->bndx > hcp->bndx)
+                       lcp->bndx--;
+               else if (!dup && lcp->bndx == hcp->bndx)
+                       F_SET(lcp, H_DELETED);
+               else if (dup && lcp->bndx == hcp->bndx) {
+                       /* Assign dpgno in case there was page conversion. */
+                       lcp->dpgno = hcp->dpgno;
+                       if (add && lcp->dndx >= hcp->dndx )
+                               lcp->dndx++;
+                       else if (!add && lcp->dndx > hcp->dndx)
+                               lcp->dndx--;
+                       else if (!add && lcp->dndx == hcp->dndx)
+                               F_SET(lcp, H_DELETED);
+
+                       /* Now adjust on-page information. */
+                       if (lcp->dpgno == PGNO_INVALID)
+                               if (add) {
+                                       lcp->dup_tlen += len;
+                                       if (lcp->dndx > hcp->dndx)
+                                               lcp->dup_off += len;
+                               } else {
+                                       lcp->dup_tlen -= len;
+                                       if (lcp->dndx > hcp->dndx)
+                                               lcp->dup_off -= len;
+                               }
+               }
+       }
+       DB_THREAD_UNLOCK(hp->dbp);
+}
+
+/*
+ * __ham_hdup --
+ *     This function gets called when we create a duplicate handle for a
+ *     threaded DB.  It should create the private part of the DB structure.
+ * PUBLIC: int  __ham_hdup __P((DB *, DB *));
+ */
+int
+__ham_hdup(orig, new)
+       DB *orig, *new;
+{
+       HTAB *hashp;
+       DBC *curs;
+       int ret;
+
+       if ((hashp = (HTAB *)malloc(sizeof(HTAB))) == NULL)
+               return (ENOMEM);
+
+       new->internal = hashp;
+
+       hashp->dbp = new;
+       hashp->hlock = 0;
+       hashp->hdr = NULL;
+       hashp->hash = ((HTAB *)orig->internal)->hash;
+       if ((hashp->split_buf = (PAGE *)malloc(orig->pgsize)) == NULL)
+               return (ENOMEM);
+       hashp->local_errno = 0;
+       hashp->hash_accesses = 0;
+       hashp->hash_collisions = 0;
+       hashp->hash_expansions = 0;
+       hashp->hash_overflows = 0;
+       hashp->hash_bigpages = 0;
+       /* Initialize the cursor queue. */
+       ret = __ham_c_init(new, NULL, &curs);
+       TAILQ_INSERT_TAIL(&new->curs_queue, curs, links);
+       return (ret);
+}
diff --git a/db2/hash/hash.src b/db2/hash/hash.src
new file mode 100644 (file)
index 0000000..04a98d3
--- /dev/null
@@ -0,0 +1,211 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997
+ *     Sleepycat Software.  All rights reserved.
+ */
+/*
+ * Copyright (c) 1995, 1996
+ *     Margo Seltzer.  All rights reserved.
+ */
+/*
+ * Copyright (c) 1995, 1996
+ *     The President and Fellows of Harvard University.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Margo Seltzer.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *     This product includes software developed by the University of
+ *     California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *     @(#)hash.src    10.1 (Sleepycat) 4/12/97
+ */
+
+#include "config.h"
+
+/*
+ * This is the source file used to create the logging functions for the
+ * hash package.  Each access method (or set of routines wishing to register
+ * record types with the transaction system) should have a file like this.
+ * Each type of log record and its parameters is defined.  The basic
+ * format of a record definition is:
+ *
+ * BEGIN       <RECORD_TYPE>
+ * ARG|STRING|POINTER  <variable name> <variable type> <printf format>
+ * ...
+ * END
+ * ARG the argument is a simple parameter of the type *        specified.
+ * DBT the argument is a DBT (db.h) containing a length and pointer.
+ * PTR the argument is a pointer to the data type specified; the entire
+ *     type should be logged.
+ *
+ * There are a set of shell scripts of the form xxx.sh that generate c
+ * code and or h files to process these.  (This is probably better done
+ * in a single PERL script, but for now, this works.)
+ *
+ * The DB recovery system requires the following three fields appear in
+ * every record, and will assign them to the per-record-type structures
+ * as well as making them the first parameters to the appropriate logging
+ * call.
+ * rectype:    record-type, identifies the structure and log/read call
+ * txnid:      transaction id, a DBT in this implementation
+ * prev:       the last LSN for this transaction
+ */
+
+/*
+ * Use the argument of PREFIX as the prefix for all record types,
+ * routines, id numbers, etc.
+ */
+PREFIX ham
+
+/*
+ * HASH-insdel: used for hash to insert/delete a pair of entries onto a master
+ * page. The pair might be regular key/data pairs or they might be the
+ * structures that refer to off page items, duplicates or offpage duplicates.
+ *  opcode - PUTPAIR/DELPAIR + big masks
+ *  fileid - identifies the file referenced
+ *  pgno - page within file
+ *  ndx - index on the page of the item being added (item index)
+ *  pagelsn - lsn on the page before the update
+ *  key - the key being inserted
+ *  data - the data being inserted
+ */
+BEGIN insdel
+ARG    opcode          u_int32_t       lu
+ARG    fileid          u_int32_t       lu
+ARG    pgno            db_pgno_t       lu
+ARG    ndx             u_int32_t       lu
+POINTER        pagelsn         DB_LSN *        lu
+DBT    key             DBT             s
+DBT    data            DBT             s
+END
+
+/*
+ * Used to add and remove overflow pages.
+ * prev_pgno is the previous page that is going to get modified to
+ *     point to this one.  If this is the first page in a chain
+ *     then prev_pgno should be PGNO_INVALID.
+ * new_pgno is the page being allocated.
+ * next_pgno is the page that follows this one.  On allocation,
+ *     this should be PGNO_INVALID.  For deletes, it may exist.
+ * pagelsn is the old lsn on the page.
+ */
+BEGIN newpage
+ARG    opcode          u_int32_t       lu
+ARG    fileid          u_int32_t       lu
+ARG    prev_pgno       db_pgno_t       lu
+POINTER        prevlsn         DB_LSN *        lu
+ARG    new_pgno        db_pgno_t       lu
+POINTER        pagelsn         DB_LSN *        lu
+ARG    next_pgno       db_pgno_t       lu
+POINTER        nextlsn         DB_LSN *        lu
+END
+
+/*
+ * Splitting requires two types of log messages.  The first
+ * logs the meta-data of the split.  The second logs the
+ * data on the original page.  To redo the split, we have
+ * to visit the new page (pages) and add the items back
+ * on the page if they are not yet there.
+ * For the meta-data split
+ *     bucket: max_bucket in table before split
+ *     ovflpoint: overflow point before split.
+ *     spares: spares[ovflpoint] before split.
+ */
+BEGIN splitmeta
+ARG    fileid          u_int32_t       lu
+ARG    bucket          u_int32_t       lu
+ARG    ovflpoint       u_int32_t       lu
+ARG    spares          u_int32_t       lu
+POINTER        metalsn         DB_LSN *        lu
+END
+
+BEGIN splitdata
+ARG    fileid          u_int32_t       lu
+ARG    opcode          u_int32_t       lu
+ARG    pgno            db_pgno_t       lu
+DBT    pageimage       DBT             s
+POINTER        pagelsn         DB_LSN *        lu
+END
+
+/*
+ * HASH-replace: is used for hash to handle partial puts that only
+ * affect a single master page.
+ *  fileid - identifies the file referenced
+ *  pgno - page within file
+ *  ndx - index on the page of the item being modified (item index)
+ *  pagelsn - lsn on the page before the update
+ *  off - offset in the old item where the new item is going.
+ *  olditem - DBT that describes the part of the item being replaced.
+ *  newitem - DBT of the new item.
+ *  makedup - this was a replacement that made an item a duplicate.
+ */
+BEGIN replace
+ARG    fileid          u_int32_t       lu
+ARG    pgno            db_pgno_t       lu
+ARG    ndx             u_int32_t       lu
+POINTER        pagelsn         DB_LSN *        lu
+ARG    off             int32_t         ld
+DBT    olditem         DBT             s
+DBT    newitem         DBT             s
+ARG    makedup         u_int32_t       lu
+END
+
+/*
+ * HASH-newpgno: is used to record getting/deleting a new page number.
+ * This doesn't require much data modification, just modifying the
+ * meta-data.
+ * pgno is the page being allocated/freed.
+ * free_pgno is the next_pgno on the free list.
+ * old_type was the type of a page being deallocated.
+ * old_pgno was the next page number before the deallocation.  We use it
+ *     to indicate whether we incremented the spares count or not
+ *     during this allocation.
+ */
+BEGIN newpgno
+ARG    opcode          u_int32_t       lu
+ARG    fileid          u_int32_t       lu
+ARG    pgno            db_pgno_t       lu
+ARG    free_pgno       db_pgno_t       lu
+ARG    old_type        u_int32_t       lu
+ARG    old_pgno        db_pgno_t       lu
+ARG    new_type        u_int32_t       lu
+POINTER        pagelsn         DB_LSN *        lu
+POINTER metalsn                DB_LSN *        lu
+END
+
+/*
+ * ovfl: initialize a set of overflow pages.
+ */
+BEGIN ovfl
+ARG    fileid          u_int32_t       lu
+ARG    start_pgno      db_pgno_t       lu
+ARG    npages          u_int32_t       lu
+ARG    free_pgno       db_pgno_t       lu
+POINTER        metalsn         DB_LSN *        lu
+END
diff --git a/db2/hash/hash_auto.c b/db2/hash/hash_auto.c
new file mode 100644 (file)
index 0000000..f8ab80c
--- /dev/null
@@ -0,0 +1,1343 @@
+/* Do not edit: automatically built by dist/db_gen.sh. */
+#include "config.h"
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <ctype.h>
+#include <errno.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+#endif
+
+#include "db_int.h"
+#include "shqueue.h"
+#include "db_page.h"
+#include "db_dispatch.h"
+#include "hash.h"
+#include "db_am.h"
+#include "common_ext.h"
+
+/*
+ * PUBLIC: int __ham_insdel_log
+ * PUBLIC:     __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t,
+ * PUBLIC:     u_int32_t, u_int32_t, db_pgno_t, u_int32_t,
+ * PUBLIC:     DB_LSN *, DBT *, DBT *));
+ */
+int __ham_insdel_log(logp, txnid, ret_lsnp, flags,
+       opcode, fileid, pgno, ndx, pagelsn, key,
+       data)
+       DB_LOG *logp;
+       DB_TXN *txnid;
+       DB_LSN *ret_lsnp;
+       u_int32_t flags;
+       u_int32_t opcode;
+       u_int32_t fileid;
+       db_pgno_t pgno;
+       u_int32_t ndx;
+       DB_LSN * pagelsn;
+       DBT *key;
+       DBT *data;
+{
+       DBT logrec;
+       DB_LSN *lsnp, null_lsn;
+       u_int32_t zero;
+       u_int32_t rectype, txn_num;
+       int ret;
+       u_int8_t *bp;
+
+       rectype = DB_ham_insdel;
+       txn_num = txnid == NULL ? 0 : txnid->txnid;
+       if (txnid == NULL) {
+               null_lsn.file = 0;
+               null_lsn.offset = 0;
+               lsnp = &null_lsn;
+       } else
+               lsnp = &txnid->last_lsn;
+       logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN)
+           + sizeof(opcode)
+           + sizeof(fileid)
+           + sizeof(pgno)
+           + sizeof(ndx)
+           + sizeof(*pagelsn)
+           + sizeof(u_int32_t) + (key == NULL ? 0 : key->size)
+           + sizeof(u_int32_t) + (data == NULL ? 0 : data->size);
+       if ((logrec.data = (void *)malloc(logrec.size)) == NULL)
+               return (ENOMEM);
+
+       bp = logrec.data;
+       memcpy(bp, &rectype, sizeof(rectype));
+       bp += sizeof(rectype);
+       memcpy(bp, &txn_num, sizeof(txn_num));
+       bp += sizeof(txn_num);
+       memcpy(bp, lsnp, sizeof(DB_LSN));
+       bp += sizeof(DB_LSN);
+       memcpy(bp, &opcode, sizeof(opcode));
+       bp += sizeof(opcode);
+       memcpy(bp, &fileid, sizeof(fileid));
+       bp += sizeof(fileid);
+       memcpy(bp, &pgno, sizeof(pgno));
+       bp += sizeof(pgno);
+       memcpy(bp, &ndx, sizeof(ndx));
+       bp += sizeof(ndx);
+       if (pagelsn != NULL)
+               memcpy(bp, pagelsn, sizeof(*pagelsn));
+       else
+               memset(bp, 0, sizeof(*pagelsn));
+       bp += sizeof(*pagelsn);
+       if (key == NULL) {
+               zero = 0;
+               memcpy(bp, &zero, sizeof(u_int32_t));
+               bp += sizeof(u_int32_t);
+       } else {
+               memcpy(bp, &key->size, sizeof(key->size));
+               bp += sizeof(key->size);
+               memcpy(bp, key->data, key->size);
+               bp += key->size;
+       }
+       if (data == NULL) {
+               zero = 0;
+               memcpy(bp, &zero, sizeof(u_int32_t));
+               bp += sizeof(u_int32_t);
+       } else {
+               memcpy(bp, &data->size, sizeof(data->size));
+               bp += sizeof(data->size);
+               memcpy(bp, data->data, data->size);
+               bp += data->size;
+       }
+#ifdef DEBUG
+       if ((u_int32_t)(bp - (u_int8_t *)logrec.data) != logrec.size)
+               fprintf(stderr, "Error in log record length");
+#endif
+       ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags);
+       if (txnid != NULL)
+               txnid->last_lsn = *ret_lsnp;
+       free(logrec.data);
+       return (ret);
+}
+
+/*
+ * PUBLIC: int __ham_insdel_print
+ * PUBLIC:    __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+ */
+
+int
+__ham_insdel_print(notused1, dbtp, lsnp, notused3, notused4)
+       DB_LOG *notused1;
+       DBT *dbtp;
+       DB_LSN *lsnp;
+       int notused3;
+       void *notused4;
+{
+       __ham_insdel_args *argp;
+       u_int32_t i;
+       int c, ret;
+
+       i = 0;
+       c = 0;
+       notused1 = NULL;
+       notused3 = 0;
+       notused4 = NULL;
+
+       if((ret = __ham_insdel_read(dbtp->data, &argp)) != 0)
+               return (ret);
+       printf("[%lu][%lu]ham_insdel: rec: %lu txnid %lx prevlsn [%lu][%lu]\n",
+           (u_long)lsnp->file,
+           (u_long)lsnp->offset,
+           (u_long)argp->type,
+           (u_long)argp->txnid->txnid,
+           (u_long)argp->prev_lsn.file,
+           (u_long)argp->prev_lsn.offset);
+       printf("\topcode: %lu\n", (u_long)argp->opcode);
+       printf("\tfileid: %lu\n", (u_long)argp->fileid);
+       printf("\tpgno: %lu\n", (u_long)argp->pgno);
+       printf("\tndx: %lu\n", (u_long)argp->ndx);
+       printf("\tpagelsn: [%lu][%lu]\n",
+           (u_long)argp->pagelsn.file, (u_long)argp->pagelsn.offset);
+       printf("\tkey: ");
+       for (i = 0; i < argp->key.size; i++) {
+               c = ((char *)argp->key.data)[i];
+               if (isprint(c) || c == 0xa)
+                       putchar(c);
+               else
+                       printf("%#x ", c);
+       }
+       printf("\n");
+       printf("\tdata: ");
+       for (i = 0; i < argp->data.size; i++) {
+               c = ((char *)argp->data.data)[i];
+               if (isprint(c) || c == 0xa)
+                       putchar(c);
+               else
+                       printf("%#x ", c);
+       }
+       printf("\n");
+       printf("\n");
+       free(argp);
+       return (0);
+}
+
+/*
+ * PUBLIC: int __ham_insdel_read __P((void *, __ham_insdel_args **));
+ */
+int
+__ham_insdel_read(recbuf, argpp)
+       void *recbuf;
+       __ham_insdel_args **argpp;
+{
+       __ham_insdel_args *argp;
+       u_int8_t *bp;
+
+       argp = (__ham_insdel_args *)malloc(sizeof(__ham_insdel_args) +
+           sizeof(DB_TXN));
+       if (argp == NULL)
+               return (ENOMEM);
+       argp->txnid = (DB_TXN *)&argp[1];
+       bp = recbuf;
+       memcpy(&argp->type, bp, sizeof(argp->type));
+       bp += sizeof(argp->type);
+       memcpy(&argp->txnid->txnid,  bp, sizeof(argp->txnid->txnid));
+       bp += sizeof(argp->txnid->txnid);
+       memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN));
+       bp += sizeof(DB_LSN);
+       memcpy(&argp->opcode, bp, sizeof(argp->opcode));
+       bp += sizeof(argp->opcode);
+       memcpy(&argp->fileid, bp, sizeof(argp->fileid));
+       bp += sizeof(argp->fileid);
+       memcpy(&argp->pgno, bp, sizeof(argp->pgno));
+       bp += sizeof(argp->pgno);
+       memcpy(&argp->ndx, bp, sizeof(argp->ndx));
+       bp += sizeof(argp->ndx);
+       memcpy(&argp->pagelsn, bp,  sizeof(argp->pagelsn));
+       bp += sizeof(argp->pagelsn);
+       memcpy(&argp->key.size, bp, sizeof(u_int32_t));
+       bp += sizeof(u_int32_t);
+       argp->key.data = bp;
+       bp += argp->key.size;
+       memcpy(&argp->data.size, bp, sizeof(u_int32_t));
+       bp += sizeof(u_int32_t);
+       argp->data.data = bp;
+       bp += argp->data.size;
+       *argpp = argp;
+       return (0);
+}
+
+/*
+ * PUBLIC: int __ham_newpage_log
+ * PUBLIC:     __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t,
+ * PUBLIC:     u_int32_t, u_int32_t, db_pgno_t, DB_LSN *,
+ * PUBLIC:     db_pgno_t, DB_LSN *, db_pgno_t, DB_LSN *));
+ */
+int __ham_newpage_log(logp, txnid, ret_lsnp, flags,
+       opcode, fileid, prev_pgno, prevlsn, new_pgno, pagelsn,
+       next_pgno, nextlsn)
+       DB_LOG *logp;
+       DB_TXN *txnid;
+       DB_LSN *ret_lsnp;
+       u_int32_t flags;
+       u_int32_t opcode;
+       u_int32_t fileid;
+       db_pgno_t prev_pgno;
+       DB_LSN * prevlsn;
+       db_pgno_t new_pgno;
+       DB_LSN * pagelsn;
+       db_pgno_t next_pgno;
+       DB_LSN * nextlsn;
+{
+       DBT logrec;
+       DB_LSN *lsnp, null_lsn;
+       u_int32_t rectype, txn_num;
+       int ret;
+       u_int8_t *bp;
+
+       rectype = DB_ham_newpage;
+       txn_num = txnid == NULL ? 0 : txnid->txnid;
+       if (txnid == NULL) {
+               null_lsn.file = 0;
+               null_lsn.offset = 0;
+               lsnp = &null_lsn;
+       } else
+               lsnp = &txnid->last_lsn;
+       logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN)
+           + sizeof(opcode)
+           + sizeof(fileid)
+           + sizeof(prev_pgno)
+           + sizeof(*prevlsn)
+           + sizeof(new_pgno)
+           + sizeof(*pagelsn)
+           + sizeof(next_pgno)
+           + sizeof(*nextlsn);
+       if ((logrec.data = (void *)malloc(logrec.size)) == NULL)
+               return (ENOMEM);
+
+       bp = logrec.data;
+       memcpy(bp, &rectype, sizeof(rectype));
+       bp += sizeof(rectype);
+       memcpy(bp, &txn_num, sizeof(txn_num));
+       bp += sizeof(txn_num);
+       memcpy(bp, lsnp, sizeof(DB_LSN));
+       bp += sizeof(DB_LSN);
+       memcpy(bp, &opcode, sizeof(opcode));
+       bp += sizeof(opcode);
+       memcpy(bp, &fileid, sizeof(fileid));
+       bp += sizeof(fileid);
+       memcpy(bp, &prev_pgno, sizeof(prev_pgno));
+       bp += sizeof(prev_pgno);
+       if (prevlsn != NULL)
+               memcpy(bp, prevlsn, sizeof(*prevlsn));
+       else
+               memset(bp, 0, sizeof(*prevlsn));
+       bp += sizeof(*prevlsn);
+       memcpy(bp, &new_pgno, sizeof(new_pgno));
+       bp += sizeof(new_pgno);
+       if (pagelsn != NULL)
+               memcpy(bp, pagelsn, sizeof(*pagelsn));
+       else
+               memset(bp, 0, sizeof(*pagelsn));
+       bp += sizeof(*pagelsn);
+       memcpy(bp, &next_pgno, sizeof(next_pgno));
+       bp += sizeof(next_pgno);
+       if (nextlsn != NULL)
+               memcpy(bp, nextlsn, sizeof(*nextlsn));
+       else
+               memset(bp, 0, sizeof(*nextlsn));
+       bp += sizeof(*nextlsn);
+#ifdef DEBUG
+       if ((u_int32_t)(bp - (u_int8_t *)logrec.data) != logrec.size)
+               fprintf(stderr, "Error in log record length");
+#endif
+       ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags);
+       if (txnid != NULL)
+               txnid->last_lsn = *ret_lsnp;
+       free(logrec.data);
+       return (ret);
+}
+
+/*
+ * PUBLIC: int __ham_newpage_print
+ * PUBLIC:    __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+ */
+
+int
+__ham_newpage_print(notused1, dbtp, lsnp, notused3, notused4)
+       DB_LOG *notused1;
+       DBT *dbtp;
+       DB_LSN *lsnp;
+       int notused3;
+       void *notused4;
+{
+       __ham_newpage_args *argp;
+       u_int32_t i;
+       int c, ret;
+
+       i = 0;
+       c = 0;
+       notused1 = NULL;
+       notused3 = 0;
+       notused4 = NULL;
+
+       if((ret = __ham_newpage_read(dbtp->data, &argp)) != 0)
+               return (ret);
+       printf("[%lu][%lu]ham_newpage: rec: %lu txnid %lx prevlsn [%lu][%lu]\n",
+           (u_long)lsnp->file,
+           (u_long)lsnp->offset,
+           (u_long)argp->type,
+           (u_long)argp->txnid->txnid,
+           (u_long)argp->prev_lsn.file,
+           (u_long)argp->prev_lsn.offset);
+       printf("\topcode: %lu\n", (u_long)argp->opcode);
+       printf("\tfileid: %lu\n", (u_long)argp->fileid);
+       printf("\tprev_pgno: %lu\n", (u_long)argp->prev_pgno);
+       printf("\tprevlsn: [%lu][%lu]\n",
+           (u_long)argp->prevlsn.file, (u_long)argp->prevlsn.offset);
+       printf("\tnew_pgno: %lu\n", (u_long)argp->new_pgno);
+       printf("\tpagelsn: [%lu][%lu]\n",
+           (u_long)argp->pagelsn.file, (u_long)argp->pagelsn.offset);
+       printf("\tnext_pgno: %lu\n", (u_long)argp->next_pgno);
+       printf("\tnextlsn: [%lu][%lu]\n",
+           (u_long)argp->nextlsn.file, (u_long)argp->nextlsn.offset);
+       printf("\n");
+       free(argp);
+       return (0);
+}
+
+/*
+ * PUBLIC: int __ham_newpage_read __P((void *, __ham_newpage_args **));
+ */
+int
+__ham_newpage_read(recbuf, argpp)
+       void *recbuf;
+       __ham_newpage_args **argpp;
+{
+       __ham_newpage_args *argp;
+       u_int8_t *bp;
+
+       argp = (__ham_newpage_args *)malloc(sizeof(__ham_newpage_args) +
+           sizeof(DB_TXN));
+       if (argp == NULL)
+               return (ENOMEM);
+       argp->txnid = (DB_TXN *)&argp[1];
+       bp = recbuf;
+       memcpy(&argp->type, bp, sizeof(argp->type));
+       bp += sizeof(argp->type);
+       memcpy(&argp->txnid->txnid,  bp, sizeof(argp->txnid->txnid));
+       bp += sizeof(argp->txnid->txnid);
+       memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN));
+       bp += sizeof(DB_LSN);
+       memcpy(&argp->opcode, bp, sizeof(argp->opcode));
+       bp += sizeof(argp->opcode);
+       memcpy(&argp->fileid, bp, sizeof(argp->fileid));
+       bp += sizeof(argp->fileid);
+       memcpy(&argp->prev_pgno, bp, sizeof(argp->prev_pgno));
+       bp += sizeof(argp->prev_pgno);
+       memcpy(&argp->prevlsn, bp,  sizeof(argp->prevlsn));
+       bp += sizeof(argp->prevlsn);
+       memcpy(&argp->new_pgno, bp, sizeof(argp->new_pgno));
+       bp += sizeof(argp->new_pgno);
+       memcpy(&argp->pagelsn, bp,  sizeof(argp->pagelsn));
+       bp += sizeof(argp->pagelsn);
+       memcpy(&argp->next_pgno, bp, sizeof(argp->next_pgno));
+       bp += sizeof(argp->next_pgno);
+       memcpy(&argp->nextlsn, bp,  sizeof(argp->nextlsn));
+       bp += sizeof(argp->nextlsn);
+       *argpp = argp;
+       return (0);
+}
+
+/*
+ * PUBLIC: int __ham_splitmeta_log
+ * PUBLIC:     __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t,
+ * PUBLIC:     u_int32_t, u_int32_t, u_int32_t, u_int32_t,
+ * PUBLIC:     DB_LSN *));
+ */
+int __ham_splitmeta_log(logp, txnid, ret_lsnp, flags,
+       fileid, bucket, ovflpoint, spares, metalsn)
+       DB_LOG *logp;
+       DB_TXN *txnid;
+       DB_LSN *ret_lsnp;
+       u_int32_t flags;
+       u_int32_t fileid;
+       u_int32_t bucket;
+       u_int32_t ovflpoint;
+       u_int32_t spares;
+       DB_LSN * metalsn;
+{
+       DBT logrec;
+       DB_LSN *lsnp, null_lsn;
+       u_int32_t rectype, txn_num;
+       int ret;
+       u_int8_t *bp;
+
+       rectype = DB_ham_splitmeta;
+       txn_num = txnid == NULL ? 0 : txnid->txnid;
+       if (txnid == NULL) {
+               null_lsn.file = 0;
+               null_lsn.offset = 0;
+               lsnp = &null_lsn;
+       } else
+               lsnp = &txnid->last_lsn;
+       logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN)
+           + sizeof(fileid)
+           + sizeof(bucket)
+           + sizeof(ovflpoint)
+           + sizeof(spares)
+           + sizeof(*metalsn);
+       if ((logrec.data = (void *)malloc(logrec.size)) == NULL)
+               return (ENOMEM);
+
+       bp = logrec.data;
+       memcpy(bp, &rectype, sizeof(rectype));
+       bp += sizeof(rectype);
+       memcpy(bp, &txn_num, sizeof(txn_num));
+       bp += sizeof(txn_num);
+       memcpy(bp, lsnp, sizeof(DB_LSN));
+       bp += sizeof(DB_LSN);
+       memcpy(bp, &fileid, sizeof(fileid));
+       bp += sizeof(fileid);
+       memcpy(bp, &bucket, sizeof(bucket));
+       bp += sizeof(bucket);
+       memcpy(bp, &ovflpoint, sizeof(ovflpoint));
+       bp += sizeof(ovflpoint);
+       memcpy(bp, &spares, sizeof(spares));
+       bp += sizeof(spares);
+       if (metalsn != NULL)
+               memcpy(bp, metalsn, sizeof(*metalsn));
+       else
+               memset(bp, 0, sizeof(*metalsn));
+       bp += sizeof(*metalsn);
+#ifdef DEBUG
+       if ((u_int32_t)(bp - (u_int8_t *)logrec.data) != logrec.size)
+               fprintf(stderr, "Error in log record length");
+#endif
+       ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags);
+       if (txnid != NULL)
+               txnid->last_lsn = *ret_lsnp;
+       free(logrec.data);
+       return (ret);
+}
+
+/*
+ * PUBLIC: int __ham_splitmeta_print
+ * PUBLIC:    __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+ */
+
+int
+__ham_splitmeta_print(notused1, dbtp, lsnp, notused3, notused4)
+       DB_LOG *notused1;
+       DBT *dbtp;
+       DB_LSN *lsnp;
+       int notused3;
+       void *notused4;
+{
+       __ham_splitmeta_args *argp;
+       u_int32_t i;
+       int c, ret;
+
+       i = 0;
+       c = 0;
+       notused1 = NULL;
+       notused3 = 0;
+       notused4 = NULL;
+
+       if((ret = __ham_splitmeta_read(dbtp->data, &argp)) != 0)
+               return (ret);
+       printf("[%lu][%lu]ham_splitmeta: rec: %lu txnid %lx prevlsn [%lu][%lu]\n",
+           (u_long)lsnp->file,
+           (u_long)lsnp->offset,
+           (u_long)argp->type,
+           (u_long)argp->txnid->txnid,
+           (u_long)argp->prev_lsn.file,
+           (u_long)argp->prev_lsn.offset);
+       printf("\tfileid: %lu\n", (u_long)argp->fileid);
+       printf("\tbucket: %lu\n", (u_long)argp->bucket);
+       printf("\tovflpoint: %lu\n", (u_long)argp->ovflpoint);
+       printf("\tspares: %lu\n", (u_long)argp->spares);
+       printf("\tmetalsn: [%lu][%lu]\n",
+           (u_long)argp->metalsn.file, (u_long)argp->metalsn.offset);
+       printf("\n");
+       free(argp);
+       return (0);
+}
+
+/*
+ * PUBLIC: int __ham_splitmeta_read __P((void *, __ham_splitmeta_args **));
+ */
+int
+__ham_splitmeta_read(recbuf, argpp)
+       void *recbuf;
+       __ham_splitmeta_args **argpp;
+{
+       __ham_splitmeta_args *argp;
+       u_int8_t *bp;
+
+       argp = (__ham_splitmeta_args *)malloc(sizeof(__ham_splitmeta_args) +
+           sizeof(DB_TXN));
+       if (argp == NULL)
+               return (ENOMEM);
+       argp->txnid = (DB_TXN *)&argp[1];
+       bp = recbuf;
+       memcpy(&argp->type, bp, sizeof(argp->type));
+       bp += sizeof(argp->type);
+       memcpy(&argp->txnid->txnid,  bp, sizeof(argp->txnid->txnid));
+       bp += sizeof(argp->txnid->txnid);
+       memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN));
+       bp += sizeof(DB_LSN);
+       memcpy(&argp->fileid, bp, sizeof(argp->fileid));
+       bp += sizeof(argp->fileid);
+       memcpy(&argp->bucket, bp, sizeof(argp->bucket));
+       bp += sizeof(argp->bucket);
+       memcpy(&argp->ovflpoint, bp, sizeof(argp->ovflpoint));
+       bp += sizeof(argp->ovflpoint);
+       memcpy(&argp->spares, bp, sizeof(argp->spares));
+       bp += sizeof(argp->spares);
+       memcpy(&argp->metalsn, bp,  sizeof(argp->metalsn));
+       bp += sizeof(argp->metalsn);
+       *argpp = argp;
+       return (0);
+}
+
+/*
+ * PUBLIC: int __ham_splitdata_log
+ * PUBLIC:     __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t,
+ * PUBLIC:     u_int32_t, u_int32_t, db_pgno_t, DBT *,
+ * PUBLIC:     DB_LSN *));
+ */
+int __ham_splitdata_log(logp, txnid, ret_lsnp, flags,
+       fileid, opcode, pgno, pageimage, pagelsn)
+       DB_LOG *logp;
+       DB_TXN *txnid;
+       DB_LSN *ret_lsnp;
+       u_int32_t flags;
+       u_int32_t fileid;
+       u_int32_t opcode;
+       db_pgno_t pgno;
+       DBT *pageimage;
+       DB_LSN * pagelsn;
+{
+       DBT logrec;
+       DB_LSN *lsnp, null_lsn;
+       u_int32_t zero;
+       u_int32_t rectype, txn_num;
+       int ret;
+       u_int8_t *bp;
+
+       rectype = DB_ham_splitdata;
+       txn_num = txnid == NULL ? 0 : txnid->txnid;
+       if (txnid == NULL) {
+               null_lsn.file = 0;
+               null_lsn.offset = 0;
+               lsnp = &null_lsn;
+       } else
+               lsnp = &txnid->last_lsn;
+       logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN)
+           + sizeof(fileid)
+           + sizeof(opcode)
+           + sizeof(pgno)
+           + sizeof(u_int32_t) + (pageimage == NULL ? 0 : pageimage->size)
+           + sizeof(*pagelsn);
+       if ((logrec.data = (void *)malloc(logrec.size)) == NULL)
+               return (ENOMEM);
+
+       bp = logrec.data;
+       memcpy(bp, &rectype, sizeof(rectype));
+       bp += sizeof(rectype);
+       memcpy(bp, &txn_num, sizeof(txn_num));
+       bp += sizeof(txn_num);
+       memcpy(bp, lsnp, sizeof(DB_LSN));
+       bp += sizeof(DB_LSN);
+       memcpy(bp, &fileid, sizeof(fileid));
+       bp += sizeof(fileid);
+       memcpy(bp, &opcode, sizeof(opcode));
+       bp += sizeof(opcode);
+       memcpy(bp, &pgno, sizeof(pgno));
+       bp += sizeof(pgno);
+       if (pageimage == NULL) {
+               zero = 0;
+               memcpy(bp, &zero, sizeof(u_int32_t));
+               bp += sizeof(u_int32_t);
+       } else {
+               memcpy(bp, &pageimage->size, sizeof(pageimage->size));
+               bp += sizeof(pageimage->size);
+               memcpy(bp, pageimage->data, pageimage->size);
+               bp += pageimage->size;
+       }
+       if (pagelsn != NULL)
+               memcpy(bp, pagelsn, sizeof(*pagelsn));
+       else
+               memset(bp, 0, sizeof(*pagelsn));
+       bp += sizeof(*pagelsn);
+#ifdef DEBUG
+       if ((u_int32_t)(bp - (u_int8_t *)logrec.data) != logrec.size)
+               fprintf(stderr, "Error in log record length");
+#endif
+       ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags);
+       if (txnid != NULL)
+               txnid->last_lsn = *ret_lsnp;
+       free(logrec.data);
+       return (ret);
+}
+
+/*
+ * PUBLIC: int __ham_splitdata_print
+ * PUBLIC:    __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+ */
+
+int
+__ham_splitdata_print(notused1, dbtp, lsnp, notused3, notused4)
+       DB_LOG *notused1;
+       DBT *dbtp;
+       DB_LSN *lsnp;
+       int notused3;
+       void *notused4;
+{
+       __ham_splitdata_args *argp;
+       u_int32_t i;
+       int c, ret;
+
+       i = 0;
+       c = 0;
+       notused1 = NULL;
+       notused3 = 0;
+       notused4 = NULL;
+
+       if((ret = __ham_splitdata_read(dbtp->data, &argp)) != 0)
+               return (ret);
+       printf("[%lu][%lu]ham_splitdata: rec: %lu txnid %lx prevlsn [%lu][%lu]\n",
+           (u_long)lsnp->file,
+           (u_long)lsnp->offset,
+           (u_long)argp->type,
+           (u_long)argp->txnid->txnid,
+           (u_long)argp->prev_lsn.file,
+           (u_long)argp->prev_lsn.offset);
+       printf("\tfileid: %lu\n", (u_long)argp->fileid);
+       printf("\topcode: %lu\n", (u_long)argp->opcode);
+       printf("\tpgno: %lu\n", (u_long)argp->pgno);
+       printf("\tpageimage: ");
+       for (i = 0; i < argp->pageimage.size; i++) {
+               c = ((char *)argp->pageimage.data)[i];
+               if (isprint(c) || c == 0xa)
+                       putchar(c);
+               else
+                       printf("%#x ", c);
+       }
+       printf("\n");
+       printf("\tpagelsn: [%lu][%lu]\n",
+           (u_long)argp->pagelsn.file, (u_long)argp->pagelsn.offset);
+       printf("\n");
+       free(argp);
+       return (0);
+}
+
+/*
+ * PUBLIC: int __ham_splitdata_read __P((void *, __ham_splitdata_args **));
+ */
+int
+__ham_splitdata_read(recbuf, argpp)
+       void *recbuf;
+       __ham_splitdata_args **argpp;
+{
+       __ham_splitdata_args *argp;
+       u_int8_t *bp;
+
+       argp = (__ham_splitdata_args *)malloc(sizeof(__ham_splitdata_args) +
+           sizeof(DB_TXN));
+       if (argp == NULL)
+               return (ENOMEM);
+       argp->txnid = (DB_TXN *)&argp[1];
+       bp = recbuf;
+       memcpy(&argp->type, bp, sizeof(argp->type));
+       bp += sizeof(argp->type);
+       memcpy(&argp->txnid->txnid,  bp, sizeof(argp->txnid->txnid));
+       bp += sizeof(argp->txnid->txnid);
+       memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN));
+       bp += sizeof(DB_LSN);
+       memcpy(&argp->fileid, bp, sizeof(argp->fileid));
+       bp += sizeof(argp->fileid);
+       memcpy(&argp->opcode, bp, sizeof(argp->opcode));
+       bp += sizeof(argp->opcode);
+       memcpy(&argp->pgno, bp, sizeof(argp->pgno));
+       bp += sizeof(argp->pgno);
+       memcpy(&argp->pageimage.size, bp, sizeof(u_int32_t));
+       bp += sizeof(u_int32_t);
+       argp->pageimage.data = bp;
+       bp += argp->pageimage.size;
+       memcpy(&argp->pagelsn, bp,  sizeof(argp->pagelsn));
+       bp += sizeof(argp->pagelsn);
+       *argpp = argp;
+       return (0);
+}
+
+/*
+ * PUBLIC: int __ham_replace_log
+ * PUBLIC:     __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t,
+ * PUBLIC:     u_int32_t, db_pgno_t, u_int32_t, DB_LSN *,
+ * PUBLIC:     int32_t, DBT *, DBT *, u_int32_t));
+ */
+int __ham_replace_log(logp, txnid, ret_lsnp, flags,
+       fileid, pgno, ndx, pagelsn, off, olditem,
+       newitem, makedup)
+       DB_LOG *logp;
+       DB_TXN *txnid;
+       DB_LSN *ret_lsnp;
+       u_int32_t flags;
+       u_int32_t fileid;
+       db_pgno_t pgno;
+       u_int32_t ndx;
+       DB_LSN * pagelsn;
+       int32_t off;
+       DBT *olditem;
+       DBT *newitem;
+       u_int32_t makedup;
+{
+       DBT logrec;
+       DB_LSN *lsnp, null_lsn;
+       u_int32_t zero;
+       u_int32_t rectype, txn_num;
+       int ret;
+       u_int8_t *bp;
+
+       rectype = DB_ham_replace;
+       txn_num = txnid == NULL ? 0 : txnid->txnid;
+       if (txnid == NULL) {
+               null_lsn.file = 0;
+               null_lsn.offset = 0;
+               lsnp = &null_lsn;
+       } else
+               lsnp = &txnid->last_lsn;
+       logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN)
+           + sizeof(fileid)
+           + sizeof(pgno)
+           + sizeof(ndx)
+           + sizeof(*pagelsn)
+           + sizeof(off)
+           + sizeof(u_int32_t) + (olditem == NULL ? 0 : olditem->size)
+           + sizeof(u_int32_t) + (newitem == NULL ? 0 : newitem->size)
+           + sizeof(makedup);
+       if ((logrec.data = (void *)malloc(logrec.size)) == NULL)
+               return (ENOMEM);
+
+       bp = logrec.data;
+       memcpy(bp, &rectype, sizeof(rectype));
+       bp += sizeof(rectype);
+       memcpy(bp, &txn_num, sizeof(txn_num));
+       bp += sizeof(txn_num);
+       memcpy(bp, lsnp, sizeof(DB_LSN));
+       bp += sizeof(DB_LSN);
+       memcpy(bp, &fileid, sizeof(fileid));
+       bp += sizeof(fileid);
+       memcpy(bp, &pgno, sizeof(pgno));
+       bp += sizeof(pgno);
+       memcpy(bp, &ndx, sizeof(ndx));
+       bp += sizeof(ndx);
+       if (pagelsn != NULL)
+               memcpy(bp, pagelsn, sizeof(*pagelsn));
+       else
+               memset(bp, 0, sizeof(*pagelsn));
+       bp += sizeof(*pagelsn);
+       memcpy(bp, &off, sizeof(off));
+       bp += sizeof(off);
+       if (olditem == NULL) {
+               zero = 0;
+               memcpy(bp, &zero, sizeof(u_int32_t));
+               bp += sizeof(u_int32_t);
+       } else {
+               memcpy(bp, &olditem->size, sizeof(olditem->size));
+               bp += sizeof(olditem->size);
+               memcpy(bp, olditem->data, olditem->size);
+               bp += olditem->size;
+       }
+       if (newitem == NULL) {
+               zero = 0;
+               memcpy(bp, &zero, sizeof(u_int32_t));
+               bp += sizeof(u_int32_t);
+       } else {
+               memcpy(bp, &newitem->size, sizeof(newitem->size));
+               bp += sizeof(newitem->size);
+               memcpy(bp, newitem->data, newitem->size);
+               bp += newitem->size;
+       }
+       memcpy(bp, &makedup, sizeof(makedup));
+       bp += sizeof(makedup);
+#ifdef DEBUG
+       if ((u_int32_t)(bp - (u_int8_t *)logrec.data) != logrec.size)
+               fprintf(stderr, "Error in log record length");
+#endif
+       ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags);
+       if (txnid != NULL)
+               txnid->last_lsn = *ret_lsnp;
+       free(logrec.data);
+       return (ret);
+}
+
+/*
+ * PUBLIC: int __ham_replace_print
+ * PUBLIC:    __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+ */
+
+int
+__ham_replace_print(notused1, dbtp, lsnp, notused3, notused4)
+       DB_LOG *notused1;
+       DBT *dbtp;
+       DB_LSN *lsnp;
+       int notused3;
+       void *notused4;
+{
+       __ham_replace_args *argp;
+       u_int32_t i;
+       int c, ret;
+
+       i = 0;
+       c = 0;
+       notused1 = NULL;
+       notused3 = 0;
+       notused4 = NULL;
+
+       if((ret = __ham_replace_read(dbtp->data, &argp)) != 0)
+               return (ret);
+       printf("[%lu][%lu]ham_replace: rec: %lu txnid %lx prevlsn [%lu][%lu]\n",
+           (u_long)lsnp->file,
+           (u_long)lsnp->offset,
+           (u_long)argp->type,
+           (u_long)argp->txnid->txnid,
+           (u_long)argp->prev_lsn.file,
+           (u_long)argp->prev_lsn.offset);
+       printf("\tfileid: %lu\n", (u_long)argp->fileid);
+       printf("\tpgno: %lu\n", (u_long)argp->pgno);
+       printf("\tndx: %lu\n", (u_long)argp->ndx);
+       printf("\tpagelsn: [%lu][%lu]\n",
+           (u_long)argp->pagelsn.file, (u_long)argp->pagelsn.offset);
+       printf("\toff: %ld\n", (long)argp->off);
+       printf("\tolditem: ");
+       for (i = 0; i < argp->olditem.size; i++) {
+               c = ((char *)argp->olditem.data)[i];
+               if (isprint(c) || c == 0xa)
+                       putchar(c);
+               else
+                       printf("%#x ", c);
+       }
+       printf("\n");
+       printf("\tnewitem: ");
+       for (i = 0; i < argp->newitem.size; i++) {
+               c = ((char *)argp->newitem.data)[i];
+               if (isprint(c) || c == 0xa)
+                       putchar(c);
+               else
+                       printf("%#x ", c);
+       }
+       printf("\n");
+       printf("\tmakedup: %lu\n", (u_long)argp->makedup);
+       printf("\n");
+       free(argp);
+       return (0);
+}
+
+/*
+ * PUBLIC: int __ham_replace_read __P((void *, __ham_replace_args **));
+ */
+int
+__ham_replace_read(recbuf, argpp)
+       void *recbuf;
+       __ham_replace_args **argpp;
+{
+       __ham_replace_args *argp;
+       u_int8_t *bp;
+
+       argp = (__ham_replace_args *)malloc(sizeof(__ham_replace_args) +
+           sizeof(DB_TXN));
+       if (argp == NULL)
+               return (ENOMEM);
+       argp->txnid = (DB_TXN *)&argp[1];
+       bp = recbuf;
+       memcpy(&argp->type, bp, sizeof(argp->type));
+       bp += sizeof(argp->type);
+       memcpy(&argp->txnid->txnid,  bp, sizeof(argp->txnid->txnid));
+       bp += sizeof(argp->txnid->txnid);
+       memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN));
+       bp += sizeof(DB_LSN);
+       memcpy(&argp->fileid, bp, sizeof(argp->fileid));
+       bp += sizeof(argp->fileid);
+       memcpy(&argp->pgno, bp, sizeof(argp->pgno));
+       bp += sizeof(argp->pgno);
+       memcpy(&argp->ndx, bp, sizeof(argp->ndx));
+       bp += sizeof(argp->ndx);
+       memcpy(&argp->pagelsn, bp,  sizeof(argp->pagelsn));
+       bp += sizeof(argp->pagelsn);
+       memcpy(&argp->off, bp, sizeof(argp->off));
+       bp += sizeof(argp->off);
+       memcpy(&argp->olditem.size, bp, sizeof(u_int32_t));
+       bp += sizeof(u_int32_t);
+       argp->olditem.data = bp;
+       bp += argp->olditem.size;
+       memcpy(&argp->newitem.size, bp, sizeof(u_int32_t));
+       bp += sizeof(u_int32_t);
+       argp->newitem.data = bp;
+       bp += argp->newitem.size;
+       memcpy(&argp->makedup, bp, sizeof(argp->makedup));
+       bp += sizeof(argp->makedup);
+       *argpp = argp;
+       return (0);
+}
+
+/*
+ * PUBLIC: int __ham_newpgno_log
+ * PUBLIC:     __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t,
+ * PUBLIC:     u_int32_t, u_int32_t, db_pgno_t, db_pgno_t,
+ * PUBLIC:     u_int32_t, db_pgno_t, u_int32_t, DB_LSN *,
+ * PUBLIC:     DB_LSN *));
+ */
+int __ham_newpgno_log(logp, txnid, ret_lsnp, flags,
+       opcode, fileid, pgno, free_pgno, old_type, old_pgno,
+       new_type, pagelsn, metalsn)
+       DB_LOG *logp;
+       DB_TXN *txnid;
+       DB_LSN *ret_lsnp;
+       u_int32_t flags;
+       u_int32_t opcode;
+       u_int32_t fileid;
+       db_pgno_t pgno;
+       db_pgno_t free_pgno;
+       u_int32_t old_type;
+       db_pgno_t old_pgno;
+       u_int32_t new_type;
+       DB_LSN * pagelsn;
+       DB_LSN * metalsn;
+{
+       DBT logrec;
+       DB_LSN *lsnp, null_lsn;
+       u_int32_t rectype, txn_num;
+       int ret;
+       u_int8_t *bp;
+
+       rectype = DB_ham_newpgno;
+       txn_num = txnid == NULL ? 0 : txnid->txnid;
+       if (txnid == NULL) {
+               null_lsn.file = 0;
+               null_lsn.offset = 0;
+               lsnp = &null_lsn;
+       } else
+               lsnp = &txnid->last_lsn;
+       logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN)
+           + sizeof(opcode)
+           + sizeof(fileid)
+           + sizeof(pgno)
+           + sizeof(free_pgno)
+           + sizeof(old_type)
+           + sizeof(old_pgno)
+           + sizeof(new_type)
+           + sizeof(*pagelsn)
+           + sizeof(*metalsn);
+       if ((logrec.data = (void *)malloc(logrec.size)) == NULL)
+               return (ENOMEM);
+
+       bp = logrec.data;
+       memcpy(bp, &rectype, sizeof(rectype));
+       bp += sizeof(rectype);
+       memcpy(bp, &txn_num, sizeof(txn_num));
+       bp += sizeof(txn_num);
+       memcpy(bp, lsnp, sizeof(DB_LSN));
+       bp += sizeof(DB_LSN);
+       memcpy(bp, &opcode, sizeof(opcode));
+       bp += sizeof(opcode);
+       memcpy(bp, &fileid, sizeof(fileid));
+       bp += sizeof(fileid);
+       memcpy(bp, &pgno, sizeof(pgno));
+       bp += sizeof(pgno);
+       memcpy(bp, &free_pgno, sizeof(free_pgno));
+       bp += sizeof(free_pgno);
+       memcpy(bp, &old_type, sizeof(old_type));
+       bp += sizeof(old_type);
+       memcpy(bp, &old_pgno, sizeof(old_pgno));
+       bp += sizeof(old_pgno);
+       memcpy(bp, &new_type, sizeof(new_type));
+       bp += sizeof(new_type);
+       if (pagelsn != NULL)
+               memcpy(bp, pagelsn, sizeof(*pagelsn));
+       else
+               memset(bp, 0, sizeof(*pagelsn));
+       bp += sizeof(*pagelsn);
+       if (metalsn != NULL)
+               memcpy(bp, metalsn, sizeof(*metalsn));
+       else
+               memset(bp, 0, sizeof(*metalsn));
+       bp += sizeof(*metalsn);
+#ifdef DEBUG
+       if ((u_int32_t)(bp - (u_int8_t *)logrec.data) != logrec.size)
+               fprintf(stderr, "Error in log record length");
+#endif
+       ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags);
+       if (txnid != NULL)
+               txnid->last_lsn = *ret_lsnp;
+       free(logrec.data);
+       return (ret);
+}
+
+/*
+ * PUBLIC: int __ham_newpgno_print
+ * PUBLIC:    __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+ */
+
+int
+__ham_newpgno_print(notused1, dbtp, lsnp, notused3, notused4)
+       DB_LOG *notused1;
+       DBT *dbtp;
+       DB_LSN *lsnp;
+       int notused3;
+       void *notused4;
+{
+       __ham_newpgno_args *argp;
+       u_int32_t i;
+       int c, ret;
+
+       i = 0;
+       c = 0;
+       notused1 = NULL;
+       notused3 = 0;
+       notused4 = NULL;
+
+       if((ret = __ham_newpgno_read(dbtp->data, &argp)) != 0)
+               return (ret);
+       printf("[%lu][%lu]ham_newpgno: rec: %lu txnid %lx prevlsn [%lu][%lu]\n",
+           (u_long)lsnp->file,
+           (u_long)lsnp->offset,
+           (u_long)argp->type,
+           (u_long)argp->txnid->txnid,
+           (u_long)argp->prev_lsn.file,
+           (u_long)argp->prev_lsn.offset);
+       printf("\topcode: %lu\n", (u_long)argp->opcode);
+       printf("\tfileid: %lu\n", (u_long)argp->fileid);
+       printf("\tpgno: %lu\n", (u_long)argp->pgno);
+       printf("\tfree_pgno: %lu\n", (u_long)argp->free_pgno);
+       printf("\told_type: %lu\n", (u_long)argp->old_type);
+       printf("\told_pgno: %lu\n", (u_long)argp->old_pgno);
+       printf("\tnew_type: %lu\n", (u_long)argp->new_type);
+       printf("\tpagelsn: [%lu][%lu]\n",
+           (u_long)argp->pagelsn.file, (u_long)argp->pagelsn.offset);
+       printf("\tmetalsn: [%lu][%lu]\n",
+           (u_long)argp->metalsn.file, (u_long)argp->metalsn.offset);
+       printf("\n");
+       free(argp);
+       return (0);
+}
+
+/*
+ * PUBLIC: int __ham_newpgno_read __P((void *, __ham_newpgno_args **));
+ */
+int
+__ham_newpgno_read(recbuf, argpp)
+       void *recbuf;
+       __ham_newpgno_args **argpp;
+{
+       __ham_newpgno_args *argp;
+       u_int8_t *bp;
+
+       argp = (__ham_newpgno_args *)malloc(sizeof(__ham_newpgno_args) +
+           sizeof(DB_TXN));
+       if (argp == NULL)
+               return (ENOMEM);
+       argp->txnid = (DB_TXN *)&argp[1];
+       bp = recbuf;
+       memcpy(&argp->type, bp, sizeof(argp->type));
+       bp += sizeof(argp->type);
+       memcpy(&argp->txnid->txnid,  bp, sizeof(argp->txnid->txnid));
+       bp += sizeof(argp->txnid->txnid);
+       memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN));
+       bp += sizeof(DB_LSN);
+       memcpy(&argp->opcode, bp, sizeof(argp->opcode));
+       bp += sizeof(argp->opcode);
+       memcpy(&argp->fileid, bp, sizeof(argp->fileid));
+       bp += sizeof(argp->fileid);
+       memcpy(&argp->pgno, bp, sizeof(argp->pgno));
+       bp += sizeof(argp->pgno);
+       memcpy(&argp->free_pgno, bp, sizeof(argp->free_pgno));
+       bp += sizeof(argp->free_pgno);
+       memcpy(&argp->old_type, bp, sizeof(argp->old_type));
+       bp += sizeof(argp->old_type);
+       memcpy(&argp->old_pgno, bp, sizeof(argp->old_pgno));
+       bp += sizeof(argp->old_pgno);
+       memcpy(&argp->new_type, bp, sizeof(argp->new_type));
+       bp += sizeof(argp->new_type);
+       memcpy(&argp->pagelsn, bp,  sizeof(argp->pagelsn));
+       bp += sizeof(argp->pagelsn);
+       memcpy(&argp->metalsn, bp,  sizeof(argp->metalsn));
+       bp += sizeof(argp->metalsn);
+       *argpp = argp;
+       return (0);
+}
+
+/*
+ * PUBLIC: int __ham_ovfl_log
+ * PUBLIC:     __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t,
+ * PUBLIC:     u_int32_t, db_pgno_t, u_int32_t, db_pgno_t,
+ * PUBLIC:     DB_LSN *));
+ */
+int __ham_ovfl_log(logp, txnid, ret_lsnp, flags,
+       fileid, start_pgno, npages, free_pgno, metalsn)
+       DB_LOG *logp;
+       DB_TXN *txnid;
+       DB_LSN *ret_lsnp;
+       u_int32_t flags;
+       u_int32_t fileid;
+       db_pgno_t start_pgno;
+       u_int32_t npages;
+       db_pgno_t free_pgno;
+       DB_LSN * metalsn;
+{
+       DBT logrec;
+       DB_LSN *lsnp, null_lsn;
+       u_int32_t rectype, txn_num;
+       int ret;
+       u_int8_t *bp;
+
+       rectype = DB_ham_ovfl;
+       txn_num = txnid == NULL ? 0 : txnid->txnid;
+       if (txnid == NULL) {
+               null_lsn.file = 0;
+               null_lsn.offset = 0;
+               lsnp = &null_lsn;
+       } else
+               lsnp = &txnid->last_lsn;
+       logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN)
+           + sizeof(fileid)
+           + sizeof(start_pgno)
+           + sizeof(npages)
+           + sizeof(free_pgno)
+           + sizeof(*metalsn);
+       if ((logrec.data = (void *)malloc(logrec.size)) == NULL)
+               return (ENOMEM);
+
+       bp = logrec.data;
+       memcpy(bp, &rectype, sizeof(rectype));
+       bp += sizeof(rectype);
+       memcpy(bp, &txn_num, sizeof(txn_num));
+       bp += sizeof(txn_num);
+       memcpy(bp, lsnp, sizeof(DB_LSN));
+       bp += sizeof(DB_LSN);
+       memcpy(bp, &fileid, sizeof(fileid));
+       bp += sizeof(fileid);
+       memcpy(bp, &start_pgno, sizeof(start_pgno));
+       bp += sizeof(start_pgno);
+       memcpy(bp, &npages, sizeof(npages));
+       bp += sizeof(npages);
+       memcpy(bp, &free_pgno, sizeof(free_pgno));
+       bp += sizeof(free_pgno);
+       if (metalsn != NULL)
+               memcpy(bp, metalsn, sizeof(*metalsn));
+       else
+               memset(bp, 0, sizeof(*metalsn));
+       bp += sizeof(*metalsn);
+#ifdef DEBUG
+       if ((u_int32_t)(bp - (u_int8_t *)logrec.data) != logrec.size)
+               fprintf(stderr, "Error in log record length");
+#endif
+       ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags);
+       if (txnid != NULL)
+               txnid->last_lsn = *ret_lsnp;
+       free(logrec.data);
+       return (ret);
+}
+
+/*
+ * PUBLIC: int __ham_ovfl_print
+ * PUBLIC:    __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+ */
+
+int
+__ham_ovfl_print(notused1, dbtp, lsnp, notused3, notused4)
+       DB_LOG *notused1;
+       DBT *dbtp;
+       DB_LSN *lsnp;
+       int notused3;
+       void *notused4;
+{
+       __ham_ovfl_args *argp;
+       u_int32_t i;
+       int c, ret;
+
+       i = 0;
+       c = 0;
+       notused1 = NULL;
+       notused3 = 0;
+       notused4 = NULL;
+
+       if((ret = __ham_ovfl_read(dbtp->data, &argp)) != 0)
+               return (ret);
+       printf("[%lu][%lu]ham_ovfl: rec: %lu txnid %lx prevlsn [%lu][%lu]\n",
+           (u_long)lsnp->file,
+           (u_long)lsnp->offset,
+           (u_long)argp->type,
+           (u_long)argp->txnid->txnid,
+           (u_long)argp->prev_lsn.file,
+           (u_long)argp->prev_lsn.offset);
+       printf("\tfileid: %lu\n", (u_long)argp->fileid);
+       printf("\tstart_pgno: %lu\n", (u_long)argp->start_pgno);
+       printf("\tnpages: %lu\n", (u_long)argp->npages);
+       printf("\tfree_pgno: %lu\n", (u_long)argp->free_pgno);
+       printf("\tmetalsn: [%lu][%lu]\n",
+           (u_long)argp->metalsn.file, (u_long)argp->metalsn.offset);
+       printf("\n");
+       free(argp);
+       return (0);
+}
+
+/*
+ * PUBLIC: int __ham_ovfl_read __P((void *, __ham_ovfl_args **));
+ */
+int
+__ham_ovfl_read(recbuf, argpp)
+       void *recbuf;
+       __ham_ovfl_args **argpp;
+{
+       __ham_ovfl_args *argp;
+       u_int8_t *bp;
+
+       argp = (__ham_ovfl_args *)malloc(sizeof(__ham_ovfl_args) +
+           sizeof(DB_TXN));
+       if (argp == NULL)
+               return (ENOMEM);
+       argp->txnid = (DB_TXN *)&argp[1];
+       bp = recbuf;
+       memcpy(&argp->type, bp, sizeof(argp->type));
+       bp += sizeof(argp->type);
+       memcpy(&argp->txnid->txnid,  bp, sizeof(argp->txnid->txnid));
+       bp += sizeof(argp->txnid->txnid);
+       memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN));
+       bp += sizeof(DB_LSN);
+       memcpy(&argp->fileid, bp, sizeof(argp->fileid));
+       bp += sizeof(argp->fileid);
+       memcpy(&argp->start_pgno, bp, sizeof(argp->start_pgno));
+       bp += sizeof(argp->start_pgno);
+       memcpy(&argp->npages, bp, sizeof(argp->npages));
+       bp += sizeof(argp->npages);
+       memcpy(&argp->free_pgno, bp, sizeof(argp->free_pgno));
+       bp += sizeof(argp->free_pgno);
+       memcpy(&argp->metalsn, bp,  sizeof(argp->metalsn));
+       bp += sizeof(argp->metalsn);
+       *argpp = argp;
+       return (0);
+}
+
+/*
+ * PUBLIC: int __ham_init_print __P((DB_ENV *));
+ */
+int
+__ham_init_print(dbenv)
+       DB_ENV *dbenv;
+{
+       int ret;
+
+       if ((ret = __db_add_recovery(dbenv,
+           __ham_insdel_print, DB_ham_insdel)) != 0)
+               return (ret);
+       if ((ret = __db_add_recovery(dbenv,
+           __ham_newpage_print, DB_ham_newpage)) != 0)
+               return (ret);
+       if ((ret = __db_add_recovery(dbenv,
+           __ham_splitmeta_print, DB_ham_splitmeta)) != 0)
+               return (ret);
+       if ((ret = __db_add_recovery(dbenv,
+           __ham_splitdata_print, DB_ham_splitdata)) != 0)
+               return (ret);
+       if ((ret = __db_add_recovery(dbenv,
+           __ham_replace_print, DB_ham_replace)) != 0)
+               return (ret);
+       if ((ret = __db_add_recovery(dbenv,
+           __ham_newpgno_print, DB_ham_newpgno)) != 0)
+               return (ret);
+       if ((ret = __db_add_recovery(dbenv,
+           __ham_ovfl_print, DB_ham_ovfl)) != 0)
+               return (ret);
+       return (0);
+}
+
+/*
+ * PUBLIC: int __ham_init_recover __P((DB_ENV *));
+ */
+int
+__ham_init_recover(dbenv)
+       DB_ENV *dbenv;
+{
+       int ret;
+
+       if ((ret = __db_add_recovery(dbenv,
+           __ham_insdel_recover, DB_ham_insdel)) != 0)
+               return (ret);
+       if ((ret = __db_add_recovery(dbenv,
+           __ham_newpage_recover, DB_ham_newpage)) != 0)
+               return (ret);
+       if ((ret = __db_add_recovery(dbenv,
+           __ham_splitmeta_recover, DB_ham_splitmeta)) != 0)
+               return (ret);
+       if ((ret = __db_add_recovery(dbenv,
+           __ham_splitdata_recover, DB_ham_splitdata)) != 0)
+               return (ret);
+       if ((ret = __db_add_recovery(dbenv,
+           __ham_replace_recover, DB_ham_replace)) != 0)
+               return (ret);
+       if ((ret = __db_add_recovery(dbenv,
+           __ham_newpgno_recover, DB_ham_newpgno)) != 0)
+               return (ret);
+       if ((ret = __db_add_recovery(dbenv,
+           __ham_ovfl_recover, DB_ham_ovfl)) != 0)
+               return (ret);
+       return (0);
+}
+
diff --git a/db2/hash/hash_conv.c b/db2/hash/hash_conv.c
new file mode 100644 (file)
index 0000000..22901af
--- /dev/null
@@ -0,0 +1,101 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997
+ *     Sleepycat Software.  All rights reserved.
+ */
+#include "config.h"
+
+#ifndef lint
+static const char sccsid[] = "@(#)hash_conv.c  10.3 (Sleepycat) 6/21/97";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+#endif
+
+#include "db_int.h"
+#include "db_page.h"
+#include "db_swap.h"
+#include "hash.h"
+
+/*
+ * __h_pgin, __ham_pgout --
+ *     Convert host-specific page layout to/from the host-independent
+ *     format stored on disk.
+ *
+ * PUBLIC: int __ham_pgin __P((db_pgno_t, void *, DBT *));
+ * PUBLIC: int __ham_pgout __P((db_pgno_t, void *, DBT *));
+ */
+int
+__ham_pgin(pg, pp, cookie)
+       db_pgno_t pg;
+       void *pp;
+       DBT *cookie;
+{
+       DB_PGINFO *pginfo;
+       u_int32_t tpgno;
+
+       pginfo = (DB_PGINFO *)cookie->data;
+       tpgno = PGNO((PAGE *)pp);
+       if (pginfo->needswap)
+               M_32_SWAP(tpgno);
+
+       if (pg != PGNO_METADATA && pg != tpgno) {
+               P_INIT(pp, pginfo->db_pagesize,
+                   pg, PGNO_INVALID, PGNO_INVALID, 0, P_HASH);
+               return (0);
+       }
+
+       if (!pginfo->needswap)
+               return (0);
+       return (pg == PGNO_METADATA ? __ham_mswap(pp) : __db_pgin(pg, pp));
+}
+
+int
+__ham_pgout(pg, pp, cookie)
+       db_pgno_t pg;
+       void *pp;
+       DBT *cookie;
+{
+       DB_PGINFO *pginfo;
+
+       pginfo = (DB_PGINFO *)cookie->data;
+       if (!pginfo->needswap)
+               return (0);
+       return (pg == PGNO_METADATA ? __ham_mswap(pp) : __db_pgout(pg, pp));
+}
+
+/*
+ * __ham_mswap --
+ *     Swap the bytes on the hash metadata page.
+ *
+ * PUBLIC: int __ham_mswap __P((void *));
+ */
+int
+__ham_mswap(pg)
+       void *pg;
+{
+       u_int8_t *p;
+       int i;
+
+       p = (u_int8_t *)pg;
+       SWAP32(p);              /* lsn part 1 */
+       SWAP32(p);              /* lsn part 2 */
+       SWAP32(p);              /* pgno */
+       SWAP32(p);              /* magic */
+       SWAP32(p);              /* version */
+       SWAP32(p);              /* pagesize */
+       SWAP32(p);              /* ovfl_point */
+       SWAP32(p);              /* last_freed */
+       SWAP32(p);              /* max_bucket */
+       SWAP32(p);              /* high_mask */
+       SWAP32(p);              /* low_mask */
+       SWAP32(p);              /* ffactor */
+       SWAP32(p);              /* nelem */
+       SWAP32(p);              /* h_charkey */
+       SWAP32(p);              /* flags */
+       for (i = 0; i < NCACHED; ++i)
+               SWAP32(p);      /* spares */
+       return (0);
+}
diff --git a/db2/hash/hash_debug.c b/db2/hash/hash_debug.c
new file mode 100644 (file)
index 0000000..979ddd7
--- /dev/null
@@ -0,0 +1,96 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997
+ *     Sleepycat Software.  All rights reserved.
+ */
+/*
+ * Copyright (c) 1995
+ *     The President and Fellows of Harvard University.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Jeremy Rassen.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *     This product includes software developed by the University of
+ *     California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "config.h"
+
+#ifndef lint
+static const char sccsid[] = "@(#)hash_debug.c 10.2 (Sleepycat) 6/21/97";
+#endif /* not lint */
+
+#ifdef DEBUG
+/*
+ * PACKAGE:  hashing
+ *
+ * DESCRIPTION:
+ *     Debug routines.
+ *
+ * ROUTINES:
+ *
+ * External
+ *     __dump_bucket
+ */
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+
+#include <stdio.h>
+#include <string.h>
+#endif
+
+#include "db_int.h"
+#include "db_page.h"
+#include "hash.h"
+
+/*
+ * __ham_dump_bucket --
+ *
+ * PUBLIC: #ifdef DEBUG
+ * PUBLIC: void __ham_dump_bucket __P((HTAB *, u_int32_t));
+ * PUBLIC: #endif
+ */
+void
+__ham_dump_bucket(hashp, bucket)
+       HTAB *hashp;
+       u_int32_t bucket;
+{
+       PAGE *p;
+       db_pgno_t pgno;
+       int ret;
+
+       for (pgno = BUCKET_TO_PAGE(hashp, bucket); pgno != PGNO_INVALID;) {
+               if ((ret = memp_fget(hashp->dbp->mpf, &pgno, 0, &p)) != 0)
+                       break;
+               (void)__db_prpage(p, 1);
+               pgno = p->next_pgno;
+               (void)memp_fput(hashp->dbp->mpf, p, 0);
+       }
+}
+#endif /* DEBUG */
diff --git a/db2/hash/hash_dup.c b/db2/hash/hash_dup.c
new file mode 100644 (file)
index 0000000..059eec6
--- /dev/null
@@ -0,0 +1,544 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997
+ *     Sleepycat Software.  All rights reserved.
+ */
+/*
+ * Copyright (c) 1990, 1993, 1994
+ *     The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Margo Seltzer.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *     This product includes software developed by the University of
+ *     California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+#include "config.h"
+
+#ifndef lint
+static const char sccsid[] = "@(#)hash_dup.c   10.5 (Sleepycat) 7/27/97";
+#endif /* not lint */
+
+/*
+ * PACKAGE:  hashing
+ *
+ * DESCRIPTION:
+ *      Manipulation of duplicates for the hash package.
+ *
+ * ROUTINES:
+ *
+ * External
+ *      __add_dup
+ * Internal
+ */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#endif
+
+#include "db_int.h"
+#include "db_page.h"
+#include "db_swap.h"
+#include "hash.h"
+
+static int __ham_check_move __P((HTAB *, HASH_CURSOR *, int32_t));
+static int __ham_dup_convert __P((HTAB *, HASH_CURSOR *));
+static int __ham_make_dup __P((const DBT *, DBT *d, void **, u_int32_t *));
+
+/*
+ * Called from hash_access to add a duplicate key. nval is the new
+ * value that we want to add.  The flags correspond to the flag values
+ * to cursor_put indicating where to add the new element.
+ * There are 4 cases.
+ * Case 1: The existing duplicate set already resides on a separate page.
+ *        We can use common code for this.
+ * Case 2: The element is small enough to just be added to the existing set.
+ * Case 3: The element is large enough to be a big item, so we're going to
+ *        have to push the set onto a new page.
+ * Case 4: The element is large enough to push the duplicate set onto a
+ *        separate page.
+ *
+ * PUBLIC: int __ham_add_dup __P((HTAB *, HASH_CURSOR *, DBT *, int));
+ */
+int
+__ham_add_dup(hashp, hcp, nval, flags)
+       HTAB *hashp;
+       HASH_CURSOR *hcp;
+       DBT *nval;
+       int flags;
+{
+       DBT pval, tmp_val;
+       HKEYDATA *hk;
+       u_int32_t del_len, new_size;
+       int ret;
+
+       if (flags == DB_CURRENT && hcp->dpgno == PGNO_INVALID)
+               del_len = hcp->dup_len;
+       else
+               del_len = 0;
+
+       if ((ret = __ham_check_move(hashp, hcp,
+           (int32_t)DUP_SIZE(nval->size) - (int32_t)del_len)) != 0)
+               return (ret);
+
+       /*
+        * Check if resulting duplicate set is going to need to go
+        * onto a separate duplicate page.  If so, convert the
+        * duplicate set and add the new one.  After conversion,
+        * hcp->dndx is the first free ndx or the index of the
+        * current pointer into the duplicate set.
+        */
+       hk = H_PAIRDATA(hcp->pagep, hcp->bndx);
+       new_size = DUP_SIZE(nval->size) - del_len + LEN_HKEYDATA(hcp->pagep,
+           hashp->hdr->pagesize, H_DATAINDEX(hcp->bndx));
+
+       /*
+        * We convert to off-page duplicates if the item is a big item,
+        * the addition of the new item will make the set large, or
+        * if there isn't enough room on this page to add the next item.
+        */
+       if (hk->type != H_OFFDUP &&
+           (hk->type == H_OFFPAGE || ISBIG(hashp, new_size) ||
+           DUP_SIZE(nval->size) - del_len > P_FREESPACE(hcp->pagep))) {
+
+               if ((ret = __ham_dup_convert(hashp, hcp)) != 0)
+                       return (ret);
+               else
+                       hk = H_PAIRDATA(hcp->pagep, hcp->bndx);
+       }
+
+       /* There are two separate cases here: on page and off page. */
+       if (hk->type != H_OFFDUP) {
+               if (hk->type != H_DUPLICATE) {
+                       hk->type = H_DUPLICATE;
+                       pval.flags = 0;
+                       pval.data = hk->data;
+                       pval.size = LEN_HDATA(hcp->pagep, hashp->hdr->pagesize,
+                           hcp->bndx);
+                       if ((ret = __ham_make_dup(&pval, &tmp_val, &hcp->big_data,
+                           &hcp->big_datalen)) != 0 ||
+                           (ret = __ham_replpair(hashp, hcp, &tmp_val, 1)) != 0)
+                               return (ret);
+               }
+
+               /* Now make the new entry a duplicate. */
+               if ((ret = __ham_make_dup(nval,
+                   &tmp_val, &hcp->big_data, &hcp->big_datalen)) != 0)
+                       return (ret);
+
+               tmp_val.dlen = 0;
+               switch (flags) {                        /* On page. */
+               case DB_KEYFIRST:
+                       tmp_val.doff = 0;
+                       break;
+               case DB_KEYLAST:
+                       tmp_val.doff = LEN_HDATA(hcp->pagep,
+                           hashp->hdr->pagesize, hcp->bndx);
+                       break;
+               case DB_CURRENT:
+                       tmp_val.doff = hcp->dup_off;
+                       tmp_val.dlen = DUP_SIZE(hcp->dup_len);
+                       break;
+               case DB_BEFORE:
+                       tmp_val.doff = hcp->dup_off;
+                       break;
+               case DB_AFTER:
+                       tmp_val.doff = hcp->dup_off + DUP_SIZE(hcp->dup_len);
+                       break;
+               }
+               /* Add the duplicate. */
+               ret = __ham_replpair(hashp, hcp, &tmp_val, 0);
+               if (ret == 0)
+                       ret = __ham_dirty_page(hashp, hcp->pagep);
+               __ham_c_update(hashp, hcp, hcp->pgno, tmp_val.size, 1, 1);
+               return (ret);
+       }
+
+       /* If we get here, then we're on duplicate pages. */
+       if (hcp->dpgno == PGNO_INVALID) {
+               memcpy(&hcp->dpgno,
+                   (u_int8_t *)hk + SSZ(HOFFDUP, pgno), sizeof(db_pgno_t));
+               hcp->dndx = 0;
+       }
+
+       switch (flags) {
+       case DB_KEYFIRST:
+               /*
+                * The only way that we are already on a dup page is
+                * if we just converted the on-page representation.
+                * In that case, we've only got one page of duplicates.
+                */
+               if (hcp->dpagep == NULL && (ret =
+                   __db_dend(hashp->dbp, hcp->dpgno, &hcp->dpagep)) != 0)
+                       return (ret);
+               hcp->dndx = 0;
+               break;
+       case DB_KEYLAST:
+               if (hcp->dpagep == NULL && (ret =
+                   __db_dend(hashp->dbp, hcp->dpgno, &hcp->dpagep)) != 0)
+                       return (ret);
+               hcp->dpgno = PGNO(hcp->dpagep);
+               hcp->dndx = NUM_ENT(hcp->dpagep);
+               break;
+       case DB_CURRENT:
+               if ((ret = __db_ditem(hashp->dbp, hcp->dpagep, hcp->dndx,
+                   BKEYDATA_SIZE(GET_BKEYDATA(hcp->dpagep, hcp->dndx)->len)))
+                   != 0)
+                       return (ret);
+               break;
+       case DB_BEFORE: /* The default behavior is correct. */
+               break;
+       case DB_AFTER:
+               hcp->dndx++;
+               break;
+       }
+
+       ret = __db_dput(hashp->dbp,
+           nval, &hcp->dpagep, &hcp->dndx, __ham_overflow_page);
+       hcp->pgno = PGNO(hcp->pagep);
+       __ham_c_update(hashp, hcp, hcp->pgno, nval->size, 1, 1);
+       return (ret);
+}
+
+/*
+ * Convert an on-page set of duplicates to an offpage set of duplicates.
+ */
+static int
+__ham_dup_convert(hashp, hcp)
+       HTAB *hashp;
+       HASH_CURSOR *hcp;
+{
+       BOVERFLOW bo;
+       DBT dbt;
+       HOFFPAGE ho;
+       db_indx_t dndx, len;
+       int ret;
+       u_int8_t *p, *pend;
+
+       /*
+        * Create a new page for the duplicates.
+        */
+       if ((ret =
+           __ham_overflow_page(hashp->dbp, P_DUPLICATE, &hcp->dpagep)) != 0)
+               return (ret);
+       hcp->dpagep->type = P_DUPLICATE;
+       hcp->dpgno = PGNO(hcp->dpagep);
+
+       /*
+        * Now put the duplicates onto the new page.
+        */
+       dbt.flags = 0;
+       switch (((HKEYDATA *)H_PAIRDATA(hcp->pagep, hcp->bndx))->type) {
+       case H_KEYDATA:
+               /* Simple case, one key on page; move it to dup page. */
+               dndx = 0;
+               dbt.size =
+                   LEN_HDATA(hcp->pagep, hashp->hdr->pagesize, hcp->bndx);
+               dbt.data =
+                   ((HKEYDATA *)H_PAIRDATA(hcp->pagep, hcp->bndx))->data;
+               ret = __db_pitem(hashp->dbp, hcp->dpagep,
+                   (u_int32_t)dndx, BKEYDATA_SIZE(dbt.size), NULL, &dbt);
+               if (ret == 0)
+                       __ham_dirty_page(hashp, hcp->dpagep);
+               break;
+       case H_OFFPAGE:
+               /* Simple case, one key on page; move it to dup page. */
+               dndx = 0;
+               memcpy(&ho,
+                   P_ENTRY(hcp->pagep, H_DATAINDEX(hcp->bndx)), HOFFPAGE_SIZE);
+               bo.deleted = 0;
+               bo.type = ho.type;
+               bo.pgno = ho.pgno;
+               bo.tlen = ho.tlen;
+               dbt.size = BOVERFLOW_SIZE;
+               dbt.data = &bo;
+
+               ret = __db_pitem(hashp->dbp, hcp->dpagep,
+                  (u_int32_t)dndx, dbt.size, &dbt, NULL);
+               if (ret == 0)
+                       __ham_dirty_page(hashp, hcp->dpagep);
+               break;
+       case H_DUPLICATE:
+               p = ((HKEYDATA *)H_PAIRDATA(hcp->pagep, hcp->bndx))->data;
+               pend = p +
+                   LEN_HDATA(hcp->pagep, hashp->hdr->pagesize, hcp->bndx);
+
+               for (dndx = 0; p < pend; dndx++) {
+                       memcpy(&len, p, sizeof(db_indx_t));
+                       dbt.size = len;
+                       p += sizeof(db_indx_t);
+                       dbt.data = p;
+                       p += len + sizeof(db_indx_t);
+                       ret = __db_dput(hashp->dbp, &dbt,
+                           &hcp->dpagep, &dndx, __ham_overflow_page);
+                       if (ret != 0)
+                               break;
+               }
+               break;
+       default:
+               ret = __db_pgfmt(hashp->dbp, (u_long)hcp->pgno);
+       }
+       if (ret == 0) {
+               /*
+                * Now attach this to the source page in place of
+                * the old duplicate item.
+                */
+               __ham_move_offpage(hashp, hcp->pagep,
+                   (u_int32_t)H_DATAINDEX(hcp->bndx), hcp->dpgno);
+
+               /* Can probably just do a "put" here. */
+               ret = __ham_dirty_page(hashp, hcp->pagep);
+       } else {
+               (void)__ham_del_page(hashp->dbp, hcp->dpagep);
+               hcp->dpagep = NULL;
+       }
+       return (ret);
+}
+
+static int
+__ham_make_dup(notdup, dup, bufp, sizep)
+       const DBT *notdup;
+       DBT *dup;
+       void **bufp;
+       u_int32_t *sizep;
+{
+       db_indx_t tsize, item_size;
+       int ret;
+       u_int8_t *p;
+
+       item_size = (db_indx_t)notdup->size;
+       tsize = DUP_SIZE(item_size);
+       if ((ret = __ham_init_dbt(dup, tsize, bufp, sizep)) != 0)
+               return (ret);
+
+       dup->dlen = 0;
+       dup->flags = notdup->flags;
+       F_SET(dup, DB_DBT_PARTIAL);
+
+       p = dup->data;
+       memcpy(p, &item_size, sizeof(db_indx_t));
+       p += sizeof(db_indx_t);
+       memcpy(p, notdup->data, notdup->size);
+       p += notdup->size;
+       memcpy(p, &item_size, sizeof(db_indx_t));
+
+       dup->doff = 0;
+       dup->dlen = notdup->size;
+
+       return (0);
+}
+
+static int
+__ham_check_move(hashp, hcp, add_len)
+       HTAB *hashp;
+       HASH_CURSOR *hcp;
+       int32_t add_len;
+{
+       DBT k, d;
+       DB_LSN new_lsn;
+       HKEYDATA *hk;
+       PAGE *next_pagep;
+       db_pgno_t next_pgno;
+       int rectype, ret;
+       u_int32_t new_datalen, old_len;
+
+       /*
+        * Check if we can do whatever we need to on this page.  If not,
+        * then we'll have to move the current element to a new page.
+        */
+
+       hk = H_PAIRDATA(hcp->pagep, hcp->bndx);
+
+       /*
+        * If the item is already off page duplicates or an offpage item,
+        * then we know we can do whatever we need to do in-place
+        */
+       if (hk->type == H_OFFDUP || hk->type == H_OFFPAGE)
+               return (0);
+
+       old_len =
+           LEN_HITEM(hcp->pagep, hashp->hdr->pagesize, H_DATAINDEX(hcp->bndx));
+       new_datalen = old_len - HKEYDATA_SIZE(0) + add_len;
+
+       /*
+        * We need to add a new page under two conditions:
+        * 1. The addition makes the total data length cross the BIG
+        *    threshold and the OFFDUP structure won't fit on this page.
+        * 2. The addition does not make the total data cross the
+        *    threshold, but the new data won't fit on the page.
+        * If neither of these is true, then we can return.
+        */
+       if (ISBIG(hashp, new_datalen) && (old_len > HOFFDUP_SIZE ||
+           HOFFDUP_SIZE - old_len <= P_FREESPACE(hcp->pagep)))
+               return (0);
+
+       if (!ISBIG(hashp, new_datalen) &&
+           add_len <= (int32_t)P_FREESPACE(hcp->pagep))
+               return (0);
+
+       /*
+        * If we get here, then we need to move the item to a new page.
+        * Check if there are more pages in the chain.
+        */
+
+       new_datalen = ISBIG(hashp, new_datalen) ?
+           HOFFDUP_SIZE : HKEYDATA_SIZE(new_datalen);
+
+       next_pagep = NULL;
+       for (next_pgno = NEXT_PGNO(hcp->pagep); next_pgno != PGNO_INVALID;
+           next_pgno = NEXT_PGNO(next_pagep)) {
+               if (next_pagep != NULL &&
+                   (ret = __ham_put_page(hashp->dbp, next_pagep, 0)) != 0)
+                       return (ret);
+
+               if ((ret = __ham_get_page(hashp->dbp, next_pgno, &next_pagep)) != 0)
+                       return (ret);
+
+               if (P_FREESPACE(next_pagep) >= new_datalen)
+                       break;
+       }
+
+       /* No more pages, add one. */
+       if (next_pagep == NULL &&
+           (ret = __ham_add_ovflpage(hashp, hcp->pagep, 0, &next_pagep)) != 0)
+               return (ret);
+
+       /* Add new page at the end of the chain. */
+       if (P_FREESPACE(next_pagep) < new_datalen &&
+           (ret = __ham_add_ovflpage(hashp, next_pagep, 1, &next_pagep)) != 0)
+               return (ret);
+
+       /* Copy the item to the new page. */
+       if (DB_LOGGING(hashp->dbp)) {
+               rectype = PUTPAIR;
+               k.flags = 0;
+               d.flags = 0;
+               if (H_PAIRKEY(hcp->pagep, hcp->bndx)->type == H_OFFPAGE) {
+                       rectype |= PAIR_KEYMASK;
+                       k.data = H_PAIRKEY(hcp->pagep, hcp->bndx);
+                       k.size = HOFFPAGE_SIZE;
+               } else {
+                       k.data = H_PAIRKEY(hcp->pagep, hcp->bndx)->data;
+                       k.size = LEN_HKEY(hcp->pagep,
+                           hashp->hdr->pagesize, hcp->bndx);
+               }
+
+               if (hk->type == H_OFFPAGE) {
+                       rectype |= PAIR_DATAMASK;
+                       d.data = H_PAIRDATA(hcp->pagep, hcp->bndx);
+                       d.size = HOFFPAGE_SIZE;
+               } else {
+                       d.data = H_PAIRDATA(hcp->pagep, hcp->bndx)->data;
+                       d.size = LEN_HDATA(hcp->pagep,
+                           hashp->hdr->pagesize, hcp->bndx);
+               }
+
+
+               if ((ret = __ham_insdel_log(hashp->dbp->dbenv->lg_info,
+                   (DB_TXN *)hashp->dbp->txn, &new_lsn, 0, rectype,
+                   hashp->dbp->log_fileid, PGNO(next_pagep),
+                   (u_int32_t)H_NUMPAIRS(next_pagep), &LSN(next_pagep),
+                   &k, &d)) != 0)
+                       return (ret);
+
+               /* Move lsn onto page. */
+               LSN(next_pagep) = new_lsn;      /* Structure assignment. */
+       }
+
+       __ham_copy_item(hashp, hcp->pagep, H_KEYINDEX(hcp->bndx), next_pagep);
+       __ham_copy_item(hashp, hcp->pagep, H_DATAINDEX(hcp->bndx), next_pagep);
+
+       /* Now delete the pair from the current page. */
+       ret = __ham_del_pair(hashp, hcp);
+
+       (void)__ham_put_page(hashp->dbp, hcp->pagep, 1);
+       hcp->pagep = next_pagep;
+       hcp->pgno = PGNO(hcp->pagep);
+       hcp->bndx = H_NUMPAIRS(hcp->pagep) - 1;
+       F_SET(hcp, H_EXPAND);
+       return (ret);
+}
+
+/*
+ * Replace an onpage set of duplicates with the OFFDUP structure that
+ * references the duplicate page.
+ * XXX This is really just a special case of __onpage_replace; we should
+ * probably combine them.
+ * PUBLIC: void __ham_move_offpage __P((HTAB *, PAGE *, u_int32_t, db_pgno_t));
+ */
+void
+__ham_move_offpage(hashp, pagep, ndx, pgno)
+       HTAB *hashp;
+       PAGE *pagep;
+       u_int32_t ndx;
+       db_pgno_t pgno;
+{
+       DBT new_dbt;
+       DBT old_dbt;
+       HOFFDUP od;
+       db_indx_t i;
+       int32_t shrink;
+       u_int8_t *src;
+
+       od.type = H_OFFDUP;
+       od.pgno = pgno;
+
+       if (DB_LOGGING(hashp->dbp)) {
+               new_dbt.data = &od;
+               new_dbt.size = HOFFDUP_SIZE;
+               old_dbt.data = P_ENTRY(pagep, ndx);
+               old_dbt.size = LEN_HITEM(pagep, hashp->hdr->pagesize, ndx);
+               (void)__ham_replace_log(hashp->dbp->dbenv->lg_info,
+                   (DB_TXN *)hashp->dbp->txn, &LSN(pagep), 0,
+                   hashp->dbp->log_fileid, PGNO(pagep), (u_int32_t)ndx,
+                   &LSN(pagep), -1, &old_dbt, &new_dbt, 0);
+       }
+
+       shrink =
+           LEN_HITEM(pagep, hashp->hdr->pagesize, ndx) - HOFFDUP_SIZE;
+
+       if (shrink != 0) {
+               /* Copy data. */
+               src = (u_int8_t *)(pagep) + HOFFSET(pagep);
+               memmove(src + shrink, src, pagep->inp[ndx] - HOFFSET(pagep));
+               HOFFSET(pagep) += shrink;
+
+               /* Update index table. */
+               for (i = ndx; i < NUM_ENT(pagep); i++)
+                       pagep->inp[i] += shrink;
+       }
+
+       /* Now copy the offdup entry onto the page. */
+       memcpy(P_ENTRY(pagep, ndx), &od, HOFFDUP_SIZE);
+}
diff --git a/db2/hash/hash_func.c b/db2/hash/hash_func.c
new file mode 100644 (file)
index 0000000..2ef47af
--- /dev/null
@@ -0,0 +1,219 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997
+ *     Sleepycat Software.  All rights reserved.
+ */
+/*
+ * Copyright (c) 1990, 1993
+ *     Margo Seltzer.  All rights reserved.
+ */
+/*
+ * Copyright (c) 1990, 1993
+ *     The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Margo Seltzer.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *     This product includes software developed by the University of
+ *     California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "config.h"
+
+#ifndef lint
+static const char sccsid[] = "@(#)hash_func.c  10.6 (Sleepycat) 7/26/97";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+#endif
+
+#include "db_int.h"
+#include "db_page.h"
+#include "hash.h"
+
+/*
+ * __ham_func2 --
+ *     Phong Vo's linear congruential hash.
+ *
+ * PUBLIC: u_int32_t __ham_func2 __P((const void *, u_int32_t));
+ */
+#define        dcharhash(h, c) ((h) = 0x63c63cd9*(h) + 0x9c39c33d + (c))
+
+u_int32_t
+__ham_func2(key, len)
+       const void *key;
+       u_int32_t len;
+{
+       const u_int8_t *e, *k;
+       u_int32_t h;
+       u_int8_t c;
+
+       k = key;
+       e = k + len;
+       for (h = 0; k != e;) {
+               c = *k++;
+               if (!c && k > e)
+                       break;
+               dcharhash(h, c);
+       }
+       return (h);
+}
+
+/*
+ * __ham_func3 --
+ *     Ozan Yigit's original sdbm hash.
+ *
+ * Ugly, but fast.  Break the string up into 8 byte units.  On the first time
+ * through the loop get the "leftover bytes" (strlen % 8).  On every other
+ * iteration, perform 8 HASHC's so we handle all 8 bytes.  Essentially, this
+ * saves us 7 cmp & branch instructions.
+ *
+ * PUBLIC: u_int32_t __ham_func3 __P((const void *, u_int32_t));
+ */
+u_int32_t
+__ham_func3(key, len)
+       const void *key;
+       u_int32_t len;
+{
+       const u_int8_t *k;
+       u_int32_t n, loop;
+
+       if (len == 0)
+               return (0);
+
+#define        HASHC   n = *k++ + 65599 * n
+       n = 0;
+       k = key;
+
+       loop = (len + 8 - 1) >> 3;
+       switch (len & (8 - 1)) {
+       case 0:
+               do {
+                       HASHC;
+       case 7:
+                       HASHC;
+       case 6:
+                       HASHC;
+       case 5:
+                       HASHC;
+       case 4:
+                       HASHC;
+       case 3:
+                       HASHC;
+       case 2:
+                       HASHC;
+       case 1:
+                       HASHC;
+               } while (--loop);
+       }
+       return (n);
+}
+
+/*
+ * __ham_func4 --
+ *     Chris Torek's hash function.  Although this function performs only
+ *     slightly worse than __ham_func5 on strings, it performs horribly on
+ *     numbers.
+ *
+ * PUBLIC: u_int32_t __ham_func4 __P((const void *, u_int32_t));
+ */
+u_int32_t
+__ham_func4(key, len)
+       const void *key;
+       u_int32_t len;
+{
+       const u_int8_t *k;
+       u_int32_t h, loop;
+
+       if (len == 0)
+               return (0);
+
+#define        HASH4a  h = (h << 5) - h + *k++;
+#define        HASH4b  h = (h << 5) + h + *k++;
+#define        HASH4   HASH4b
+       h = 0;
+       k = key;
+
+       loop = (len + 8 - 1) >> 3;
+       switch (len & (8 - 1)) {
+       case 0:
+               do {
+                       HASH4;
+       case 7:
+                       HASH4;
+       case 6:
+                       HASH4;
+       case 5:
+                       HASH4;
+       case 4:
+                       HASH4;
+       case 3:
+                       HASH4;
+       case 2:
+                       HASH4;
+       case 1:
+                       HASH4;
+               } while (--loop);
+       }
+       return (h);
+}
+
+/*
+ * Fowler/Noll/Vo hash
+ *
+ * The basis of the hash algorithm was taken from an idea sent by email to the
+ * IEEE Posix P1003.2 mailing list from Phong Vo (kpv@research.att.com) and
+ * Glenn Fowler (gsf@research.att.com).  Landon Curt Noll (chongo@toad.com)
+ * later improved on their algorithm.
+ *
+ * The magic is in the interesting relationship between the special prime
+ * 16777619 (2^24 + 403) and 2^32 and 2^8.
+ *
+ * This hash produces the fewest collisions of any function that we've seen so
+ * far, and works well on both numbers and strings.
+ *
+ * PUBLIC: u_int32_t __ham_func5 __P((const void *, u_int32_t));
+ */
+u_int32_t
+__ham_func5(key, len)
+       const void *key;
+       u_int32_t len;
+{
+       const u_int8_t *k, *e;
+        u_int32_t h;
+
+       k = key;
+       e = k + len;
+        for (h = 0; k < e; ++k) {
+                h *= 16777619;
+                h ^= *k;
+        }
+        return (h);
+}
diff --git a/db2/hash/hash_page.c b/db2/hash/hash_page.c
new file mode 100644 (file)
index 0000000..68c31b1
--- /dev/null
@@ -0,0 +1,1775 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997
+ *     Sleepycat Software.  All rights reserved.
+ */
+/*
+ * Copyright (c) 1990, 1993, 1994
+ *     Margo Seltzer.  All rights reserved.
+ */
+/*
+ * Copyright (c) 1990, 1993, 1994
+ *     The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Margo Seltzer.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *     This product includes software developed by the University of
+ *     California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "config.h"
+
+#ifndef lint
+static const char sccsid[] = "@(#)hash_page.c  10.18 (Sleepycat) 8/21/97";
+#endif /* not lint */
+
+
+/*
+ * PACKAGE:  hashing
+ *
+ * DESCRIPTION:
+ *      Page manipulation for hashing package.
+ *
+ * ROUTINES:
+ *
+ * External
+ *      __get_page
+ *      __add_ovflpage
+ *      __overflow_page
+ * Internal
+ *      open_temp
+ */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#endif
+
+#include "db_int.h"
+#include "db_page.h"
+#include "db_swap.h"
+#include "hash.h"
+
+static int __ham_lock_bucket __P((DB *, HASH_CURSOR *, db_lockmode_t));
+
+#ifdef DEBUG_SLOW
+static void     account_page(HTAB *, db_pgno_t, int);
+#endif
+
+/*
+ * PUBLIC: int __ham_item __P((HTAB *, HASH_CURSOR *, db_lockmode_t));
+ */
+int
+__ham_item(hashp, cursorp, mode)
+       HTAB *hashp;
+       HASH_CURSOR *cursorp;
+       db_lockmode_t mode;
+{
+       db_pgno_t next_pgno;
+       int ret;
+
+       if (F_ISSET(cursorp, H_DELETED))
+               return (EINVAL);
+       F_CLR(cursorp, H_OK | H_NOMORE);
+
+       /* Check if we need to get a page for this cursor. */
+       if ((ret = __ham_get_cpage(hashp, cursorp, mode)) != 0)
+               return (ret);
+
+       /* Check if we are looking for space in which to insert an item. */
+       if (cursorp->seek_size && cursorp->seek_found_page == PGNO_INVALID
+           && cursorp->seek_size < P_FREESPACE(cursorp->pagep))
+               cursorp->seek_found_page = cursorp->pgno;
+
+       /* Check if we need to go on to the next page. */
+       if (F_ISSET(cursorp, H_ISDUP) && cursorp->dpgno == PGNO_INVALID)
+               /*
+                * ISDUP is set, and offset is at the beginning of the datum.
+                * We need to grab the length of the datum, then set the datum
+                * pointer to be the beginning of the datum.
+                */
+               memcpy(&cursorp->dup_len,
+                   H_PAIRDATA(cursorp->pagep, cursorp->bndx)->data +
+                   cursorp->dup_off, sizeof(db_indx_t));
+       else if (F_ISSET(cursorp, H_ISDUP)) {
+               /* Make sure we're not about to run off the page. */
+               if (cursorp->dpagep == NULL && (ret = __ham_get_page(hashp->dbp,
+                   cursorp->dpgno, &cursorp->dpagep)) != 0)
+                       return (ret);
+
+               if (cursorp->dndx >= NUM_ENT(cursorp->dpagep)) {
+                       if (NEXT_PGNO(cursorp->dpagep) == PGNO_INVALID) {
+                               if ((ret = __ham_put_page(hashp->dbp,
+                                   cursorp->dpagep, 0)) != 0)
+                                       return (ret);
+                               F_CLR(cursorp, H_ISDUP);
+                               cursorp->dpagep = NULL;
+                               cursorp->dpgno = PGNO_INVALID;
+                               cursorp->dndx = NDX_INVALID;
+                               cursorp->bndx++;
+                       } else if ((ret = __ham_next_cpage(hashp, cursorp,
+                           NEXT_PGNO(cursorp->dpagep), 0, H_ISDUP)) != 0)
+                               return (ret);
+               }
+       }
+
+       if (cursorp->bndx >= (db_indx_t)H_NUMPAIRS(cursorp->pagep)) {
+               /* Fetch next page. */
+               if (NEXT_PGNO(cursorp->pagep) == PGNO_INVALID) {
+                       F_SET(cursorp, H_NOMORE);
+                       if (cursorp->dpagep != NULL &&
+                           (ret = __ham_put_page(hashp->dbp,
+                           cursorp->dpagep, 0)) != 0)
+                               return (ret);
+                       cursorp->dpgno = PGNO_INVALID;
+                       return (DB_NOTFOUND);
+               }
+               next_pgno = NEXT_PGNO(cursorp->pagep);
+               cursorp->bndx = 0;
+               if ((ret = __ham_next_cpage(hashp,
+                   cursorp, next_pgno, 0, 0)) != 0)
+                       return (ret);
+       }
+
+       F_SET(cursorp, H_OK);
+       return (0);
+}
+
+/*
+ * PUBLIC: int __ham_item_reset __P((HTAB *, HASH_CURSOR *));
+ */
+int
+__ham_item_reset(hashp, cursorp)
+       HTAB *hashp;
+       HASH_CURSOR *cursorp;
+{
+       int ret;
+
+       if (cursorp->pagep)
+               ret = __ham_put_page(hashp->dbp, cursorp->pagep, 0);
+       else
+               ret = 0;
+
+       __ham_item_init(cursorp);
+       return (ret);
+}
+
+/*
+ * PUBLIC: void __ham_item_init __P((HASH_CURSOR *));
+ */
+void
+__ham_item_init(cursorp)
+       HASH_CURSOR *cursorp;
+{
+       cursorp->pagep = NULL;
+       cursorp->bucket = BUCKET_INVALID;
+       cursorp->lock = 0;
+       cursorp->bndx = NDX_INVALID;
+       cursorp->pgno = PGNO_INVALID;
+       cursorp->dpgno = PGNO_INVALID;
+       cursorp->dndx = NDX_INVALID;
+       cursorp->dpagep = NULL;
+       cursorp->flags = 0;
+       cursorp->seek_size = 0;
+       cursorp->seek_found_page = PGNO_INVALID;
+}
+
+/*
+ * PUBLIC: int __ham_item_done __P((HTAB *, HASH_CURSOR *, int));
+ */
+int
+__ham_item_done(hashp, cursorp, dirty)
+       HTAB *hashp;
+       HASH_CURSOR *cursorp;
+       int dirty;
+{
+       int ret, t_ret;
+
+       t_ret = ret = 0;
+
+       if (cursorp->pagep)
+               ret = __ham_put_page(hashp->dbp, cursorp->pagep,
+                   dirty && cursorp->dpagep == NULL);
+       cursorp->pagep = NULL;
+
+       if (cursorp->dpagep)
+               t_ret = __ham_put_page(hashp->dbp, cursorp->dpagep, dirty);
+       cursorp->dpagep = NULL;
+
+       if (ret == 0 && t_ret != 0)
+               ret = t_ret;
+
+       /*
+        * If we are running with transactions, then we must
+        * not relinquish locks explicitly.
+        */
+       if (cursorp->lock && hashp->dbp->txn == NULL)
+           t_ret = lock_put(hashp->dbp->dbenv->lk_info, cursorp->lock);
+       cursorp->lock = 0;
+
+
+       /*
+        * We don't throw out the page number since we might want to
+        * continue getting on this page.
+        */
+       return (ret != 0 ? ret : t_ret);
+}
+
+/*
+ * Returns the last item in a bucket.
+ *
+ * PUBLIC: int __ham_item_last __P((HTAB *, HASH_CURSOR *, db_lockmode_t));
+ */
+int
+__ham_item_last(hashp, cursorp, mode)
+       HTAB *hashp;
+       HASH_CURSOR *cursorp;
+       db_lockmode_t mode;
+{
+       int ret;
+
+       if ((ret = __ham_item_reset(hashp, cursorp)) != 0)
+               return (ret);
+
+       cursorp->bucket = hashp->hdr->max_bucket;
+       F_SET(cursorp, H_OK);
+       return (__ham_item_prev(hashp, cursorp, mode));
+}
+/*
+ * PUBLIC: int __ham_item_first __P((HTAB *, HASH_CURSOR *, db_lockmode_t));
+ */
+int
+__ham_item_first(hashp, cursorp, mode)
+       HTAB *hashp;
+       HASH_CURSOR *cursorp;
+       db_lockmode_t mode;
+{
+       int ret;
+
+       if ((ret = __ham_item_reset(hashp, cursorp)) != 0)
+               return (ret);
+       F_SET(cursorp, H_OK);
+       cursorp->bucket = 0;
+       return (__ham_item_next(hashp, cursorp, mode));
+}
+
+/*
+ * Returns a pointer to key/data pair on a page.  In the case of bigkeys,
+ * just returns the page number and index of the bigkey pointer pair.
+ *
+ * PUBLIC: int __ham_item_prev __P((HTAB *, HASH_CURSOR *, db_lockmode_t));
+ */
+int
+__ham_item_prev(hashp, cursorp, mode)
+       HTAB *hashp;
+       HASH_CURSOR *cursorp;
+       db_lockmode_t mode;
+{
+       db_pgno_t next_pgno;
+       int ret;
+
+       /*
+        * There are N cases for backing up in a hash file.
+        * Case 1: In the middle of a page, no duplicates, just dec the index.
+        * Case 2: In the middle of a duplicate set, back up one.
+        * Case 3: At the beginning of a duplicate set, get out of set and
+        *      back up to next key.
+        * Case 4: At the beginning of a page; go to previous page.
+        * Case 5: At the beginning of a bucket; go to prev bucket.
+        */
+       F_CLR(cursorp, H_OK | H_NOMORE | H_DELETED);
+
+       /*
+        * First handle the duplicates.  Either you'll get the key here
+        * or you'll exit the duplicate set and drop into the code below
+        * to handle backing up through keys.
+        */
+       if (F_ISSET(cursorp, H_ISDUP)) {
+               if (cursorp->dpgno == PGNO_INVALID) {
+                       /* Duplicates are on-page. */
+                       if (cursorp->dup_off != 0)
+                               if ((ret = __ham_get_cpage(hashp,
+                                   cursorp, mode)) != 0)
+                                       return (ret);
+                               else {
+                                       HASH_CURSOR *h;
+                                       h = cursorp;
+                                       memcpy(&h->dup_len,
+                                           H_PAIRDATA(h->pagep, h->bndx)->data
+                                           + h->dup_off - sizeof(db_indx_t),
+                                           sizeof(db_indx_t));
+                                       cursorp->dup_off -=
+                                           DUP_SIZE(cursorp->dup_len);
+                                       cursorp->dndx--;
+                                       return (__ham_item(hashp,
+                                           cursorp, mode));
+                               }
+               } else if (cursorp->dndx > 0) { /* Duplicates are off-page. */
+                       cursorp->dndx--;
+                       return (__ham_item(hashp, cursorp, mode));
+               } else if ((ret = __ham_get_cpage(hashp, cursorp, mode)) != 0)
+                       return (ret);
+               else if (PREV_PGNO(cursorp->dpagep) == PGNO_INVALID) {
+                       F_CLR(cursorp, H_ISDUP); /* End of dups */
+                       cursorp->dpgno = PGNO_INVALID;
+                       if (cursorp->dpagep != NULL)
+                               (void)__ham_put_page(hashp->dbp,
+                                   cursorp->dpagep, 0);
+                       cursorp->dpagep = NULL;
+               } else if ((ret = __ham_next_cpage(hashp, cursorp,
+                   PREV_PGNO(cursorp->dpagep), 0, H_ISDUP)) != 0)
+                       return (ret);
+               else {
+                       cursorp->dndx = NUM_ENT(cursorp->pagep) - 1;
+                       return (__ham_item(hashp, cursorp, mode));
+               }
+       }
+
+       /*
+        * If we get here, we are not in a duplicate set, and just need
+        * to back up the cursor.  There are still three cases:
+        * midpage, beginning of page, beginning of bucket.
+        */
+
+       if (cursorp->bndx == 0) {               /* Beginning of page. */
+               if ((ret = __ham_get_cpage(hashp, cursorp, mode)) != 0)
+                       return (ret);
+               cursorp->pgno = PREV_PGNO(cursorp->pagep);
+               if (cursorp->pgno == PGNO_INVALID) {
+                       /* Beginning of bucket. */
+                       F_SET(cursorp, H_NOMORE);
+                       return (DB_NOTFOUND);
+               } else if ((ret = __ham_next_cpage(hashp,
+                   cursorp, cursorp->pgno, 0, 0)) != 0)
+                       return (ret);
+               else
+                       cursorp->bndx = H_NUMPAIRS(cursorp->pagep);
+       }
+
+       /*
+        * Either we've got the cursor set up to be decremented, or we
+        * have to find the end of a bucket.
+        */
+       if (cursorp->bndx == NDX_INVALID) {
+               if (cursorp->pagep == NULL)
+                       next_pgno = BUCKET_TO_PAGE(hashp, cursorp->bucket);
+               else
+                       goto got_page;
+
+               do {
+                       if ((ret = __ham_next_cpage(hashp,
+                           cursorp, next_pgno, 0, 0)) != 0)
+                               return (ret);
+got_page:              next_pgno = NEXT_PGNO(cursorp->pagep);
+                       cursorp->bndx = H_NUMPAIRS(cursorp->pagep);
+               } while (next_pgno != PGNO_INVALID);
+
+               if (cursorp->bndx == 0) {
+                       /* Bucket was empty. */
+                       F_SET(cursorp, H_NOMORE);
+                       return (DB_NOTFOUND);
+               }
+       }
+
+       cursorp->bndx--;
+
+       return (__ham_item(hashp, cursorp, mode));
+}
+
+/*
+ * Sets the cursor to the next key/data pair on a page.
+ *
+ * PUBLIC: int __ham_item_next __P((HTAB *, HASH_CURSOR *, db_lockmode_t));
+ */
+int
+__ham_item_next(hashp, cursorp, mode)
+       HTAB *hashp;
+       HASH_CURSOR *cursorp;
+       db_lockmode_t mode;
+{
+       /*
+        * Deleted on-page duplicates are a weird case. If we delete the last
+        * one, then our cursor is at the very end of a duplicate set and
+        * we actually need to go on to the next key.
+        */
+       if (F_ISSET(cursorp, H_DELETED)) {
+               if (cursorp->bndx != NDX_INVALID &&
+                   F_ISSET(cursorp, H_ISDUP) &&
+                   cursorp->dpgno == PGNO_INVALID &&
+                   cursorp->dup_tlen == cursorp->dup_off) {
+                       F_CLR(cursorp, H_ISDUP);
+                       cursorp->dpgno = PGNO_INVALID;
+                       cursorp->bndx++;
+               }
+               F_CLR(cursorp, H_DELETED);
+       } else if (cursorp->bndx == NDX_INVALID) {
+               cursorp->bndx = 0;
+               cursorp->dpgno = PGNO_INVALID;
+               F_CLR(cursorp, H_ISDUP);
+       } else if (F_ISSET(cursorp, H_ISDUP) && cursorp->dpgno != PGNO_INVALID)
+               cursorp->dndx++;
+       else if (F_ISSET(cursorp, H_ISDUP)) {
+               cursorp->dndx++;
+               cursorp->dup_off += DUP_SIZE(cursorp->dup_len);
+               if (cursorp->dup_off >= cursorp->dup_tlen) {
+                       F_CLR(cursorp, H_ISDUP);
+                       cursorp->dpgno = PGNO_INVALID;
+                       cursorp->bndx++;
+               }
+       } else
+               cursorp->bndx++;
+
+       return (__ham_item(hashp, cursorp, mode));
+}
+
+/*
+ * PUBLIC: void __ham_putitem __P((PAGE *p, const DBT *, int));
+ *
+ * This is a little bit sleazy in that we're overloading the meaning
+ * of the H_OFFPAGE type here.  When we recover deletes, we have the
+ * entire entry instead of having only the DBT, so we'll pass type
+ * H_OFFPAGE to mean, "copy the whole entry" as opposed to constructing
+ * an H_KEYDATA around it.
+ */
+void
+__ham_putitem(p, dbt, type)
+       PAGE *p;
+       const DBT *dbt;
+       int type;
+{
+       u_int16_t n, off;
+
+       n = NUM_ENT(p);
+
+       /* Put the item element on the page. */
+       if (type == H_OFFPAGE) {
+               off = HOFFSET(p) - dbt->size;
+               HOFFSET(p) = p->inp[n] = off;
+               memcpy(P_ENTRY(p, n), dbt->data, dbt->size);
+       } else {
+               off = HOFFSET(p) - HKEYDATA_SIZE(dbt->size);
+               HOFFSET(p) = p->inp[n] = off;
+               PUT_HKEYDATA(GET_HKEYDATA(p, n), dbt->data, dbt->size, type);
+       }
+
+       /* Adjust page info. */
+       NUM_ENT(p) += 1;
+}
+
+
+/*
+ * PUBLIC: int __ham_del_pair __P((HTAB *, HASH_CURSOR *));
+ * XXX TODO: if the item is an offdup, delete the other pages and
+ * then remove the pair. If the offpage page is 0, then you can
+ * just remove the pair.
+ */
+int
+__ham_del_pair(hashp, cursorp)
+       HTAB *hashp;
+       HASH_CURSOR *cursorp;
+{
+       DBT data_dbt, key_dbt;
+       DB_ENV *dbenv;
+       DB_LSN new_lsn, *n_lsn;
+       PAGE *p;
+       db_indx_t ndx;
+       db_pgno_t chg_pgno, pgno;
+       int ret, tret;
+
+       dbenv = hashp->dbp->dbenv;
+       ndx = cursorp->bndx;
+       if (cursorp->pagep == NULL && (ret =
+           __ham_get_page(hashp->dbp, cursorp->pgno, &cursorp->pagep)) != 0)
+               return (ret);
+
+       p = cursorp->pagep;
+
+       /*
+        * We optimize for the normal case which is when neither the key nor
+        * the data are large.  In this case, we write a single log record
+        * and do the delete.  If either is large, we'll call __big_delete
+        * to remove the big item and then update the page to remove the
+        * entry referring to the big item.
+        */
+       ret = 0;
+       if (H_PAIRKEY(p, ndx)->type == H_OFFPAGE) {
+               memcpy(&pgno, (u_int8_t *)GET_HOFFPAGE(p, H_KEYINDEX(ndx)) +
+                   SSZ(HOFFPAGE, pgno), sizeof(db_pgno_t));
+               ret = __db_doff(hashp->dbp, pgno, __ham_del_page);
+       }
+
+       if (ret == 0)
+               switch (H_PAIRDATA(p, ndx)->type) {
+               case H_OFFPAGE:
+                       memcpy(&pgno,
+                           (u_int8_t *)GET_HOFFPAGE(p, H_DATAINDEX(ndx)) +
+                           SSZ(HOFFPAGE, pgno), sizeof(db_pgno_t));
+                       ret = __db_doff(hashp->dbp, pgno, __ham_del_page);
+                       break;
+               case H_OFFDUP:
+                       memcpy(&pgno,
+                           (u_int8_t *)GET_HOFFDUP(p, H_DATAINDEX(ndx)) +
+                           SSZ(HOFFDUP, pgno), sizeof(db_pgno_t));
+                       ret = __db_ddup(hashp->dbp, pgno, __ham_del_page);
+                       break;
+               }
+
+       if (ret)
+               return (ret);
+
+       /* Now log the delete off this page. */
+       if (DB_LOGGING(hashp->dbp)) {
+               key_dbt.data = P_ENTRY(p, H_KEYINDEX(ndx));
+               key_dbt.size =
+                   LEN_HITEM(p, hashp->hdr->pagesize, H_KEYINDEX(ndx));
+               data_dbt.data = P_ENTRY(p, H_DATAINDEX(ndx));
+               data_dbt.size =
+                   LEN_HITEM(p, hashp->hdr->pagesize, H_DATAINDEX(ndx));
+
+               if ((ret = __ham_insdel_log(dbenv->lg_info,
+                   (DB_TXN *)hashp->dbp->txn, &new_lsn, 0, DELPAIR,
+                   hashp->dbp->log_fileid, PGNO(p), (u_int32_t)ndx,
+                   &LSN(p), &key_dbt, &data_dbt)) != 0)
+                       return (ret);
+
+               /* Move lsn onto page. */
+               LSN(p) = new_lsn;
+       }
+
+       __ham_dpair(hashp->dbp, p, ndx);
+
+       /*
+        * If we are locking, we will not maintain this.
+        * XXXX perhaps we can retain incremental numbers and apply them
+        * later.
+        */
+       if (!F_ISSET(hashp->dbp, DB_AM_LOCKING))
+               --hashp->hdr->nelem;
+
+       /*
+        * Check if the page is empty.  There are two cases.  If it's
+        * empty and it's not the first chain in the bucket (i.e., the
+        * bucket page) then we can simply remove it. If it is the first
+        * chain in the bucket, then we need to copy the second page into
+        * it and remove the second page.
+        */
+       if (NUM_ENT(p) == 0 && PREV_PGNO(p) == PGNO_INVALID &&
+           NEXT_PGNO(p) != PGNO_INVALID) {
+               PAGE *n_pagep, *nn_pagep;
+               db_pgno_t tmp_pgno;
+
+               /*
+                * First page in chain is empty and we know that there
+                * are more pages in the chain.
+                * XXX Need to log this.
+                */
+               if ((ret =
+                   __ham_get_page(hashp->dbp, NEXT_PGNO(p), &n_pagep)) != 0)
+                       return (ret);
+
+               if (NEXT_PGNO(n_pagep) != PGNO_INVALID) {
+                       if ((ret =
+                           __ham_get_page(hashp->dbp, NEXT_PGNO(n_pagep),
+                           &nn_pagep)) != 0) {
+                               (void) __ham_put_page(hashp->dbp, n_pagep, 0);
+                               return (ret);
+                       }
+                       PREV_PGNO(nn_pagep) = PGNO(p);
+                       (void)__ham_put_page(hashp->dbp, nn_pagep, 1);
+               }
+
+               tmp_pgno = PGNO(p);
+               memcpy(p, n_pagep, hashp->hdr->pagesize);
+               PGNO(p) = tmp_pgno;
+               PREV_PGNO(p) = PGNO_INVALID;
+
+               /*
+                * Cursor is advanced to the beginning of the next page.
+                */
+               cursorp->bndx = NDX_INVALID;
+               cursorp->pgno = PGNO(p);
+               chg_pgno = PGNO(p);
+               if ((ret = __ham_dirty_page(hashp, p)) != 0 ||
+                   (ret = __ham_del_page(hashp->dbp, n_pagep)) != 0)
+                       return (ret);
+       } else if (NUM_ENT(p) == 0 && PREV_PGNO(p) != PGNO_INVALID) {
+               PAGE *n_pagep, *p_pagep;
+
+               if ((ret =
+                   __ham_get_page(hashp->dbp, PREV_PGNO(p), &p_pagep)) != 0)
+                       return (ret);
+
+               if (NEXT_PGNO(p) != PGNO_INVALID) {
+                       if ((ret = __ham_get_page(hashp->dbp,
+                           NEXT_PGNO(p), &n_pagep)) != 0) {
+                               (void)__ham_put_page(hashp->dbp, p_pagep, 0);
+                               return (ret);
+                       }
+                       n_lsn = &LSN(n_pagep);
+               } else {
+                       n_pagep = NULL;
+                       n_lsn = NULL;
+               }
+
+               NEXT_PGNO(p_pagep) = NEXT_PGNO(p);
+               if (n_pagep != NULL)
+                       PREV_PGNO(n_pagep) = PGNO(p_pagep);
+
+               if (DB_LOGGING(hashp->dbp)) {
+                       if ((ret = __ham_newpage_log(dbenv->lg_info,
+                           (DB_TXN *)hashp->dbp->txn, &new_lsn, 0, DELOVFL,
+                           hashp->dbp->log_fileid, PREV_PGNO(p), &LSN(p_pagep),
+                           PGNO(p), &LSN(p), NEXT_PGNO(p), n_lsn)) != 0)
+                               return (ret);
+
+                       /* Move lsn onto page. */
+                       LSN(p_pagep) = new_lsn; /* Structure assignment. */
+                       if (n_pagep)
+                               LSN(n_pagep) = new_lsn;
+                       LSN(p) = new_lsn;
+               }
+               cursorp->pgno = NEXT_PGNO(p);
+               cursorp->bndx = 0;
+               /*
+                * Since we are about to delete the cursor page and we have
+                * just moved the cursor, we need to make sure that the
+                * old page pointer isn't left hanging around in the cursor.
+                */
+               cursorp->pagep = NULL;
+               chg_pgno = PGNO(p);
+               ret = __ham_del_page(hashp->dbp, p);
+               if ((tret = __ham_put_page(hashp->dbp, p_pagep, 1)) != 0 &&
+                   ret == 0)
+                       ret = tret;
+               if (n_pagep != NULL &&
+                   (tret = __ham_put_page(hashp->dbp, n_pagep, 1)) != 0 &&
+                   ret == 0)
+                       ret = tret;
+               if (ret != 0)
+                       return (ret);
+       } else {
+               /*
+                * Mark item deleted so that we don't try to return it, and
+                * so that we update the cursor correctly on the next call
+                * to next.
+                */
+               F_SET(cursorp, H_DELETED);
+               chg_pgno = cursorp->pgno;
+               ret = __ham_dirty_page(hashp, p);
+       }
+       __ham_c_update(hashp, cursorp, chg_pgno, 0, 0, 0);
+
+       F_CLR(cursorp, H_OK);
+       return (ret);
+}
+/*
+ * PUBLIC: int __ham_replpair __P((HTAB *, HASH_CURSOR *, DBT *, u_int32_t));
+ * Given the key data indicated by the cursor, replace part/all of it
+ * according to the fields in the dbt.
+ */
+int
+__ham_replpair(hashp, hcp, dbt, make_dup)
+       HTAB *hashp;
+       HASH_CURSOR *hcp;
+       DBT *dbt;
+       u_int32_t make_dup;
+{
+       DBT old_dbt, tmp;
+       DB_LSN  new_lsn;
+       HKEYDATA *hk;
+       u_int32_t len;
+       int32_t change;
+       int is_big, ret, type;
+       u_int8_t *beg, *dest, *end, *src;
+
+       /*
+        * Big item replacements are handled in generic code.
+        * Items that fit on the current page fall into 4 classes.
+        * 1. On-page element, same size
+        * 2. On-page element, new is bigger (fits)
+        * 3. On-page element, new is bigger (does not fit)
+        * 4. On-page element, old is bigger
+        * Numbers 1, 2, and 4 are essentially the same (and should
+        * be the common case).  We handle case 3 as a delete and
+        * add.
+        */
+
+       /*
+        * We need to compute the number of bytes that we are adding or
+        * removing from the entry.  Normally, we can simply substract
+        * the number of bytes we are replacing (dbt->dlen) from the
+        * number of bytes we are inserting (dbt->size).  However, if
+        * we are doing a partial put off the end of a record, then this
+        * formula doesn't work, because we are essentially adding
+        * new bytes.
+        */
+       change = dbt->size - dbt->dlen;
+
+       hk = H_PAIRDATA(hcp->pagep, hcp->bndx);
+       is_big = hk->type == H_OFFPAGE;
+
+       if (is_big)
+               memcpy(&len, (u_int8_t *)hk + SSZ(HOFFPAGE, tlen),
+                   sizeof(u_int32_t));
+       else
+               len = LEN_HKEYDATA(hcp->pagep,
+                   hashp->dbp->pgsize, H_DATAINDEX(hcp->bndx));
+
+       if (dbt->doff + dbt->dlen > len)
+               change += dbt->doff + dbt->dlen - len;
+
+
+       if (change > (int)P_FREESPACE(hcp->pagep) || is_big) {
+               /*
+                * Case 3 -- two subcases.
+                * A. This is not really a partial operation, but an overwrite.
+                *    Simple del and add works.
+                * B. This is a partial and we need to construct the data that
+                *    we are really inserting (yuck).
+                * In both cases, we need to grab the key off the page (in
+                * some cases we could do this outside of this routine; for
+                * cleanliness we do it here.  If you happen to be on a big
+                * key, this could be a performance hit).
+                */
+               tmp.flags = 0;
+               F_SET(&tmp, DB_DBT_MALLOC | DB_DBT_INTERNAL);
+               if ((ret =
+                   __db_ret(hashp->dbp, hcp->pagep, H_KEYINDEX(hcp->bndx),
+                   &tmp, &hcp->big_key, &hcp->big_keylen)) != 0)
+                       return (ret);
+
+               type = hk->type;
+               if (dbt->doff == 0 && dbt->dlen == len) {
+                       ret = __ham_del_pair(hashp, hcp);
+                       if (ret == 0)
+                           ret = __ham_add_el(hashp, hcp, &tmp, dbt, type);
+               } else {                                        /* Case B */
+                       DBT tdata;
+                       tdata.flags = 0;
+                       F_SET(&tdata, DB_DBT_MALLOC | DB_DBT_INTERNAL);
+
+                       if ((ret = __db_ret(hashp->dbp, hcp->pagep,
+                           H_DATAINDEX(hcp->bndx), &tdata, &hcp->big_data,
+                           &hcp->big_datalen)) != 0)
+                               goto err;
+
+                       /* Now we can delete the item. */
+                       if ((ret = __ham_del_pair(hashp, hcp)) != 0) {
+                               free(tdata.data);
+                               goto err;
+                       }
+
+                       /* Now shift old data around to make room for new. */
+                       if (change > 0) {
+                               tdata.data = (void *)
+                                   realloc(tdata.data, tdata.size + change);
+                               memset((u_int8_t *)tdata.data + tdata.size,
+                                   0, change);
+                       }
+                       if (tdata.data == NULL)
+                               return (ENOMEM);
+                       end = (u_int8_t *)tdata.data + tdata.size;
+
+                       src = (u_int8_t *)tdata.data + dbt->doff + dbt->dlen;
+                       if (src < end && tdata.size > dbt->doff + dbt->dlen) {
+                               len = tdata.size - dbt->doff - dbt->dlen;
+                               dest = src + change;
+                               memmove(dest, src, len);
+                       }
+                       memcpy((u_int8_t *)tdata.data + dbt->doff,
+                           dbt->data, dbt->size);
+                       tdata.size += change;
+
+                       /* Now add the pair. */
+                       ret = __ham_add_el(hashp, hcp, &tmp, &tdata, type);
+                       free(tdata.data);
+               }
+err:           free(tmp.data);
+               return (ret);
+       }
+
+       /*
+        * Set up pointer into existing data. Do it before the log
+        * message so we can use it inside of the log setup.
+        */
+       beg = H_PAIRDATA(hcp->pagep, hcp->bndx)->data;
+       beg += dbt->doff;
+
+       /*
+        * If we are going to have to move bytes at all, figure out
+        * all the parameters here.  Then log the call before moving
+        * anything around.
+        */
+       if (DB_LOGGING(hashp->dbp)) {
+               old_dbt.data = beg;
+               old_dbt.size = dbt->dlen;
+               if ((ret = __ham_replace_log(hashp->dbp->dbenv->lg_info,
+                   (DB_TXN *)hashp->dbp->txn, &new_lsn, 0,
+                   hashp->dbp->log_fileid, PGNO(hcp->pagep),
+                   (u_int32_t)H_DATAINDEX(hcp->bndx), &LSN(hcp->pagep),
+                   (u_int32_t)dbt->doff, &old_dbt, dbt, make_dup)) != 0)
+                       return (ret);
+
+               LSN(hcp->pagep) = new_lsn;      /* Structure assignment. */
+       }
+
+       __ham_onpage_replace(hcp->pagep, hashp->dbp->pgsize,
+           (u_int32_t)H_DATAINDEX(hcp->bndx), (int32_t)dbt->doff, change, dbt);
+
+       return (0);
+}
+
+/*
+ * Replace data on a page with new data, possibly growing or shrinking what's
+ * there.  This is called on two different occasions. On one (from replpair)
+ * we are interested in changing only the data.  On the other (from recovery)
+ * we are replacing the entire data (header and all) with a new element.  In
+ * the latter case, the off argument is negative.
+ * pagep: the page that we're changing
+ * ndx: page index of the element that is growing/shrinking.
+ * off: Offset at which we are beginning the replacement.
+ * change: the number of bytes (+ or -) that the element is growing/shrinking.
+ * dbt: the new data that gets written at beg.
+ * PUBLIC: void __ham_onpage_replace __P((PAGE *, size_t, u_int32_t, int32_t,
+ * PUBLIC:     int32_t,  DBT *));
+ */
+void
+__ham_onpage_replace(pagep, pgsize, ndx, off, change, dbt)
+       PAGE *pagep;
+       size_t pgsize;
+       u_int32_t ndx;
+       int32_t off;
+       int32_t change;
+       DBT *dbt;
+{
+       db_indx_t i;
+       int32_t len;
+       u_int8_t *src, *dest;
+       int zero_me;
+
+       if (change != 0) {
+               zero_me = 0;
+               src = (u_int8_t *)(pagep) + HOFFSET(pagep);
+               if (off < 0)
+                       len = pagep->inp[ndx] - HOFFSET(pagep);
+               else if ((u_int32_t)off >= LEN_HKEYDATA(pagep, pgsize, ndx)) {
+                       len = GET_HKEYDATA(pagep, ndx)->data +
+                           LEN_HKEYDATA(pagep, pgsize, ndx) - src;
+                       zero_me = 1;
+               } else
+                       len = (GET_HKEYDATA(pagep, ndx)->data + off) - src;
+               dest = src - change;
+               memmove(dest, src, len);
+               if (zero_me)
+                       memset(dest + len, 0, change);
+
+               /* Now update the indices. */
+               for (i = ndx; i < NUM_ENT(pagep); i++)
+                       pagep->inp[i] -= change;
+               HOFFSET(pagep) -= change;
+       }
+       if (off >= 0)
+               memcpy(GET_HKEYDATA(pagep, ndx)->data + off,
+                   dbt->data, dbt->size);
+       else
+               memcpy(P_ENTRY(pagep, ndx), dbt->data, dbt->size);
+}
+
+/*
+ * PUBLIC: int __ham_split_page __P((HTAB *, u_int32_t, u_int32_t));
+ */
+int
+__ham_split_page(hashp, obucket, nbucket)
+       HTAB *hashp;
+       u_int32_t obucket, nbucket;
+{
+       DBT key, val, page_dbt;
+       DB_ENV *dbenv;
+       DB_LSN new_lsn;
+       PAGE **pp, *old_pagep, *temp_pagep, *new_pagep;
+       db_indx_t n;
+       db_pgno_t bucket_pgno, next_pgno;
+       u_int32_t big_len, len;
+       int ret, tret;
+       void *big_buf;
+
+       dbenv = hashp->dbp->dbenv;
+       temp_pagep = old_pagep = new_pagep = NULL;
+
+       bucket_pgno = BUCKET_TO_PAGE(hashp, obucket);
+       if ((ret = __ham_get_page(hashp->dbp, bucket_pgno, &old_pagep)) != 0)
+               return (ret);
+       if ((ret = __ham_new_page(hashp, BUCKET_TO_PAGE(hashp, nbucket), P_HASH,
+           &new_pagep)) != 0)
+               goto err;
+
+       temp_pagep = hashp->split_buf;
+       memcpy(temp_pagep, old_pagep, hashp->hdr->pagesize);
+
+       if (DB_LOGGING(hashp->dbp)) {
+               page_dbt.size = hashp->hdr->pagesize;
+               page_dbt.data = old_pagep;
+               if ((ret = __ham_splitdata_log(dbenv->lg_info,
+                   (DB_TXN *)hashp->dbp->txn, &new_lsn, 0,
+                   hashp->dbp->log_fileid, SPLITOLD, PGNO(old_pagep),
+                   &page_dbt, &LSN(old_pagep))) != 0)
+                       goto err;
+       }
+
+       P_INIT(old_pagep, hashp->hdr->pagesize, PGNO(old_pagep), PGNO_INVALID,
+           PGNO_INVALID, 0, P_HASH);
+
+       if (DB_LOGGING(hashp->dbp))
+               LSN(old_pagep) = new_lsn;       /* Structure assignment. */
+
+       big_len = 0;
+       big_buf = NULL;
+       val.flags = key.flags = 0;
+       while (temp_pagep != NULL) {
+               for (n = 0; n < (db_indx_t)H_NUMPAIRS(temp_pagep); n++) {
+                       if ((ret =
+                           __db_ret(hashp->dbp, temp_pagep, H_KEYINDEX(n),
+                           &key, &big_buf, &big_len)) != 0)
+                               goto err;
+
+                       if (__ham_call_hash(hashp, key.data, key.size)
+                           == obucket)
+                               pp = &old_pagep;
+                       else
+                               pp = &new_pagep;
+
+                       /*
+                        * Figure out how many bytes we need on the new
+                        * page to store the key/data pair.
+                        */
+
+                       len = LEN_HITEM(temp_pagep, hashp->hdr->pagesize,
+                           H_DATAINDEX(n)) +
+                           LEN_HITEM(temp_pagep, hashp->hdr->pagesize,
+                           H_KEYINDEX(n)) +
+                           2 * sizeof(db_indx_t);
+
+                       if (P_FREESPACE(*pp) < len) {
+                               if (DB_LOGGING(hashp->dbp)) {
+                                       page_dbt.size = hashp->hdr->pagesize;
+                                       page_dbt.data = *pp;
+                                       if ((ret = __ham_splitdata_log(
+                                           dbenv->lg_info,
+                                           (DB_TXN *)hashp->dbp->txn,
+                                           &new_lsn, 0,
+                                           hashp->dbp->log_fileid, SPLITNEW,
+                                           PGNO(*pp), &page_dbt,
+                                           &LSN(*pp))) != 0)
+                                               goto err;
+                                       LSN(*pp) = new_lsn;
+                               }
+                               if ((ret = __ham_add_ovflpage(hashp,
+                                   *pp, 1, pp)) != 0)
+                                       goto err;
+                       }
+                       __ham_copy_item(hashp, temp_pagep, H_KEYINDEX(n), *pp);
+                       __ham_copy_item(hashp, temp_pagep, H_DATAINDEX(n), *pp);
+               }
+               next_pgno = NEXT_PGNO(temp_pagep);
+
+               /* Clear temp_page; if it's a link overflow page, free it. */
+               if (PGNO(temp_pagep) != bucket_pgno && (ret =
+                   __ham_del_page(hashp->dbp, temp_pagep)) != 0)
+                       goto err;
+
+               if (next_pgno == PGNO_INVALID)
+                       temp_pagep = NULL;
+               else if ((ret =
+                   __ham_get_page(hashp->dbp, next_pgno, &temp_pagep)) != 0)
+                       goto err;
+
+               if (temp_pagep != NULL && DB_LOGGING(hashp->dbp)) {
+                       page_dbt.size = hashp->hdr->pagesize;
+                       page_dbt.data = temp_pagep;
+                       if ((ret = __ham_splitdata_log(dbenv->lg_info,
+                           (DB_TXN *)hashp->dbp->txn, &new_lsn, 0,
+                           hashp->dbp->log_fileid, SPLITOLD, PGNO(temp_pagep),
+                           &page_dbt, &LSN(temp_pagep))) != 0)
+                               goto err;
+                       LSN(temp_pagep) = new_lsn;
+               }
+       }
+       if (big_buf != NULL)
+               free(big_buf);
+
+       /*
+        * If the original bucket spanned multiple pages, then we've got
+        * a pointer to a page that used to be on the bucket chain.  It
+        * should be deleted.
+        */
+       if (temp_pagep != NULL && PGNO(temp_pagep) != bucket_pgno &&
+           (ret = __ham_del_page(hashp->dbp, temp_pagep)) != 0)
+               goto err;
+
+       /*
+        * Write new buckets out.
+        */
+       if (DB_LOGGING(hashp->dbp)) {
+               page_dbt.size = hashp->hdr->pagesize;
+               page_dbt.data = old_pagep;
+               if ((ret = __ham_splitdata_log(dbenv->lg_info,
+                   (DB_TXN *)hashp->dbp->txn, &new_lsn, 0,
+                   hashp->dbp->log_fileid, SPLITNEW, PGNO(old_pagep),
+                   &page_dbt, &LSN(old_pagep))) != 0)
+                       goto err;
+               LSN(old_pagep) = new_lsn;
+
+               page_dbt.data = new_pagep;
+               if ((ret = __ham_splitdata_log(dbenv->lg_info,
+                   (DB_TXN *)hashp->dbp->txn, &new_lsn, 0,
+                   hashp->dbp->log_fileid, SPLITNEW, PGNO(new_pagep),
+                   &page_dbt, &LSN(new_pagep))) != 0)
+                       goto err;
+               LSN(new_pagep) = new_lsn;
+       }
+       ret = __ham_put_page(hashp->dbp, old_pagep, 1);
+       if ((tret = __ham_put_page(hashp->dbp, new_pagep, 1)) != 0 &&
+           ret == 0)
+               ret = tret;
+
+err:   if (0) {
+               if (old_pagep != NULL)
+                       (void)__ham_put_page(hashp->dbp, old_pagep, 1);
+               if (new_pagep != NULL)
+                       (void)__ham_put_page(hashp->dbp, new_pagep, 1);
+               if (temp_pagep != NULL && PGNO(temp_pagep) != bucket_pgno)
+                       (void)__ham_put_page(hashp->dbp, temp_pagep, 1);
+       }
+       return (ret);
+}
+
+/*
+ * Add the given pair to the page.  The page in question may already be
+ * held (i.e. it was already gotten).  If it is, then the page is passed
+ * in via the pagep parameter.  On return, pagep will contain the page
+ * to which we just added something.  This allows us to link overflow
+ * pages and return the new page having correctly put the last page.
+ *
+ * PUBLIC: int __ham_add_el __P((HTAB *, HASH_CURSOR *, const DBT *, const DBT *,
+ * PUBLIC:     int));
+ */
+int
+__ham_add_el(hashp, hcp, key, val, type)
+       HTAB *hashp;
+       HASH_CURSOR *hcp;
+       const DBT *key, *val;
+       int type;
+{
+       DBT *pkey, *pdata, key_dbt, data_dbt;
+       DB_LSN new_lsn;
+       HOFFPAGE doff, koff;
+       db_pgno_t next_pgno;
+       u_int32_t data_size, key_size, pairsize;
+       int do_expand, is_keybig, is_databig, rectype, ret;
+       int key_type, data_type;
+
+       do_expand = 0;
+
+       if (hcp->pagep == NULL && (ret = __ham_get_page(hashp->dbp,
+           hcp->seek_found_page != PGNO_INVALID ?  hcp->seek_found_page :
+           hcp->pgno, &hcp->pagep)) != 0)
+               return (ret);
+
+       key_size = HKEYDATA_PSIZE(key->size);
+       data_size = HKEYDATA_PSIZE(val->size);
+       is_keybig = ISBIG(hashp, key->size);
+       is_databig = ISBIG(hashp, val->size);
+       if (is_keybig)
+               key_size = HOFFPAGE_PSIZE;
+       if (is_databig)
+               data_size = HOFFPAGE_PSIZE;
+
+       pairsize = key_size + data_size;
+
+       /* Advance to first page in chain with room for item. */
+       while (H_NUMPAIRS(hcp->pagep) && NEXT_PGNO(hcp->pagep) !=
+           PGNO_INVALID) {
+               /*
+                * This may not be the end of the chain, but the pair may fit
+                * anyway.  Check if it's a bigpair that fits or a regular
+                * pair that fits.
+                */
+               if (P_FREESPACE(hcp->pagep) >= pairsize)
+                       break;
+               next_pgno = NEXT_PGNO(hcp->pagep);
+               if ((ret =
+                   __ham_next_cpage(hashp, hcp, next_pgno, 0, 0)) != 0)
+                       return (ret);
+       }
+
+       /*
+        * Check if we need to allocate a new page.
+        */
+       if (P_FREESPACE(hcp->pagep) < pairsize) {
+               do_expand = 1;
+               if ((ret = __ham_add_ovflpage(hashp,
+                   hcp->pagep, 1, &hcp->pagep)) !=  0)
+                       return (ret);
+               hcp->pgno = PGNO(hcp->pagep);
+       }
+
+       /*
+        * Update cursor.
+        */
+       hcp->bndx = H_NUMPAIRS(hcp->pagep);
+       F_CLR(hcp, H_DELETED);
+       if (is_keybig) {
+               if ((ret = __db_poff(hashp->dbp,
+                   key, &koff.pgno, __ham_overflow_page)) != 0)
+                       return (ret);
+               koff.type = H_OFFPAGE;
+               koff.tlen = key->size;
+               key_dbt.data = &koff;
+               key_dbt.size = sizeof(koff);
+               pkey = &key_dbt;
+               key_type = H_OFFPAGE;
+       } else {
+               pkey = (DBT *)key;
+               key_type = H_KEYDATA;
+       }
+
+       if (is_databig) {
+               if ((ret = __db_poff(hashp->dbp,
+                   val, &doff.pgno, __ham_overflow_page)) != 0)
+                       return (ret);
+               doff.type = H_OFFPAGE;
+               doff.tlen = val->size;
+               data_dbt.data = &doff;
+               data_dbt.size = sizeof(doff);
+               pdata = &data_dbt;
+               data_type = H_OFFPAGE;
+       } else {
+               pdata = (DBT *)val;
+               data_type = type;
+       }
+
+       if (DB_LOGGING(hashp->dbp)) {
+               rectype = PUTPAIR;
+               if (is_databig)
+                       rectype |= PAIR_DATAMASK;
+               if (is_keybig)
+                       rectype |= PAIR_KEYMASK;
+
+               if ((ret = __ham_insdel_log(hashp->dbp->dbenv->lg_info,
+                   (DB_TXN *)hashp->dbp->txn, &new_lsn, 0, rectype,
+                   hashp->dbp->log_fileid, PGNO(hcp->pagep),
+                   (u_int32_t)H_NUMPAIRS(hcp->pagep),
+                   &LSN(hcp->pagep), pkey, pdata)) != 0)
+                       return (ret);
+
+               /* Move lsn onto page. */
+               LSN(hcp->pagep) = new_lsn;      /* Structure assignment. */
+       }
+
+       __ham_putitem(hcp->pagep, pkey, key_type);
+       __ham_putitem(hcp->pagep, pdata, data_type);
+
+       /*
+        * For splits, we are going to update item_info's page number
+        * field, so that we can easily return to the same page the
+        * next time we come in here.  For other operations, this shouldn't
+        * matter, since odds are this is the last thing that happens before
+        * we return to the user program.
+        */
+       hcp->pgno = PGNO(hcp->pagep);
+
+       /*
+        * XXX Maybe keep incremental numbers here
+        */
+       if (!F_ISSET(hashp->dbp, DB_AM_LOCKING))
+               hashp->hdr->nelem++;
+
+       if (do_expand || (hashp->hdr->ffactor != 0 &&
+           (u_int32_t)H_NUMPAIRS(hcp->pagep) > hashp->hdr->ffactor))
+               F_SET(hcp, H_EXPAND);
+       return (0);
+}
+
+
+/*
+ * Special __putitem call used in splitting -- copies one entry to
+ * another.  Works for all types of hash entries (H_OFFPAGE, H_KEYDATA,
+ * H_DUPLICATE, H_OFFDUP).  Since we log splits at a high level, we
+ * do not need to do any logging here.
+ * PUBLIC: void __ham_copy_item __P((HTAB *, PAGE *, int, PAGE *));
+ */
+void
+__ham_copy_item(hashp, src_page, src_ndx, dest_page)
+       HTAB *hashp;
+       PAGE *src_page;
+       int src_ndx;
+       PAGE *dest_page;
+{
+       u_int32_t len;
+       void *src, *dest;
+
+       /*
+        * Copy the key and data entries onto this new page.
+        */
+       src = P_ENTRY(src_page, src_ndx);
+
+       /* Set up space on dest. */
+       len = LEN_HITEM(src_page, hashp->hdr->pagesize, src_ndx);
+       HOFFSET(dest_page) -= len;
+       dest_page->inp[NUM_ENT(dest_page)] = HOFFSET(dest_page);
+       dest = P_ENTRY(dest_page, NUM_ENT(dest_page));
+       NUM_ENT(dest_page)++;
+
+       memcpy(dest, src, len);
+}
+
+/*
+ *
+ * Returns:
+ *      pointer on success
+ *      NULL on error
+ *
+ * PUBLIC: int __ham_add_ovflpage __P((HTAB *, PAGE *, int, PAGE **));
+ */
+int
+__ham_add_ovflpage(hashp, pagep, release, pp)
+       HTAB *hashp;
+       PAGE *pagep;
+       int release;
+       PAGE **pp;
+{
+       DB_ENV *dbenv;
+       DB_LSN new_lsn;
+       PAGE *new_pagep;
+       int ret;
+
+       dbenv = hashp->dbp->dbenv;
+
+       if ((ret = __ham_overflow_page(hashp->dbp, P_HASH, &new_pagep)) != 0)
+               return (ret);
+
+       if (DB_LOGGING(hashp->dbp)) {
+               if ((ret = __ham_newpage_log(dbenv->lg_info,
+                   (DB_TXN *)hashp->dbp->txn, &new_lsn, 0, PUTOVFL,
+                   hashp->dbp->log_fileid, PGNO(pagep), &LSN(pagep),
+                   PGNO(new_pagep), &LSN(new_pagep), PGNO_INVALID, NULL)) != 0)
+                       return (ret);
+
+               /* Move lsn onto page. */
+               LSN(pagep) = LSN(new_pagep) = new_lsn;
+       }
+       NEXT_PGNO(pagep) = PGNO(new_pagep);
+       PREV_PGNO(new_pagep) = PGNO(pagep);
+
+       if (release)
+               ret = __ham_put_page(hashp->dbp, pagep, 1);
+
+       hashp->hash_overflows++;
+       *pp = new_pagep;
+       return (ret);
+}
+
+
+/*
+ * PUBLIC: int __ham_new_page __P((HTAB *, u_int32_t, u_int32_t, PAGE **));
+ */
+int
+__ham_new_page(hashp, addr, type, pp)
+       HTAB *hashp;
+       u_int32_t addr, type;
+       PAGE **pp;
+{
+       PAGE *pagep;
+       int ret;
+
+       if ((ret = memp_fget(hashp->dbp->mpf,
+           &addr, DB_MPOOL_CREATE, &pagep)) != 0)
+               return (ret);
+
+#ifdef DEBUG_SLOW
+       account_page(hashp, addr, 1);
+#endif
+       /* This should not be necessary because page-in should do it. */
+       P_INIT(pagep,
+           hashp->hdr->pagesize, addr, PGNO_INVALID, PGNO_INVALID, 0, type);
+
+       *pp = pagep;
+       return (0);
+}
+
+/*
+ * PUBLIC: int __ham_del_page __P((DB *, PAGE *));
+ */
+int
+__ham_del_page(dbp, pagep)
+       DB *dbp;
+       PAGE *pagep;
+{
+       DB_LSN new_lsn;
+       HTAB *hashp;
+       int ret;
+
+       hashp = (HTAB *)dbp->internal;
+       ret = 0;
+       DIRTY_META(hashp, ret);
+       if (ret != 0) {
+               if (ret != EAGAIN)
+                       __db_err(hashp->dbp->dbenv,
+                           "free_ovflpage: unable to lock meta data page %s\n",
+                           strerror(ret));
+               /*
+                * If we are going to return an error, then we should free
+                * the page, so it doesn't stay pinned forever.
+                */
+               (void)__ham_put_page(hashp->dbp, pagep, 0);
+               return (ret);
+       }
+
+       if (DB_LOGGING(hashp->dbp)) {
+               if ((ret = __ham_newpgno_log(hashp->dbp->dbenv->lg_info,
+                   (DB_TXN *)hashp->dbp->txn, &new_lsn, 0, DELPGNO,
+                   hashp->dbp->log_fileid, PGNO(pagep), hashp->hdr->last_freed,
+                   (u_int32_t)TYPE(pagep), NEXT_PGNO(pagep), P_INVALID,
+                   &LSN(pagep), &hashp->hdr->lsn)) != 0)
+                       return (ret);
+
+               hashp->hdr->lsn = new_lsn;
+               LSN(pagep) = new_lsn;
+       }
+
+#ifdef DEBUG
+       {
+               db_pgno_t __pgno;
+               DB_LSN __lsn;
+               __pgno = pagep->pgno;
+               __lsn = pagep->lsn;
+               memset(pagep, 0xff, dbp->pgsize);
+               pagep->pgno = __pgno;
+               pagep->lsn = __lsn;
+       }
+#endif
+       TYPE(pagep) = P_INVALID;
+       NEXT_PGNO(pagep) = hashp->hdr->last_freed;
+       hashp->hdr->last_freed = PGNO(pagep);
+
+       return (__ham_put_page(hashp->dbp, pagep, 1));
+}
+
+
+/*
+ * PUBLIC: int __ham_put_page __P((DB *, PAGE *, int32_t));
+ */
+int
+__ham_put_page(dbp, pagep, is_dirty)
+       DB *dbp;
+       PAGE *pagep;
+       int32_t is_dirty;
+{
+#ifdef DEBUG_SLOW
+       account_page((HTAB *)dbp->cookie,
+           ((BKT *)((char *)pagep - sizeof(BKT)))->pgno, -1);
+#endif
+       return (memp_fput(dbp->mpf, pagep, (is_dirty ? DB_MPOOL_DIRTY : 0)));
+}
+
+/*
+ * __ham_dirty_page --
+ *     Mark a page dirty.
+ *
+ * PUBLIC: int __ham_dirty_page __P((HTAB *, PAGE *));
+ */
+int
+__ham_dirty_page(hashp, pagep)
+       HTAB *hashp;
+       PAGE *pagep;
+{
+       return (memp_fset(hashp->dbp->mpf, pagep, DB_MPOOL_DIRTY));
+}
+
+/*
+ * PUBLIC: int __ham_get_page __P((DB *, db_pgno_t, PAGE **));
+ */
+int
+__ham_get_page(dbp, addr, pagep)
+       DB *dbp;
+       db_pgno_t addr;
+       PAGE **pagep;
+{
+       int ret;
+
+       ret = memp_fget(dbp->mpf, &addr, DB_MPOOL_CREATE, pagep);
+#ifdef DEBUG_SLOW
+       if (*pagep != NULL)
+               account_page((HTAB *)dbp->internal, addr, 1);
+#endif
+       return (ret);
+}
+
+/*
+ * PUBLIC: int __ham_overflow_page __P((DB *, u_int32_t, PAGE **));
+ */
+int
+__ham_overflow_page(dbp, type, pp)
+       DB *dbp;
+       u_int32_t type;
+       PAGE **pp;
+{
+       DB_LSN *lsnp, new_lsn;
+       HTAB *hashp;
+       PAGE *p;
+       db_pgno_t new_addr, next_free, newalloc_flag;
+       u_int32_t offset, splitnum;
+       int ret;
+
+       hashp = (HTAB *)dbp->internal;
+
+       ret = 0;
+       DIRTY_META(hashp, ret);
+       if (ret != 0)
+               return (ret);
+
+       /*
+        * This routine is split up into two parts.  First we have
+        * to figure out the address of the new page that we are
+        * allocating.  Then we have to log the allocation.  Only
+        * after the log do we get to complete allocation of the
+        * new page.
+        */
+       new_addr = hashp->hdr->last_freed;
+       if (new_addr != PGNO_INVALID) {
+               if ((ret = __ham_get_page(hashp->dbp, new_addr, &p)) != 0)
+                       return (ret);
+               next_free = NEXT_PGNO(p);
+               lsnp = &LSN(p);
+               newalloc_flag = 0;
+       } else {
+               splitnum = hashp->hdr->ovfl_point;
+               hashp->hdr->spares[splitnum]++;
+               offset = hashp->hdr->spares[splitnum] -
+                   (splitnum ? hashp->hdr->spares[splitnum - 1] : 0);
+               new_addr = PGNO_OF(hashp, hashp->hdr->ovfl_point, offset);
+               if (new_addr > MAX_PAGES(hashp)) {
+                       __db_err(hashp->dbp->dbenv, "hash: out of file pages");
+                       hashp->hdr->spares[splitnum]--;
+                       return (ENOMEM);
+               }
+               next_free = PGNO_INVALID;
+               p = NULL;
+               lsnp = NULL;
+               newalloc_flag = 1;
+       }
+
+       if (DB_LOGGING(hashp->dbp)) {
+               if ((ret = __ham_newpgno_log(hashp->dbp->dbenv->lg_info,
+                   (DB_TXN *)hashp->dbp->txn, &new_lsn, 0, ALLOCPGNO,
+                   hashp->dbp->log_fileid, new_addr, next_free,
+                   0, newalloc_flag, type, lsnp, &hashp->hdr->lsn)) != 0)
+                       return (ret);
+
+               hashp->hdr->lsn = new_lsn;
+               if (lsnp != NULL)
+                       *lsnp = new_lsn;
+       }
+
+       if (p != NULL) {
+               /* We just took something off the free list, initialize it. */
+               hashp->hdr->last_freed = next_free;
+               P_INIT(p, hashp->hdr->pagesize, PGNO(p), PGNO_INVALID,
+                   PGNO_INVALID, 0, (u_int8_t)type);
+       } else {
+               /* Get the new page. */
+               if ((ret = __ham_new_page(hashp, new_addr, type, &p)) != 0)
+                       return (ret);
+       }
+       if (DB_LOGGING(hashp->dbp))
+               LSN(p) = new_lsn;
+
+       *pp = p;
+       return (0);
+}
+
+#ifdef DEBUG
+/*
+ * PUBLIC: #ifdef DEBUG
+ * PUBLIC: int bucket_to_page __P((HTAB *, int));
+ * PUBLIC: #endif
+ */
+int
+bucket_to_page(hashp, n)
+       HTAB *hashp;
+       int n;
+{
+       int ret_val;
+
+       ret_val = n + 1;
+       if (n != 0)
+               ret_val += hashp->hdr->spares[__db_log2(n + 1) - 1];
+       return (ret_val);
+}
+#endif
+
+
+/*
+ * Create a bunch of overflow pages at the current split point.
+ * PUBLIC: void __ham_init_ovflpages __P((HTAB *));
+ */
+void
+__ham_init_ovflpages(hp)
+       HTAB *hp;
+{
+       DB_LSN new_lsn;
+       PAGE *p;
+       db_pgno_t last_pgno;
+       u_int32_t i, numpages;
+
+       numpages = hp->hdr->ovfl_point + 1;
+
+       last_pgno = hp->hdr->last_freed;
+       if (DB_LOGGING(hp->dbp)) {
+               (void)__ham_ovfl_log(hp->dbp->dbenv->lg_info,
+                   (DB_TXN *)hp->dbp->txn, &new_lsn, 0,
+                   hp->dbp->log_fileid, PGNO_OF(hp, hp->hdr->ovfl_point, 1),
+                   numpages, last_pgno, &hp->hdr->lsn);
+               hp->hdr->lsn = new_lsn;
+       } else
+               ZERO_LSN(new_lsn);
+
+       hp->hdr->spares[hp->hdr->ovfl_point] += numpages;
+       for (i = numpages; i > 0; i--) {
+               if (__ham_new_page(hp,
+                   PGNO_OF(hp, hp->hdr->ovfl_point, i), P_INVALID, &p) != 0)
+                       break;
+               LSN(p) = new_lsn;
+               NEXT_PGNO(p) = last_pgno;
+               last_pgno = PGNO(p);
+               (void)__ham_put_page(hp->dbp, p, 1);
+       }
+       hp->hdr->last_freed = last_pgno;
+}
+
+/*
+ * PUBLIC: int __ham_get_cpage __P((HTAB *, HASH_CURSOR *, db_lockmode_t));
+ */
+int
+__ham_get_cpage(hashp, hcp, mode)
+       HTAB *hashp;
+       HASH_CURSOR *hcp;
+       db_lockmode_t mode;
+{
+       int ret;
+
+       if (hcp->lock == 0 && F_ISSET(hashp->dbp, DB_AM_LOCKING) &&
+           (ret = __ham_lock_bucket(hashp->dbp, hcp, mode)) != 0)
+               return (ret);
+
+       if (hcp->pagep == NULL) {
+               if (hcp->pgno == PGNO_INVALID) {
+                       hcp->pgno = BUCKET_TO_PAGE(hashp, hcp->bucket);
+                       hcp->bndx = 0;
+               }
+
+               if ((ret =
+                   __ham_get_page(hashp->dbp, hcp->pgno, &hcp->pagep)) != 0)
+                       return (ret);
+       }
+
+       if (hcp->dpgno != PGNO_INVALID && hcp->dpagep == NULL)
+               if ((ret =
+                   __ham_get_page(hashp->dbp, hcp->dpgno, &hcp->dpagep)) != 0)
+                       return (ret);
+       return (0);
+}
+
+/*
+ * Get a new page at the cursor, putting the last page if necessary.
+ * If the flag is set to H_ISDUP, then we are talking about the
+ * duplicate page, not the main page.
+ * PUBLIC: int __ham_next_cpage __P((HTAB *, HASH_CURSOR *, db_pgno_t,
+ * PUBLIC:     int, int));
+ */
+int
+__ham_next_cpage(hashp, hcp, pgno, dirty, flags)
+       HTAB *hashp;
+       HASH_CURSOR *hcp;
+       db_pgno_t pgno;
+       int dirty;
+       int flags;
+{
+       PAGE *p;
+       int ret;
+
+       if (flags & H_ISDUP && hcp->dpagep != NULL &&
+           (ret = __ham_put_page(hashp->dbp, hcp->dpagep, dirty)) != 0)
+               return (ret);
+       else if (!(flags & H_ISDUP) && hcp->pagep != NULL &&
+           (ret = __ham_put_page(hashp->dbp, hcp->pagep, dirty)) != 0)
+               return (ret);
+
+       if ((ret = __ham_get_page(hashp->dbp, pgno, &p)) != 0)
+               return (ret);
+
+       if (flags & H_ISDUP) {
+               hcp->dpagep = p;
+               hcp->dpgno = pgno;
+               hcp->dndx = 0;
+       } else {
+               hcp->pagep = p;
+               hcp->pgno = pgno;
+               hcp->bndx = 0;
+       }
+
+       return (0);
+}
+
+/*
+ * __ham_lock_bucket --
+ *     Get the lock on a particular bucket.
+ */
+static int
+__ham_lock_bucket(dbp, hcp, mode)
+       DB *dbp;
+       HASH_CURSOR *hcp;
+       db_lockmode_t mode;
+{
+       int ret;
+
+       /*
+        * What a way to trounce on the memory system.  It might be
+        * worth copying the lk_info into the hashp.
+        */
+       ret = 0;
+       dbp->lock.pgno = (db_pgno_t)(hcp->bucket);
+       ret = lock_get(dbp->dbenv->lk_info,
+           dbp->txn == NULL ?  dbp->locker : dbp->txn->txnid, 0,
+           &dbp->lock_dbt, mode, &hcp->lock);
+
+       return (ret < 0 ? EAGAIN : ret);
+}
+
+/*
+ * __ham_dpair --
+ *     Delete a pair on a page, paying no attention to what the pair
+ *     represents.  The caller is responsible for freeing up duplicates
+ *     or offpage entries that might be referenced by this pair.
+ *
+ * PUBLIC: void __ham_dpair __P((DB *, PAGE *, u_int32_t));
+ */
+void
+__ham_dpair(dbp, p, pndx)
+       DB *dbp;
+       PAGE *p;
+       u_int32_t pndx;
+{
+       db_indx_t delta, n;
+       u_int8_t *dest, *src;
+
+       /*
+        * Compute "delta", the amount we have to shift all of the
+        * offsets.  To find the delta, we just need to calculate
+        * the size of the pair of elements we are removing.
+        */
+       delta = H_PAIRSIZE(p, dbp->pgsize, pndx);
+
+       /*
+        * The hard case: we want to remove something other than
+        * the last item on the page.  We need to shift data and
+        * offsets down.
+        */
+       if ((db_indx_t)pndx != H_NUMPAIRS(p) - 1) {
+               /*
+                * Move the data: src is the first occupied byte on
+                * the page. (Length is delta.)
+                */
+               src = (u_int8_t *)p + HOFFSET(p);
+
+               /*
+                * Destination is delta bytes beyond src.  This might
+                * be an overlapping copy, so we have to use memmove.
+                */
+               dest = src + delta;
+               memmove(dest, src, p->inp[H_DATAINDEX(pndx)] - HOFFSET(p));
+       }
+
+       /* Adjust the offsets. */
+       for (n = (db_indx_t)pndx; n < (db_indx_t)(H_NUMPAIRS(p) - 1); n++) {
+               p->inp[H_KEYINDEX(n)] = p->inp[H_KEYINDEX(n+1)] + delta;
+               p->inp[H_DATAINDEX(n)] = p->inp[H_DATAINDEX(n+1)] + delta;
+       }
+
+       /* Adjust page metadata. */
+       HOFFSET(p) = HOFFSET(p) + delta;
+       NUM_ENT(p) = NUM_ENT(p) - 2;
+}
+
+#ifdef DEBUG_SLOW
+static void
+account_page(hashp, pgno, inout)
+       HTAB *hashp;
+       db_pgno_t pgno;
+       int inout;
+{
+       static struct {
+               db_pgno_t pgno;
+               int times;
+       } list[100];
+       static int last;
+       int i, j;
+
+       if (inout == -1)                        /* XXX: Kluge */
+               inout = 0;
+
+       /* Find page in list. */
+       for (i = 0; i < last; i++)
+               if (list[i].pgno == pgno)
+                       break;
+       /* Not found. */
+       if (i == last) {
+               list[last].times = inout;
+               list[last].pgno = pgno;
+               last++;
+       }
+       list[i].times = inout;
+       if (list[i].times == 0) {
+               for (j = i; j < last; j++)
+                       list[j] = list[j + 1];
+               last--;
+       }
+       for (i = 0; i < last; i++, list[i].times++)
+               if (list[i].times > 20 && !is_bitmap_pgno(hashp, list[i].pgno))
+                       (void)fprintf(stderr,
+                           "Warning: pg %lu has been out for %d times\n",
+                           (u_long)list[i].pgno, list[i].times);
+}
+#endif /* DEBUG_SLOW */
diff --git a/db2/hash/hash_rec.c b/db2/hash/hash_rec.c
new file mode 100644 (file)
index 0000000..81d9bb5
--- /dev/null
@@ -0,0 +1,810 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997
+ *     Sleepycat Software.  All rights reserved.
+ */
+/*
+ * Copyright (c) 1995, 1996
+ *     Margo Seltzer.  All rights reserved.
+ */
+/*
+ * Copyright (c) 1995, 1996
+ *     The President and Fellows of Harvard University.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Margo Seltzer.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *     This product includes software developed by the University of
+ *     California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "config.h"
+
+#ifndef lint
+static const char sccsid[] = "@(#)hash_rec.c   10.12 (Sleepycat) 8/22/97";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+#endif
+
+#include "db_int.h"
+#include "shqueue.h"
+#include "db_page.h"
+#include "hash.h"
+#include "btree.h"
+#include "log.h"
+#include "db_dispatch.h"
+#include "common_ext.h"
+
+/*
+ * __ham_insdel_recover --
+ *
+ * PUBLIC: int __ham_insdel_recover
+ * PUBLIC:     __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+ */
+int
+__ham_insdel_recover(logp, dbtp, lsnp, redo, info)
+       DB_LOG *logp;
+       DBT *dbtp;
+       DB_LSN *lsnp;
+       int redo;
+       void *info;
+{
+       __ham_insdel_args *argp;
+       DB *mdbp, *file_dbp;
+       DB_MPOOLFILE *mpf;
+       HTAB *hashp;
+       PAGE *pagep;
+       u_int32_t op;
+       int cmp_n, cmp_p, getmeta, ret;
+
+       getmeta = 0;
+       hashp = NULL;                           /* XXX: shut the compiler up. */
+       REC_PRINT(__ham_insdel_print);
+       REC_INTRO(__ham_insdel_read);
+
+       ret = memp_fget(mpf, &argp->pgno, 0, &pagep);
+       if (ret != 0)
+               if (!redo) {
+                       /*
+                        * We are undoing and the page doesn't exist.  That
+                        * is equivalent to having a pagelsn of 0, so we
+                        * would not have to undo anything.  In this case,
+                        * don't bother creating a page.
+                        */
+                       *lsnp = argp->prev_lsn;
+                       ret = 0;
+                       goto out;
+               } else if ((ret = memp_fget(mpf, &argp->pgno,
+                   DB_MPOOL_CREATE, &pagep)) != 0)
+                       goto out;
+
+
+       hashp = (HTAB *)file_dbp->internal;
+       GET_META(file_dbp, hashp);
+       getmeta = 1;
+
+       cmp_n = log_compare(lsnp, &LSN(pagep));
+       cmp_p = log_compare(&LSN(pagep), &argp->pagelsn);
+       /*
+        * Two possible things going on:
+        * redo a delete/undo a put: delete the item from the page.
+        * redo a put/undo a delete: add the item to the page.
+        * If we are undoing a delete, then the information logged is the
+        * entire entry off the page, not just the data of a dbt.  In
+        * this case, we want to copy it back onto the page verbatim.
+        * We do this by calling __putitem with the type H_OFFPAGE instead
+        * of H_KEYDATA.
+        */
+       op = OPCODE_OF(argp->opcode);
+
+       if ((op == DELPAIR && cmp_n == 0 && !redo) ||
+           (op == PUTPAIR && cmp_p == 0 && redo)) {
+               /* Need to redo a PUT or undo a delete. */
+               __ham_putitem(pagep, &argp->key,
+                   !redo || PAIR_ISKEYBIG(argp->opcode) ?
+                   H_OFFPAGE : H_KEYDATA);
+               __ham_putitem(pagep, &argp->data,
+                   !redo || PAIR_ISDATABIG(argp->opcode) ?
+                   H_OFFPAGE : H_KEYDATA);
+
+               LSN(pagep) = redo ? *lsnp : argp->pagelsn;
+               if ((ret = __ham_put_page(file_dbp, pagep, 1)) != 0)
+                       goto out;
+
+       } else if ((op == DELPAIR && cmp_p == 0 && redo)
+           || (op == PUTPAIR && cmp_n == 0 && !redo)) {
+               /* Need to undo a put or redo a delete. */
+               __ham_dpair(file_dbp, pagep, argp->ndx);
+               LSN(pagep) = redo ? *lsnp : argp->pagelsn;
+               if ((ret = __ham_put_page(file_dbp, (PAGE *)pagep, 1)) != 0)
+                       goto out;
+       } else
+               if ((ret = __ham_put_page(file_dbp, (PAGE *)pagep, 0)) != 0)
+                       goto out;
+
+       /* Return the previous LSN. */
+       *lsnp = argp->prev_lsn;
+
+out:   if (getmeta)
+               RELEASE_META(file_dbp, hashp);
+       REC_CLOSE;
+}
+
+/*
+ * __ham_newpage_recover --
+ *     This log message is used when we add/remove overflow pages.  This
+ *     message takes care of the pointer chains, not the data on the pages.
+ *
+ * PUBLIC: int __ham_newpage_recover
+ * PUBLIC:     __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+ */
+int
+__ham_newpage_recover(logp, dbtp, lsnp, redo, info)
+       DB_LOG *logp;
+       DBT *dbtp;
+       DB_LSN *lsnp;
+       int redo;
+       void *info;
+{
+       __ham_newpage_args *argp;
+       DB *mdbp, *file_dbp;
+       DB_MPOOLFILE *mpf;
+       HTAB *hashp;
+       PAGE *pagep;
+       int cmp_n, cmp_p, change, getmeta, ret;
+
+       getmeta = 0;
+       hashp = NULL;                           /* XXX: shut the compiler up. */
+       REC_PRINT(__ham_newpage_print);
+       REC_INTRO(__ham_newpage_read);
+
+       ret = memp_fget(mpf, &argp->new_pgno, 0, &pagep);
+       if (ret != 0)
+               if (!redo) {
+                       /*
+                        * We are undoing and the page doesn't exist.  That
+                        * is equivalent to having a pagelsn of 0, so we
+                        * would not have to undo anything.  In this case,
+                        * don't bother creating a page.
+                        */
+                       ret = 0;
+                       goto ppage;
+               } else if ((ret = memp_fget(mpf, &argp->new_pgno,
+                   DB_MPOOL_CREATE, &pagep)) != 0)
+                       goto out;
+
+       hashp = (HTAB *)file_dbp->internal;
+       GET_META(file_dbp, hashp);
+       getmeta = 1;
+
+       /*
+        * There are potentially three pages we need to check: the one
+        * that we created/deleted, the one before it and the one after
+        * it.
+        */
+
+       cmp_n = log_compare(lsnp, &LSN(pagep));
+       cmp_p = log_compare(&LSN(pagep), &argp->pagelsn);
+       change = 0;
+
+       if ((cmp_p == 0 && redo && argp->opcode == PUTOVFL) ||
+           (cmp_n == 0 && !redo && argp->opcode == DELOVFL)) {
+               /* Redo a create new page or undo a delete new page. */
+               P_INIT(pagep, file_dbp->pgsize, argp->new_pgno,
+                   argp->prev_pgno, argp->next_pgno, 0, P_HASH);
+               change = 1;
+       } else if ((cmp_p == 0 && redo && argp->opcode == DELOVFL) ||
+           (cmp_n == 0 && !redo && argp->opcode == PUTOVFL)) {
+               /*
+                * Redo a delete or undo a create new page.  All we
+                * really need to do is change the LSN.
+                */
+               change = 1;
+       }
+
+       if (!change) {
+               if ((ret = __ham_put_page(file_dbp, (PAGE *)pagep, 0)) != 0)
+                       goto out;
+       } else {
+               LSN(pagep) = redo ? *lsnp : argp->pagelsn;
+               if ((ret = __ham_put_page(file_dbp, (PAGE *)pagep, 1)) != 0)
+                       goto out;
+       }
+
+       /* Now do the prev page. */
+ppage: if (argp->prev_pgno != PGNO_INVALID) {
+               ret = memp_fget(mpf, &argp->prev_pgno, 0, &pagep);
+
+               if (ret != 0)
+                       if (!redo) {
+                               /*
+                                * We are undoing and the page doesn't exist.
+                                * That is equivalent to having a pagelsn of 0,
+                                * so we would not have to undo anything.  In
+                                * this case, don't bother creating a page.
+                                */
+                               ret = 0;
+                               goto npage;
+                       } else if ((ret =
+                           memp_fget(mpf, &argp->prev_pgno,
+                           DB_MPOOL_CREATE, &pagep)) != 0)
+                               goto out;
+
+               cmp_n = log_compare(lsnp, &LSN(pagep));
+               cmp_p = log_compare(&LSN(pagep), &argp->prevlsn);
+               change = 0;
+
+               if ((cmp_p == 0 && redo && argp->opcode == PUTOVFL) ||
+                   (cmp_n == 0 && !redo && argp->opcode == DELOVFL)) {
+                       /* Redo a create new page or undo a delete new page. */
+                       pagep->next_pgno = argp->new_pgno;
+                       change = 1;
+               } else if ((cmp_p == 0 && redo && argp->opcode == DELOVFL) ||
+                   (cmp_n == 0 && !redo && argp->opcode == PUTOVFL)) {
+                       /* Redo a delete or undo a create new page. */
+                       pagep->next_pgno = argp->next_pgno;
+                       change = 1;
+               }
+
+               if (!change) {
+                       if ((ret = __ham_put_page(file_dbp, (PAGE *)pagep, 0)) != 0)
+                               goto out;
+               } else {
+                       LSN(pagep) = redo ? *lsnp : argp->prevlsn;
+                       if ((ret = __ham_put_page(file_dbp, (PAGE *)pagep, 1)) != 0)
+                               goto out;
+               }
+       }
+
+       /* Now time to do the next page */
+npage: if (argp->next_pgno != PGNO_INVALID) {
+               ret = memp_fget(mpf, &argp->next_pgno, 0, &pagep);
+
+               if (ret != 0)
+                       if (!redo) {
+                               /*
+                                * We are undoing and the page doesn't exist.
+                                * That is equivalent to having a pagelsn of 0,
+                                * so we would not have to undo anything.  In
+                                * this case, don't bother creating a page.
+                                */
+                               *lsnp = argp->prev_lsn;
+                               ret = 0;
+                               goto out;
+                       } else if ((ret =
+                           memp_fget(mpf, &argp->next_pgno,
+                           DB_MPOOL_CREATE, &pagep)) != 0)
+                               goto out;
+
+               cmp_n = log_compare(lsnp, &LSN(pagep));
+               cmp_p = log_compare(&LSN(pagep), &argp->nextlsn);
+               change = 0;
+
+               if ((cmp_p == 0 && redo && argp->opcode == PUTOVFL) ||
+                   (cmp_n == 0 && !redo && argp->opcode == DELOVFL)) {
+                       /* Redo a create new page or undo a delete new page. */
+                       pagep->prev_pgno = argp->new_pgno;
+                       change = 1;
+               } else if ((cmp_p == 0 && redo && argp->opcode == DELOVFL) ||
+                   (cmp_n == 0 && !redo && argp->opcode == PUTOVFL)) {
+                       /* Redo a delete or undo a create new page. */
+                       pagep->prev_pgno = argp->prev_pgno;
+                       change = 1;
+               }
+
+               if (!change) {
+                       if ((ret =
+                           __ham_put_page(file_dbp, (PAGE *)pagep, 0)) != 0)
+                               goto out;
+               } else {
+                       LSN(pagep) = redo ? *lsnp : argp->nextlsn;
+                       if ((ret =
+                           __ham_put_page(file_dbp, (PAGE *)pagep, 1)) != 0)
+                               goto out;
+               }
+       }
+       *lsnp = argp->prev_lsn;
+
+out:   if (getmeta)
+               RELEASE_META(file_dbp, hashp);
+       REC_CLOSE;
+}
+
+
+/*
+ * __ham_replace_recover --
+ *     This log message refers to partial puts that are local to a single
+ *     page.  You can think of them as special cases of the more general
+ *     insdel log message.
+ *
+ * PUBLIC: int __ham_replace_recover
+ * PUBLIC:    __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+ */
+int
+__ham_replace_recover(logp, dbtp, lsnp, redo, info)
+       DB_LOG *logp;
+       DBT *dbtp;
+       DB_LSN *lsnp;
+       int redo;
+       void *info;
+{
+       __ham_replace_args *argp;
+       DB *mdbp, *file_dbp;
+       DB_MPOOLFILE *mpf;
+       DBT dbt;
+       HKEYDATA *hk;
+       HTAB *hashp;
+       PAGE *pagep;
+       int32_t grow;
+       int change, cmp_n, cmp_p, getmeta, ret;
+
+       getmeta = 0;
+       hashp = NULL;                           /* XXX: shut the compiler up. */
+       REC_PRINT(__ham_replace_print);
+       REC_INTRO(__ham_replace_read);
+
+       ret = memp_fget(mpf, &argp->pgno, 0, &pagep);
+       if (ret != 0)
+               if (!redo) {
+                       /*
+                        * We are undoing and the page doesn't exist.  That
+                        * is equivalent to having a pagelsn of 0, so we
+                        * would not have to undo anything.  In this case,
+                        * don't bother creating a page.
+                        */
+                       *lsnp = argp->prev_lsn;
+                       ret = 0;
+                       goto out;
+               } else if ((ret = memp_fget(mpf, &argp->pgno,
+                   DB_MPOOL_CREATE, &pagep)) != 0)
+                       goto out;
+
+       hashp = (HTAB *)file_dbp->internal;
+       GET_META(file_dbp, hashp);
+       getmeta = 1;
+
+       cmp_n = log_compare(lsnp, &LSN(pagep));
+       cmp_p = log_compare(&LSN(pagep), &argp->pagelsn);
+
+       if (cmp_p == 0 && redo) {
+               change = 1;
+               /* Reapply the change as specified. */
+               dbt.data = argp->newitem.data;
+               dbt.size = argp->newitem.size;
+               grow = argp->newitem.size - argp->olditem.size;
+               LSN(pagep) = *lsnp;
+       } else if (cmp_n == 0 && !redo) {
+               change = 1;
+               /* Undo the already applied change. */
+               dbt.data = argp->olditem.data;
+               dbt.size = argp->olditem.size;
+               grow = argp->olditem.size - argp->newitem.size;
+               LSN(pagep) = argp->pagelsn;
+       } else {
+               change = 0;
+               grow = 0;
+       }
+
+       if (change) {
+               __ham_onpage_replace(pagep,
+                   file_dbp->pgsize, argp->ndx, argp->off, grow, &dbt);
+               if (argp->makedup) {
+                       hk = GET_HKEYDATA(pagep, argp->ndx);
+                       if (redo)
+                               hk->type = H_DUPLICATE;
+                       else
+                               hk->type = H_KEYDATA;
+               }
+       }
+
+       if ((ret = __ham_put_page(file_dbp, pagep, change)) != 0)
+               goto out;
+
+       *lsnp = argp->prev_lsn;
+
+out:   if (getmeta)
+               RELEASE_META(file_dbp, hashp);
+       REC_CLOSE;
+}
+
+/*
+ * __ham_newpgno_recover --
+ *     This log message is used when allocating or deleting an overflow
+ *     page.  It takes care of modifying the meta data.
+ *
+ * PUBLIC: int __ham_newpgno_recover
+ * PUBLIC:    __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+ */
+int
+__ham_newpgno_recover(logp, dbtp, lsnp, redo, info)
+       DB_LOG *logp;
+       DBT *dbtp;
+       DB_LSN *lsnp;
+       int redo;
+        void *info;
+{
+       __ham_newpgno_args *argp;
+       DB *mdbp, *file_dbp;
+       DB_MPOOLFILE *mpf;
+       HTAB *hashp;
+       PAGE *pagep;
+       int change, cmp_n, cmp_p, getmeta, ret;
+
+       getmeta = 0;
+       hashp = NULL;                           /* XXX: shut the compiler up. */
+       REC_PRINT(__ham_newpgno_print);
+       REC_INTRO(__ham_newpgno_read);
+
+       hashp = (HTAB *)file_dbp->internal;
+       GET_META(file_dbp, hashp);
+       getmeta = 1;
+
+       /*
+        * There are two phases to the recovery here.  First we need
+        * to update the meta data; then we need to update the page.
+        * We'll do the meta-data first.
+        */
+       cmp_n = log_compare(lsnp, &hashp->hdr->lsn);
+       cmp_p = log_compare(&hashp->hdr->lsn, &argp->metalsn);
+
+       change = 0;
+       if ((cmp_p == 0 && redo && argp->opcode == ALLOCPGNO) ||
+           (cmp_n == 0 && !redo && argp->opcode == DELPGNO)) {
+               /* Need to redo an allocation or undo a deletion. */
+               hashp->hdr->last_freed = argp->free_pgno;
+               if (redo && argp->old_pgno != 0) /* Must be ALLOCPGNO */
+                       hashp->hdr->spares[hashp->hdr->ovfl_point]++;
+               change = 1;
+       } else if (cmp_p == 0 && redo && argp->opcode == DELPGNO) {
+               /* Need to redo a deletion */
+               hashp->hdr->last_freed = argp->pgno;
+               change = 1;
+       } else if (cmp_n == 0 && !redo && argp->opcode == ALLOCPGNO) {
+               /* undo an allocation. */
+               if (argp->old_pgno == 0)
+                       hashp->hdr->last_freed = argp->pgno;
+               else {
+                       hashp->hdr->spares[hashp->hdr->ovfl_point]--;
+                       hashp->hdr->last_freed = 0;
+               }
+               change = 1;
+       }
+       if (change) {
+               hashp->hdr->lsn = redo ? *lsnp : argp->metalsn;
+               F_SET(file_dbp, DB_HS_DIRTYMETA);
+       }
+
+
+       /* Now check the newly allocated/freed page. */
+       ret = memp_fget(mpf, &argp->pgno, 0, &pagep);
+
+       if (ret != 0)
+               if (!redo) {
+                       /*
+                        * We are undoing and the page doesn't exist.  That
+                        * is equivalent to having a pagelsn of 0, so we
+                        * would not have to undo anything.  In this case,
+                        * don't bother creating a page.
+                        */
+                       *lsnp = argp->prev_lsn;
+                       ret = 0;
+                       goto out;
+               } else if ((ret = memp_fget(mpf, &argp->pgno,
+                   DB_MPOOL_CREATE, &pagep)) != 0)
+                       goto out;
+
+       cmp_n = log_compare(lsnp, &LSN(pagep));
+       cmp_p = log_compare(&LSN(pagep), &argp->pagelsn);
+
+       change = 0;
+       if (cmp_p == 0 && redo && argp->opcode == ALLOCPGNO) {
+               /* Need to redo an allocation. */
+               P_INIT(pagep, file_dbp->pgsize, argp->pgno, PGNO_INVALID,
+                   PGNO_INVALID, 0, argp->new_type);
+               change = 1;
+       } else if (cmp_n == 0 && !redo && argp->opcode == DELPGNO) {
+               /* Undoing a delete. */
+               P_INIT(pagep, file_dbp->pgsize, argp->pgno, PGNO_INVALID,
+                   argp->old_pgno, 0, argp->old_type);
+               change = 1;
+       } else if ((cmp_p == 0 && redo && argp->opcode == DELPGNO) ||
+           (cmp_n == 0 && !redo && argp->opcode == ALLOCPGNO)) {
+               /* Need to redo a deletion or undo an allocation. */
+               NEXT_PGNO(pagep) = argp->free_pgno;
+               TYPE(pagep) = P_INVALID;
+               change = 1;
+       }
+       if (change)
+               LSN(pagep) = redo ? *lsnp : argp->pagelsn;
+
+       if ((ret = __ham_put_page(file_dbp, pagep, change)) != 0)
+               goto out;
+
+       *lsnp = argp->prev_lsn;
+
+out:   if (getmeta)
+               RELEASE_META(file_dbp, hashp);
+       REC_CLOSE;
+
+}
+
+/*
+ * __ham_splitmeta_recover --
+ *     This is the meta-data part of the split.  Records the new and old
+ *     bucket numbers and the new/old mask information.
+ *
+ * PUBLIC: int __ham_splitmeta_recover
+ * PUBLIC:    __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+ */
+int
+__ham_splitmeta_recover(logp, dbtp, lsnp, redo, info)
+       DB_LOG *logp;
+       DBT *dbtp;
+       DB_LSN *lsnp;
+       int redo;
+        void *info;
+{
+       __ham_splitmeta_args *argp;
+       DB *mdbp, *file_dbp;
+       DB_MPOOLFILE *mpf;
+       HTAB *hashp;
+       int change, cmp_n, cmp_p, getmeta, ret;
+       u_int32_t pow;
+
+       getmeta = 0;
+       hashp = NULL;                           /* XXX: shut the compiler up. */
+       REC_PRINT(__ham_splitmeta_print);
+       REC_INTRO(__ham_splitmeta_read);
+
+       hashp = (HTAB *)file_dbp->internal;
+       GET_META(file_dbp, hashp);
+       getmeta = 1;
+
+       /*
+        * There are two phases to the recovery here.  First we need
+        * to update the meta data; then we need to update the page.
+        * We'll do the meta-data first.
+        */
+       cmp_n = log_compare(lsnp, &hashp->hdr->lsn);
+       cmp_p = log_compare(&hashp->hdr->lsn, &argp->metalsn);
+
+       change = 0;
+       if (cmp_p == 0 && redo) {
+               /* Need to redo the split information. */
+               hashp->hdr->max_bucket = argp->bucket + 1;
+               pow = __db_log2(hashp->hdr->max_bucket + 1);
+               if (pow > hashp->hdr->ovfl_point) {
+                       hashp->hdr->spares[pow] =
+                               hashp->hdr->spares[hashp->hdr->ovfl_point];
+                       hashp->hdr->ovfl_point = pow;
+               }
+               if (hashp->hdr->max_bucket > hashp->hdr->high_mask) {
+                       hashp->hdr->low_mask = hashp->hdr->high_mask;
+                       hashp->hdr->high_mask =
+                           hashp->hdr->max_bucket | hashp->hdr->low_mask;
+               }
+               change = 1;
+       } else if (cmp_n == 0 && !redo) {
+               /* Need to undo the split information. */
+               hashp->hdr->max_bucket = argp->bucket;
+               hashp->hdr->ovfl_point = argp->ovflpoint;
+               hashp->hdr->spares[hashp->hdr->ovfl_point] = argp->spares;
+               pow = 1 << __db_log2(hashp->hdr->max_bucket + 1);
+               hashp->hdr->high_mask = pow - 1;
+               hashp->hdr->low_mask = (pow >> 1) - 1;
+               change = 1;
+       }
+       if (change) {
+               hashp->hdr->lsn = redo ? *lsnp : argp->metalsn;
+               F_SET(file_dbp, DB_HS_DIRTYMETA);
+       }
+       *lsnp = argp->prev_lsn;
+
+out:   if (getmeta)
+               RELEASE_META(file_dbp, hashp);
+       REC_CLOSE;
+}
+
+/*
+ * __ham_splitdata_recover --
+ *
+ * PUBLIC: int __ham_splitdata_recover
+ * PUBLIC:    __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+ */
+int
+__ham_splitdata_recover(logp, dbtp, lsnp, redo, info)
+       DB_LOG *logp;
+       DBT *dbtp;
+       DB_LSN *lsnp;
+       int redo;
+        void *info;
+{
+       __ham_splitdata_args *argp;
+       DB *mdbp, *file_dbp;
+       DB_MPOOLFILE *mpf;
+       HTAB *hashp;
+       PAGE *pagep;
+       int change, cmp_n, cmp_p, getmeta, ret;
+
+       getmeta = 0;
+       hashp = NULL;                           /* XXX: shut the compiler up. */
+       REC_PRINT(__ham_splitdata_print);
+       REC_INTRO(__ham_splitdata_read);
+
+       ret = memp_fget(mpf, &argp->pgno, 0, &pagep);
+       if (ret != 0)
+               if (!redo) {
+                       /*
+                        * We are undoing and the page doesn't exist.  That
+                        * is equivalent to having a pagelsn of 0, so we
+                        * would not have to undo anything.  In this case,
+                        * don't bother creating a page.
+                        */
+                       *lsnp = argp->prev_lsn;
+                       ret = 0;
+                       goto out;
+               } else if ((ret = memp_fget(mpf, &argp->pgno,
+                   DB_MPOOL_CREATE, &pagep)) != 0)
+                       goto out;
+
+       hashp = (HTAB *)file_dbp->internal;
+       GET_META(file_dbp, hashp);
+       getmeta = 1;
+
+       cmp_n = log_compare(lsnp, &LSN(pagep));
+       cmp_p = log_compare(&LSN(pagep), &argp->pagelsn);
+
+       /*
+        * There are two types of log messages here, one for the old page
+        * and one for the new pages created.  The original image in the
+        * SPLITOLD record is used for undo.  The image in the SPLITNEW
+        * is used for redo.  We should never have a case where there is
+        * a redo operation and the SPLITOLD record is on disk, but not
+        * the SPLITNEW record.  Therefore, we only have work to do when
+        * redo NEW messages and undo OLD messages, but we have to update
+        * LSNs in both cases.
+        */
+       change = 0;
+       if (cmp_p == 0 && redo) {
+               if (argp->opcode == SPLITNEW)
+                       /* Need to redo the split described. */
+                       memcpy(pagep, argp->pageimage.data,
+                           argp->pageimage.size);
+               LSN(pagep) = *lsnp;
+               change = 1;
+       } else if (cmp_n == 0 && !redo) {
+               if (argp->opcode == SPLITOLD) {
+                       /* Put back the old image. */
+                       memcpy(pagep, argp->pageimage.data,
+                           argp->pageimage.size);
+               } else
+                       P_INIT(pagep, file_dbp->pgsize, argp->pgno,
+                           PGNO_INVALID, PGNO_INVALID, 0, P_HASH);
+               LSN(pagep) = argp->pagelsn;
+               change = 1;
+       }
+       if ((ret = __ham_put_page(file_dbp, pagep, change)) != 0)
+               goto out;
+
+       *lsnp = argp->prev_lsn;
+
+out:   if (getmeta)
+               RELEASE_META(file_dbp, hashp);
+       REC_CLOSE;
+}
+
+/*
+ * __ham_ovfl_recover --
+ *     This message is generated when we initialize a set of overflow pages.
+ *
+ * PUBLIC: int __ham_ovfl_recover
+ * PUBLIC:     __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+ */
+int
+__ham_ovfl_recover(logp, dbtp, lsnp, redo, info)
+       DB_LOG *logp;
+       DBT *dbtp;
+       DB_LSN *lsnp;
+       int redo;
+        void *info;
+{
+       __ham_ovfl_args *argp;
+       DB *mdbp, *file_dbp;
+       DB_MPOOLFILE *mpf;
+       HTAB *hashp;
+       PAGE *pagep;
+       db_pgno_t max_pgno, pgno;
+       int cmp_n, cmp_p, getmeta, ret;
+
+       getmeta = 0;
+       hashp = NULL;                           /* XXX: shut the compiler up. */
+       REC_PRINT(__ham_ovfl_print);
+       REC_INTRO(__ham_ovfl_read);
+
+       hashp = (HTAB *)file_dbp->internal;
+       GET_META(file_dbp, hashp);
+       getmeta = 1;
+       file_dbp = NULL;
+
+       cmp_n = log_compare(lsnp, &hashp->hdr->lsn);
+       cmp_p = log_compare(&hashp->hdr->lsn, &argp->metalsn);
+
+       if (cmp_p == 0 && redo) {
+               /* Redo the allocation. */
+               hashp->hdr->last_freed = argp->start_pgno;
+               hashp->hdr->spares[argp->npages  - 1] += argp->npages;
+               hashp->hdr->lsn = *lsnp;
+               F_SET(file_dbp, DB_HS_DIRTYMETA);
+       } else if (cmp_n == 0 && !redo) {
+               hashp->hdr->last_freed = argp->free_pgno;
+               hashp->hdr->spares[argp->npages  - 1] -= argp->npages;
+               hashp->hdr->lsn = argp->metalsn;
+               F_SET(file_dbp, DB_HS_DIRTYMETA);
+       }
+
+       max_pgno = argp->start_pgno + argp->npages - 1;
+       ret = 0;
+       for (pgno = argp->start_pgno; pgno <= max_pgno; pgno++) {
+               ret = memp_fget(mpf, &pgno, 0, &pagep);
+               if (ret != 0) {
+                       if (redo && (ret = memp_fget(mpf, &pgno,
+                           DB_MPOOL_CREATE, &pagep)) != 0)
+                               goto out;
+                       else if (!redo) {
+                               (void)__ham_put_page(file_dbp, pagep, 0);
+                               continue;
+                       }
+               }
+               if (redo && log_compare((const DB_LSN *)lsnp,
+                   (const DB_LSN *)&LSN(pagep)) > 0) {
+                       P_INIT(pagep, file_dbp->pgsize, pgno, PGNO_INVALID,
+                           pgno == max_pgno ? argp->free_pgno : pgno + 1,
+                           0, P_HASH);
+                       LSN(pagep) = *lsnp;
+                       ret = __ham_put_page(file_dbp, pagep, 1);
+               } else if (!redo) {
+                       ZERO_LSN(pagep->lsn);
+                       ret = __ham_put_page(file_dbp, pagep, 1);
+               } else
+                       ret = __ham_put_page(file_dbp, pagep, 0);
+               if (ret)
+                       goto out;
+       }
+
+       *lsnp = argp->prev_lsn;
+out:   if (getmeta)
+               RELEASE_META(file_dbp, hashp);
+       REC_CLOSE;
+}
diff --git a/db2/hash/hash_stat.c b/db2/hash/hash_stat.c
new file mode 100644 (file)
index 0000000..99c6078
--- /dev/null
@@ -0,0 +1,58 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997
+ *     Sleepycat Software.  All rights reserved.
+ */
+
+#include "config.h"
+
+#ifndef lint
+static const char sccsid[] = "@(#)hash_stat.c  10.6 (Sleepycat) 7/2/97";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+
+#include <errno.h>
+#endif
+
+#include "db_int.h"
+#include "db_page.h"
+#include "hash.h"
+#include "common_ext.h"
+
+/*
+ * __ham_stat --
+ *     Gather/print the hash statistics.
+ *
+ * PUBLIC: int __ham_stat __P((DB *, FILE *));
+ */
+int
+__ham_stat(dbp, fp)
+       DB *dbp;
+       FILE *fp;
+{
+       HTAB *hashp;
+       int i;
+
+       hashp = (HTAB *)dbp->internal;
+
+       fprintf(fp, "hash: accesses %lu collisions %lu\n",
+           hashp->hash_accesses, hashp->hash_collisions);
+       fprintf(fp, "hash: expansions %lu\n", hashp->hash_expansions);
+       fprintf(fp, "hash: overflows %lu\n", hashp->hash_overflows);
+       fprintf(fp, "hash: big key/data pages %lu\n", hashp->hash_bigpages);
+
+       SET_LOCKER(dbp, NULL);
+       GET_META(dbp, hashp);
+       fprintf(fp, "keys %lu maxp %lu\n",
+           (u_long)hashp->hdr->nelem, (u_long)hashp->hdr->max_bucket);
+
+       for (i = 0; i < NCACHED; i++)
+               fprintf(fp,
+                   "spares[%d] = %lu\n", i, (u_long)hashp->hdr->spares[i]);
+
+       RELEASE_META(dbp, hashp);
+       return (0);
+}
diff --git a/db2/include/btree.h b/db2/include/btree.h
new file mode 100644 (file)
index 0000000..5cf4224
--- /dev/null
@@ -0,0 +1,312 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997
+ *     Sleepycat Software.  All rights reserved.
+ */
+/*
+ * Copyright (c) 1990, 1993, 1994, 1995, 1996
+ *     Keith Bostic.  All rights reserved.
+ */
+/*
+ * Copyright (c) 1990, 1993, 1994, 1995
+ *     The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Mike Olson.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *     This product includes software developed by the University of
+ *     California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *     @(#)btree.h     10.16 (Sleepycat) 8/24/97
+ */
+
+/* Forward structure declarations. */
+struct __btree;                typedef struct __btree BTREE;
+struct __cursor;       typedef struct __cursor CURSOR;
+struct __epg;          typedef struct __epg EPG;
+struct __rcursor;      typedef struct __rcursor RCURSOR;
+struct __recno;                typedef struct __recno RECNO;
+
+#undef DEFMINKEYPAGE                   /* Minimum keys per page */
+#define        DEFMINKEYPAGE    (2)
+
+#undef ISINTERNAL                      /* If an internal page. */
+#define        ISINTERNAL(p)   (TYPE(p) == P_IBTREE || TYPE(p) ==  P_IRECNO)
+#undef ISLEAF                          /* If a leaf page. */
+#define        ISLEAF(p)       (TYPE(p) == P_LBTREE || TYPE(p) ==  P_LRECNO)
+
+/* Allocate and discard thread structures. */
+#define        GETHANDLE(dbp, set_txn, dbpp, ret) {                            \
+       if (F_ISSET(dbp, DB_AM_THREAD)) {                               \
+               if ((ret = __db_gethandle(dbp, __bam_bdup, dbpp)) != 0) \
+                       return (ret);                                   \
+       } else                                                          \
+               *dbpp = dbp;                                            \
+       *dbpp->txn = set_txn;                                           \
+}
+#define        PUTHANDLE(dbp) {                                                \
+       dbp->txn = NULL;                                                \
+       if (F_ISSET(dbp, DB_AM_THREAD))                                 \
+               __db_puthandle(dbp);                                    \
+}
+
+/*
+ * If doing transactions we have to hold the locks associated with a data item
+ * from a page for the entire transaction.  However, we don't have to hold the
+ * locks associated with walking the tree.  Distinguish between the two so that
+ * we don't tie up the internal pages of the tree longer than necessary.
+ */
+#define        __BT_LPUT(dbp, lock)                                            \
+       (F_ISSET((dbp), DB_AM_LOCKING) ?                                \
+           lock_put((dbp)->dbenv->lk_info, lock) : 0)
+#define        __BT_TLPUT(dbp, lock)                                           \
+       (F_ISSET((dbp), DB_AM_LOCKING) && (dbp)->txn == NULL ?          \
+           lock_put((dbp)->dbenv->lk_info, lock) : 0)
+
+/*
+ * Flags to __bt_search() and __rec_search().
+ *
+ * Note, internal page searches must find the largest record less than key in
+ * the tree so that descents work.  Leaf page searches must find the smallest
+ * record greater than key so that the returned index is the record's correct
+ * position for insertion.
+ *
+ * The flags parameter to the search routines describes three aspects of the
+ * search: the type of locking required (including if we're locking a pair of
+ * pages), the item to return in the presence of duplicates and whether or not
+ * to return deleted entries.  To simplify both the mnemonic representation
+ * and the code that checks for various cases, we construct a set of bitmasks.
+ */
+#define        S_READ          0x0001          /* Read locks. */
+#define        S_WRITE         0x0002          /* Write locks. */
+
+#define        S_APPEND        0x0040          /* Append to the tree. */
+#define        S_DELNO         0x0080          /* Don't return deleted items. */
+#define        S_DUPFIRST      0x0100          /* Return first duplicate. */
+#define        S_DUPLAST       0x0200          /* Return last duplicate. */
+#define        S_EXACT         0x0400          /* Exact items only. */
+#define        S_PARENT        0x0800          /* Lock page pair. */
+
+#define        S_DELETE        (S_WRITE | S_DUPFIRST | S_DELNO | S_EXACT)
+#define        S_FIND          (S_READ | S_DUPFIRST | S_DELNO)
+#define        S_INSERT        (S_WRITE | S_DUPLAST)
+#define        S_KEYFIRST      (S_WRITE | S_DUPFIRST)
+#define        S_KEYLAST       (S_WRITE | S_DUPLAST)
+#define        S_WRPAIR        (S_WRITE | S_DUPLAST | S_PARENT)
+
+/*
+ * Flags to __bam_iitem().
+ */
+#define        BI_NEWKEY       0x01            /* New key. */
+#define        BI_DELETED      0x02            /* Key/data pair only placeholder. */
+
+/*
+ * Various routines pass around page references.  A page reference can be a
+ * pointer to the page or a page number; for either, an indx can designate
+ * an item on the page.
+ */
+struct __epg {
+       PAGE     *page;                 /* The page. */
+       db_indx_t indx;                 /* The index on the page. */
+       DB_LOCK   lock;                 /* The page's lock. */
+};
+
+/*
+ * Btree cursor.
+ *
+ * Arguments passed to __bam_ca_replace().
+ */
+typedef enum {
+       REPLACE_SETUP,
+       REPLACE_SUCCESS,
+       REPLACE_FAILED
+} ca_replace_arg;
+struct __cursor {
+       DBC             *dbc;           /* Enclosing DBC. */
+
+       PAGE            *page;          /* Cursor page. */
+
+       db_pgno_t        pgno;          /* Page. */
+       db_indx_t        indx;          /* Page item ref'd by the cursor. */
+
+       db_pgno_t        dpgno;         /* Duplicate page. */
+       db_indx_t        dindx;         /* Page item ref'd by the cursor. */
+
+       DB_LOCK          lock;          /* Cursor read lock. */
+       db_lockmode_t    mode;          /* Lock mode. */
+
+       /*
+        * If a cursor record is deleted, the key/data pair has to remain on
+        * the page so that subsequent inserts/deletes don't interrupt the
+        * cursor progression through the file.  This results in interesting
+        * cases when "standard" operations, e.g., dbp->put() are done in the
+        * context of "deleted" cursors.
+        *
+        * C_DELETED -- The item referenced by the cursor has been "deleted"
+        *              but not physically removed from the page.
+        * C_REPLACE -- The "deleted" item referenced by a cursor has been
+        *              replaced by a dbp->put(), so the cursor is no longer
+        *              responsible for physical removal from the page.
+        * C_REPLACE_SETUP --
+        *              We are about to overwrite a "deleted" item, flag any
+        *              cursors referencing it for transition to C_REPLACE
+        *              state.
+        */
+#define        C_DELETED       0x0001
+#define        C_REPLACE       0x0002
+#define        C_REPLACE_SETUP 0x0004
+       u_int32_t        flags;
+};
+
+/*
+ * Recno cursor.
+ *
+ * Arguments passed to __ram_ca().
+ */
+typedef enum {
+       CA_DELETE,
+       CA_IAFTER,
+       CA_IBEFORE
+} ca_recno_arg;
+struct __rcursor {
+       DBC             *dbc;           /* Enclosing DBC. */
+
+       db_recno_t       recno;         /* Current record number. */
+
+       /*
+        * Cursors referencing "deleted" records are positioned between
+        * two records, and so must be specially adjusted until they are
+        * moved.
+        */
+#define        CR_DELETED      0x0001          /* Record deleted. */
+       u_int32_t        flags;
+};
+
+/*
+ * We maintain a stack of the pages that we're locking in the tree.  Btree's
+ * (currently) only save two levels of the tree at a time, so the default
+ * stack is always large enough.  Recno trees have to lock the entire tree to
+ * do inserts/deletes, however.  Grow the stack as necessary.
+ */
+#undef BT_STK_CLR
+#define        BT_STK_CLR(t)                                                   \
+       ((t)->bt_csp = (t)->bt_sp)
+
+#undef BT_STK_ENTER
+#define        BT_STK_ENTER(t, pagep, page_indx, lock, ret) do {               \
+       if ((ret =                                                      \
+           (t)->bt_csp == (t)->bt_esp ? __bam_stkgrow(t) : 0) == 0) {  \
+               (t)->bt_csp->page = pagep;                              \
+               (t)->bt_csp->indx = page_indx;                          \
+               (t)->bt_csp->lock = lock;                               \
+       }                                                               \
+} while (0)
+
+#undef BT_STK_PUSH
+#define        BT_STK_PUSH(t, pagep, page_indx, lock, ret) do {                \
+       BT_STK_ENTER(t, pagep, page_indx, lock, ret);                   \
+       ++(t)->bt_csp;                                                  \
+} while (0)
+
+#undef BT_STK_POP
+#define        BT_STK_POP(t)                                                   \
+       ((t)->bt_csp == (t)->bt_stack ? NULL : --(t)->bt_csp)
+
+/*
+ * The in-memory recno data structure.
+ *
+ * !!!
+ * These fields are ignored as far as multi-threading is concerned.  There
+ * are no transaction semantics associated with backing files, nor is there
+ * any thread protection.
+ */
+#undef RECNO_OOB
+#define        RECNO_OOB       0               /* Illegal record number. */
+
+struct __recno {
+       int              re_delim;      /* Variable-length delimiting byte. */
+       int              re_pad;        /* Fixed-length padding byte. */
+       u_int32_t        re_len;        /* Length for fixed-length records. */
+
+       char            *re_source;     /* Source file name. */
+       int              re_fd;         /* Source file descriptor */
+       db_recno_t       re_last;       /* Last record number read. */
+       void            *re_cmap;       /* Current point in mapped space. */
+       void            *re_smap;       /* Start of mapped space. */
+       void            *re_emap;       /* End of mapped space. */
+       size_t           re_msize;      /* Size of mapped region. */
+                                       /* Recno input function. */
+       int (*re_irec) __P((DB *, db_recno_t));
+
+#define        RECNO_EOF       0x0001          /* EOF on backing source file. */
+#define        RECNO_MODIFIED  0x0002          /* Tree was modified. */
+       u_int32_t        flags;
+};
+
+/*
+ * The in-memory btree data structure.
+ */
+struct __btree {
+/*
+ * These fields are per-thread and are initialized when the BTREE structure
+ * is created.
+ */
+       db_pgno_t        bt_lpgno;      /* Last insert location. */
+
+       DBT              bt_rkey;       /* Returned key. */
+       DBT              bt_rdata;      /* Returned data. */
+
+       EPG             *bt_sp;         /* Stack pointer. */
+       EPG             *bt_csp;        /* Current stack entry. */
+       EPG             *bt_esp;        /* End stack pointer. */
+       EPG              bt_stack[5];
+
+       RECNO           *bt_recno;      /* Private recno structure. */
+
+       DB_BTREE_LSTAT lstat;           /* Btree local statistics. */
+
+/*
+ * These fields are copied from the original BTREE structure and never
+ * change.
+ */
+       db_indx_t        bt_maxkey;     /* Maximum keys per page. */
+       db_indx_t        bt_minkey;     /* Minimum keys per page. */
+
+       int (*bt_compare)               /* Comparison function. */
+           __P((const DBT *, const DBT *));
+       size_t(*bt_prefix)              /* Prefix function. */
+           __P((const DBT *, const DBT *));
+
+       db_indx_t        bt_ovflsize;   /* Maximum key/data on-page size. */
+};
+
+#include "btree_auto.h"
+#include "btree_ext.h"
+#include "db_am.h"
+#include "common_ext.h"
diff --git a/db2/include/btree_auto.h b/db2/include/btree_auto.h
new file mode 100644 (file)
index 0000000..b422e1d
--- /dev/null
@@ -0,0 +1,108 @@
+/* Do not edit: automatically built by dist/db_gen.sh. */
+#ifndef bam_AUTO_H
+#define bam_AUTO_H
+
+#define        DB_bam_pg_alloc (DB_bam_BEGIN + 1)
+
+typedef struct _bam_pg_alloc_args {
+       u_int32_t type;
+       DB_TXN *txnid;
+       DB_LSN prev_lsn;
+       u_int32_t       fileid;
+       DB_LSN  meta_lsn;
+       DB_LSN  page_lsn;
+       db_pgno_t       pgno;
+       u_int32_t       ptype;
+       db_pgno_t       next;
+} __bam_pg_alloc_args;
+
+
+#define        DB_bam_pg_free  (DB_bam_BEGIN + 2)
+
+typedef struct _bam_pg_free_args {
+       u_int32_t type;
+       DB_TXN *txnid;
+       DB_LSN prev_lsn;
+       u_int32_t       fileid;
+       db_pgno_t       pgno;
+       DB_LSN  meta_lsn;
+       DBT     header;
+       db_pgno_t       next;
+} __bam_pg_free_args;
+
+
+#define        DB_bam_split    (DB_bam_BEGIN + 3)
+
+typedef struct _bam_split_args {
+       u_int32_t type;
+       DB_TXN *txnid;
+       DB_LSN prev_lsn;
+       u_int32_t       fileid;
+       db_pgno_t       left;
+       DB_LSN  llsn;
+       db_pgno_t       right;
+       DB_LSN  rlsn;
+       u_int32_t       indx;
+       db_pgno_t       npgno;
+       DB_LSN  nlsn;
+       DBT     pg;
+} __bam_split_args;
+
+
+#define        DB_bam_rsplit   (DB_bam_BEGIN + 4)
+
+typedef struct _bam_rsplit_args {
+       u_int32_t type;
+       DB_TXN *txnid;
+       DB_LSN prev_lsn;
+       u_int32_t       fileid;
+       db_pgno_t       pgno;
+       DBT     pgdbt;
+       DBT     rootent;
+       DB_LSN  rootlsn;
+} __bam_rsplit_args;
+
+
+#define        DB_bam_adj      (DB_bam_BEGIN + 5)
+
+typedef struct _bam_adj_args {
+       u_int32_t type;
+       DB_TXN *txnid;
+       DB_LSN prev_lsn;
+       u_int32_t       fileid;
+       db_pgno_t       pgno;
+       DB_LSN  lsn;
+       u_int32_t       indx;
+       u_int32_t       indx_copy;
+       u_int32_t       is_insert;
+} __bam_adj_args;
+
+
+#define        DB_bam_cadjust  (DB_bam_BEGIN + 6)
+
+typedef struct _bam_cadjust_args {
+       u_int32_t type;
+       DB_TXN *txnid;
+       DB_LSN prev_lsn;
+       u_int32_t       fileid;
+       db_pgno_t       pgno;
+       DB_LSN  lsn;
+       u_int32_t       indx;
+       int32_t adjust;
+       int32_t total;
+} __bam_cadjust_args;
+
+
+#define        DB_bam_cdel     (DB_bam_BEGIN + 7)
+
+typedef struct _bam_cdel_args {
+       u_int32_t type;
+       DB_TXN *txnid;
+       DB_LSN prev_lsn;
+       u_int32_t       fileid;
+       db_pgno_t       pgno;
+       DB_LSN  lsn;
+       u_int32_t       indx;
+} __bam_cdel_args;
+
+#endif
diff --git a/db2/include/btree_ext.h b/db2/include/btree_ext.h
new file mode 100644 (file)
index 0000000..dab0f5b
--- /dev/null
@@ -0,0 +1,121 @@
+/* Do not edit: automatically built by dist/distrib. */
+int __bam_close __P((DB *));
+int __bam_sync __P((DB *, int));
+int __bam_cmp __P((DB *, const DBT *, EPG *));
+int __bam_defcmp __P((const DBT *, const DBT *));
+size_t __bam_defpfx __P((const DBT *, const DBT *));
+int __bam_pgin __P((db_pgno_t, void *, DBT *));
+int __bam_pgout __P((db_pgno_t, void *, DBT *));
+int __bam_mswap __P((PAGE *));
+int __bam_cursor __P((DB *, DB_TXN *, DBC **));
+int __bam_get __P((DB *, DB_TXN *, DBT *, DBT *, int));
+int __bam_ovfl_chk __P((DB *, CURSOR *, u_int32_t, int));
+int __bam_ca_delete __P((DB *, db_pgno_t, u_int32_t, CURSOR *));
+void __bam_ca_di __P((DB *, db_pgno_t, u_int32_t, int));
+void __bam_ca_dup __P((DB *,
+   db_pgno_t, u_int32_t, u_int32_t, db_pgno_t, u_int32_t));
+void __bam_ca_move __P((DB *, BTREE *, db_pgno_t, db_pgno_t));
+void __bam_ca_replace
+   __P((DB *, db_pgno_t, u_int32_t, ca_replace_arg));
+void __bam_ca_split __P((DB *,
+   db_pgno_t, db_pgno_t, db_pgno_t, u_int32_t, int));
+int __bam_delete __P((DB *, DB_TXN *, DBT *, int));
+int __ram_delete __P((DB *, DB_TXN *, DBT *, int));
+int __bam_ditem __P((DB *, PAGE *, u_int32_t));
+int __bam_adjindx __P((DB *, PAGE *, u_int32_t, u_int32_t, int));
+int __bam_dpage __P((DB *, const DBT *));
+int __bam_open __P((DB *, DBTYPE, DB_INFO *));
+int __bam_bdup __P((DB *, DB *));
+int __bam_new __P((DB *, u_int32_t, PAGE **));
+int __bam_free __P((DB *, PAGE *));
+int __bam_lget __P((DB *, int, db_pgno_t, db_lockmode_t, DB_LOCK *));
+int __bam_lput __P((DB *, DB_LOCK));
+int __bam_pget __P((DB *, PAGE **, db_pgno_t *, int));
+int __bam_put __P((DB *, DB_TXN *, DBT *, DBT *, int));
+int __bam_iitem __P((DB *,
+   PAGE **, db_indx_t *, DBT *, DBT *, int, int));
+int __bam_pg_alloc_recover
+  __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+int __bam_pg_free_recover
+  __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+int __bam_split_recover
+  __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+int __bam_rsplit_recover
+  __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+int __bam_adj_recover
+  __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+int __bam_cadjust_recover
+  __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+int __bam_cdel_recover
+  __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+int __ram_open __P((DB *, DBTYPE, DB_INFO *));
+int __ram_cursor __P((DB *, DB_TXN *, DBC **));
+int __ram_close __P((DB *));
+void __ram_ca __P((DB *, db_recno_t, ca_recno_arg));
+int __ram_getno __P((DB *, const DBT *, db_recno_t *, int));
+int __ram_snapshot __P((DB *));
+int __bam_rsearch __P((DB *, db_recno_t *, u_int, int, int *));
+int __bam_adjust __P((DB *, BTREE *, int));
+int __bam_nrecs __P((DB *, db_recno_t *));
+db_recno_t __bam_total __P((PAGE *));
+int __bam_search __P((DB *,
+    const DBT *, u_int, int, db_recno_t *, int *));
+int __bam_stkrel __P((DB *));
+int __bam_stkgrow __P((BTREE *));
+int __bam_split __P((DB *, void *));
+int __bam_broot __P((DB *, PAGE *, PAGE *, PAGE *));
+int __ram_root __P((DB *, PAGE *, PAGE *, PAGE *));
+int __bam_copy __P((DB *, PAGE *, PAGE *, u_int32_t, u_int32_t));
+int __bam_stat __P((DB *, void *, void *(*)(size_t), int));
+void __bam_add_mstat __P((DB_BTREE_LSTAT *, DB_BTREE_LSTAT *));
+int __bam_pg_alloc_log
+    __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t,
+    u_int32_t, DB_LSN *, DB_LSN *, db_pgno_t,
+    u_int32_t, db_pgno_t));
+int __bam_pg_alloc_print
+   __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+int __bam_pg_alloc_read __P((void *, __bam_pg_alloc_args **));
+int __bam_pg_free_log
+    __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t,
+    u_int32_t, db_pgno_t, DB_LSN *, DBT *,
+    db_pgno_t));
+int __bam_pg_free_print
+   __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+int __bam_pg_free_read __P((void *, __bam_pg_free_args **));
+int __bam_split_log
+    __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t,
+    u_int32_t, db_pgno_t, DB_LSN *, db_pgno_t,
+    DB_LSN *, u_int32_t, db_pgno_t, DB_LSN *,
+    DBT *));
+int __bam_split_print
+   __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+int __bam_split_read __P((void *, __bam_split_args **));
+int __bam_rsplit_log
+    __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t,
+    u_int32_t, db_pgno_t, DBT *, DBT *,
+    DB_LSN *));
+int __bam_rsplit_print
+   __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+int __bam_rsplit_read __P((void *, __bam_rsplit_args **));
+int __bam_adj_log
+    __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t,
+    u_int32_t, db_pgno_t, DB_LSN *, u_int32_t,
+    u_int32_t, u_int32_t));
+int __bam_adj_print
+   __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+int __bam_adj_read __P((void *, __bam_adj_args **));
+int __bam_cadjust_log
+    __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t,
+    u_int32_t, db_pgno_t, DB_LSN *, u_int32_t,
+    int32_t, int32_t));
+int __bam_cadjust_print
+   __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+int __bam_cadjust_read __P((void *, __bam_cadjust_args **));
+int __bam_cdel_log
+    __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t,
+    u_int32_t, db_pgno_t, DB_LSN *, u_int32_t));
+int __bam_cdel_print
+   __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+int __bam_cdel_read __P((void *, __bam_cdel_args **));
+int __bam_init_print __P((DB_ENV *));
+int __bam_init_recover __P((DB_ENV *));
diff --git a/db2/include/clib_ext.h b/db2/include/clib_ext.h
new file mode 100644 (file)
index 0000000..8ccd2b5
--- /dev/null
@@ -0,0 +1,65 @@
+/* Do not edit: automatically built by dist/distrib. */
+#ifdef __STDC__
+void err __P((int eval, const char *, ...));
+#else
+void err();
+#endif
+#ifdef __STDC__
+void errx __P((int eval, const char *, ...));
+#else
+void errx();
+#endif
+#ifdef __STDC__
+void warn __P((const char *, ...));
+#else
+void warn();
+#endif
+#ifdef __STDC__
+void warnx __P((const char *, ...));
+#else
+void warnx();
+#endif
+#ifndef HAVE_GETCWD
+char *getcwd __P((char *, size_t));
+#endif
+void get_long __P((char *, long, long, long *));
+#ifndef HAVE_GETOPT
+int getopt __P((int, char * const *, const char *));
+#endif
+#ifndef HAVE_MEMCMP
+int memcmp __P((const void *, const void *, size_t));
+#endif
+#ifndef HAVE_MEMCPY
+void *memcpy __P((void *, const void *, size_t));
+#endif
+#ifndef HAVE_MEMMOVE
+void *memmove __P((void *, const void *, size_t));
+#endif
+#ifndef HAVE_MEMCPY
+void *memcpy __P((void *, const void *, size_t));
+#endif
+#ifndef HAVE_MEMMOVE
+void *memmove __P((void *, const void *, size_t));
+#endif
+#ifndef HAVE_RAISE
+int raise __P((int));
+#endif
+#ifndef HAVE_SNPRINTF
+#ifdef __STDC__
+int snprintf __P((char *, size_t, const char *, ...));
+#else
+int snprintf();
+#endif
+#endif
+#ifndef HAVE_STRDUP
+char *strdup __P((const char *));
+#endif
+#ifndef HAVE_STRERROR
+char *strerror __P((int));
+#endif
+#ifndef HAVE_STRSEP
+char *strsep __P((char **, const char *));
+#endif
+#ifndef HAVE_VSNPRINTF
+int vsnprintf();
+#endif
diff --git a/db2/include/common_ext.h b/db2/include/common_ext.h
new file mode 100644 (file)
index 0000000..9840162
--- /dev/null
@@ -0,0 +1,41 @@
+/* Do not edit: automatically built by dist/distrib. */
+int __db_appname __P((DB_ENV *,
+   APPNAME, const char *, const char *, int *, char **));
+int __db_apprec __P((DB_ENV *, int));
+int __db_byteorder __P((DB_ENV *, int));
+#ifdef __STDC__
+void __db_err __P((const DB_ENV *dbenv, const char *fmt, ...));
+#else
+void __db_err();
+#endif
+int __db_panic __P((DB *));
+int __db_fchk __P((DB_ENV *, const char *, int, int));
+int __db_fcchk __P((DB_ENV *, const char *, int, int, int));
+int __db_cdelchk __P((const DB *, int, int, int));
+int __db_cgetchk __P((const DB *, DBT *, DBT *, int, int));
+int __db_cputchk __P((const DB *,
+   const DBT *, DBT *, int, int, int));
+int __db_delchk __P((const DB *, int, int));
+int __db_getchk __P((const DB *, const DBT *, DBT *, int));
+int __db_putchk __P((const DB *, DBT *, const DBT *, int, int, int));
+int __db_statchk __P((const DB *, int));
+int __db_syncchk __P((const DB *, int));
+int __db_ferr __P((const DB_ENV *, const char *, int));
+u_int32_t __db_log2 __P((u_int32_t));
+int __db_rcreate __P((DB_ENV *, APPNAME,
+   const char *, const char *, int, size_t, int *, void *));
+int __db_ropen __P((DB_ENV *,
+   APPNAME, const char *, const char *, int, int *, void *));
+int __db_rclose __P((DB_ENV *, int, void *));
+int __db_runlink __P((DB_ENV *,
+   APPNAME, const char *, const char *, int));
+int __db_rgrow __P((DB_ENV *, int, size_t));
+int __db_rremap __P((DB_ENV *, void *, size_t, size_t, int, void *));
+void __db_shalloc_init __P((void *, size_t));
+int __db_shalloc __P((void *, size_t, size_t, void *));
+void __db_shalloc_free __P((void *, void *));
+size_t __db_shalloc_count __P((void *));
+size_t __db_shsizeof __P((void *));
+void __db_shalloc_dump __P((FILE *, void *));
+int __db_tablesize __P((int));
+void __db_hashinit __P((void *, int));
diff --git a/db2/include/cxx_int.h b/db2/include/cxx_int.h
new file mode 100644 (file)
index 0000000..bf7a096
--- /dev/null
@@ -0,0 +1,118 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1997
+ *     Sleepycat Software.  All rights reserved.
+ *
+ *     @(#)cxx_int.h   10.4 (Sleepycat) 8/22/97
+ */
+
+#ifndef _CXX_INT_H_
+#define _CXX_INT_H_
+
+// private data structures known to the implementation only
+
+#include <assert.h>             // used by defines below
+
+//
+// Using FooImp classes will allow the implementation to change in the
+// future without any modification to user code or even to header files
+// that the user includes. FooImp * is just like void * except that it
+// provides a little extra protection, since you cannot randomly assign
+// any old pointer to a FooImp* as you can with void *.  Currently, a
+// pointer to such an opaque class is always just a pointer to the
+// appropriate underlying implementation struct.  These are converted
+// back and forth using the various overloaded wrap()/unwrap() methods.
+// This is essentially a use of the "Bridge" Design Pattern.
+//
+// WRAPPED_CLASS implements the appropriate wrap() and unwrap() methods
+// for a wrapper class that has an underlying pointer representation.
+//
+#define WRAPPED_CLASS(_WRAPPER_CLASS, _IMP_CLASS, _WRAPPED_TYPE)           \
+                                                                           \
+        class _IMP_CLASS {};                                               \
+                                                                           \
+        inline _WRAPPED_TYPE unwrap(_WRAPPER_CLASS *val)                   \
+        {                                                                  \
+            if (!val) return 0;                                            \
+            return (_WRAPPED_TYPE)(val->imp());                            \
+        }                                                                  \
+                                                                           \
+        inline const _WRAPPED_TYPE unwrapConst(const _WRAPPER_CLASS *val)  \
+        {                                                                  \
+            if (!val) return 0;                                            \
+            return (const _WRAPPED_TYPE)(val->imp());                      \
+        }                                                                  \
+                                                                           \
+        inline _IMP_CLASS *wrap(_WRAPPED_TYPE val)                         \
+        {                                                                  \
+            return (_IMP_CLASS*)val;                                       \
+        }
+
+WRAPPED_CLASS(DbLockTab, DbLockTabImp, DB_LOCKTAB*)
+WRAPPED_CLASS(DbLog, DbLogImp, DB_LOG*)
+WRAPPED_CLASS(DbMpool, DbMpoolImp, DB_MPOOL*)
+WRAPPED_CLASS(DbMpoolFile, DbMpoolFileImp, DB_MPOOLFILE*)
+WRAPPED_CLASS(Db, DbImp, DB*)
+WRAPPED_CLASS(DbTxn, DbTxnImp, DB_TXN*)
+WRAPPED_CLASS(DbTxnMgr, DbTxnMgrImp, DB_TXNMGR*)
+
+// Macros that handle detected errors, in case we want to
+// change the default behavior.  runtime_error() throws an
+// exception by default.
+//
+// Since it's unusual to throw an exception in a destructor,
+// we have a separate macro.  For now, we silently ignore such
+// detected errors.
+//
+#define DB_ERROR(caller, ecode) \
+    DbEnv::runtime_error(caller, ecode)
+
+#define DB_DESTRUCTOR_ERROR(caller, ecode) \
+    DbEnv::runtime_error(caller, ecode, 1)
+
+
+////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////
+//
+// These defines are for tedious flag or field set/get access methods.
+//
+
+// Define setName() and getName() methods that twiddle
+// the _flags field.
+//
+#define DB_FLAG_METHODS(_class, _flags, _cxx_name, _flag_name) \
+                                                               \
+void _class::set##_cxx_name(int onOrOff)                       \
+{                                                              \
+    if (onOrOff)                                               \
+        _flags |= _flag_name;                                  \
+    else                                                       \
+        _flags &= ~(_flag_name);                               \
+}                                                              \
+                                                               \
+int _class::get##_cxx_name() const                             \
+{                                                              \
+    return (_flags & _flag_name) ? 1 : 0;                      \
+}
+
+
+#define DB_RO_ACCESS(_class, _type, _cxx_name, _field)         \
+                                                               \
+_type _class::get_##_cxx_name() const                          \
+{                                                              \
+    return _field;                                             \
+}
+
+#define DB_WO_ACCESS(_class, _type, _cxx_name, _field)         \
+                                                               \
+void _class::set_##_cxx_name(_type value)                      \
+{                                                              \
+    _field = value;                                            \
+}                                                              \
+
+#define DB_RW_ACCESS(_class, _type, _cxx_name, _field)         \
+        DB_RO_ACCESS(_class, _type, _cxx_name, _field)         \
+        DB_WO_ACCESS(_class, _type, _cxx_name, _field)
+
+#endif /* !_CXX_INT_H_ */
diff --git a/db2/include/db.h.src b/db2/include/db.h.src
new file mode 100644 (file)
index 0000000..f9b29fa
--- /dev/null
@@ -0,0 +1,796 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997
+ *     Sleepycat Software.  All rights reserved.
+ *
+ *     @(#)db.h.src    10.67 (Sleepycat) 8/25/97
+ */
+
+#ifndef _DB_H_
+#define        _DB_H_
+
+#ifndef __NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+
+#include <stdio.h>
+#endif
+
+/*
+ * XXX
+ * MacOS: ensure that Metrowerks C makes enumeration types int sized.
+ */
+#ifdef __MWERKS__
+#pragma enumsalwaysint on
+#endif
+
+/*
+ * XXX
+ * Handle function prototypes and the keyword "const".  This steps on name
+ * space that DB doesn't control, but all of the other solutions are worse.
+ */
+#undef __P
+#if defined(__STDC__) || defined(__cplusplus)
+#define        __P(protos)     protos          /* ANSI C prototypes */
+#else
+#define        const
+#define        __P(protos)     ()              /* K&R C preprocessor */
+#endif
+
+/*
+ * !!!
+ * DB needs basic information about specifically sized types.  If they're
+ * not provided by the system, typedef them here.
+ *
+ * We protect them against multiple inclusion using __BIT_TYPES_DEFINED__,
+ * as does BIND and Kerberos, since we don't know for sure what #include
+ * files the user is using.
+ *
+ * !!!
+ * We also provide the standard u_int, u_long etc., if they're not provided
+ * by the system.  This isn't completely necessary, but the example programs
+ * need them.
+ */
+#ifndef        __BIT_TYPES_DEFINED__
+#define        __BIT_TYPES_DEFINED__
+@u_int8_decl@
+@int16_decl@
+@u_int16_decl@
+@int32_decl@
+@u_int32_decl@
+#endif
+
+@u_char_decl@
+@u_short_decl@
+@u_int_decl@
+@u_long_decl@
+
+#define        DB_VERSION_MAJOR        2
+#define        DB_VERSION_MINOR        3
+#define        DB_VERSION_PATCH        4
+#define        DB_VERSION_STRING       "Sleepycat Software: DB 2.3.4: (8/20/97)"
+
+typedef        u_int32_t       db_pgno_t;      /* Page number type. */
+typedef        u_int16_t       db_indx_t;      /* Page offset type. */
+#define        DB_MAX_PAGES    0xffffffff      /* >= # of pages in a file */
+
+typedef        u_int32_t       db_recno_t;     /* Record number type. */
+typedef size_t         DB_LOCK;        /* Object returned by lock manager. */
+#define        DB_MAX_RECORDS  0xffffffff      /* >= # of records in a tree */
+
+#define        DB_FILE_ID_LEN          20      /* DB file ID length. */
+
+/* Forward structure declarations, so applications get type checking. */
+struct __db;           typedef struct __db DB;
+#ifdef DB_DBM_HSEARCH
+                       typedef struct __db DBM;
+#endif
+struct __db_bt_stat;   typedef struct __db_bt_stat DB_BTREE_STAT;
+struct __db_dbt;       typedef struct __db_dbt DBT;
+struct __db_env;       typedef struct __db_env DB_ENV;
+struct __db_info;      typedef struct __db_info DB_INFO;
+struct __db_lockregion;        typedef struct __db_lockregion DB_LOCKREGION;
+struct __db_lockreq;   typedef struct __db_lockreq DB_LOCKREQ;
+struct __db_locktab;   typedef struct __db_locktab DB_LOCKTAB;
+struct __db_log;       typedef struct __db_log DB_LOG;
+struct __db_lsn;       typedef struct __db_lsn DB_LSN;
+struct __db_mpool;     typedef struct __db_mpool DB_MPOOL;
+struct __db_mpool_fstat;typedef struct __db_mpool_fstat DB_MPOOL_FSTAT;
+struct __db_mpool_stat;        typedef struct __db_mpool_stat DB_MPOOL_STAT;
+struct __db_mpoolfile; typedef struct __db_mpoolfile DB_MPOOLFILE;
+struct __db_txn;       typedef struct __db_txn DB_TXN;
+struct __db_txn_active;        typedef struct __db_txn_active DB_TXN_ACTIVE;
+struct __db_txn_stat;  typedef struct __db_txn_stat DB_TXN_STAT;
+struct __db_txnmgr;    typedef struct __db_txnmgr DB_TXNMGR;
+struct __db_txnregion; typedef struct __db_txnregion DB_TXNREGION;
+struct __dbc;          typedef struct __dbc DBC;
+
+/* Key/data structure -- a Data-Base Thang. */
+struct __db_dbt {
+       void     *data;                 /* key/data */
+       u_int32_t size;                 /* key/data length */
+       u_int32_t ulen;                 /* RO: length of user buffer. */
+       u_int32_t dlen;                 /* RO: get/put record length. */
+       u_int32_t doff;                 /* RO: get/put record offset. */
+
+#define        DB_DBT_INTERNAL 0x01            /* Perform any mallocs using regular
+                                          malloc, not the user's malloc. */
+#define        DB_DBT_MALLOC   0x02            /* Return in allocated memory. */
+#define        DB_DBT_PARTIAL  0x04            /* Partial put/get. */
+#define        DB_DBT_USERMEM  0x08            /* Return in user's memory. */
+       u_int32_t flags;
+};
+
+/*
+ * Database configuration and initialization.
+ */
+ /*
+  * Flags understood by both db_open(3) and db_appinit(3).
+  */
+#define        DB_CREATE               0x00001 /* O_CREAT: create file as necessary. */
+#define        DB_NOMMAP               0x00002 /* Don't mmap underlying file. */
+#define        DB_THREAD               0x00004 /* Free-thread DB package handles. */
+
+/*
+ * Flags understood by db_appinit(3).
+ *
+ * DB_APP_INIT and DB_MUTEXDEBUG are internal only, and not documented.
+ */
+/*                             0x00007    COMMON MASK. */
+#define        DB_APP_INIT             0x00008 /* Appinit called, paths initialized. */
+#define        DB_INIT_LOCK            0x00010 /* Initialize locking. */
+#define        DB_INIT_LOG             0x00020 /* Initialize logging. */
+#define        DB_INIT_MPOOL           0x00040 /* Initialize mpool. */
+#define        DB_INIT_TXN             0x00080 /* Initialize transactions. */
+#define        DB_MPOOL_PRIVATE        0x00100 /* Mpool: private memory pool. */
+#define        DB_MUTEXDEBUG           0x00200 /* Do not get/set mutexes in regions. */
+#define        DB_RECOVER              0x00400 /* Run normal recovery. */
+#define        DB_RECOVER_FATAL        0x00800 /* Run catastrophic recovery. */
+#define        DB_TXN_NOSYNC           0x01000 /* Do not sync log on commit. */
+#define        DB_USE_ENVIRON          0x02000 /* Use the environment. */
+#define        DB_USE_ENVIRON_ROOT     0x04000 /* Use the environment if root. */
+
+/* CURRENTLY UNUSED LOCK FLAGS. */
+#define        DB_TXN_LOCK_2PL         0x00000 /* Two-phase locking. */
+#define        DB_TXN_LOCK_OPTIMISTIC  0x00000 /* Optimistic locking. */
+#define        DB_TXN_LOCK_MASK        0x00000 /* Lock flags mask. */
+
+/* CURRENTLY UNUSED LOG FLAGS. */
+#define        DB_TXN_LOG_REDO         0x00000 /* Redo-only logging. */
+#define        DB_TXN_LOG_UNDO         0x00000 /* Undo-only logging. */
+#define        DB_TXN_LOG_UNDOREDO     0x00000 /* Undo/redo write-ahead logging. */
+#define        DB_TXN_LOG_MASK         0x00000 /* Log flags mask. */
+
+/*
+ * Flags understood by db_open(3).
+ *
+ * DB_EXCL and DB_TEMPORARY are internal only, and not documented.
+ * DB_SEQUENTIAL is currently internal, but likely to be exported some day.
+ */
+/*                             0x00007    COMMON MASK. */
+/*                             0x07fff    ALREADY USED. */
+#define        DB_EXCL                 0x08000 /* O_EXCL: exclusive open. */
+#define        DB_RDONLY               0x10000 /* O_RDONLY: read-only. */
+#define        DB_SEQUENTIAL           0x20000 /* Indicate sequential access. */
+#define        DB_TEMPORARY            0x40000 /* Remove on last close. */
+#define        DB_TRUNCATE             0x80000 /* O_TRUNCATE: replace existing DB. */
+
+/*
+ * Deadlock detector modes; used in the DBENV structure to configure the
+ * locking subsystem.
+ */
+#define        DB_LOCK_NORUN           0x0
+#define        DB_LOCK_DEFAULT         0x1
+#define        DB_LOCK_OLDEST          0x2
+#define        DB_LOCK_RANDOM          0x3
+#define        DB_LOCK_YOUNGEST        0x4
+
+struct __db_env {
+       int              db_lorder;     /* Byte order. */
+
+                                       /* Error message callback. */
+       void (*db_errcall) __P((const char *, char *));
+       FILE            *db_errfile;    /* Error message file stream. */
+       const char      *db_errpfx;     /* Error message prefix. */
+       int              db_verbose;    /* Generate debugging messages. */
+
+       /* User paths. */
+       char            *db_home;       /* Database home. */
+       char            *db_log_dir;    /* Database log file directory. */
+       char            *db_tmp_dir;    /* Database tmp file directory. */
+
+       char           **db_data_dir;   /* Database data file directories. */
+       int              data_cnt;      /* Database data file slots. */
+       int              data_next;     /* Next Database data file slot. */
+
+       /* Locking. */
+       DB_LOCKTAB      *lk_info;       /* Return from lock_open(). */
+       u_int8_t        *lk_conflicts;  /* Two dimensional conflict matrix. */
+       int              lk_modes;      /* Number of lock modes in table. */
+       unsigned int     lk_max;        /* Maximum number of locks. */
+       u_int32_t        lk_detect;     /* Deadlock detect on every conflict. */
+       int (*db_yield) __P((void));    /* Yield function for threads. */
+
+       /* Logging. */
+       DB_LOG          *lg_info;       /* Return from log_open(). */
+       u_int32_t        lg_max;        /* Maximum file size. */
+
+       /* Memory pool. */
+       DB_MPOOL        *mp_info;       /* Return from memp_open(). */
+       size_t           mp_mmapsize;   /* Maximum file size for mmap. */
+       size_t           mp_size;       /* Bytes in the mpool cache. */
+
+       /* Transactions. */
+       DB_TXNMGR       *tx_info;       /* Return from txn_open(). */
+       unsigned int     tx_max;        /* Maximum number of transactions. */
+       int (*tx_recover)               /* Dispatch function for recovery. */
+           __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+
+       u_int32_t        flags;         /* Flags. */
+};
+
+/*******************************************************
+ * Access methods.
+ *******************************************************/
+typedef enum {
+       DB_BTREE=1,                     /* B+tree. */
+       DB_HASH,                        /* Extended Linear Hashing. */
+       DB_RECNO,                       /* Fixed and variable-length records. */
+       DB_UNKNOWN                      /* Figure it out on open. */
+} DBTYPE;
+
+#define        DB_BTREEVERSION 6               /* Current btree version. */
+#define        DB_BTREEOLDVER  6               /* Oldest btree version supported. */
+#define        DB_BTREEMAGIC   0x053162
+
+#define        DB_HASHVERSION  5               /* Current hash version. */
+#define        DB_HASHOLDVER   4               /* Oldest hash version supported. */
+#define        DB_HASHMAGIC    0x061561
+
+#define        DB_LOGVERSION   2               /* Current log version. */
+#define        DB_LOGOLDVER    2               /* Oldest log version supported. */
+#define        DB_LOGMAGIC     0x040988
+
+struct __db_info {
+       int              db_lorder;     /* Byte order. */
+       size_t           db_cachesize;  /* Underlying cache size. */
+       size_t           db_pagesize;   /* Underlying page size. */
+
+                                       /* Local heap allocation. */
+       void *(*db_malloc) __P((size_t));
+
+       /* Btree access method. */
+       int              bt_maxkey;     /* Maximum keys per page. */
+       int              bt_minkey;     /* Minimum keys per page. */
+       int (*bt_compare)               /* Comparison function. */
+           __P((const DBT *, const DBT *));
+       size_t (*bt_prefix)             /* Prefix function. */
+           __P((const DBT *, const DBT *));
+
+       /* Hash access method. */
+       unsigned int     h_ffactor;     /* Fill factor. */
+       unsigned int     h_nelem;       /* Number of elements. */
+       u_int32_t       (*h_hash)       /* Hash function. */
+           __P((const void *, u_int32_t));
+
+       /* Recno access method. */
+       int              re_pad;        /* Fixed-length padding byte. */
+       int              re_delim;      /* Variable-length delimiting byte. */
+       u_int32_t        re_len;        /* Length for fixed-length records. */
+       char            *re_source;     /* Source file name. */
+
+#define        DB_DELIMITER            0x0001  /* Recno: re_delim set. */
+#define        DB_DUP                  0x0002  /* Btree, Hash: duplicate keys. */
+#define        DB_FIXEDLEN             0x0004  /* Recno: fixed-length records. */
+#define        DB_PAD                  0x0008  /* Recno: re_pad set. */
+#define        DB_RECNUM               0x0010  /* Btree: record numbers. */
+#define        DB_RENUMBER             0x0020  /* Recno: renumber on insert/delete. */
+#define        DB_SNAPSHOT             0x0040  /* Recno: snapshot the input. */
+       u_int32_t        flags;
+};
+
+/*
+ * DB access method and cursor operation codes.  These are implemented as
+ * bit fields for future flexibility, but currently only a single one may
+ * be specified to any function.
+ */
+#define        DB_AFTER        0x000001        /* c_put() */
+#define        DB_APPEND       0x000002        /* put() */
+#define        DB_BEFORE       0x000004        /* c_put() */
+#define        DB_CHECKPOINT   0x000008        /* log_put(), log_get() */
+#define        DB_CURRENT      0x000010        /* c_get(), c_put(), log_get() */
+#define        DB_FIRST        0x000020        /* c_get(), log_get() */
+#define        DB_FLUSH        0x000040        /* log_put() */
+#define        DB_GET_RECNO    0x000080        /* c_get() */
+#define        DB_KEYFIRST     0x000100        /* c_put() */
+#define        DB_KEYLAST      0x000200        /* c_put() */
+#define        DB_LAST         0x000400        /* c_get(), log_get() */
+#define        DB_NEXT         0x000800        /* c_get(), log_get() */
+#define        DB_NOOVERWRITE  0x001000        /* put() */
+#define        DB_NOSYNC       0x002000        /* close() */
+#define        DB_PREV         0x004000        /* c_get(), log_get() */
+#define        DB_RECORDCOUNT  0x008000        /* stat() */
+#define        DB_SET          0x010000        /* c_get(), log_get() */
+#define        DB_SET_RANGE    0x020000        /* c_get() */
+#define        DB_SET_RECNO    0x040000        /* get(), c_get() */
+
+/* DB (user visible) error return codes. */
+#define        DB_INCOMPLETE           ( -1)   /* Sync didn't finish. */
+#define        DB_KEYEMPTY             ( -2)   /* The key/data pair was deleted or
+                                          was never created by the user. */
+#define        DB_KEYEXIST             ( -3)   /* The key/data pair already exists. */
+#define        DB_LOCK_DEADLOCK        ( -4)   /* Locker killed to resolve deadlock. */
+#define        DB_LOCK_NOTGRANTED      ( -5)   /* Lock unavailable, no-wait set. */
+#define        DB_LOCK_NOTHELD         ( -6)   /* Lock not held by locker. */
+#define        DB_NOTFOUND             ( -7)   /* Key/data pair not found (EOF). */
+
+/* DB (private) error return codes. */
+#define        DB_DELETED              ( -8)   /* Recovery file marked deleted. */
+#define        DB_NEEDSPLIT            ( -9)   /* Page needs to be split. */
+#define        DB_REGISTERED           (-10)   /* Entry was previously registered. */
+#define        DB_SWAPBYTES            (-11)   /* Database needs byte swapping. */
+
+struct __db_ilock {                    /* Internal DB access method lock. */
+       db_pgno_t       pgno;           /* Page being locked. */
+                                       /* File id. */
+       u_int8_t        fileid[DB_FILE_ID_LEN];
+};
+
+/* DB access method description structure. */
+struct __db {
+       void    *mutex;                 /* Synchronization for free threading */
+       DBTYPE   type;                  /* DB access method. */
+       DB_ENV  *dbenv;                 /* DB_ENV structure. */
+       DB_ENV  *mp_dbenv;              /* DB_ENV for local mpool creation. */
+
+       DB      *master;                /* Original DB created by db_open. */
+       void    *internal;              /* Access method private. */
+
+       DB_MPOOL        *mp;            /* The access method's mpool. */
+       DB_MPOOLFILE    *mpf;           /* The access method's mpool file. */
+
+       /*
+        * XXX
+        * Explicit representations of structures in queue.h.
+        *
+        * TAILQ_HEAD(curs_queue, __dbc);
+        */
+       struct {
+               struct __dbc *tqh_first;
+               struct __dbc **tqh_last;
+       } curs_queue;
+
+       /*
+        * XXX
+        * Explicit representations of structures in queue.h.
+        *
+        * LIST_HEAD(handleq, __db);
+        * LIST_ENTRY(__db);
+        */
+       struct {
+               struct __db *lh_first;
+       } handleq;                      /* List of handles for this DB. */
+       struct {
+               struct __db *le_next;
+               struct __db **le_prev;
+       } links;                        /* Links for the handle list. */
+
+       u_int32_t log_fileid;           /* Logging file id. */
+
+       DB_TXN   *txn;                  /* Current transaction. */
+       u_int32_t locker;               /* Default process' locker id. */
+       DBT       lock_dbt;             /* DBT referencing lock. */
+       struct __db_ilock lock;         /* Lock. */
+
+       size_t    pgsize;               /* Logical page size of file. */
+
+                                       /* Local heap allocation. */
+       void *(*db_malloc) __P((size_t));
+
+                                       /* Functions. */
+       int (*close)    __P((DB *, int));
+       int (*cursor)   __P((DB *, DB_TXN *, DBC **));
+       int (*del)      __P((DB *, DB_TXN *, DBT *, int));
+       int (*fd)       __P((DB *, int *));
+       int (*get)      __P((DB *, DB_TXN *, DBT *, DBT *, int));
+       int (*put)      __P((DB *, DB_TXN *, DBT *, DBT *, int));
+       int (*stat)     __P((DB *, void *, void *(*)(size_t), int));
+       int (*sync)     __P((DB *, int));
+
+#define        DB_AM_DUP       0x000001        /* DB_DUP (internal). */
+#define        DB_AM_INMEM     0x000002        /* In-memory; no sync on close. */
+#define        DB_AM_LOCKING   0x000004        /* Perform locking. */
+#define        DB_AM_LOGGING   0x000008        /* Perform logging. */
+#define        DB_AM_MLOCAL    0x000010        /* Database memory pool is local. */
+#define        DB_AM_PGDEF     0x000020        /* Page size was defaulted. */
+#define        DB_AM_RDONLY    0x000040        /* Database is readonly. */
+#define        DB_AM_RECOVER   0x000080        /* In recovery (do not log or lock). */
+#define        DB_AM_SWAP      0x000100        /* Pages need to be byte-swapped. */
+#define        DB_AM_THREAD    0x000200        /* DB is multi-threaded. */
+#define        DB_BT_RECNUM    0x000400        /* DB_RECNUM (internal) */
+#define        DB_HS_DIRTYMETA 0x000800        /* Hash: Metadata page modified. */
+#define        DB_RE_DELIMITER 0x001000        /* DB_DELIMITER (internal). */
+#define        DB_RE_FIXEDLEN  0x002000        /* DB_FIXEDLEN (internal). */
+#define        DB_RE_PAD       0x004000        /* DB_PAD (internal). */
+#define        DB_RE_RENUMBER  0x008000        /* DB_RENUMBER (internal). */
+#define        DB_RE_SNAPSHOT  0x010000        /* DB_SNAPSHOT (internal). */
+
+       u_int32_t flags;
+};
+
+/* Cursor description structure. */
+struct __dbc {
+       DB *dbp;                        /* Related DB access method. */
+       DB_TXN   *txn;                  /* Associated transaction. */
+
+       /*
+        * XXX
+        * Explicit representations of structures in queue.h.
+        *
+        * TAILQ_ENTRY(__dbc);
+        */
+       struct {
+               struct __dbc *tqe_next;
+               struct __dbc **tqe_prev;
+       } links;
+
+       void     *internal;             /* Access method private. */
+
+       int (*c_close)  __P((DBC *));
+       int (*c_del)    __P((DBC *, int));
+       int (*c_get)    __P((DBC *, DBT *, DBT *, int));
+       int (*c_put)    __P((DBC *, DBT *, DBT *, int));
+};
+
+/* Btree/recno statistics structure. */
+struct __db_bt_stat {
+       u_int32_t bt_flags;             /* Open flags. */
+       u_int32_t bt_maxkey;            /* Maxkey value. */
+       u_int32_t bt_minkey;            /* Minkey value. */
+       u_int32_t bt_re_len;            /* Fixed-length record length. */
+       u_int32_t bt_re_pad;            /* Fixed-length record pad. */
+       u_int32_t bt_pagesize;          /* Page size. */
+       u_int32_t bt_levels;            /* Tree levels. */
+       u_int32_t bt_nrecs;             /* Number of records. */
+       u_int32_t bt_int_pg;            /* Internal pages. */
+       u_int32_t bt_leaf_pg;           /* Leaf pages. */
+       u_int32_t bt_dup_pg;            /* Duplicate pages. */
+       u_int32_t bt_over_pg;           /* Overflow pages. */
+       u_int32_t bt_free;              /* Pages on the free list. */
+       u_int32_t bt_freed;             /* Pages freed for reuse. */
+       u_int32_t bt_int_pgfree;        /* Bytes free in internal pages. */
+       u_int32_t bt_leaf_pgfree;       /* Bytes free in leaf pages. */
+       u_int32_t bt_dup_pgfree;        /* Bytes free in duplicate pages. */
+       u_int32_t bt_over_pgfree;       /* Bytes free in overflow pages. */
+       u_int32_t bt_pfxsaved;          /* Bytes saved by prefix compression. */
+       u_int32_t bt_split;             /* Total number of splits. */
+       u_int32_t bt_rootsplit;         /* Root page splits. */
+       u_int32_t bt_fastsplit;         /* Fast splits. */
+       u_int32_t bt_added;             /* Items added. */
+       u_int32_t bt_deleted;           /* Items deleted. */
+       u_int32_t bt_get;               /* Items retrieved. */
+       u_int32_t bt_cache_hit;         /* Hits in fast-insert code. */
+       u_int32_t bt_cache_miss;        /* Misses in fast-insert code. */
+};
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+int   db_appinit __P((const char *, char * const *, DB_ENV *, int));
+int   db_appexit __P((DB_ENV *));
+int   db_open __P((const char *, DBTYPE, int, int, DB_ENV *, DB_INFO *, DB **));
+char *db_version __P((int *, int *, int *));
+#if defined(__cplusplus)
+};
+#endif
+
+/*******************************************************
+ * Locking
+ *******************************************************/
+#define        DB_LOCKVERSION  1
+#define        DB_LOCKMAGIC    0x090193
+
+/* Flag values for lock_vec(). */
+#define        DB_LOCK_NOWAIT          0x01    /* Don't wait on unavailable lock. */
+
+/* Flag values for lock_detect(). */
+#define        DB_LOCK_CONFLICT        0x01    /* Run on any conflict. */
+
+/* Request types. */
+typedef enum {
+       DB_LOCK_DUMP,                   /* Display held locks. */
+       DB_LOCK_GET,                    /* Get the lock. */
+       DB_LOCK_PUT,                    /* Release the lock. */
+       DB_LOCK_PUT_ALL,                /* Release locker's locks. */
+       DB_LOCK_PUT_OBJ                 /* Release locker's locks on obj. */
+} db_lockop_t;
+
+/* Simple R/W lock modes and for multi-granularity intention locking. */
+typedef enum {
+       DB_LOCK_NG=0,                   /* Not granted. */
+       DB_LOCK_READ,                   /* Shared/read. */
+       DB_LOCK_WRITE,                  /* Exclusive/write. */
+       DB_LOCK_IREAD,                  /* Intent to share/read. */
+       DB_LOCK_IWRITE,                 /* Intent exclusive/write. */
+       DB_LOCK_IWR                     /* Intent to read and write. */
+} db_lockmode_t;
+
+/* Lock request structure. */
+struct __db_lockreq {
+       db_lockop_t      op;            /* Operation. */
+       db_lockmode_t    mode;          /* Requested mode. */
+       u_int32_t        locker;        /* Locker identity. */
+       DBT             *obj;           /* Object being locked. */
+       DB_LOCK          lock;          /* Lock returned. */
+};
+
+/*
+ * Commonly used conflict matrices.
+ *
+ * Standard Read/Write (or exclusive/shared) locks.
+ */
+#define        DB_LOCK_RW_N    3
+extern const u_int8_t db_rw_conflicts[];
+
+/* Multi-granularity locking. */
+#define        DB_LOCK_RIW_N   6
+extern const u_int8_t db_riw_conflicts[];
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+int      lock_close __P((DB_LOCKTAB *));
+int      lock_detect __P((DB_LOCKTAB *, int, u_int32_t));
+int      lock_get __P((DB_LOCKTAB *,
+           u_int32_t, int, const DBT *, db_lockmode_t, DB_LOCK *));
+int      lock_id __P((DB_LOCKTAB *, u_int32_t *));
+int      lock_open __P((const char *, int, int, DB_ENV *, DB_LOCKTAB **));
+int      lock_put __P((DB_LOCKTAB *, DB_LOCK));
+int      lock_unlink __P((const char *, int, DB_ENV *));
+int      lock_vec __P((DB_LOCKTAB *,
+           u_int32_t, int, DB_LOCKREQ *, int, DB_LOCKREQ **));
+#if defined(__cplusplus)
+};
+#endif
+
+/*******************************************************
+ * Logging.
+ *******************************************************/
+/* Flag values for log_archive(). */
+#define        DB_ARCH_ABS             0x001   /* Absolute pathnames. */
+#define        DB_ARCH_DATA            0x002   /* Data files. */
+#define        DB_ARCH_LOG             0x004   /* Log files. */
+
+/*
+ * A DB_LSN has two parts, a fileid which identifies a specific file, and an
+ * offset within that file.  The fileid is an unsigned 4-byte quantity that
+ * uniquely identifies a file within the log directory -- currently a simple
+ * counter inside the log.  The offset is also an unsigned 4-byte value.  The
+ * log manager guarantees the offset is never more than 4 bytes by switching
+ * to a new log file before the maximum length imposed by an unsigned 4-byte
+ * offset is reached.
+ */
+struct __db_lsn {
+       u_int32_t       file;           /* File ID. */
+       u_int32_t       offset;         /* File offset. */
+};
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+int     log_archive __P((DB_LOG *, char **[], int, void *(*)(size_t)));
+int     log_close __P((DB_LOG *));
+int     log_compare __P((const DB_LSN *, const DB_LSN *));
+int     log_file __P((DB_LOG *, const DB_LSN *, char *, size_t));
+int     log_flush __P((DB_LOG *, const DB_LSN *));
+int     log_get __P((DB_LOG *, DB_LSN *, DBT *, int));
+int     log_open __P((const char *, int, int, DB_ENV *, DB_LOG **));
+int     log_put __P((DB_LOG *, DB_LSN *, const DBT *, int));
+int     log_register __P((DB_LOG *, DB *, const char *, DBTYPE, u_int32_t *));
+int     log_unlink __P((const char *, int, DB_ENV *));
+int     log_unregister __P((DB_LOG *, u_int32_t));
+#if defined(__cplusplus)
+};
+#endif
+
+/*******************************************************
+ * Mpool
+ *******************************************************/
+/* Flag values for memp_fget(). */
+#define        DB_MPOOL_CREATE         0x001   /* Create a page. */
+#define        DB_MPOOL_LAST           0x002   /* Return the last page. */
+#define        DB_MPOOL_NEW            0x004   /* Create a new page. */
+
+/* Flag values for memp_fput(), memp_fset(). */
+#define        DB_MPOOL_CLEAN          0x001   /* Clear modified bit. */
+#define        DB_MPOOL_DIRTY          0x002   /* Page is modified. */
+#define        DB_MPOOL_DISCARD        0x004   /* Don't cache the page. */
+
+/* Mpool statistics structure. */
+struct __db_mpool_stat {
+       size_t st_cachesize;            /* Cache size. */
+       unsigned long st_cache_hit;     /* Pages found in the cache. */
+       unsigned long st_cache_miss;    /* Pages not found in the cache. */
+       unsigned long st_map;           /* Pages from mapped files. */
+       unsigned long st_page_create;   /* Pages created in the cache. */
+       unsigned long st_page_in;       /* Pages read in. */
+       unsigned long st_page_out;      /* Pages written out. */
+       unsigned long st_ro_evict;      /* Read-only pages evicted. */
+       unsigned long st_rw_evict;      /* Read-write pages evicted. */
+       unsigned long st_hash_buckets;  /* Number of hash buckets. */
+       unsigned long st_hash_searches; /* Total hash chain searches. */
+       unsigned long st_hash_longest;  /* Longest hash chain searched. */
+       unsigned long st_hash_examined; /* Total hash entries searched. */
+};
+
+/* Mpool file statistics structure. */
+struct __db_mpool_fstat {
+       char *file_name;                /* File name. */
+       size_t st_pagesize;             /* Page size. */
+       unsigned long st_cache_hit;     /* Pages found in the cache. */
+       unsigned long st_cache_miss;    /* Pages not found in the cache. */
+       unsigned long st_map;           /* Pages from mapped files. */
+       unsigned long st_page_create;   /* Pages created in the cache. */
+       unsigned long st_page_in;       /* Pages read in. */
+       unsigned long st_page_out;      /* Pages written out. */
+};
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+int    memp_close __P((DB_MPOOL *));
+int    memp_fclose __P((DB_MPOOLFILE *));
+int    memp_fget __P((DB_MPOOLFILE *, db_pgno_t *, unsigned long, void *));
+int    memp_fopen __P((DB_MPOOL *, const char *,
+           int, int, int, size_t, int, DBT *, u_int8_t *, DB_MPOOLFILE **));
+int    memp_fput __P((DB_MPOOLFILE *, void *, unsigned long));
+int    memp_fset __P((DB_MPOOLFILE *, void *, unsigned long));
+int    memp_fsync __P((DB_MPOOLFILE *));
+int    memp_open __P((const char *, int, int, DB_ENV *, DB_MPOOL **));
+int    memp_register __P((DB_MPOOL *, int,
+           int (*)(db_pgno_t, void *, DBT *),
+           int (*)(db_pgno_t, void *, DBT *)));
+int    memp_stat __P((DB_MPOOL *,
+           DB_MPOOL_STAT **, DB_MPOOL_FSTAT ***, void *(*)(size_t)));
+int    memp_sync __P((DB_MPOOL *, DB_LSN *));
+int    memp_unlink __P((const char *, int, DB_ENV *));
+#if defined(__cplusplus)
+};
+#endif
+
+/*******************************************************
+ * Transactions.
+ *******************************************************/
+#define        DB_TXNVERSION   1
+#define        DB_TXNMAGIC     0x041593
+
+/* Operations values to the tx_recover() function. */
+#define        DB_TXN_BACKWARD_ROLL    1       /* Read the log backwards. */
+#define        DB_TXN_FORWARD_ROLL     2       /* Read the log forwards. */
+#define        DB_TXN_OPENFILES        3       /* Read for open files. */
+#define        DB_TXN_REDO             4       /* Redo the operation. */
+#define        DB_TXN_UNDO             5       /* Undo the operation. */
+
+/* Internal transaction status values. */
+
+/* Transaction statistics structure. */
+struct __db_txn_active {
+       u_int32_t       txnid;          /* Transaction ID */
+       DB_LSN          lsn;            /* Lsn of the begin record */
+};
+
+struct __db_txn_stat {
+       DB_LSN          st_last_ckp;    /* lsn of the last checkpoint */
+       DB_LSN          st_pending_ckp; /* last checkpoint did not finish */
+       time_t          st_time_ckp;    /* time of last checkpoint */
+       u_int32_t       st_last_txnid;  /* last transaction id given out */
+       u_int32_t       st_maxtxns;     /* maximum number of active txns */
+       u_int32_t       st_naborts;     /* number of aborted transactions */
+       u_int32_t       st_nbegins;     /* number of begun transactions */
+       u_int32_t       st_ncommits;    /* number of committed transactions */
+       u_int32_t       st_nactive;     /* number of active transactions */
+       DB_TXN_ACTIVE   *st_txnarray;   /* array of active transactions */
+};
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+int      txn_abort __P((DB_TXN *));
+int      txn_begin __P((DB_TXNMGR *, DB_TXN *, DB_TXN **));
+int      txn_checkpoint __P((const DB_TXNMGR *, long, long));
+int      txn_commit __P((DB_TXN *));
+int      txn_close __P((DB_TXNMGR *));
+u_int32_t txn_id __P((DB_TXN *));
+int      txn_open __P((const char *, int, int, DB_ENV *, DB_TXNMGR **));
+int      txn_prepare __P((DB_TXN *));
+int      txn_stat __P((DB_TXNMGR *, DB_TXN_STAT **, void *(*)(size_t)));
+int      txn_unlink __P((const char *, int, DB_ENV *));
+#if defined(__cplusplus)
+};
+#endif
+
+#ifdef DB_DBM_HSEARCH
+/*******************************************************
+ * Dbm/Ndbm historic interfaces.
+ *******************************************************/
+#define        DBM_INSERT      0               /* Flags to dbm_store(). */
+#define        DBM_REPLACE     1
+
+/*
+ * The db(3) support for ndbm(3) always appends this suffix to the
+ * file name to avoid overwriting the user's original database.
+ */
+#define        DBM_SUFFIX      ".db"
+
+typedef struct {
+       char *dptr;
+       int dsize;
+} datum;
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+int     dbminit __P((char *));
+#if !defined(__cplusplus)
+int     delete __P((datum));
+#endif
+datum   fetch __P((datum));
+datum   firstkey __P((void));
+datum   nextkey __P((datum));
+int     store __P((datum, datum));
+
+/*
+ * !!!
+ * Don't prototype:
+ *
+ *      dbm_clearerr(DBM *db);
+ *      dbm_dirfno(DBM *db);
+ *      dbm_error(DBM *db);
+ *      dbm_pagfno(DBM *db);
+ *      dbm_rdonly(DBM *db);
+ *
+ * they weren't documented and were historically implemented as #define's.
+ */
+void    dbm_close __P((DBM *));
+int     dbm_delete __P((DBM *, datum));
+datum   dbm_fetch __P((DBM *, datum));
+datum   dbm_firstkey __P((DBM *));
+long    dbm_forder __P((DBM *, datum));
+datum   dbm_nextkey __P((DBM *));
+DBM    *dbm_open __P((const char *, int, int));
+int     dbm_store __P((DBM *, datum, datum, int));
+#if defined(__cplusplus)
+};
+#endif
+
+/*******************************************************
+ * Hsearch historic interface.
+ *******************************************************/
+typedef enum {
+       FIND, ENTER
+} ACTION;
+
+typedef struct entry {
+       char *key;
+       void *data;
+} ENTRY;
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+int     hcreate __P((unsigned int));
+void    hdestroy __P((void));
+ENTRY  *hsearch __P((ENTRY, ACTION));
+#if defined(__cplusplus)
+};
+#endif
+#endif /* DB_DBM_HSEARCH */
+
+/*
+ * XXX
+ * MacOS: Reset Metrowerks C enum sizes.
+ */
+#ifdef __MWERKS__
+#pragma enumsalwaysint reset
+#endif
+#endif /* !_DB_H_ */
diff --git a/db2/include/db_185.h.src b/db2/include/db_185.h.src
new file mode 100644 (file)
index 0000000..52fb3a0
--- /dev/null
@@ -0,0 +1,170 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997
+ *     Sleepycat Software.  All rights reserved.
+ */
+/*
+ * Copyright (c) 1990, 1993, 1994
+ *     The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *     This product includes software developed by the University of
+ *     California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *     @(#)db_185.h.src        8.3 (Sleepycat) 7/27/97
+ */
+
+#ifndef _DB_185_H_
+#define        _DB_185_H_
+
+#include <sys/types.h>
+
+#include <limits.h>
+
+/*
+ * XXX
+ * Handle function prototypes and the keyword "const".  This steps on name
+ * space that DB doesn't control, but all of the other solutions are worse.
+ */
+#undef __P
+#if defined(__STDC__) || defined(__cplusplus)
+#define        __P(protos)     protos          /* ANSI C prototypes */
+#else
+#define        const
+#define        __P(protos)     ()              /* K&R C preprocessor */
+#endif
+
+#define        RET_ERROR       -1              /* Return values. */
+#define        RET_SUCCESS      0
+#define        RET_SPECIAL      1
+
+#ifndef        __BIT_TYPES_DEFINED__
+#define        __BIT_TYPES_DEFINED__
+@u_int8_decl@
+@int16_decl@
+@u_int16_decl@
+@int32_decl@
+@u_int32_decl@
+#endif
+
+#define        MAX_PAGE_NUMBER 0xffffffff      /* >= # of pages in a file */
+typedef u_int32_t      pgno_t;
+#define        MAX_PAGE_OFFSET 65535           /* >= # of bytes in a page */
+typedef u_int16_t      indx_t;
+#define        MAX_REC_NUMBER  0xffffffff      /* >= # of records in a tree */
+typedef u_int32_t      recno_t;
+
+/* Key/data structure -- a Data-Base Thang. */
+typedef struct {
+       void    *data;                  /* data */
+       size_t   size;                  /* data length */
+} DBT;
+
+/* Routine flags. */
+#define        R_CURSOR        1               /* del, put, seq */
+#define        __R_UNUSED      2               /* UNUSED */
+#define        R_FIRST         3               /* seq */
+#define        R_IAFTER        4               /* put (RECNO) */
+#define        R_IBEFORE       5               /* put (RECNO) */
+#define        R_LAST          6               /* seq (BTREE, RECNO) */
+#define        R_NEXT          7               /* seq */
+#define        R_NOOVERWRITE   8               /* put */
+#define        R_PREV          9               /* seq (BTREE, RECNO) */
+#define        R_SETCURSOR     10              /* put (RECNO) */
+#define        R_RECNOSYNC     11              /* sync (RECNO) */
+
+typedef enum { DB_BTREE, DB_HASH, DB_RECNO } DBTYPE;
+
+/* Access method description structure. */
+typedef struct __db {
+       DBTYPE type;                    /* Underlying db type. */
+       int (*close)    __P((struct __db *));
+       int (*del)      __P((const struct __db *, const DBT *, u_int));
+       int (*get)      __P((const struct __db *, const DBT *, DBT *, u_int));
+       int (*put)      __P((const struct __db *, DBT *, const DBT *, u_int));
+       int (*seq)      __P((const struct __db *, DBT *, DBT *, u_int));
+       int (*sync)     __P((const struct __db *, u_int));
+       void *internal;                 /* Access method private. */
+       int (*fd)       __P((const struct __db *));
+} DB;
+
+#define        BTREEMAGIC      0x053162
+#define        BTREEVERSION    3
+
+/* Structure used to pass parameters to the btree routines. */
+typedef struct {
+#define        R_DUP           0x01    /* duplicate keys */
+       u_long  flags;
+       u_int   cachesize;      /* bytes to cache */
+       int     maxkeypage;     /* maximum keys per page */
+       int     minkeypage;     /* minimum keys per page */
+       u_int   psize;          /* page size */
+       int     (*compare)      /* comparison function */
+           __P((const DBT *, const DBT *));
+       size_t  (*prefix)       /* prefix function */
+           __P((const DBT *, const DBT *));
+       int     lorder;         /* byte order */
+} BTREEINFO;
+
+#define        HASHMAGIC       0x061561
+#define        HASHVERSION     2
+
+/* Structure used to pass parameters to the hashing routines. */
+typedef struct {
+       u_int   bsize;          /* bucket size */
+       u_int   ffactor;        /* fill factor */
+       u_int   nelem;          /* number of elements */
+       u_int   cachesize;      /* bytes to cache */
+       u_int32_t               /* hash function */
+               (*hash) __P((const void *, size_t));
+       int     lorder;         /* byte order */
+} HASHINFO;
+
+/* Structure used to pass parameters to the record routines. */
+typedef struct {
+#define        R_FIXEDLEN      0x01    /* fixed-length records */
+#define        R_NOKEY         0x02    /* key not required */
+#define        R_SNAPSHOT      0x04    /* snapshot the input */
+       u_long  flags;
+       u_int   cachesize;      /* bytes to cache */
+       u_int   psize;          /* page size */
+       int     lorder;         /* byte order */
+       size_t  reclen;         /* record length (fixed-length records) */
+       u_char  bval;           /* delimiting byte (variable-length records */
+       char    *bfname;        /* btree file name */
+} RECNOINFO;
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+DB *dbopen __P((const char *, int, int, DBTYPE, const void *));
+
+#if defined(__cplusplus)
+};
+#endif
+#endif /* !_DB_185_H_ */
diff --git a/db2/include/db_am.h b/db2/include/db_am.h
new file mode 100644 (file)
index 0000000..3289eec
--- /dev/null
@@ -0,0 +1,87 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997
+ *     Sleepycat Software.  All rights reserved.
+ *
+ *     @(#)db_am.h     10.5 (Sleepycat) 8/22/97
+ */
+#ifndef _DB_AM_H
+#define _DB_AM_H
+
+#define DB_ISBIG       0x01
+#define        DB_ADD_DUP      0x10
+#define        DB_REM_DUP      0x20
+#define        DB_ADD_BIG      0x30
+#define        DB_REM_BIG      0x40
+#define        DB_SPLITOLD     0x50
+#define        DB_SPLITNEW     0x60
+
+/*
+ * Standard initialization and shutdown macros for all recovery functions.
+ *
+ * Requires the following local variables:
+ *
+ *     DB *file_dbp, *mdbp;
+ *     DB_MPOOLFILE *mpf;
+ *     int ret;
+ */
+#define        REC_INTRO(func) {                                               \
+       file_dbp = mdbp = NULL;                                         \
+       if ((ret = func(dbtp->data, &argp)) != 0)                       \
+               goto out;                                               \
+       if (__db_fileid_to_db(logp, &mdbp, argp->fileid)) {             \
+               if (ret == DB_DELETED)                                  \
+                       ret = 0;                                        \
+               goto out;                                               \
+       }                                                               \
+       if (mdbp == NULL)                                               \
+               goto out;                                               \
+       if (F_ISSET(mdbp, DB_AM_THREAD)) {                              \
+               if ((ret = __db_gethandle(mdbp,                         \
+                   mdbp->type == DB_HASH ? __ham_hdup : __bam_bdup,    \
+                   &file_dbp)) != 0)                                   \
+                       goto out;                                       \
+       } else                                                          \
+               file_dbp = mdbp;                                        \
+       F_SET(file_dbp, DB_AM_RECOVER);                                 \
+       mpf = file_dbp->mpf;                                            \
+}
+#define        REC_CLOSE {                                                     \
+       if (argp != NULL)                                               \
+               free (argp);                                            \
+       if (file_dbp != NULL) {                                         \
+               F_CLR(file_dbp, DB_AM_RECOVER);                         \
+               if (F_ISSET(file_dbp, DB_AM_THREAD))                    \
+                       __db_puthandle(file_dbp);                       \
+       }                                                               \
+       return (ret);                                                   \
+}
+
+/*
+ * No-op versions of the same macros.
+ */
+#define        REC_NOOP_INTRO(func) {                                          \
+       if ((ret = func(dbtp->data, &argp)) != 0)                       \
+               return (ret);                                           \
+}
+#define        REC_NOOP_CLOSE {                                                \
+       if (argp != NULL)                                               \
+               free (argp);                                            \
+       return (ret);                                                   \
+}
+
+/*
+ * Standard debugging macro for all recovery functions.
+ */
+#ifdef DEBUG_RECOVER
+#define        REC_PRINT(func)                                                 \
+       (void)func(logp, dbtp, lsnp, redo, info);
+#else
+#define        REC_PRINT(func)                                                 \
+       info = info;                    /* XXX: Shut the compiler up. */
+#endif
+
+#include "db_auto.h"
+#include "db_ext.h"
+#endif
diff --git a/db2/include/db_auto.h b/db2/include/db_auto.h
new file mode 100644 (file)
index 0000000..7478173
--- /dev/null
@@ -0,0 +1,118 @@
+/* Do not edit: automatically built by dist/db_gen.sh. */
+#ifndef db_AUTO_H
+#define db_AUTO_H
+
+#define        DB_db_addrem    (DB_db_BEGIN + 1)
+
+typedef struct _db_addrem_args {
+       u_int32_t type;
+       DB_TXN *txnid;
+       DB_LSN prev_lsn;
+       u_int32_t       opcode;
+       u_int32_t       fileid;
+       db_pgno_t       pgno;
+       u_int32_t       indx;
+       size_t  nbytes;
+       DBT     hdr;
+       DBT     dbt;
+       DB_LSN  pagelsn;
+} __db_addrem_args;
+
+
+#define        DB_db_split     (DB_db_BEGIN + 2)
+
+typedef struct _db_split_args {
+       u_int32_t type;
+       DB_TXN *txnid;
+       DB_LSN prev_lsn;
+       u_int32_t       opcode;
+       u_int32_t       fileid;
+       db_pgno_t       pgno;
+       DBT     pageimage;
+       DB_LSN  pagelsn;
+} __db_split_args;
+
+
+#define        DB_db_big       (DB_db_BEGIN + 3)
+
+typedef struct _db_big_args {
+       u_int32_t type;
+       DB_TXN *txnid;
+       DB_LSN prev_lsn;
+       u_int32_t       opcode;
+       u_int32_t       fileid;
+       db_pgno_t       pgno;
+       db_pgno_t       prev_pgno;
+       db_pgno_t       next_pgno;
+       DBT     dbt;
+       DB_LSN  pagelsn;
+       DB_LSN  prevlsn;
+       DB_LSN  nextlsn;
+} __db_big_args;
+
+
+#define        DB_db_ovref     (DB_db_BEGIN + 4)
+
+typedef struct _db_ovref_args {
+       u_int32_t type;
+       DB_TXN *txnid;
+       DB_LSN prev_lsn;
+       u_int32_t       fileid;
+       db_pgno_t       pgno;
+       DB_LSN  lsn;
+} __db_ovref_args;
+
+
+#define        DB_db_relink    (DB_db_BEGIN + 5)
+
+typedef struct _db_relink_args {
+       u_int32_t type;
+       DB_TXN *txnid;
+       DB_LSN prev_lsn;
+       u_int32_t       fileid;
+       db_pgno_t       pgno;
+       DB_LSN  lsn;
+       db_pgno_t       prev;
+       DB_LSN  lsn_prev;
+       db_pgno_t       next;
+       DB_LSN  lsn_next;
+} __db_relink_args;
+
+
+#define        DB_db_addpage   (DB_db_BEGIN + 6)
+
+typedef struct _db_addpage_args {
+       u_int32_t type;
+       DB_TXN *txnid;
+       DB_LSN prev_lsn;
+       u_int32_t       fileid;
+       db_pgno_t       pgno;
+       DB_LSN  lsn;
+       db_pgno_t       nextpgno;
+       DB_LSN  nextlsn;
+} __db_addpage_args;
+
+
+#define        DB_db_debug     (DB_db_BEGIN + 7)
+
+typedef struct _db_debug_args {
+       u_int32_t type;
+       DB_TXN *txnid;
+       DB_LSN prev_lsn;
+       DBT     op;
+       u_int32_t       fileid;
+       DBT     key;
+       DBT     data;
+       u_int32_t       arg_flags;
+} __db_debug_args;
+
+
+#define        DB_db_noop      (DB_db_BEGIN + 8)
+
+typedef struct _db_noop_args {
+       u_int32_t type;
+       DB_TXN *txnid;
+       DB_LSN prev_lsn;
+} __db_noop_args;
+
+#endif
diff --git a/db2/include/db_cxx.h b/db2/include/db_cxx.h
new file mode 100644 (file)
index 0000000..506aed8
--- /dev/null
@@ -0,0 +1,888 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1997
+ *     Sleepycat Software.  All rights reserved.
+ *
+ *     @(#)db_cxx.h    10.7 (Sleepycat) 8/22/97
+ */
+
+#ifndef _DB_CXX_H_
+#define _DB_CXX_H_
+
+//
+// C++ assumptions:
+//
+// To ensure portability to many platforms, both new and old, we make
+// few assumptions about the C++ compiler and library.  For example,
+// we do not expect STL, templates or namespaces to be available.  The
+// "newest" C++ feature used is exceptions, which are used liberally
+// to transmit error information.  Even the use of exceptions can be
+// disabled at runtime, see setErrorModel().
+//
+// C++ naming conventions:
+//
+//  - All top level class names start with Db.
+//  - All class members start with lower case letter.
+//  - All private data members are suffixed with underscore.
+//  - Use underscores to divide names into multiple words.
+//  - Simple data accessors are named with get_ or set_ prefix.
+//  - All method names are taken from names of functions in the C
+//    layer of db (usually by dropping a prefix like "db_").
+//    These methods have the same argument types and order,
+//    other than dropping the explicit arg that acts as "this".
+//
+// As a rule, each DbFoo object has exactly one underlying DB_FOO struct
+// (defined in db.h) associated with it.  In many cases, we inherit directly
+// from the DB_FOO structure to make this relationship explicit.  Often,
+// the underlying C layer allocates and deallocates these structures, so
+// there is no easy way to add any data to the DbFoo class.  When you see
+// a comment about whether data is permitted to be added, this is what
+// is going on.  Of course, if we need to add data to such C++ classes
+// in the future, we will arrange to have an indirect pointer to the
+// DB_FOO struct (as some of the classes already have).
+//
+
+
+////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////
+//
+// Forward declarations
+//
+
+#include "db.h"
+
+class Db;                                        // forward
+class Dbc;                                       // forward
+class DbEnv;                                     // forward
+class DbException;                               // forward
+class DbInfo;                                    // forward
+class DbLock;                                    // forward
+class DbLockTab;                                 // forward
+class DbLog;                                     // forward
+class DbLsn;                                     // forward
+class DbMpool;                                   // forward
+class DbMpoolFile;                               // forward
+class Dbt;                                       // forward
+class DbTxn;                                     // forward
+class DbTxnMgr;                                  // forward
+
+////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////
+//
+// Mechanisms for declaring classes
+//
+
+//
+// Every class defined in this file has an _exported next to the class name.
+// This is needed for WinTel machines so that the class methods can
+// be exported or imported in a DLL as appropriate.  Users of the DLL
+// use the define DB_USE_DLL.  When the DLL is built, DB_CREATE_DLL
+// must be defined.
+//
+#if defined(_MSC_VER)
+
+#  if defined(DB_CREATE_DLL)
+#    define _exported __declspec(dllexport)      // creator of dll
+#  elif defined(DB_USE_DLL)
+#    define _exported __declspec(dllimport)      // user of dll
+#  else
+#    define _exported                            // static lib creator or user
+#  endif
+
+#else
+
+#  define _exported
+
+#endif
+
+// DEFINE_DB_CLASS defines an imp_ data member and imp() accessor.
+// The underlying type is a pointer to an opaque *Imp class, that
+// gets converted to the correct implementation class by the implementation.
+//
+// Since these defines use "private/public" labels, and leave the access
+// being "private", we always use these by convention before any data
+// members in the private section of a class.  Keeping them in the
+// private section also emphasizes that they are off limits to user code.
+//
+#define DEFINE_DB_CLASS(name) \
+    public: class name##Imp* imp() { return imp_; } \
+    public: const class name##Imp* imp() const { return imp_; } \
+    private: class name##Imp* imp_
+
+
+////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////
+//
+// Turn off inappropriate compiler warnings
+//
+
+#ifdef _MSC_VER
+
+// These are level 4 warnings that are explicitly disabled.
+// With Visual C++, by default you do not see above level 3 unless
+// you use /W4.  But we like to compile with the highest level
+// warnings to catch other errors.
+//
+// 4201: nameless struct/union
+//       triggered by standard include file <winnt.h>
+//
+// 4514: unreferenced inline function has been removed
+//       certain include files in MSVC define methods that are not called
+//
+#pragma warning(disable: 4201 4514)
+
+#endif
+
+////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////
+//
+// Exception classes
+//
+
+// Almost any error in the DB library throws a DbException.
+// Every exception should be considered an abnormality
+// (e.g. bug, misuse of DB, file system error).
+//
+// NOTE: We would like to inherit from class exception and
+//       let it handle what(), but there are
+//       MSVC++ problems when <exception> is included.
+//
+class _exported DbException
+{
+public:
+    virtual ~DbException();
+    DbException(int err);
+    DbException(const char *description);
+    DbException(const char *prefix, int err);
+    DbException(const char *prefix1, const char *prefix2, int err);
+    const int get_errno();
+    virtual const char *what() const;
+
+    DbException(const DbException &);
+    DbException &operator = (const DbException &);
+
+private:
+    char *what_;
+    int err_;                   // errno
+};
+
+
+////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////
+//
+// Lock classes
+//
+
+class _exported DbLock
+{
+    friend DbLockTab;
+
+public:
+    DbLock(unsigned int);
+    DbLock();
+
+    unsigned int get_lock_id();
+    void set_lock_id(unsigned int);
+
+    int put(DbLockTab *locktab);
+
+    DbLock(const DbLock &);
+    DbLock &operator = (const DbLock &);
+
+protected:
+    // We can add data to this class if needed
+    // since its contained class is not allocated by db.
+    // (see comment at top)
+
+    DB_LOCK lock_;
+};
+
+class _exported DbLockTab
+{
+friend DbEnv;
+public:
+    int close();
+    int detect(int atype, u_int32_t flags);
+    int get(u_int32_t locker, int flags, const Dbt *obj,
+            db_lockmode_t lock_mode, DbLock *lock);
+    int id(u_int32_t *idp);
+    int vec(u_int32_t locker, int flags, DB_LOCKREQ list[],
+           int nlist, DB_LOCKREQ **elistp);
+
+    // Create or remove new locktab files
+    //
+    static int open(const char *dir, int flags, int mode,
+                    DbEnv* dbenv, DbLockTab **regionp);
+    static int unlink(const char *dir, int force, DbEnv* dbenv);
+
+private:
+    // We can add data to this class if needed
+    // since it is implemented via a pointer.
+    // (see comment at top)
+
+    // copying not allowed
+    //
+    DbLockTab(const DbLockTab &);
+    DbLockTab &operator = (const DbLockTab &);
+
+    // Note: use DbLockTab::open() or DbEnv::get_lk_info()
+    // to get pointers to a DbLockTab,
+    // and call DbLockTab::close() rather than delete to release them.
+    //
+    DbLockTab();
+    ~DbLockTab();
+
+    DEFINE_DB_CLASS(DbLockTab);
+};
+
+
+////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////
+//
+// Log classes
+//
+
+class _exported DbLsn : protected DB_LSN
+{
+    friend DbLog;               // friendship needed to cast to base class
+    friend DbMpool;
+};
+
+class _exported DbLog
+{
+friend DbEnv;
+public:
+    int archive(char **list[], int flags, void *(*db_malloc)(size_t));
+    int close();
+    static int compare(const DbLsn *lsn0, const DbLsn *lsn1);
+    int file(DbLsn *lsn, char *namep, int len);
+    int flush(const DbLsn *lsn);
+    int get(DbLsn *lsn, Dbt *data, int flags);
+    int put(DbLsn *lsn, const Dbt *data, int flags);
+
+    // Normally these would be called register and unregister to
+    // parallel the C interface, but "register" is a reserved word.
+    //
+    int db_register(Db *dbp, const char *name, u_int32_t *fidp);
+    int db_unregister(u_int32_t fid);
+
+    // Create or remove new log files
+    //
+    static int open(const char *dir, int flags, int mode,
+                    DbEnv* dbenv, DbLog **regionp);
+    static int unlink(const char *dir, int force, DbEnv* dbenv);
+
+private:
+    // We can add data to this class if needed
+    // since it is implemented via a pointer.
+    // (see comment at top)
+
+    // Note: use DbLog::open() or DbEnv::get_lg_info()
+    // to get pointers to a DbLog,
+    // and call DbLog::close() rather than delete to release them.
+    //
+    DbLog();
+    ~DbLog();
+
+    // no copying
+    DbLog(const DbLog &);
+    operator = (const DbLog &);
+
+    DEFINE_DB_CLASS(DbLog);
+};
+
+
+////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////
+//
+// Memory pool classes
+//
+
+class _exported DbMpoolFile
+{
+public:
+    int close();
+    int get(db_pgno_t *pgnoaddr, unsigned long flags, void *pagep);
+    int put(void *pgaddr, unsigned long flags);
+    int set(void *pgaddr, unsigned long flags);
+    int sync();
+
+    static int open(DbMpool *mp, const char *file,
+                    int ftype, int flags, int mode,
+                    size_t pagesize, int lsn_offset,
+                    Dbt *pgcookie, u_int8_t *uid, DbMpoolFile **mpf);
+
+private:
+    // We can add data to this class if needed
+    // since it is implemented via a pointer.
+    // (see comment at top)
+
+    // Note: use DbMpoolFile::open()
+    // to get pointers to a DbMpoolFile,
+    // and call DbMpoolFile::close() rather than delete to release them.
+    //
+    DbMpoolFile();
+
+    // Shut g++ up.
+protected:
+    ~DbMpoolFile();
+
+private:
+    // no copying
+    DbMpoolFile(const DbMpoolFile &);
+    operator = (const DbMpoolFile &);
+
+    DEFINE_DB_CLASS(DbMpoolFile);
+};
+
+class _exported DbMpool
+{
+friend DbEnv;
+public:
+    int close();
+
+    // access to low level interface
+    // Normally this would be called register to parallel
+    // the C interface, but "register" is a reserved word.
+    //
+    int db_register(int ftype,
+                    int (*pgin)(db_pgno_t pgno, void *pgaddr, DBT *pgcookie),
+                    int (*pgout)(db_pgno_t pgno, void *pgaddr, DBT *pgcookie));
+
+    int stat(DB_MPOOL_STAT **gsp, DB_MPOOL_FSTAT ***fsp,
+             void *(*db_malloc)(size_t));
+    int sync(DbLsn *lsn);
+
+    // Create or remove new mpool files
+    //
+    static int open(const char *dir, int flags, int mode,
+                    DbEnv* dbenv, DbMpool **regionp);
+    static int unlink(const char *dir, int force, DbEnv* dbenv);
+
+private:
+    // We can add data to this class if needed
+    // since it is implemented via a pointer.
+    // (see comment at top)
+
+    // Note: use DbMpool::open() or DbEnv::get_mp_info()
+    // to get pointers to a DbMpool,
+    // and call DbMpool::close() rather than delete to release them.
+    //
+    DbMpool();
+    ~DbMpool();
+
+    // no copying
+    DbMpool(const DbMpool &);
+    DbMpool &operator = (const DbMpool &);
+
+    DEFINE_DB_CLASS(DbMpool);
+};
+
+
+////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////
+//
+// Transaction classes
+//
+
+class _exported DbTxnMgr
+{
+friend DbEnv;
+public:
+    int begin(DbTxn *pid, DbTxn **tid);
+    int checkpoint(long kbyte, long min) const;
+    int close();
+    int stat(DB_TXN_STAT **statp, void *(*db_malloc)(size_t));
+
+    // Create or remove new txnmgr files
+    //
+    static int open(const char *dir, int flags, int mode,
+                    DbEnv* dbenv, DbTxnMgr **regionp);
+    static int unlink(const char *dir, int force, DbEnv* dbenv);
+
+private:
+    // We can add data to this class if needed
+    // since it is implemented via a pointer.
+    // (see comment at top)
+
+    // Note: use DbTxnMgr::open() or DbEnv::get_tx_info()
+    // to get pointers to a DbTxnMgr,
+    // and call DbTxnMgr::close() rather than delete to release them.
+    //
+    DbTxnMgr();
+    ~DbTxnMgr();
+
+    // no copying
+    DbTxnMgr(const DbTxnMgr &);
+    operator = (const DbTxnMgr &);
+
+    DEFINE_DB_CLASS(DbTxnMgr);
+};
+
+class _exported DbTxn
+{
+friend DbTxnMgr;
+public:
+    int abort();
+    int commit();
+    u_int32_t id();
+    int prepare();
+
+private:
+    // We can add data to this class if needed
+    // since it is implemented via a pointer.
+    // (see comment at top)
+
+    // Note: use DbTxnMgr::begin() to get pointers to a DbTxn,
+    // and call DbTxn::abort() or DbTxn::commit rather than
+    // delete to release them.
+    //
+    DbTxn();
+    ~DbTxn();
+
+    // no copying
+    DbTxn(const DbTxn &);
+    operator = (const DbTxn &);
+
+    DEFINE_DB_CLASS(DbTxn);
+};
+
+
+////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////
+//
+// Application classes
+//
+
+//
+// A set of application options - define how this application uses
+// the db library.
+//
+class _exported DbInfo : protected DB_INFO
+{
+    friend DbEnv;
+    friend Db;
+
+public:
+    DbInfo();
+    ~DbInfo();
+
+    // Byte order.
+    int        get_lorder() const;
+    void set_lorder(int);
+
+    // Underlying cache size.
+    size_t get_cachesize() const;
+    void set_cachesize(size_t);
+
+    // Underlying page size.
+    size_t get_pagesize() const;
+    void set_pagesize(size_t);
+
+    // Local heap allocation.
+    typedef void *(*db_malloc_fcn)(size_t);
+    db_malloc_fcn get_malloc() const;
+    void set_malloc(db_malloc_fcn);
+
+    ////////////////////////////////////////////////////////////////
+    // Btree access method.
+
+    // Maximum keys per page.
+    int        get_bt_maxkey() const;
+    void set_bt_maxkey(int);
+
+    // Minimum keys per page.
+    int        get_bt_minkey() const;
+    void set_bt_minkey(int);
+
+    // Comparison function.
+    typedef int (*bt_compare_fcn)(const DBT *, const DBT *);
+    bt_compare_fcn get_bt_compare() const;
+    void set_bt_compare(bt_compare_fcn);
+
+    // Prefix function.
+    typedef size_t (*bt_prefix_fcn)(const DBT *, const DBT *);
+    bt_prefix_fcn get_bt_prefix() const;
+    void set_bt_prefix(bt_prefix_fcn);
+
+    ////////////////////////////////////////////////////////////////
+    // Hash access method.
+
+    // Fill factor.
+    unsigned int get_h_ffactor() const;
+    void set_h_ffactor(unsigned int);
+
+    // Number of elements.
+    unsigned int get_h_nelem() const;
+    void set_h_nelem(unsigned int);
+
+    // Hash function.
+    typedef u_int32_t (*h_hash_fcn)(const void *, u_int32_t);
+    h_hash_fcn get_h_hash() const;
+    void set_h_hash(h_hash_fcn);
+
+    ////////////////////////////////////////////////////////////////
+    // Recno access method.
+
+    // Fixed-length padding byte.
+    int        get_re_pad() const;
+    void set_re_pad(int);
+
+    // Variable-length delimiting byte.
+    int        get_re_delim() const;
+    void set_re_delim(int);
+
+    // Length for fixed-length records.
+    u_int32_t get_re_len() const;
+    void set_re_len(u_int32_t);
+
+    // Source file name.
+    char *get_re_source() const;
+    void set_re_source(char *);
+
+    // Note: some flags are set as side effects of calling
+    // above "set" methods.
+    //
+    u_int32_t get_flags() const;
+    void set_flags(u_int32_t);
+
+
+    // (deep) copying of this object is allowed.
+    //
+    DbInfo(const DbInfo &);
+    DbInfo &operator = (const DbInfo &);
+
+private:
+    // We can add data to this class if needed
+    // since parent class is not allocated by db.
+    // (see comment at top)
+};
+
+//
+// Base application class.  Provides functions for opening a database.
+// User of this library can use this class as a starting point for
+// developing a DB application - derive their application class from
+// this one, add application control logic.
+//
+// Note that if you use the default constructor, you must explicitly
+// call appinit() before any other db activity (e.g. opening files)
+//
+class _exported DbEnv : protected DB_ENV
+{
+friend DbTxnMgr;
+friend DbLog;
+friend DbLockTab;
+friend DbMpool;
+friend Db;
+
+public:
+
+    ~DbEnv();
+
+    // This constructor can be used to immediately initialize the
+    // application with these arguments.  Do not use it if you
+    // need to set other parameters via the access methods.
+    //
+    DbEnv(const char *homeDir, char *const *db_config, int flags);
+
+    // Use this constructor if you wish to *delay* the initialization
+    // of the db library.  This is useful if you need to set
+    // any particular parameters via the access methods below.
+    // Then call appinit() to complete the initialization.
+    //
+    DbEnv();
+
+    // Used in conjunction with the default constructor to
+    // complete the initialization of the db library.
+    //
+    int appinit(const char *homeDir, char *const *db_config, int flags);
+
+    ////////////////////////////////////////////////////////////////
+    // simple get/set access methods
+    //
+    // If you are calling set_ methods, you need to
+    // use the default constructor along with appinit().
+
+    // Byte order.
+    int        get_lorder() const;
+    void set_lorder(int);
+
+    // Error message callback.
+    typedef void (*db_errcall_fcn)(const char *, char *);
+    db_errcall_fcn get_errcall() const;
+    void set_errcall(db_errcall_fcn);
+
+    // Error message file stream.
+    FILE *get_errfile() const;
+    void set_errfile(FILE *);
+
+    // Error message prefix.
+    const char *get_errpfx() const;
+    void set_errpfx(const char *);
+
+    // Generate debugging messages.
+    int get_verbose() const;
+    void set_verbose(int);
+
+    ////////////////////////////////////////////////////////////////
+    // User paths.
+
+    // Database home.
+    char *get_home() const;
+    void set_home(char *);
+
+    // Database log file directory.
+    char *get_log_dir() const;
+    void set_log_dir(char *);
+
+    // Database tmp file directory.
+    char *get_tmp_dir() const;
+    void set_tmp_dir(char *);
+
+    // Database data file directories.
+    char **get_data_dir() const;
+    void set_data_dir(char **);
+
+    // Database data file slots.
+    int get_data_cnt() const;
+    void set_data_cnt(int);
+
+    // Next Database data file slot.
+    int get_data_next() const;
+    void set_data_next(int);
+
+
+    ////////////////////////////////////////////////////////////////
+    // Locking.
+
+    // Return from lock_open().
+    DbLockTab *get_lk_info() const;
+
+    // Two dimensional conflict matrix.
+    u_int8_t *get_lk_conflicts() const;
+    void set_lk_conflicts(u_int8_t *);
+
+    // Number of lock modes in table.
+    int get_lk_modes() const;
+    void set_lk_modes(int);
+
+    // Maximum number of locks.
+    unsigned int get_lk_max() const;
+    void set_lk_max(unsigned int);
+
+    // Deadlock detect on every conflict.
+    u_int32_t get_lk_detect() const;
+    void set_lk_detect(u_int32_t);
+
+    // Yield function for threads.
+    typedef int (*db_yield_fcn) (void);
+    db_yield_fcn get_yield() const;
+    void set_yield(db_yield_fcn);
+
+
+    ////////////////////////////////////////////////////////////////
+    // Logging.
+
+    // Return from log_open().
+    DbLog *get_lg_info() const;
+
+    // Maximum file size.
+    u_int32_t get_lg_max() const;
+    void set_lg_max(u_int32_t);
+
+
+    ////////////////////////////////////////////////////////////////
+    // Memory pool.
+
+    // Return from memp_open().
+    DbMpool *get_mp_info() const;
+
+    // Maximum file size for mmap.
+    size_t get_mp_mmapsize() const;
+    void set_mp_mmapsize(size_t);
+
+    // Bytes in the mpool cache.
+    size_t get_mp_size() const;
+    void set_mp_size(size_t);
+
+
+    ////////////////////////////////////////////////////////////////
+    // Transactions.
+
+    // Return from txn_open().
+    DbTxnMgr *get_tx_info() const;
+
+    // Maximum number of transactions.
+    unsigned int get_tx_max() const;
+    void set_tx_max(unsigned int);
+
+    // Dispatch function for recovery.
+    typedef int (*tx_recover_fcn)(DB_LOG *, DBT *, DB_LSN *, int, void *);
+    tx_recover_fcn get_tx_recover() const;
+    void set_tx_recover(tx_recover_fcn);
+
+    // Flags.
+    u_int32_t get_flags() const;
+    void set_flags(u_int32_t);
+
+    ////////////////////////////////////////////////////////////////
+    // The default error model is to throw an exception whenever
+    // an error occurs.  This generally allows for cleaner logic
+    // for transaction processing, as a try block can surround a
+    // single transaction.  Alternatively, since almost every method
+    // returns an error code (errno), the error model can be set to
+    // not throw exceptions, and instead return the appropriate code.
+    //
+    enum ErrorModel { Exception, ErrorReturn };
+    void set_error_model(ErrorModel);
+    ErrorModel get_error_model() const;
+
+    // If an error is detected and the error call function
+    // or stream is set, a message is dispatched or printed.
+    // If a prefix is set, each message is prefixed.
+    //
+    // You can use set_errcall() or set_errfile() above to control
+    // error functionality using a C model.  Alternatively, you can
+    // call set_error_stream() to force all errors to a C++ stream.
+    // It is unwise to mix these approaches.
+    //
+    class ostream* get_error_stream() const;
+    void set_error_stream(class ostream*);
+
+    // used internally
+    static int runtime_error(const char *caller, int err, int in_destructor = 0);
+
+private:
+    // We can add data to this class if needed
+    // since parent class is not allocated by db.
+    // (see comment at top)
+
+    // no copying
+    DbEnv(const DbEnv &);
+    operator = (const DbEnv &);
+
+    ErrorModel error_model_;
+    static void stream_error_function(const char *, char *);
+    static ostream *error_stream_;
+};
+
+////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////
+//
+// Table access classes
+//
+
+//
+// Represents a database table = a set of keys with associated values.
+//
+class _exported Db
+{
+    friend DbEnv;
+
+public:
+    int close(int flags);
+    int cursor(DbTxn *txnid, Dbc **cursorp);
+    int del(Dbt *key, DbTxn *txnid);
+    int fd(int *fdp);
+    int get(DbTxn *txnid, Dbt *key, Dbt *data, int flags);
+    int put(DbTxn *txnid, Dbt *key, Dbt *data, int flags);
+    int stat(void *sp, void *(*db_malloc)(size_t), int flags);
+    int sync(int flags);
+
+    DBTYPE get_type() const;
+
+    static int open(const char *fname, DBTYPE type, int flags,
+                    int mode, DbEnv *dbenv, DbInfo *info, Db **dbpp);
+
+private:
+    // We can add data to this class if needed
+    // since it is implemented via a pointer.
+    // (see comment at top)
+
+    // Note: use Db::open() to get initialize pointers to a Db,
+    // and call Db::close() rather than delete to release them.
+    Db();
+    ~Db();
+
+    // no copying
+    Db(const Db &);
+    Db &operator = (const Db &);
+
+    DEFINE_DB_CLASS(Db);
+};
+
+//
+// A chunk of data, maybe a key or value.
+//
+class _exported Dbt : private DBT
+{
+    friend Dbc;
+    friend Db;
+    friend DbLog;
+    friend DbMpoolFile;
+    friend DbLockTab;
+
+public:
+
+    // key/data
+    void *get_data() const;
+    void set_data(void *);
+
+    // key/data length
+    u_int32_t get_size() const;
+    void set_size(u_int32_t);
+
+    // RO: length of user buffer.
+    u_int32_t get_ulen() const;
+    void set_ulen(u_int32_t);
+
+    // RO: get/put record length.
+    u_int32_t get_dlen() const;
+    void set_dlen(u_int32_t);
+
+    // RO: get/put record offset.
+    u_int32_t get_doff() const;
+    void set_doff(u_int32_t);
+
+    // flags
+    u_int32_t get_flags() const;
+    void set_flags(u_int32_t);
+
+    Dbt(void *data, size_t size);
+    Dbt();
+    ~Dbt();
+    Dbt(const Dbt &);
+    Dbt &operator = (const Dbt &);
+
+private:
+    // We can add data to this class if needed
+    // since parent class is not allocated by db.
+    // (see comment at top)
+};
+
+class _exported Dbc : protected DBC
+{
+    friend Db;
+
+public:
+    int close();
+    int del(int flags);
+    int get(Dbt* key, Dbt *data, int flags);
+    int put(Dbt* key, Dbt *data, int flags);
+
+private:
+    // No data is permitted in this class (see comment at top)
+
+    // Note: use Db::cursor() to get pointers to a Dbc,
+    // and call Dbc::close() rather than delete to release them.
+    //
+    Dbc();
+    ~Dbc();
+
+    // no copying
+    Dbc(const Dbc &);
+    Dbc &operator = (const Dbc &);
+};
+
+#endif /* !_DB_CXX_H_ */
diff --git a/db2/include/db_dispatch.h b/db2/include/db_dispatch.h
new file mode 100644 (file)
index 0000000..b93ec39
--- /dev/null
@@ -0,0 +1,73 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997
+ *     Sleepycat Software.  All rights reserved.
+ */
+/*
+ * Copyright (c) 1995, 1996
+ *     The President and Fellows of Harvard University.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *     This product includes software developed by the University of
+ *     California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *     @(#)db_dispatch.h       10.1 (Sleepycat) 4/12/97
+ */
+
+#ifndef _DB_DISPATCH_H
+#define _DB_DISPATCH_H
+
+/*
+ * Declarations and typedefs for the list of transaction IDs used during
+ * recovery.
+ */
+
+typedef struct __db_txnhead {
+       LIST_HEAD(__db_headlink, _db_txnlist) head;
+       u_int32_t maxid;
+} __db_txnhead;
+
+typedef struct _db_txnlist {
+       LIST_ENTRY(_db_txnlist) links;
+       u_int32_t       txnid;
+} __db_txnlist;
+
+#define        DB_log_BEGIN              0
+#define        DB_txn_BEGIN              5
+#define        DB_ham_BEGIN             20
+#define        DB_db_BEGIN              40
+#define        DB_bam_BEGIN             50
+#define        DB_ram_BEGIN            100
+#define        DB_user_BEGIN           150
+
+#define        TXN_UNDO                 0
+#define        TXN_REDO                 1
+#define        TXN_BACKWARD_ROLL       -1
+#define        TXN_FORWARD_ROLL        -2
+#define TXN_OPENFILES          -3
+#endif
diff --git a/db2/include/db_ext.h b/db2/include/db_ext.h
new file mode 100644 (file)
index 0000000..1cccb47
--- /dev/null
@@ -0,0 +1,114 @@
+/* Do not edit: automatically built by dist/distrib. */
+int __db_pgerr __P((DB *, db_pgno_t));
+int __db_pgfmt __P((DB *, db_pgno_t));
+int __db_addrem_log
+    __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t,
+    u_int32_t, u_int32_t, db_pgno_t, u_int32_t,
+    size_t, DBT *, DBT *, DB_LSN *));
+int __db_addrem_print
+   __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+int __db_addrem_read __P((void *, __db_addrem_args **));
+int __db_split_log
+    __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t,
+    u_int32_t, u_int32_t, db_pgno_t, DBT *,
+    DB_LSN *));
+int __db_split_print
+   __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+int __db_split_read __P((void *, __db_split_args **));
+int __db_big_log
+    __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t,
+    u_int32_t, u_int32_t, db_pgno_t, db_pgno_t,
+    db_pgno_t, DBT *, DB_LSN *, DB_LSN *,
+    DB_LSN *));
+int __db_big_print
+   __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+int __db_big_read __P((void *, __db_big_args **));
+int __db_ovref_log
+    __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t,
+    u_int32_t, db_pgno_t, DB_LSN *));
+int __db_ovref_print
+   __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+int __db_ovref_read __P((void *, __db_ovref_args **));
+int __db_relink_log
+    __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t,
+    u_int32_t, db_pgno_t, DB_LSN *, db_pgno_t,
+    DB_LSN *, db_pgno_t, DB_LSN *));
+int __db_relink_print
+   __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+int __db_relink_read __P((void *, __db_relink_args **));
+int __db_addpage_log
+    __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t,
+    u_int32_t, db_pgno_t, DB_LSN *, db_pgno_t,
+    DB_LSN *));
+int __db_addpage_print
+   __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+int __db_addpage_read __P((void *, __db_addpage_args **));
+int __db_debug_log
+    __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t,
+    DBT *, u_int32_t, DBT *, DBT *,
+    u_int32_t));
+int __db_debug_print
+   __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+int __db_debug_read __P((void *, __db_debug_args **));
+int __db_noop_log
+    __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t));
+int __db_noop_print
+   __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+int __db_noop_read __P((void *, __db_noop_args **));
+int __db_init_print __P((DB_ENV *));
+int __db_init_recover __P((DB_ENV *));
+int __db_pgin __P((db_pgno_t, void *));
+int __db_pgout __P((db_pgno_t, void *));
+int __db_dispatch __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+int __db_add_recovery __P((DB_ENV *,
+   int (*)(DB_LOG *, DBT *, DB_LSN *, int, void *), u_int32_t));
+int __db_txnlist_init __P((void *));
+int __db_txnlist_add __P((void *, u_int32_t));
+int __db_txnlist_find __P((void *, u_int32_t));
+int __db_dput __P((DB *,
+   DBT *, PAGE **, db_indx_t *, int (*)(DB *, u_int32_t, PAGE **)));
+int __db_drem __P((DB *,
+   PAGE **, u_int32_t, int (*)(DB *, PAGE *)));
+int __db_dend __P((DB *, db_pgno_t, PAGE **));
+ int __db_ditem __P((DB *, PAGE *, int, u_int32_t));
+int __db_pitem
+    __P((DB *, PAGE *, u_int32_t, u_int32_t, DBT *, DBT *));
+int __db_relink __P((DB *, PAGE *, PAGE **, int));
+int __db_ddup __P((DB *, db_pgno_t, int (*)(DB *, PAGE *)));
+int __db_goff __P((DB *, DBT *,
+    u_int32_t, db_pgno_t, void **, u_int32_t *));
+int __db_poff __P((DB *, const DBT *, db_pgno_t *,
+    int (*)(DB *, u_int32_t, PAGE **)));
+int __db_ioff __P((DB *, db_pgno_t));
+int __db_doff __P((DB *, db_pgno_t, int (*)(DB *, PAGE *)));
+int __db_moff __P((DB *, const DBT *, db_pgno_t));
+void __db_loadme __P((void));
+FILE *__db_prinit __P((FILE *));
+int __db_dump __P((DB *, char *, int));
+int __db_prdb __P((DB *));
+int __db_prbtree __P((DB *));
+int __db_prhash __P((DB *));
+int __db_prtree __P((DB_MPOOLFILE *, int));
+int __db_prnpage __P((DB_MPOOLFILE *, db_pgno_t));
+int __db_prpage __P((PAGE *, int));
+int __db_isbad __P((PAGE *, int));
+void __db_pr __P((u_int8_t *, u_int32_t));
+void __db_prflags __P((u_int32_t, const FN *));
+int __db_addrem_recover
+   __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+int __db_split_recover __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+int __db_big_recover __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+int __db_ovref_recover __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+int __db_relink_recover
+  __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+int __db_addpage_recover
+   __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+int __db_debug_recover __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+int __db_noop_recover
+  __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+int __db_ret __P((DB *,
+   PAGE *, u_int32_t, DBT *, void **, u_int32_t *));
+int __db_retcopy __P((DBT *,
+   void *, u_int32_t, void **, u_int32_t *, void *(*)(size_t)));
+int __db_gethandle __P((DB *, int (*)(DB *, DB *), DB **));
+int __db_puthandle __P((DB *));
diff --git a/db2/include/db_int.h.src b/db2/include/db_int.h.src
new file mode 100644 (file)
index 0000000..b60e500
--- /dev/null
@@ -0,0 +1,332 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997
+ *     Sleepycat Software.  All rights reserved.
+ *
+ *     @(#)db_int.h.src        10.28 (Sleepycat) 8/20/97
+ */
+
+#ifndef _DB_INTERNAL_H_
+#define        _DB_INTERNAL_H_
+
+#include "db.h"                                /* Standard DB include file. */
+#include "queue.h"
+#include "os_ext.h"
+
+/*******************************************************
+ * General purpose constants and macros.
+ *******************************************************/
+#define        UINT32_T_MAX    0xffffffff      /* Maximum 32 bit unsigned. */
+#define        UINT16_T_MAX        0xffff      /* Maximum 16 bit unsigned. */
+
+#define        DB_MIN_PGSIZE   0x000200        /* Minimum page size. */
+#define        DB_MAX_PGSIZE   0x010000        /* Maximum page size. */
+
+#define        DB_MINCACHE     10              /* Minimum cached pages */
+
+/*
+ * Aligning items to particular sizes or in pages or memory.  ALIGNP is a
+ * separate macro, as we've had to cast the pointer to different integral
+ * types on different architectures.
+ *
+ * We cast pointers into unsigned longs when manipulating them because C89
+ * guarantees that u_long is the largest available integral type and further,
+ * to never generate overflows.  However, neither C89 or C9X  requires that
+ * any integer type be large enough to hold a pointer, although C9X created
+ * the intptr_t type, which is guaranteed to hold a pointer but may or may
+ * not exist.  At some point in the future, we should test for intptr_t and
+ * use it where available.
+ */
+#undef ALIGNTYPE
+#define        ALIGNTYPE               u_long
+#undef ALIGNP
+#define        ALIGNP(value, bound)    ALIGN((ALIGNTYPE)value, bound)
+#undef ALIGN
+#define        ALIGN(value, bound)     (((value) + (bound) - 1) & ~((bound) - 1))
+
+/*
+ * There are several on-page structures that are declared to have a number of
+ * fields followed by a variable length array of items.  The structure size
+ * without including the variable length array or the address of the first of
+ * those elements can be found using SSZ.
+ *
+ * This macro can also be used to find the offset of a structure element in a
+ * structure.  This is used in various places to copy structure elements from
+ * unaligned memory references, e.g., pointers into a packed page.
+ *
+ * There are two versions because compilers object if you take the address of
+ * an array.
+ */
+#undef SSZ
+#define SSZ(name, field)       ((int)&(((name *)0)->field))
+
+#undef SSZA
+#define SSZA(name, field)      ((int)&(((name *)0)->field[0]))
+
+/* Free and free-string macros that overwrite memory during debugging. */
+#ifdef DEBUG
+#undef FREE
+#define        FREE(p, len) {                                                  \
+       memset(p, 0xff, len);                                           \
+       free(p);                                                        \
+}
+#undef FREES
+#define        FREES(p) {                                                      \
+       FREE(p, strlen(p));                                             \
+}
+#else
+#undef FREE
+#define        FREE(p, len) {                                                  \
+       free(p);                                                        \
+}
+#undef FREES
+#define        FREES(p) {                                                      \
+       free(p);                                                        \
+}
+#endif
+
+/* Structure used to print flag values. */
+typedef struct __fn {
+       u_int32_t mask;                 /* Flag value. */
+       char     *name;                 /* Flag name. */
+} FN;
+
+/* Set, clear and test flags. */
+#define        F_SET(p, f)     (p)->flags |= (f)
+#define        F_CLR(p, f)     (p)->flags &= ~(f)
+#define        F_ISSET(p, f)   ((p)->flags & (f))
+#define        LF_SET(f)       (flags |= (f))
+#define        LF_CLR(f)       (flags &= ~(f))
+#define        LF_ISSET(f)     (flags & (f))
+
+/* Display separator string. */
+#undef DB_LINE
+#define        DB_LINE "=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-="
+
+/*******************************************************
+ * Files.
+ *******************************************************/
+#ifndef MAXPATHLEN             /* Maximum path length. */
+#ifdef PATH_MAX
+#define        MAXPATHLEN      PATH_MAX
+#else
+#define        MAXPATHLEN      1024
+#endif
+#endif
+
+#define        PATH_DOT        "."     /* Current working directory. */
+#define        PATH_SEPARATOR  "/"     /* Path separator character. */
+
+#ifndef S_IRUSR                        /* UNIX specific file permissions. */
+#define        S_IRUSR 0000400         /* R for owner */
+#define        S_IWUSR 0000200         /* W for owner */
+#define        S_IRGRP 0000040         /* R for group */
+#define        S_IWGRP 0000020         /* W for group */
+#define        S_IROTH 0000004         /* R for other */
+#define        S_IWOTH 0000002         /* W for other */
+#endif
+
+#ifndef S_ISDIR                        /* UNIX specific: directory test. */
+#define        S_ISDIR(m)      ((m & 0170000) == 0040000)
+#endif
+
+/*******************************************************
+ * Mutex support.
+ *******************************************************/
+@spin_line1@
+@spin_line2@
+@spin_line3@
+
+/*
+ * !!!
+ * Various systems require different alignments for mutexes (the worst we've
+ * seen so far is 16-bytes on some HP architectures).  The mutex (tsl_t) must
+ * be first in the db_mutex_t structure, which must itself be first in the
+ * region.  This ensures the alignment is as returned by mmap(2), which should
+ * be sufficient.  All other mutex users must ensure proper alignment locally.
+ */
+#define        MUTEX_ALIGNMENT @mutex_align@
+
+/*
+ * The offset of a mutex in memory.
+ */
+#define        MUTEX_LOCK_OFFSET(a, b) ((off_t)((u_int8_t *)b - (u_int8_t *)a))
+
+typedef struct _db_mutex_t {
+#ifdef HAVE_SPINLOCKS
+       tsl_t   tsl_resource;           /* Resource test and set. */
+#ifdef DEBUG
+       u_long  pid;                    /* Lock holder: 0 or process pid. */
+#endif
+#else
+       off_t   off;                    /* Backing file offset. */
+       u_long  pid;                    /* Lock holder: 0 or process pid. */
+#endif
+#ifdef MUTEX_STATISTICS
+       u_long  mutex_set_wait;         /* Blocking mutex: required waiting. */
+       u_long  mutex_set_nowait;       /* Blocking mutex: without waiting. */
+#endif
+} db_mutex_t;
+
+#include "mutex_ext.h"
+
+/*******************************************************
+ * Access methods.
+ *******************************************************/
+/* Lock/unlock a DB thread. */
+#define        DB_THREAD_LOCK(dbp)                                             \
+       (F_ISSET(dbp, DB_AM_THREAD) ?                                   \
+           __db_mutex_lock((db_mutex_t *)(dbp)->mutex,  -1,            \
+               (dbp)->dbenv == NULL ? NULL : (dbp)->dbenv->db_yield) : 0)
+#define        DB_THREAD_UNLOCK(dbp)                                           \
+       (F_ISSET(dbp, DB_AM_THREAD) ?                                   \
+           __db_mutex_unlock((db_mutex_t *)(dbp)->mutex,  -1) : 0)
+
+/* Btree/recno local statistics structure. */
+struct __db_bt_lstat;  typedef struct __db_bt_lstat DB_BTREE_LSTAT;
+struct __db_bt_lstat {
+       u_int32_t bt_freed;             /* Pages freed for reuse. */
+       u_int32_t bt_pfxsaved;          /* Bytes saved by prefix compression. */
+       u_int32_t bt_split;             /* Total number of splits. */
+       u_int32_t bt_rootsplit;         /* Root page splits. */
+       u_int32_t bt_fastsplit;         /* Fast splits. */
+       u_int32_t bt_added;             /* Items added. */
+       u_int32_t bt_deleted;           /* Items deleted. */
+       u_int32_t bt_get;               /* Items retrieved. */
+       u_int32_t bt_cache_hit;         /* Hits in fast-insert code. */
+       u_int32_t bt_cache_miss;        /* Misses in fast-insert code. */
+};
+
+/*******************************************************
+ * Environment.
+ *******************************************************/
+/* Type passed to __db_appname(). */
+typedef enum {
+       DB_APP_NONE=0,                  /* No type (region). */
+       DB_APP_DATA,                    /* Data file. */
+       DB_APP_LOG,                     /* Log file. */
+       DB_APP_TMP                      /* Temporary file. */
+} APPNAME;
+
+/*******************************************************
+ * Regions.
+ *******************************************************/
+/*
+ * The shared memory regions share an initial structure so that the general
+ * region code can handle races between the region being deleted and other
+ * processes waiting on the region mutex.
+ *
+ * !!!
+ * Note, the mutex must be the first entry in the region; see comment above.
+ */
+typedef struct _rlayout {
+       db_mutex_t lock;                /* Region mutex. */
+       u_int32_t  refcnt;              /* Region reference count. */
+       size_t     size;                /* Region length. */
+       int        majver;              /* Major version number. */
+       int        minver;              /* Minor version number. */
+       int        patch;               /* Patch version number. */
+
+#define        DB_R_DELETED    0x01            /* Region was deleted. */
+       u_int32_t  flags;
+} RLAYOUT;
+
+/*******************************************************
+ * Mpool.
+ *******************************************************/
+/*
+ * File types for DB access methods.  Negative numbers are reserved to DB.
+ */
+#define        DB_FTYPE_BTREE          -1      /* Btree. */
+#define        DB_FTYPE_HASH           -2      /* Hash. */
+
+/* Structure used as the DB pgin/pgout pgcookie. */
+typedef struct __dbpginfo {
+       size_t  db_pagesize;            /* Underlying page size. */
+       int     needswap;               /* If swapping required. */
+} DB_PGINFO;
+
+/*******************************************************
+ * Log.
+ *******************************************************/
+/* Initialize an LSN to 'zero'. */
+#define        ZERO_LSN(LSN) {                                                 \
+       (LSN).file = 0;                                                 \
+       (LSN).offset = 0;                                               \
+}
+
+/* Return 1 if LSN is a 'zero' lsn, otherwise return 0. */
+#define        IS_ZERO_LSN(LSN)        ((LSN).file == 0)
+
+/* Test if we need to log a change. */
+#define        DB_LOGGING(dbp) \
+       (F_ISSET(dbp, DB_AM_LOGGING) && !F_ISSET(dbp, DB_AM_RECOVER))
+
+#ifdef DEBUG
+/*
+ * Debugging macro to log operations.
+ *     If DEBUG_WOP is defined, log operations that modify the database.
+ *     If DEBUG_ROP is defined, log operations that read the database.
+ *
+ * D dbp
+ * T txn
+ * O operation (string)
+ * K key
+ * A data
+ * F flags
+ */
+#define        LOG_OP(D, T, O, K, A, F) {                                      \
+       DB_LSN _lsn;                                                    \
+       DBT _op;                                                        \
+       if (DB_LOGGING((D))) {                                          \
+               memset(&_op, 0, sizeof(_op));                           \
+               _op.data = O;                                           \
+               _op.size = strlen(O) + 1;                               \
+               (void)__db_debug_log((D)->dbenv->lg_info,               \
+                   T, &_lsn, 0, &_op, (D)->log_fileid, K, A, F);       \
+       }                                                               \
+}
+#ifdef DEBUG_ROP
+#define        DEBUG_LREAD(D, T, O, K, A, F)   LOG_OP(D, T, O, K, A, F)
+#else
+#define        DEBUG_LREAD(D, T, O, K, A, F)
+#endif
+#ifdef DEBUG_WOP
+#define        DEBUG_LWRITE(D, T, O, K, A, F)  LOG_OP(D, T, O, K, A, F)
+#else
+#define        DEBUG_LWRITE(D, T, O, K, A, F)
+#endif
+#else
+#define        DEBUG_LREAD(D, T, O, K, A, F)
+#define        DEBUG_LWRITE(D, T, O, K, A, F)
+#endif /* DEBUG */
+
+/*******************************************************
+ * Transactions and recovery.
+ *******************************************************/
+/*
+ * The locker id space is divided between the transaction manager and the lock
+ * manager.  Lockid's start at 0 and go to MAX_LOCKER_ID.  Txn Id's start at
+ * MAX_LOCKER_ID + 1 and go up to MAX_TXNID.
+ */
+#define        MAX_LOCKER_ID   0x0fffffff
+#define        MAX_TXNID       0xffffffff
+
+/*
+ * Out of band value for a lock.  The locks are returned to callers as offsets
+ * into the lock regions.  Since the RLAYOUT structure begins all regions, an
+ * offset of 0 is guaranteed not to be a valid lock.
+ */
+#define        LOCK_INVALID    0
+
+/* The structure allocated for every transaction. */
+struct __db_txn {
+       DB_TXNMGR       *mgrp;          /* Pointer to transaction manager. */
+       DB_TXN          *parent;        /* Pointer to transaction's parent. */
+       DB_LSN          last_lsn;       /* Lsn of last log write. */
+       u_int32_t       txnid;          /* Unique transaction id. */
+       size_t          off;            /* Detail structure within region. */
+       TAILQ_ENTRY(__db_txn) links;
+};
+#endif /* !_DB_INTERNAL_H_ */
diff --git a/db2/include/db_page.h b/db2/include/db_page.h
new file mode 100644 (file)
index 0000000..9e78682
--- /dev/null
@@ -0,0 +1,535 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997
+ *     Sleepycat Software.  All rights reserved.
+ *
+ *     @(#)db_page.h   10.10 (Sleepycat) 8/18/97
+ */
+
+#ifndef _DB_PAGE_H_
+#define        _DB_PAGE_H_
+
+/*
+ * DB page formats.
+ *
+ * This implementation requires that values within the following structures
+ * NOT be padded -- note, ANSI C permits random padding within structures.
+ * If your compiler pads randomly you can just forget ever making DB run on
+ * your system.  In addition, no data type can require larger alignment than
+ * its own size, e.g., a 4-byte data element may not require 8-byte alignment.
+ *
+ * Note that key/data lengths are often stored in db_indx_t's -- this is
+ * not accidental, nor does it limit the key/data size.  If the key/data
+ * item fits on a page, it's guaranteed to be small enough to fit into a
+ * db_indx_t, and storing it in one saves space.
+ */
+
+#define        PGNO_METADATA   0       /* Metadata page number. */
+#define        PGNO_INVALID    0       /* Metadata page number, therefore illegal. */
+#define        PGNO_ROOT       1       /* Root is page #1. */
+
+/************************************************************************
+ BTREE METADATA PAGE LAYOUT
+ ************************************************************************/
+
+/*
+ * Btree metadata page layout:
+ *
+ *     +-----------------------------------+
+ *     |    lsn    |   pgno    |   magic   |
+ *     +-----------------------------------+
+ *     |   version |  pagesize |   free    |
+ *     +-----------------------------------+
+ *     |    flags  |  unused ...           |
+ *     +-----------------------------------+
+ */
+typedef struct _btmeta {
+       DB_LSN    lsn;          /* 00-07: LSN. */
+       db_pgno_t pgno;         /* 08-11: Current page number. */
+       u_int32_t magic;        /* 12-15: Magic number. */
+       u_int32_t version;      /* 16-19: Version. */
+       u_int32_t pagesize;     /* 20-23: Pagesize. */
+       u_int32_t maxkey;       /* 24-27: Btree: Maxkey. */
+       u_int32_t minkey;       /* 28-31: Btree: Minkey. */
+       u_int32_t free;         /* 32-35: Free list page number. */
+#define        BTM_DUP         0x001   /*        Duplicates. */
+#define        BTM_RECNO       0x002   /*        Recno tree. */
+#define        BTM_RECNUM      0x004   /*        Btree: maintain record count. */
+#define        BTM_FIXEDLEN    0x008   /*        Recno: fixed length records. */
+#define        BTM_RENUMBER    0x010   /*        Recno: renumber on insert/delete. */
+#define        BTM_MASK        0x01f
+       u_int32_t flags;        /* 36-39: Flags. */
+       u_int32_t re_len;       /* 40-43: Recno: fixed-length record length. */
+       u_int32_t re_pad;       /* 44-47: Recno: fixed-length record pad. */
+                               /* 48-67: Unique file ID. */
+       u_int8_t  uid[DB_FILE_ID_LEN];
+
+       u_int32_t spare[13];    /* 68-123: Save some room for growth. */
+
+       DB_BTREE_LSTAT stat;    /* 124-163: Statistics. */
+} BTMETA;
+
+/************************************************************************
+ HASH METADATA PAGE LAYOUT
+ ************************************************************************/
+
+/*
+ * Hash metadata page layout:
+ *
+ *     +-----------------------------------+
+ *     |    lsn    |   magic   |  version  |
+ *     +-----------------------------------+
+ *     |  pagesize | ovfl_point| last_freed|
+ *     +-----------------------------------+
+ *     | max_bucket| high_mask | low_mask  |
+ *     +-----------------------------------+
+ *     | ffactor   |   nelem   | charkey   |
+ *     +-----------------------------------+
+ *     | spares[32]|   flags   | unused    |
+ *     +-----------------------------------+
+ */
+/* Hash Table Information */
+typedef struct hashhdr {       /* Disk resident portion */
+       DB_LSN  lsn;            /* 00-07: LSN of the header page */
+       db_pgno_t pgno;         /* 08-11: Page number (btree compatibility). */
+       u_int32_t magic;        /* 12-15: Magic NO for hash tables */
+       u_int32_t version;      /* 16-19: Version ID */
+       u_int32_t pagesize;     /* 20-23: Bucket/Page Size */
+       u_int32_t ovfl_point;   /* 24-27: Overflow page allocation location */
+       u_int32_t last_freed;   /* 28-31: Last freed overflow page pgno */
+       u_int32_t max_bucket;   /* 32-35: ID of Maximum bucket in use */
+       u_int32_t high_mask;    /* 36-39: Modulo mask into table */
+       u_int32_t low_mask;     /* 40-43: Modulo mask into table lower half */
+       u_int32_t ffactor;      /* 44-47: Fill factor */
+       u_int32_t nelem;        /* 48-51: Number of keys in hash table */
+       u_int32_t h_charkey;    /* 52-55: Value of hash(CHARKEY) */
+#define        DB_HASH_DUP     0x01
+       u_int32_t flags;        /* 56-59: Allow duplicates. */
+#define NCACHED        32              /* number of spare points */
+                               /* 60-187: Spare pages for overflow */
+       u_int32_t spares[NCACHED];
+                               /* 188-207: Unique file ID. */
+       u_int8_t  uid[DB_FILE_ID_LEN];
+
+       /*
+        * Minimum page size is 256.
+        */
+} HASHHDR;
+
+/************************************************************************
+ MAIN PAGE LAYOUT
+ ************************************************************************/
+
+/*
+ *     +-----------------------------------+
+ *     |    lsn    |   pgno    | prev pgno |
+ *     +-----------------------------------+
+ *     | next pgno |  entries  | hf offset |
+ *     +-----------------------------------+
+ *     |   level   |   type    |   index   |
+ *     +-----------------------------------+
+ *     |   index   | free -->              |
+ *     +-----------+-----------------------+
+ *     |        F R E E A R E A            |
+ *     +-----------------------------------+
+ *     |              <-- free |   item    |
+ *     +-----------------------------------+
+ *     |   item    |   item    |   item    |
+ *     +-----------------------------------+
+ *
+ * sizeof(PAGE) == 26 bytes, and the following indices are guaranteed to be
+ * two-byte aligned.
+ *
+ * For hash and btree leaf pages, index items are paired, e.g., inp[0] is the
+ * key for inp[1]'s data.  All other types of pages only contain single items.
+ */
+typedef struct _db_page {
+       DB_LSN    lsn;          /* 00-07: Log sequence number. */
+       db_pgno_t pgno;         /* 08-11: Current page number. */
+       db_pgno_t prev_pgno;    /* 12-15: Previous page number. */
+       db_pgno_t next_pgno;    /* 16-19: Next page number. */
+       db_indx_t entries;      /* 20-21: Number of item pairs on the page. */
+       db_indx_t hf_offset;    /* 22-23: High free byte page offset. */
+
+       /*
+        * The btree levels are numbered from the leaf to the root, starting
+        * with 1, so the leaf is level 1, its parent is level 2, and so on.
+        * We maintain this level on all btree pages, but the only place that
+        * we actually need it is on the root page.  It would not be difficult
+        * to hide the byte on the root page once it becomes an internal page,
+        * so we could get this byte back if we needed it for something else.
+        */
+#define        LEAFLEVEL         1
+#define        MAXBTREELEVEL   255
+       u_int8_t  level;        /*    24: Btree tree level. */
+
+#define        P_INVALID       0       /*        Invalid page type. */
+#define        P_DUPLICATE     1       /*        Duplicate. */
+#define        P_HASH          2       /*        Hash. */
+#define        P_IBTREE        3       /*        Btree internal. */
+#define        P_IRECNO        4       /*        Recno internal. */
+#define        P_LBTREE        5       /*        Btree leaf. */
+#define        P_LRECNO        6       /*        Recno leaf. */
+#define        P_OVERFLOW      7       /*        Overflow. */
+       u_int8_t  type;         /*    25: Page type. */
+       db_indx_t inp[1];       /* Variable length index of items. */
+} PAGE;
+
+/* Element macros. */
+#define        LSN(p)          (((PAGE *)p)->lsn)
+#define        PGNO(p)         (((PAGE *)p)->pgno)
+#define        PREV_PGNO(p)    (((PAGE *)p)->prev_pgno)
+#define        NEXT_PGNO(p)    (((PAGE *)p)->next_pgno)
+#define        NUM_ENT(p)      (((PAGE *)p)->entries)
+#define        HOFFSET(p)      (((PAGE *)p)->hf_offset)
+#define        LEVEL(p)        (((PAGE *)p)->level)
+#define        TYPE(p)         (((PAGE *)p)->type)
+
+/*
+ * !!!
+ * The next_pgno and prev_pgno fields are not maintained for btree and recno
+ * internal pages.  It's a minor performance improvement, and more, it's
+ * hard to do when deleting internal pages, and it decreases the chance of
+ * deadlock during deletes and splits.
+ *
+ * !!!
+ * The btree/recno access method needs db_recno_t bytes of space on the root
+ * page to specify how many records are stored in the tree.  (The alternative
+ * is to store the number of records in the meta-data page, which will create
+ * a second hot spot in trees being actively modified, or recalculate it from
+ * the BINTERNAL fields on each access.)  Overload the prev_pgno field.
+ */
+#define        RE_NREC(p)                                                      \
+       (TYPE(p) == P_LBTREE ? NUM_ENT(p) / 2 :                         \
+           TYPE(p) == P_LRECNO ? NUM_ENT(p) : PREV_PGNO(p))
+#define        RE_NREC_ADJ(p, adj)                                             \
+       PREV_PGNO(p) += adj;
+#define        RE_NREC_SET(p, num)                                             \
+       PREV_PGNO(p) = num;
+
+/*
+ * Initialize a page.
+ *
+ * !!!
+ * Don't modify the page's LSN, code depends on it being unchanged after a
+ * P_INIT call.
+ */
+#define        P_INIT(pg, pg_size, n, pg_prev, pg_next, btl, pg_type) do {     \
+       PGNO(pg) = n;                                                   \
+       PREV_PGNO(pg) = pg_prev;                                        \
+       NEXT_PGNO(pg) = pg_next;                                        \
+       NUM_ENT(pg) = 0;                                                \
+       HOFFSET(pg) = pg_size;                                          \
+       LEVEL(pg) = btl;                                                \
+       TYPE(pg) = pg_type;                                             \
+} while (0)
+
+/* Page header length (offset to first index). */
+#define P_OVERHEAD             (SSZA(PAGE, inp))
+
+/* First free byte. */
+#define        LOFFSET(pg)             (P_OVERHEAD + NUM_ENT(pg) * sizeof(db_indx_t))
+
+/* Free space on the page. */
+#define        P_FREESPACE(pg)         (HOFFSET(pg) - LOFFSET(pg))
+
+/* Get a pointer to the bytes at a specific index. */
+#define        P_ENTRY(pg, indx)       ((u_int8_t *)pg + ((PAGE *)pg)->inp[indx])
+
+/************************************************************************
+ OVERFLOW PAGE LAYOUT
+ ************************************************************************/
+
+/*
+ * Overflow items are referenced by HOFFPAGE and BOVERFLOW structures, which
+ * store a page number (the first page of the overflow item) and a length
+ * (the total length of the overflow item).  The overflow item consists of
+ * some number of overflow pages, linked by the next_pgno field of the page.
+ * A next_pgno field of PGNO_INVALID flags the end of the overflow item.
+ *
+ * Overflow page overloads:
+ *     The amount of overflow data stored on each page is stored in the
+ *     hf_offset field.
+ *
+ *     The implementation reference counts overflow items as it's possible
+ *     for them to be promoted onto btree internal pages.  The reference
+ *     count is stored in the entries field.
+ */
+#define        OV_LEN(p)       (((PAGE *)p)->hf_offset)
+#define        OV_REF(p)       (((PAGE *)p)->entries)
+
+/* Maximum number of bytes that you can put on an overflow page. */
+#define        P_MAXSPACE(psize)       ((psize) - P_OVERHEAD)
+
+/************************************************************************
+ HASH PAGE LAYOUT
+ ************************************************************************/
+
+/* Each index references a group of bytes on the page. */
+#define        H_KEYDATA       1       /* Key/data item. */
+#define        H_DUPLICATE     2       /* Duplicate key/data item. */
+#define        H_OFFPAGE       3       /* Overflow key/data item. */
+#define        H_OFFDUP        4       /* Overflow page of duplicates. */
+
+/*
+ * The first and second types are H_KEYDATA and H_DUPLICATE, represented
+ * by the HKEYDATA structure:
+ *
+ *     +-----------------------------------+
+ *     |    type   | key/data ...          |
+ *     +-----------------------------------+
+ *
+ * For duplicates, the data field encodes duplicate elements in the data
+ * field:
+ *
+ *     +---------------------------------------------------------------+
+ *     |    type   | len1 | element1 | len1 | len2 | element2 | len2   |
+ *     +---------------------------------------------------------------+
+ *
+ * Thus, by keeping track of the offset in the element, we can do both
+ * backward and forward traversal.
+ */
+typedef struct _hkeydata {
+       u_int8_t  type;         /*    00: Page type. */
+       u_int8_t  data[1];      /* Variable length key/data item. */
+} HKEYDATA;
+
+/* Get a HKEYDATA item for a specific index. */
+#define        GET_HKEYDATA(pg, indx)                                          \
+       ((HKEYDATA *)P_ENTRY(pg, indx))
+
+/*
+ * The length of any HKEYDATA item. Note that indx is an element index,
+ * not a PAIR index.
+ */
+#define        LEN_HITEM(pg, pgsize, indx)                                     \
+       (((indx) == 0 ? pgsize : pg->inp[indx - 1]) - pg->inp[indx])
+
+#define        LEN_HKEYDATA(pg, psize, indx)                                   \
+       (((indx) == 0 ? psize : pg->inp[indx - 1]) -                    \
+       pg->inp[indx] - HKEYDATA_SIZE(0))
+
+/*
+ * Page space required to add a new HKEYDATA item to the page, with and
+ * without the index value.
+ */
+#define        HKEYDATA_SIZE(len)                                              \
+       ((len) + SSZA(HKEYDATA, data))
+#define        HKEYDATA_PSIZE(len)                                             \
+       (HKEYDATA_SIZE(len) + sizeof(db_indx_t))
+
+/* Put a HKEYDATA item at the location referenced by a page entry. */
+#define        PUT_HKEYDATA(pe, kd, len, type) {                               \
+       ((HKEYDATA *)pe)->type = type;                                  \
+       memcpy((u_int8_t *)pe + sizeof(u_int8_t), kd, len);             \
+}
+
+/*
+ * Macros the describe the page layout in terms of key-data pairs.
+ * The use of "pindex" indicates that the argument is the index
+ * expressed in pairs instead of individual elements.
+ */
+#define H_NUMPAIRS(pg)                 (NUM_ENT(pg) / 2)
+#define        H_KEYINDEX(pindx)               (2 * (pindx))
+#define        H_DATAINDEX(pindx)              ((2 * (pindx)) + 1)
+#define        H_PAIRKEY(pg, pindx)            GET_HKEYDATA(pg, H_KEYINDEX(pindx))
+#define        H_PAIRDATA(pg, pindx)           GET_HKEYDATA(pg, H_DATAINDEX(pindx))
+#define H_PAIRSIZE(pg, psize, pindx)                                   \
+       (LEN_HITEM(pg, psize, H_KEYINDEX(pindx)) +                      \
+       LEN_HITEM(pg, psize, H_DATAINDEX(pindx)))
+#define LEN_HDATA(p, psize, pindx) LEN_HKEYDATA(p, psize, H_DATAINDEX(pindx))
+#define LEN_HKEY(p, psize, pindx) LEN_HKEYDATA(p, psize, H_KEYINDEX(pindx))
+
+/*
+ * The third type is the H_OFFPAGE, represented by the HOFFPAGE structure:
+ *
+ *     +-----------------------------------+
+ *     |   type    |  pgno_t   | total len |
+ *     +-----------------------------------+
+ */
+typedef struct _hoffpage {
+       u_int8_t  type;         /*    00: Page type and delete flag. */
+       u_int8_t  unused[3];    /* 01-03: Padding, unused. */
+       db_pgno_t pgno;         /* 04-07: Offpage page number. */
+       u_int32_t tlen;         /* 08-11: Total length of item. */
+} HOFFPAGE;
+
+/* Get a HOFFPAGE item for a specific index. */
+#define        GET_HOFFPAGE(pg, indx)                                          \
+       ((HOFFPAGE *)P_ENTRY(pg, indx))
+
+/*
+ * Page space required to add a new HOFFPAGE item to the page, with and
+ * without the index value.
+ */
+#define        HOFFPAGE_SIZE           (sizeof(HOFFPAGE))
+#define        HOFFPAGE_PSIZE          (HOFFPAGE_SIZE + sizeof(db_indx_t))
+
+/*
+ * The fourth type is H_OFFDUP represented by the HOFFDUP structure:
+ *
+ *     +-----------------------+
+ *     |   type    |  pgno_t   |
+ *     +-----------------------+
+ */
+typedef struct _hoffdup {
+       u_int8_t  type;         /*    00: Page type and delete flag. */
+       u_int8_t  unused[3];    /* 01-03: Padding, unused. */
+       db_pgno_t pgno;         /* 04-07: Offpage page number. */
+} HOFFDUP;
+
+/* Get a HOFFDUP item for a specific index. */
+#define        GET_HOFFDUP(pg, indx)                                           \
+       ((HOFFDUP *)P_ENTRY(pg, indx))
+
+/*
+ * Page space required to add a new HOFFDUP item to the page, with and
+ * without the index value.
+ */
+#define        HOFFDUP_SIZE            (sizeof(HOFFDUP))
+#define        HOFFDUP_PSIZE           (HOFFDUP_SIZE + sizeof(db_indx_t))
+
+/************************************************************************
+ BTREE PAGE LAYOUT
+ ************************************************************************/
+
+/* Each index references a group of bytes on the page. */
+#define        B_KEYDATA       1       /* Key/data item. */
+#define        B_DUPLICATE     2       /* Duplicate key/data item. */
+#define        B_OVERFLOW      3       /* Overflow key/data item. */
+
+/*
+ * The first type is B_KEYDATA, represented by the BKEYDATA structure:
+ *
+ *     +-----------------------------------+
+ *     |   length  |    type   | key/data  |
+ *     +-----------------------------------+
+ */
+typedef struct _bkeydata {
+       db_indx_t len;          /* 00-01: Key/data item length. */
+       u_int     deleted :1;   /*    02: Page type and delete flag. */
+       u_int     type    :7;
+       u_int8_t  data[1];      /* Variable length key/data item. */
+} BKEYDATA;
+
+/* Get a BKEYDATA item for a specific index. */
+#define        GET_BKEYDATA(pg, indx)                                          \
+       ((BKEYDATA *)P_ENTRY(pg, indx))
+
+/*
+ * Page space required to add a new BKEYDATA item to the page, with and
+ * without the index value.
+ */
+#define        BKEYDATA_SIZE(len)                                              \
+       ALIGN((len) + SSZA(BKEYDATA, data), 4)
+#define        BKEYDATA_PSIZE(len)                                             \
+       (BKEYDATA_SIZE(len) + sizeof(db_indx_t))
+
+/*
+ * The second and third types are B_DUPLICATE and B_OVERFLOW, represented
+ * by the BOVERFLOW structure:
+ *
+ *     +-----------------------------------+
+ *     | total len |    type   |   unused  |
+ *     +-----------------------------------+
+ *     | nxt: page |  nxt: off | nxt: len  |
+ *     +-----------------------------------+
+ */
+typedef struct _boverflow {
+       db_indx_t unused1;      /* 00-01: Padding, unused. */
+       u_int     deleted :1;   /*    02: Page type and delete flag. */
+       u_int     type    :7;
+       u_int8_t  unused2;      /*    03: Padding, unused. */
+       db_pgno_t pgno;         /* 04-07: Next page number. */
+       u_int32_t tlen;         /* 08-11: Total length of item. */
+} BOVERFLOW;
+
+/* Get a BOVERFLOW item for a specific index. */
+#define        GET_BOVERFLOW(pg, indx)                                         \
+       ((BOVERFLOW *)P_ENTRY(pg, indx))
+
+/*
+ * Page space required to add a new BOVERFLOW item to the page, with and
+ * without the index value.
+ */
+#define        BOVERFLOW_SIZE                                                  \
+       ALIGN(sizeof(BOVERFLOW), 4)
+#define        BOVERFLOW_PSIZE                                                 \
+       (BOVERFLOW_SIZE + sizeof(db_indx_t))
+
+/*
+ * Btree leaf and hash page layouts group indices in sets of two, one
+ * for the key and one for the data.  Everything else does it in sets
+ * of one to save space.  I use the following macros so that it's real
+ * obvious what's going on...
+ */
+#define        O_INDX  1
+#define        P_INDX  2
+
+/************************************************************************
+ BTREE INTERNAL PAGE LAYOUT
+ ************************************************************************/
+
+/*
+ * Btree internal entry.
+ *
+ *     +-----------------------------------+
+ *     | leaf pgno |   type    | data ...  |
+ *     +-----------------------------------+
+ */
+typedef struct _binternal {
+       db_indx_t len;          /* 00-01: Key/data item length. */
+       u_int      deleted :1;  /*    02: Page type and delete flag. */
+       u_int      type   :7;
+       u_int8_t   unused;      /*    03: Padding, unused. */
+       db_pgno_t  pgno;        /* 04-07: Page number of referenced page. */
+       db_recno_t nrecs;       /* 08-11: Subtree record count. */
+       u_int8_t   data[1];     /* Variable length key item. */
+} BINTERNAL;
+
+/* Get a BINTERNAL item for a specific index. */
+#define        GET_BINTERNAL(pg, indx)                                         \
+       ((BINTERNAL *)P_ENTRY(pg, indx))
+
+/*
+ * Page space required to add a new BINTERNAL item to the page, with and
+ * without the index value.
+ */
+#define        BINTERNAL_SIZE(len)                                             \
+       ALIGN((len) + SSZA(BINTERNAL, data), 4)
+#define        BINTERNAL_PSIZE(len)                                            \
+       (BINTERNAL_SIZE(len) + sizeof(db_indx_t))
+
+/************************************************************************
+ RECNO INTERNAL PAGE LAYOUT
+ ************************************************************************/
+
+/*
+ * The recno internal entry.
+ *
+ *     +-----------------------+
+ *     | leaf pgno | # of recs |
+ *     +-----------------------+
+ *
+ * XXX
+ * Why not fold this into the db_indx_t structure, it's fixed length.
+ */
+typedef struct _rinternal {
+       db_pgno_t  pgno;        /* 00-03: Page number of referenced page. */
+       db_recno_t nrecs;       /* 04-07: Subtree record count. */
+} RINTERNAL;
+
+/* Get a RINTERNAL item for a specific index. */
+#define        GET_RINTERNAL(pg, indx)                                         \
+       ((RINTERNAL *)P_ENTRY(pg, indx))
+
+/*
+ * Page space required to add a new RINTERNAL item to the page, with and
+ * without the index value.
+ */
+#define        RINTERNAL_SIZE                                                  \
+       ALIGN(sizeof(RINTERNAL), 4)
+#define        RINTERNAL_PSIZE                                                 \
+       (RINTERNAL_SIZE + sizeof(db_indx_t))
+#endif /* _DB_PAGE_H_ */
diff --git a/db2/include/db_shash.h b/db2/include/db_shash.h
new file mode 100644 (file)
index 0000000..f695a2b
--- /dev/null
@@ -0,0 +1,106 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997
+ *     Sleepycat Software.  All rights reserved.
+ *
+ *     @(#)db_shash.h  10.1 (Sleepycat) 4/12/97
+ */
+
+/* Hash Headers */
+typedef        SH_TAILQ_HEAD(hash_head) DB_HASHTAB;
+
+/*
+ * __db_hashlookup --
+ *
+ * Look up something in a shared memory hash table.  The "elt" argument
+ * should be a key, and cmp_func must know how to compare a key to whatever
+ * structure it is that appears in the hash table.  The comparison function
+ * cmp_func is called as: cmp_func(lookup_elt, table_elt);
+ * begin: address of the beginning of the hash table.
+ * type: the structure type of the elements that are linked in each bucket.
+ * field: the name of the field by which the "type" structures are linked.
+ * elt: the item for which we are searching in the hash table.
+ * result: the variable into which we'll store the element if we find it.
+ * nelems: the number of buckets in the hash table.
+ * hash_func: the hash function that operates on elements of the type of elt
+ * cmp_func: compare elements of the type of elt with those in the table (of
+ *     type "type").
+ *
+ * If the element is not in the hash table, this macro exits with result
+ * set to NULL.
+ */
+#define        __db_hashlookup(begin, type, field, elt, r, n, hash, cmp) do {  \
+       DB_HASHTAB *__bucket;                                           \
+       u_int32_t __ndx;                                                \
+                                                                       \
+       __ndx = hash(elt) % (n);                                        \
+       __bucket = &begin[__ndx];                                       \
+       for (r = SH_TAILQ_FIRST(__bucket, type);                        \
+           r != NULL; r = SH_TAILQ_NEXT(r, field, type))               \
+               if (cmp(elt, r))                                        \
+                       break;                                          \
+} while(0)
+
+/*
+ * __db_hashinsert --
+ *
+ * Insert a new entry into the hash table.  This assumes that lookup has
+ * failed; don't call it if you haven't already called __db_hashlookup.
+ * begin: the beginning address of the hash table.
+ * type: the structure type of the elements that are linked in each bucket.
+ * field: the name of the field by which the "type" structures are linked.
+ * elt: the item to be inserted.
+ * nelems: the number of buckets in the hash table.
+ * hash_func: the hash function that operates on elements of the type of elt
+ */
+#define        __db_hashinsert(begin, type, field, elt, n, hash) do {          \
+       u_int32_t __ndx;                                                \
+       DB_HASHTAB *__bucket;                                           \
+                                                                       \
+       __ndx = hash(elt) % (n);                                        \
+       __bucket = &begin[__ndx];                                       \
+       SH_TAILQ_INSERT_HEAD(__bucket, elt, field, type);               \
+} while(0)
+
+/*
+ * __db_hashremove --
+ *     Remove the entry with a key == elt.
+ * begin: address of the beginning of the hash table.
+ * type: the structure type of the elements that are linked in each bucket.
+ * field: the name of the field by which the "type" structures are linked.
+ * elt: the item to be deleted.
+ * nelems: the number of buckets in the hash table.
+ * hash_func: the hash function that operates on elements of the type of elt
+ * cmp_func: compare elements of the type of elt with those in the table (of
+ *     type "type").
+ */
+#define        __db_hashremove(begin, type, field, elt, n, hash, cmp) {        \
+       u_int32_t __ndx;                                                \
+       DB_HASHTAB *__bucket;                                           \
+       SH_TAILQ_ENTRY *__entp;                                         \
+                                                                       \
+       __ndx = hash(elt) % (n);                                        \
+       __bucket = &begin[__ndx];                                       \
+       __db_hashlookup(begin, type, field, elt, __entp, n, hash, cmp); \
+       SH_TAILQ_REMOVE(__bucket, __entp, field, type);                 \
+}
+
+/*
+ * __db_hashremove_el --
+ *     Given the object "obj" in the table, remove it.
+ * begin: address of the beginning of the hash table.
+ * type: the structure type of the elements that are linked in each bucket.
+ * field: the name of the field by which the "type" structures are linked.
+ * obj: the object in the table that we with to delete.
+ * nelems: the number of buckets in the hash table.
+ * hash_func: the hash function that operates on elements of the type of elt
+ */
+#define        __db_hashremove_el(begin, type, field, obj, n, hash) {          \
+       u_int32_t __ndx;                                                \
+       DB_HASHTAB *__bucket;                                           \
+                                                                       \
+       __ndx = hash(obj) % (n);                                        \
+       __bucket = &begin[__ndx];                                       \
+       SH_TAILQ_REMOVE(__bucket, obj, field, type);                    \
+}
diff --git a/db2/include/db_swap.h b/db2/include/db_swap.h
new file mode 100644 (file)
index 0000000..278282f
--- /dev/null
@@ -0,0 +1,105 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997
+ *     Sleepycat Software.  All rights reserved.
+ */
+/*
+ * Copyright (c) 1990, 1993, 1994
+ *     The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *     This product includes software developed by the University of
+ *     California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *     @(#)db_swap.h   10.3 (Sleepycat) 6/10/97
+ */
+
+#ifndef _DB_SWAP_H_
+#define        _DB_SWAP_H_
+
+/*
+ * Little endian <==> big endian 32-bit swap macros.
+ *     M_32_SWAP       swap a memory location
+ *     P_32_COPY       copy potentially unaligned 4 byte quantities
+ *     P_32_SWAP       swap a referenced memory location
+ */
+#define        M_32_SWAP(a) {                                                  \
+       u_int32_t _tmp;                                                 \
+       _tmp = a;                                                       \
+       ((u_int8_t *)&a)[0] = ((u_int8_t *)&_tmp)[3];                   \
+       ((u_int8_t *)&a)[1] = ((u_int8_t *)&_tmp)[2];                   \
+       ((u_int8_t *)&a)[2] = ((u_int8_t *)&_tmp)[1];                   \
+       ((u_int8_t *)&a)[3] = ((u_int8_t *)&_tmp)[0];                   \
+}
+#define        P_32_COPY(a, b) {                                               \
+       ((u_int8_t *)b)[0] = ((u_int8_t *)a)[0];                        \
+       ((u_int8_t *)b)[1] = ((u_int8_t *)a)[1];                        \
+       ((u_int8_t *)b)[2] = ((u_int8_t *)a)[2];                        \
+       ((u_int8_t *)b)[3] = ((u_int8_t *)a)[3];                        \
+}
+#define        P_32_SWAP(a) {                                                  \
+       u_int32_t _tmp;                                                 \
+       P_32_COPY(a, &_tmp);                                            \
+       ((u_int8_t *)a)[0] = ((u_int8_t *)&_tmp)[3];                    \
+       ((u_int8_t *)a)[1] = ((u_int8_t *)&_tmp)[2];                    \
+       ((u_int8_t *)a)[2] = ((u_int8_t *)&_tmp)[1];                    \
+       ((u_int8_t *)a)[3] = ((u_int8_t *)&_tmp)[0];                    \
+}
+
+/*
+ * Little endian <==> big endian 16-bit swap macros.
+ *     M_16_SWAP       swap a memory location
+ *     P_16_COPY       copy potentially unaligned  from one location to another
+ *     P_16_SWAP       swap a referenced memory location
+ */
+#define        M_16_SWAP(a) {                                                  \
+       u_int16_t _tmp;                                                 \
+       _tmp = (u_int16_t)a;                                            \
+       ((u_int8_t *)&a)[0] = ((u_int8_t *)&_tmp)[1];                   \
+       ((u_int8_t *)&a)[1] = ((u_int8_t *)&_tmp)[0];                   \
+}
+#define        P_16_COPY(a, b) {                                               \
+       ((u_int8_t *)b)[0] = ((u_int8_t *)a)[0];                        \
+       ((u_int8_t *)b)[1] = ((u_int8_t *)a)[1];                        \
+}
+#define        P_16_SWAP(a) {                                                  \
+       u_int16_t _tmp;                                                 \
+       P_16_COPY(a, &_tmp);                                            \
+       ((u_int8_t *)a)[0] = ((u_int8_t *)&_tmp)[1];                    \
+       ((u_int8_t *)a)[1] = ((u_int8_t *)&_tmp)[0];                    \
+}
+
+#define        SWAP32(p) {                                                     \
+       P_32_SWAP(p);                                                   \
+       (p) += sizeof(u_int32_t);                                       \
+}
+#define        SWAP16(p) {                                                     \
+       P_16_SWAP(p);                                                   \
+       (p) += sizeof(u_int16_t);                                       \
+}
+#endif /* !_DB_SWAP_H_ */
diff --git a/db2/include/hash.h b/db2/include/hash.h
new file mode 100644 (file)
index 0000000..cb8ea35
--- /dev/null
@@ -0,0 +1,211 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997
+ *     Sleepycat Software.  All rights reserved.
+ */
+/*
+ * Copyright (c) 1990, 1993, 1994
+ *     Margo Seltzer.  All rights reserved.
+ */
+/*
+ * Copyright (c) 1990, 1993, 1994
+ *     The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Margo Seltzer.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *     This product includes software developed by the University of
+ *     California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *     @(#)hash.h      10.6 (Sleepycat) 8/18/97
+ */
+
+/* Cursor structure definitions. */
+typedef struct cursor_t {
+       DBC             *db_cursor;
+       db_pgno_t       bucket;         /* Bucket we are traversing. */
+       DB_LOCK         lock;           /* Lock held on the current bucket. */
+       PAGE            *pagep;         /* The current page. */
+       db_pgno_t       pgno;           /* Current page number. */
+       db_indx_t       bndx;           /* Index within the current page. */
+       PAGE            *dpagep;        /* Duplicate page pointer. */
+       db_pgno_t       dpgno;          /* Duplicate page number. */
+       db_indx_t       dndx;           /* Index within a duplicate set. */
+       db_indx_t       dup_off;        /* Offset within a duplicate set. */
+       db_indx_t       dup_len;        /* Length of current duplicate. */
+       db_indx_t       dup_tlen;       /* Total length of duplicate entry. */
+       u_int32_t       seek_size;      /* Number of bytes we need for add. */
+       db_pgno_t       seek_found_page;/* Page on which we can insert. */
+       u_int32_t       big_keylen;     /* Length of big_key buffer. */
+       void            *big_key;       /* Temporary buffer for big keys. */
+       u_int32_t       big_datalen;    /* Length of big_data buffer. */
+       void            *big_data;      /* Temporary buffer for big data. */
+#define        H_OK            0x0001
+#define        H_NOMORE        0x0002
+#define        H_DELETED       0x0004
+#define        H_ISDUP         0x0008
+#define        H_EXPAND        0x0020
+       u_int32_t       flags;          /* Is cursor inside a dup set. */
+} HASH_CURSOR;
+
+#define        IS_VALID(C) ((C)->bucket != BUCKET_INVALID)
+
+
+typedef struct htab {          /* Memory resident data structure. */
+       DB *dbp;                /* Pointer to parent db structure. */
+       DB_LOCK hlock;          /* Metadata page lock. */
+       HASHHDR *hdr;           /* Pointer to meta-data page. */
+       u_int32_t (*hash) __P((const void *, u_int32_t)); /* Hash Function */
+       PAGE *split_buf;        /* Temporary buffer for splits. */
+       int local_errno;        /* Error Number -- for DBM compatability */
+       u_long hash_accesses;   /* Number of accesses to this table. */
+       u_long hash_collisions; /* Number of collisions on search. */
+       u_long hash_expansions; /* Number of times we added a bucket. */
+       u_long hash_overflows;  /* Number of overflow pages. */
+       u_long hash_bigpages;   /* Number of big key/data pages. */
+} HTAB;
+
+/*
+ * Macro used for interface functions to set the txnid in the DBP.
+ */
+#define        SET_LOCKER(D, T) ((D)->txn = (T))
+
+/*
+ * More interface macros used to get/release the meta data page.
+ */
+#define        GET_META(D, H) {                                                \
+       int _r;                                                         \
+       if (F_ISSET(D, DB_AM_LOCKING) && !F_ISSET(D, DB_AM_RECOVER)) {  \
+               (D)->lock.pgno = BUCKET_INVALID;                        \
+               if ((_r = lock_get((D)->dbenv->lk_info,                 \
+                   (D)->txn == NULL ? (D)->locker : (D)->txn->txnid,   \
+                   0, &(D)->lock_dbt, DB_LOCK_READ,                    \
+                   &(H)->hlock)) != 0)                                 \
+                       return (_r < 0 ? EAGAIN : _r);                  \
+       }                                                               \
+       if ((_r = __ham_get_page(D, 0, (PAGE **)&((H)->hdr))) != 0) {   \
+               if ((H)->hlock) {                                       \
+                       (void)lock_put((D)->dbenv->lk_info, (H)->hlock);\
+                       (H)->hlock = 0;                                 \
+               }                                                       \
+               return (_r);                                            \
+       }                                                               \
+}
+
+#define        RELEASE_META(D, H) {                                            \
+       if (!F_ISSET(D, DB_AM_RECOVER) &&                               \
+           (D)->txn == NULL && (H)->hlock)                             \
+               (void)lock_put((H)->dbp->dbenv->lk_info, (H)->hlock);   \
+       (H)->hlock = 0;                                                 \
+       if ((H)->hdr)                                                   \
+               (void)__ham_put_page(D, (PAGE *)(H)->hdr,               \
+                   F_ISSET(D, DB_HS_DIRTYMETA) ? 1 : 0);               \
+       (H)->hdr = NULL;                                                \
+       F_CLR(D, DB_HS_DIRTYMETA);                                      \
+}
+
+#define        DIRTY_META(H, R) {                                              \
+       if (F_ISSET((H)->dbp, DB_AM_LOCKING) &&                         \
+           !F_ISSET((H)->dbp, DB_AM_RECOVER)) {                        \
+               DB_LOCK _tmp;                                           \
+               (H)->dbp->lock.pgno = BUCKET_INVALID;                   \
+               if (((R) = lock_get((H)->dbp->dbenv->lk_info,           \
+                   (H)->dbp->txn ? (H)->dbp->txn->txnid :              \
+                   (H)->dbp->locker, 0, &(H)->dbp->lock_dbt,           \
+                   DB_LOCK_WRITE, &_tmp)) == 0)                        \
+                       (R) = lock_put((H)->dbp->dbenv->lk_info,        \
+                           (H)->hlock);                                \
+               else if ((R) < 0)                                       \
+                       (R) = EAGAIN;                                   \
+               (H)->hlock = _tmp;                                      \
+       }                                                               \
+       F_SET((H)->dbp, DB_HS_DIRTYMETA);                               \
+}
+
+/* Allocate and discard thread structures. */
+#define        H_GETHANDLE(dbp, dbpp, ret)                                     \
+       if (F_ISSET(dbp, DB_AM_THREAD))                                 \
+               ret = __db_gethandle(dbp, __ham_hdup, dbpp);            \
+       else {                                                          \
+               ret = 0;                                                \
+               *dbpp = dbp;                                            \
+       }
+
+#define        H_PUTHANDLE(dbp) {                                              \
+       if (F_ISSET(dbp, DB_AM_THREAD))                                 \
+               __db_puthandle(dbp);                                    \
+}
+
+/* Test string. */
+#define        CHARKEY                 "%$sniglet^&"
+
+/* Overflow management */
+/*
+ * Overflow page numbers are allocated per split point.  At each doubling of
+ * the table, we can allocate extra pages.  We keep track of how many pages
+ * we've allocated at each point to calculate bucket to page number mapping.
+ */
+#define        BUCKET_TO_PAGE(H, B) \
+       ((B) + 1 + ((B) ? (H)->hdr->spares[__db_log2((B)+1)-1] : 0))
+
+#define        PGNO_OF(H, S, O) (BUCKET_TO_PAGE((H), (1 << (S)) - 1) + (O))
+
+/* Constraints about number of pages and how much data goes on a page. */
+
+#define        MAX_PAGES(H)    UINT32_T_MAX
+#define        MINFILL         0.25
+#define        ISBIG(H, N)     (((N) > ((H)->hdr->pagesize * MINFILL)) ? 1 : 0)
+
+/* Shorthands for accessing structure */
+#define        NDX_INVALID     0xFFFF
+#define        BUCKET_INVALID  0xFFFFFFFF
+
+/* On page duplicates are stored as a string of size-data-size triples. */
+#define        DUP_SIZE(len)   ((len) + 2 * sizeof(db_indx_t))
+
+/* Log messages types (these are subtypes within a record type) */
+#define        PAIR_KEYMASK            0x1
+#define        PAIR_DATAMASK           0x2
+#define        PAIR_ISKEYBIG(N)        (N & PAIR_KEYMASK)
+#define        PAIR_ISDATABIG(N)       (N & PAIR_DATAMASK)
+#define        OPCODE_OF(N)            (N & ~(PAIR_KEYMASK | PAIR_DATAMASK))
+
+#define        PUTPAIR         0x20
+#define        DELPAIR         0x30
+#define        PUTOVFL         0x40
+#define        DELOVFL         0x50
+#define        ALLOCPGNO       0x60
+#define        DELPGNO         0x70
+#define        SPLITOLD        0x80
+#define        SPLITNEW        0x90
+
+#include "hash_auto.h"
+#include "hash_ext.h"
+#include "db_am.h"
+#include "common_ext.h"
diff --git a/db2/include/hash_auto.h b/db2/include/hash_auto.h
new file mode 100644 (file)
index 0000000..5ff1229
--- /dev/null
@@ -0,0 +1,114 @@
+/* Do not edit: automatically built by dist/db_gen.sh. */
+#ifndef ham_AUTO_H
+#define ham_AUTO_H
+
+#define        DB_ham_insdel   (DB_ham_BEGIN + 1)
+
+typedef struct _ham_insdel_args {
+       u_int32_t type;
+       DB_TXN *txnid;
+       DB_LSN prev_lsn;
+       u_int32_t       opcode;
+       u_int32_t       fileid;
+       db_pgno_t       pgno;
+       u_int32_t       ndx;
+       DB_LSN  pagelsn;
+       DBT     key;
+       DBT     data;
+} __ham_insdel_args;
+
+
+#define        DB_ham_newpage  (DB_ham_BEGIN + 2)
+
+typedef struct _ham_newpage_args {
+       u_int32_t type;
+       DB_TXN *txnid;
+       DB_LSN prev_lsn;
+       u_int32_t       opcode;
+       u_int32_t       fileid;
+       db_pgno_t       prev_pgno;
+       DB_LSN  prevlsn;
+       db_pgno_t       new_pgno;
+       DB_LSN  pagelsn;
+       db_pgno_t       next_pgno;
+       DB_LSN  nextlsn;
+} __ham_newpage_args;
+
+
+#define        DB_ham_splitmeta        (DB_ham_BEGIN + 3)
+
+typedef struct _ham_splitmeta_args {
+       u_int32_t type;
+       DB_TXN *txnid;
+       DB_LSN prev_lsn;
+       u_int32_t       fileid;
+       u_int32_t       bucket;
+       u_int32_t       ovflpoint;
+       u_int32_t       spares;
+       DB_LSN  metalsn;
+} __ham_splitmeta_args;
+
+
+#define        DB_ham_splitdata        (DB_ham_BEGIN + 4)
+
+typedef struct _ham_splitdata_args {
+       u_int32_t type;
+       DB_TXN *txnid;
+       DB_LSN prev_lsn;
+       u_int32_t       fileid;
+       u_int32_t       opcode;
+       db_pgno_t       pgno;
+       DBT     pageimage;
+       DB_LSN  pagelsn;
+} __ham_splitdata_args;
+
+
+#define        DB_ham_replace  (DB_ham_BEGIN + 5)
+
+typedef struct _ham_replace_args {
+       u_int32_t type;
+       DB_TXN *txnid;
+       DB_LSN prev_lsn;
+       u_int32_t       fileid;
+       db_pgno_t       pgno;
+       u_int32_t       ndx;
+       DB_LSN  pagelsn;
+       int32_t off;
+       DBT     olditem;
+       DBT     newitem;
+       u_int32_t       makedup;
+} __ham_replace_args;
+
+
+#define        DB_ham_newpgno  (DB_ham_BEGIN + 6)
+
+typedef struct _ham_newpgno_args {
+       u_int32_t type;
+       DB_TXN *txnid;
+       DB_LSN prev_lsn;
+       u_int32_t       opcode;
+       u_int32_t       fileid;
+       db_pgno_t       pgno;
+       db_pgno_t       free_pgno;
+       u_int32_t       old_type;
+       db_pgno_t       old_pgno;
+       u_int32_t       new_type;
+       DB_LSN  pagelsn;
+       DB_LSN  metalsn;
+} __ham_newpgno_args;
+
+
+#define        DB_ham_ovfl     (DB_ham_BEGIN + 7)
+
+typedef struct _ham_ovfl_args {
+       u_int32_t type;
+       DB_TXN *txnid;
+       DB_LSN prev_lsn;
+       u_int32_t       fileid;
+       db_pgno_t       start_pgno;
+       u_int32_t       npages;
+       db_pgno_t       free_pgno;
+       DB_LSN  metalsn;
+} __ham_ovfl_args;
+
+#endif
diff --git a/db2/include/hash_ext.h b/db2/include/hash_ext.h
new file mode 100644 (file)
index 0000000..5ae63dc
--- /dev/null
@@ -0,0 +1,120 @@
+/* Do not edit: automatically built by dist/distrib. */
+int __ham_open __P((DB *, DB_INFO *));
+int  __ham_close __P((DB *));
+int __ham_expand_table __P((HTAB *));
+u_int32_t __ham_call_hash __P((HTAB *, u_int8_t *, int32_t));
+int __ham_init_dbt __P((DBT *, u_int32_t, void **, u_int32_t *));
+void __ham_c_update __P((HTAB *,
+   HASH_CURSOR *, db_pgno_t, u_int32_t, int, int));
+int  __ham_hdup __P((DB *, DB *));
+int __ham_insdel_log
+    __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t,
+    u_int32_t, u_int32_t, db_pgno_t, u_int32_t,
+    DB_LSN *, DBT *, DBT *));
+int __ham_insdel_print
+   __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+int __ham_insdel_read __P((void *, __ham_insdel_args **));
+int __ham_newpage_log
+    __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t,
+    u_int32_t, u_int32_t, db_pgno_t, DB_LSN *,
+    db_pgno_t, DB_LSN *, db_pgno_t, DB_LSN *));
+int __ham_newpage_print
+   __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+int __ham_newpage_read __P((void *, __ham_newpage_args **));
+int __ham_splitmeta_log
+    __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t,
+    u_int32_t, u_int32_t, u_int32_t, u_int32_t,
+    DB_LSN *));
+int __ham_splitmeta_print
+   __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+int __ham_splitmeta_read __P((void *, __ham_splitmeta_args **));
+int __ham_splitdata_log
+    __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t,
+    u_int32_t, u_int32_t, db_pgno_t, DBT *,
+    DB_LSN *));
+int __ham_splitdata_print
+   __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+int __ham_splitdata_read __P((void *, __ham_splitdata_args **));
+int __ham_replace_log
+    __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t,
+    u_int32_t, db_pgno_t, u_int32_t, DB_LSN *,
+    int32_t, DBT *, DBT *, u_int32_t));
+int __ham_replace_print
+   __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+int __ham_replace_read __P((void *, __ham_replace_args **));
+int __ham_newpgno_log
+    __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t,
+    u_int32_t, u_int32_t, db_pgno_t, db_pgno_t,
+    u_int32_t, db_pgno_t, u_int32_t, DB_LSN *,
+    DB_LSN *));
+int __ham_newpgno_print
+   __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+int __ham_newpgno_read __P((void *, __ham_newpgno_args **));
+int __ham_ovfl_log
+    __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t,
+    u_int32_t, db_pgno_t, u_int32_t, db_pgno_t,
+    DB_LSN *));
+int __ham_ovfl_print
+   __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+int __ham_ovfl_read __P((void *, __ham_ovfl_args **));
+int __ham_init_print __P((DB_ENV *));
+int __ham_init_recover __P((DB_ENV *));
+int __ham_pgin __P((db_pgno_t, void *, DBT *));
+int __ham_pgout __P((db_pgno_t, void *, DBT *));
+int __ham_mswap __P((void *));
+#ifdef DEBUG
+void __ham_dump_bucket __P((HTAB *, u_int32_t));
+#endif
+int __ham_add_dup __P((HTAB *, HASH_CURSOR *, DBT *, int));
+void __ham_move_offpage __P((HTAB *, PAGE *, u_int32_t, db_pgno_t));
+u_int32_t __ham_func2 __P((const void *, u_int32_t));
+u_int32_t __ham_func3 __P((const void *, u_int32_t));
+u_int32_t __ham_func4 __P((const void *, u_int32_t));
+u_int32_t __ham_func5 __P((const void *, u_int32_t));
+int __ham_item __P((HTAB *, HASH_CURSOR *, db_lockmode_t));
+int __ham_item_reset __P((HTAB *, HASH_CURSOR *));
+void __ham_item_init __P((HASH_CURSOR *));
+int __ham_item_done __P((HTAB *, HASH_CURSOR *, int));
+int __ham_item_last __P((HTAB *, HASH_CURSOR *, db_lockmode_t));
+int __ham_item_first __P((HTAB *, HASH_CURSOR *, db_lockmode_t));
+int __ham_item_prev __P((HTAB *, HASH_CURSOR *, db_lockmode_t));
+int __ham_item_next __P((HTAB *, HASH_CURSOR *, db_lockmode_t));
+void __ham_putitem __P((PAGE *p, const DBT *, int));
+int __ham_del_pair __P((HTAB *, HASH_CURSOR *));
+int __ham_replpair __P((HTAB *, HASH_CURSOR *, DBT *, u_int32_t));
+void __ham_onpage_replace __P((PAGE *, size_t, u_int32_t, int32_t,
+    int32_t,  DBT *));
+int __ham_split_page __P((HTAB *, u_int32_t, u_int32_t));
+int __ham_add_el __P((HTAB *, HASH_CURSOR *, const DBT *, const DBT *,
+    int));
+void __ham_copy_item __P((HTAB *, PAGE *, int, PAGE *));
+int __ham_add_ovflpage __P((HTAB *, PAGE *, int, PAGE **));
+int __ham_new_page __P((HTAB *, u_int32_t, u_int32_t, PAGE **));
+int __ham_del_page __P((DB *, PAGE *));
+int __ham_put_page __P((DB *, PAGE *, int32_t));
+int __ham_dirty_page __P((HTAB *, PAGE *));
+int __ham_get_page __P((DB *, db_pgno_t, PAGE **));
+int __ham_overflow_page __P((DB *, u_int32_t, PAGE **));
+#ifdef DEBUG
+int bucket_to_page __P((HTAB *, int));
+#endif
+void __ham_init_ovflpages __P((HTAB *));
+int __ham_get_cpage __P((HTAB *, HASH_CURSOR *, db_lockmode_t));
+int __ham_next_cpage __P((HTAB *, HASH_CURSOR *, db_pgno_t,
+    int, int));
+void __ham_dpair __P((DB *, PAGE *, u_int32_t));
+int __ham_insdel_recover
+    __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+int __ham_newpage_recover
+    __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+int __ham_replace_recover
+   __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+int __ham_newpgno_recover
+   __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+int __ham_splitmeta_recover
+   __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+int __ham_splitdata_recover
+   __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+int __ham_ovfl_recover
+    __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+int __ham_stat __P((DB *, FILE *));
diff --git a/db2/include/lock.h b/db2/include/lock.h
new file mode 100644 (file)
index 0000000..18d29e8
--- /dev/null
@@ -0,0 +1,194 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997
+ *     Sleepycat Software.  All rights reserved.
+ *
+ *     @(#)lock.h      10.7 (Sleepycat) 7/29/97
+ */
+
+typedef struct __db_lockobj    DB_LOCKOBJ;
+
+#define DB_DEFAULT_LOCK_FILE   "__db_lock.share"
+#define DB_LOCK_DEFAULT_N      5000
+#define DB_LOCK_MAXID          0x7fffffff
+
+/*
+ * The lock region consists of:
+ *     The DB_LOCKREGION structure (sizeof(DB_LOCKREGION)).
+ *     The conflict matrix of nmodes * nmodes bytes (nmodes * nmodes).
+ *     The hash table for object lookup (hashsize * sizeof(DB_OBJ *)).
+ *     The locks themselves (maxlocks * sizeof(struct __db_lock).
+ *     The objects being locked (maxlocks * sizeof(DB_OBJ)).
+ *     String space to represent the DBTs that are the objects being locked.
+ */
+struct __db_lockregion {
+       RLAYOUT         hdr;            /* Shared region header. */
+       u_int32_t       magic;          /* lock magic number */
+       u_int32_t       version;        /* version number */
+       u_int32_t       id;             /* unique id generator */
+       u_int32_t       need_dd;        /* flag for deadlock detector */
+       u_int32_t       detect;         /* run dd on every conflict */
+       SH_TAILQ_HEAD(lock_header) free_locks;  /* free lock header */
+       SH_TAILQ_HEAD(obj_header) free_objs;    /* free obj header */
+       u_int32_t       maxlocks;       /* maximum number of locks in table */
+       u_int32_t       table_size;     /* size of hash table */
+       u_int32_t       nmodes;         /* number of lock modes */
+       u_int32_t       numobjs;        /* number of objects */
+       u_int32_t       nlockers;       /* number of lockers */
+       size_t          increment;      /* how much to grow region */
+       size_t          hash_off;       /* offset of hash table */
+       size_t          mem_off;        /* offset of memory region */
+       size_t          mem_bytes;      /* number of bytes in memory region */
+       u_int32_t       nconflicts;     /* number of lock conflicts */
+       u_int32_t       nrequests;      /* number of lock gets */
+       u_int32_t       nreleases;      /* number of lock puts */
+       u_int32_t       ndeadlocks;     /* number of deadlocks */
+};
+
+/* Macros to lock/unlock the region. */
+#define        LOCK_LOCKREGION(lt)                                             \
+       (void)__db_mutex_lock(&(lt)->region->hdr.lock,(lt)->fd,         \
+           (lt)->dbenv == NULL ? NULL : (lt)->dbenv->db_yield)
+#define        UNLOCK_LOCKREGION(lt)                                           \
+       (void)__db_mutex_unlock(&(lt)->region->hdr.lock, (lt)->fd)
+
+/*
+ * Since we will be keeping DBTs in shared memory, we need the equivalent
+ * of a DBT that will work in shared memory.
+ */
+typedef struct __sh_dbt {
+       u_int32_t size;
+       ssize_t off;
+} SH_DBT;
+
+#define SH_DBT_PTR(p)  ((void *)(((u_int8_t *)(p)) + (p)->off))
+
+/*
+ * The lock table is the per-process cookie returned from a lock_open call.
+ */
+struct __db_lockobj {
+       SH_DBT  lockobj;                /* Identifies object locked. */
+       SH_TAILQ_ENTRY links;           /* Links for free list. */
+       union {
+               SH_TAILQ_HEAD(_wait) _waiters;  /* List of waiting locks. */
+               u_int32_t       _dd_id;         /* Deadlock detector id. */
+       } wlinks;
+       union {
+               SH_LIST_HEAD(_held) _heldby;    /* Locks held by this locker. */
+               SH_TAILQ_HEAD(_hold) _holders;  /* List of held locks. */
+       } dlinks;
+#define        DB_LOCK_OBJTYPE         1
+#define        DB_LOCK_LOCKER          2
+       u_int8_t type;                  /* Real object or locker id. */
+};
+
+
+#define dd_id  wlinks._dd_id
+#define        waiters wlinks._waiters
+#define        holders dlinks._holders
+#define        heldby  dlinks._heldby
+
+struct __db_locktab {
+       DB_ENV          *dbenv;         /* Environment. */
+       int              fd;            /* mapped file descriptor */
+       DB_LOCKREGION   *region;        /* address of shared memory region */
+       DB_HASHTAB      *hashtab;       /* Beginning of hash table. */
+       size_t          reg_size;       /* last known size of lock region */
+       void            *mem;           /* Beginning of string space. */
+       u_int8_t        *conflicts;     /* Pointer to conflict matrix. */
+};
+
+/* Test for conflicts. */
+#define CONFLICTS(T, HELD, WANTED) \
+       T->conflicts[HELD * T->region->nmodes + WANTED]
+
+/*
+ * Status of a lock.
+ */
+typedef enum {
+       DB_LSTAT_ABORTED,               /* Lock belongs to an aborted txn. */
+       DB_LSTAT_ERR,                   /* Lock is bad. */
+       DB_LSTAT_FREE,                  /* Lock is unallocated. */
+       DB_LSTAT_HELD,                  /* Lock is currently held. */
+       DB_LSTAT_NOGRANT,               /* Lock was not granted. */
+       DB_LSTAT_PENDING,               /* Lock was waiting and has been
+                                        * promoted; waiting for the owner
+                                        * to run and upgrade it to held. */
+       DB_LSTAT_WAITING                /* Lock is on the wait queue. */
+} db_status_t;
+
+/*
+ * Resources in the lock region.  Used to indicate which resource
+ * is running low when we need to grow the region.
+ */
+typedef enum {
+       DB_LOCK_MEM, DB_LOCK_OBJ, DB_LOCK_LOCK
+} db_resource_t;
+
+struct __db_lock {
+       /*
+        * Wait on mutex to wait on lock.  You reference your own mutex with
+        * ID 0 and others reference your mutex with ID 1.
+        */
+       db_mutex_t      mutex;
+
+       u_int32_t       holder;         /* Who holds this lock. */
+       SH_TAILQ_ENTRY  links;          /* Free or holder/waiter list. */
+       SH_LIST_ENTRY   locker_links;   /* List of locks held by a locker. */
+       u_int32_t       refcount;       /* Reference count the lock. */
+       db_lockmode_t   mode;           /* What sort of lock. */
+       ssize_t         obj;            /* Relative offset of object struct. */
+       db_status_t     status;         /* Status of this lock. */
+};
+
+/*
+ * We cannot return pointers to the user (else we cannot easily grow regions),
+ * so we return offsets in the region.  These must be converted to and from
+ * regular pointers.  Always use the macros below.
+ */
+#define OFFSET_TO_LOCK(lt, off)        \
+       ((struct __db_lock *)((u_int8_t *)((lt)->region) + (off)))
+#define LOCK_TO_OFFSET(lt, lock) \
+       ((size_t)((u_int8_t *)(lock) - (u_int8_t *)lt->region))
+#define OFFSET_TO_OBJ(lt, off) \
+       ((DB_LOCKOBJ *)((u_int8_t *)((lt)->region) + (off)))
+#define OBJ_TO_OFFSET(lt, obj) \
+       ((size_t)((u_int8_t *)(obj) - (u_int8_t *)lt->region))
+
+/*
+ * The lock header contains the region structure and the conflict matrix.
+ * Aligned to a large boundary because we don't know what the underlying
+ * type of the hash table elements are.
+ */
+#define LOCK_HASH_ALIGN        8
+#define LOCK_HEADER_SIZE(M)    \
+       ((size_t)(sizeof(DB_LOCKREGION) + ALIGN((M * M), LOCK_HASH_ALIGN)))
+
+/*
+ * For the full region, we need to add the locks, the objects, the hash table
+ * and the string space (which is 16 bytes per lock).
+ */
+#define STRING_SIZE(N) (16 * N)
+
+#define LOCK_REGION_SIZE(M, N, H)                                      \
+       (ALIGN(LOCK_HEADER_SIZE(M) +                                    \
+       (H) * sizeof(DB_HASHTAB), MUTEX_ALIGNMENT) +                    \
+       (N) * ALIGN(sizeof(struct __db_lock), MUTEX_ALIGNMENT) +        \
+       ALIGN((N) * sizeof(DB_LOCKOBJ), sizeof(size_t)) +               \
+       ALIGN(STRING_SIZE(N), sizeof(size_t)))
+
+#ifdef DEBUG
+#define        LOCK_DEBUG_LOCKERS      0x0001
+#define        LOCK_DEBUG_LOCK         0x0002
+#define        LOCK_DEBUG_OBJ          0x0004
+#define        LOCK_DEBUG_CONF         0x0008
+#define        LOCK_DEBUG_MEM          0x0010
+#define        LOCK_DEBUG_BUCKET       0x0020
+#define LOCK_DEBUG_OBJECTS     0x0040
+#define        LOCK_DEBUG_ALL          0xFFFF
+
+#define        LOCK_DEBUG_NOMUTEX      0x0100
+#endif
+
+#include "lock_ext.h"
diff --git a/db2/include/lock_ext.h b/db2/include/lock_ext.h
new file mode 100644 (file)
index 0000000..59d5072
--- /dev/null
@@ -0,0 +1,8 @@
+/* Do not edit: automatically built by dist/distrib. */
+int __lock_getobj  __P((DB_LOCKTAB *,
+    u_int32_t, DBT *, u_int32_t type, DB_LOCKOBJ **));
+int __lock_cmp __P((DBT *, DB_LOCKOBJ *));
+int __lock_locker_cmp __P((u_int32_t, DB_LOCKOBJ *));
+int __lock_ohash __P((DBT *));
+u_int32_t __lock_locker_hash __P((u_int32_t));
+u_int32_t __lock_lhash __P((DB_LOCKOBJ *));
diff --git a/db2/include/log.h b/db2/include/log.h
new file mode 100644 (file)
index 0000000..970dfd1
--- /dev/null
@@ -0,0 +1,157 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997
+ *     Sleepycat Software.  All rights reserved.
+ *
+ *     @(#)log.h       10.8 (Sleepycat) 8/18/97
+ */
+
+#ifndef _LOG_H_
+#define        _LOG_H_
+
+struct __fname;                typedef struct __fname FNAME;
+struct __hdr;          typedef struct __hdr HDR;
+struct __log;          typedef struct __log LOG;
+struct __log_persist;  typedef struct __log_persist LOGP;
+
+#define        MAXLFNAME       99999           /* Maximum log file name. */
+#define        LFNAME          "log.%05d"      /* Log file name template. */
+
+                                       /* Default log name. */
+#define DB_DEFAULT_LOG_FILE    "__db_log.share"
+
+#define        DEFAULT_MAX     (10 * 1048576)  /* 10 Mb. */
+
+/* Macros to return per-process address, offsets. */
+#define        ADDR(base, offset)      ((void *)((u_int8_t *)((base)->addr) + offset))
+#define        OFFSET(base, p)         ((u_int8_t *)(p) - (u_int8_t *)(base)->addr)
+
+/* Macros to lock/unlock the region and threads. */
+#define        LOCK_LOGTHREAD(dblp)                                            \
+       if (F_ISSET(dblp, DB_AM_THREAD))                                \
+               (void)__db_mutex_lock(&(dblp)->mutex, -1,               \
+                   (dblp)->dbenv == NULL ? NULL : (dblp)->dbenv->db_yield)
+#define        UNLOCK_LOGTHREAD(dblp)                                          \
+       if (F_ISSET(dblp, DB_AM_THREAD))                                \
+               (void)__db_mutex_unlock(&(dblp)->mutex, -1);
+#define        LOCK_LOGREGION(dblp)                                            \
+       (void)__db_mutex_lock(&((RLAYOUT *)(dblp)->lp)->lock,           \
+           (dblp)->fd, (dblp)->dbenv == NULL ? NULL : (dblp)->dbenv->db_yield)
+#define        UNLOCK_LOGREGION(dblp)                                          \
+       (void)__db_mutex_unlock(&((RLAYOUT *)(dblp)->lp)->lock, (dblp)->fd)
+
+/*
+ * The per-process table that maps log file-id's to DB structures.
+ */
+typedef        struct __db_entry {
+       DB      *dbp;                   /* Associated DB structure. */
+       int     refcount;               /* Reference counted. */
+       int     deleted;                /* File was not found during open. */
+} DB_ENTRY;
+
+/*
+ * DB_LOG
+ *     Per-process log structure.
+ */
+struct __db_log {
+/* These fields need to be protected for multi-threaded support. */
+       db_mutex_t      mutex;          /* Mutex for thread protection. */
+
+       DB_ENTRY *dbentry;              /* Recovery file-id mapping. */
+#define        DB_GROW_SIZE    64
+       u_int32_t dbentry_cnt;          /* Entries.  Grows by DB_GROW_SIZE. */
+
+/*
+ * These fields are always accessed while the region lock is held, so they do
+ * not have to be protected by the thread lock as well OR, they are only used
+ * when threads are not being used, i.e. most cursor operations are disallowed
+ * on threaded logs.
+ */
+       u_int32_t lfname;               /* Log file "name". */
+       int       lfd;                  /* Log file descriptor. */
+
+       DB_LSN    c_lsn;                /* Cursor: current LSN. */
+       DBT       c_dbt;                /* Cursor: return DBT structure. */
+       int       c_fd;                 /* Cursor: file descriptor. */
+       u_int32_t c_off;                /* Cursor: previous record offset. */
+       u_int32_t c_len;                /* Cursor: current record length. */
+
+/* These fields are not protected. */
+       LOG      *lp;                   /* Address of the shared LOG. */
+
+       DB_ENV   *dbenv;                /* Reference to error information. */
+
+       void     *maddr;                /* Address of mmap'd region. */
+       void     *addr;                 /* Address of shalloc() region. */
+       int       fd;                   /* Region file descriptor. */
+
+       u_int32_t flags;                /* Support the DB_AM_XXX flags. */
+};
+
+/*
+ * HDR --
+ *     Log record header.
+ */
+struct __hdr {
+       u_int32_t prev;                 /* Previous offset. */
+       u_int32_t cksum;                /* Current checksum. */
+       u_int32_t len;                  /* Current length. */
+};
+
+struct __log_persist {
+       u_int32_t magic;                /* DB_LOGMAGIC */
+       u_int32_t version;              /* DB_LOGVERSION */
+
+       u_int32_t lg_max;               /* Maximum file size. */
+       int       mode;                 /* Log file mode. */
+};
+
+/*
+ * LOG --
+ *     Shared log region.  One of these is allocated in shared memory,
+ *     and describes the log.
+ */
+struct __log {
+       RLAYOUT   rlayout;              /* General region information. */
+
+       LOGP      persist;              /* Persistent information. */
+
+       SH_TAILQ_HEAD(__fq) fq;         /* List of file names. */
+
+       DB_LSN    lsn;                  /* LSN at current file offset. */
+       DB_LSN    c_lsn;                /* LSN of the last checkpoint. */
+       DB_LSN    s_lsn;                /* LSN of the last sync. */
+       DB_LSN    span_lsn;             /* LSN spanning buffer write. */
+
+       u_int32_t len;                  /* Length of the last record. */
+
+       size_t    b_off;                /* Current offset in the buffer. */
+       u_int32_t w_off;                /* Current write offset in the file. */
+
+       time_t    chkpt;                /* Time of the last checkpoint. */
+       u_int32_t written;              /* Bytes written since checkpoint. */
+
+       u_int8_t buf[4 * 1024];         /* Log buffer. */
+};
+
+/*
+ * FNAME --
+ *     File name and id.
+ */
+struct __fname {
+       SH_TAILQ_ENTRY q;               /* File name queue. */
+
+       u_int16_t ref;                  /* Reference count. */
+
+       u_int32_t id;                   /* Logging file id. */
+       DBTYPE    s_type;               /* Saved DB type. */
+
+       u_int32_t fileid_off;           /* Unique file id offset. */
+
+       size_t    name_off;             /* Name offset. */
+};
+
+#include "log_auto.h"
+#include "log_ext.h"
+#endif /* _LOG_H_ */
diff --git a/db2/include/log_auto.h b/db2/include/log_auto.h
new file mode 100644 (file)
index 0000000..820aac6
--- /dev/null
@@ -0,0 +1,27 @@
+/* Do not edit: automatically built by dist/db_gen.sh. */
+#ifndef log_AUTO_H
+#define log_AUTO_H
+
+#define        DB_log_register (DB_log_BEGIN + 1)
+
+typedef struct _log_register_args {
+       u_int32_t type;
+       DB_TXN *txnid;
+       DB_LSN prev_lsn;
+       DBT     name;
+       DBT     uid;
+       u_int32_t       id;
+       DBTYPE  ftype;
+} __log_register_args;
+
+
+#define        DB_log_unregister       (DB_log_BEGIN + 2)
+
+typedef struct _log_unregister_args {
+       u_int32_t type;
+       DB_TXN *txnid;
+       DB_LSN prev_lsn;
+       u_int32_t       id;
+} __log_unregister_args;
+
+#endif
diff --git a/db2/include/log_ext.h b/db2/include/log_ext.h
new file mode 100644 (file)
index 0000000..d5c9dd6
--- /dev/null
@@ -0,0 +1,29 @@
+/* Do not edit: automatically built by dist/distrib. */
+int __log_find __P((DB_ENV *, LOG *, int *));
+int __log_valid __P((DB_ENV *, LOG *, int));
+int __log_register_log
+    __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t,
+    DBT *, DBT *, u_int32_t, DBTYPE));
+int __log_register_print
+   __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+int __log_register_read __P((void *, __log_register_args **));
+int __log_unregister_log
+    __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t,
+    u_int32_t));
+int __log_unregister_print
+   __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+int __log_unregister_read __P((void *, __log_unregister_args **));
+int __log_init_print __P((DB_ENV *));
+int __log_init_recover __P((DB_ENV *));
+int __log_findckp __P((DB_LOG *, DB_LSN *));
+int __log_get __P((DB_LOG *, DB_LSN *, DBT *, int, int));
+int __log_put __P((DB_LOG *, DB_LSN *, const DBT *, int));
+int __log_name __P((DB_ENV *, int, char **));
+int __log_register_recover
+    __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+int __log_unregister_recover
+    __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+int __log_add_logid __P((DB_LOG *, DB *, u_int32_t));
+int __db_fileid_to_db __P((DB_LOG *, DB **, u_int32_t));
+void __log_close_files __P((DB_LOG *));
+void __log_rem_logid __P((DB_LOG *, u_int32_t));
diff --git a/db2/include/mp.h b/db2/include/mp.h
new file mode 100644 (file)
index 0000000..4872596
--- /dev/null
@@ -0,0 +1,266 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997
+ *     Sleepycat Software.  All rights reserved.
+ *
+ *     @(#)mp.h        10.14 (Sleepycat) 8/18/97
+ */
+
+struct __bh;           typedef struct __bh BH;
+struct __db_mpreg;     typedef struct __db_mpreg DB_MPREG;
+struct __mpool;                typedef struct __mpool MPOOL;
+struct __mpoolfile;    typedef struct __mpoolfile MPOOLFILE;
+
+                                       /* Default mpool name. */
+#define        DB_DEFAULT_MPOOL_FILE   "__db_mpool.share"
+
+/*
+ *  We default to 128K (16 8K pages) if the user doesn't specify, and
+ * require a minimum of 20K.
+ */
+#define        DB_CACHESIZE_DEF        (128 * 1024)
+#define        DB_CACHESIZE_MIN        ( 20 * 1024)
+
+/* Macro to return per-process address, offsets. */
+#define        ADDR(base, offset)      ((void *)((u_int8_t *)((base)->addr) + offset))
+#define        OFFSET(base, p)         ((u_int8_t *)(p) - (u_int8_t *)(base)->addr)
+
+#define        INVALID         0               /* Invalid shared memory offset. */
+#define        TEMPORARY       "<tmp>"         /* Temporary file name. */
+
+/*
+ * There are two kinds of locks in the mpool code.  The first is the region
+ * lock, used to serialize modifications to all data structures.  The second
+ * is a per-buffer header lock.  The locking order is as follows:
+ *
+ * Process searching for a buffer:
+ *     Acquire the region lock.
+ *     Find the buffer header.
+ *     Increment the reference count (guarantee the buffer stays).
+ *     If the BH_LOCKED flag is set:
+ *             Release the region lock.
+ *             Acquire the buffer lock.
+ *             Release the buffer lock.
+ *             Acquire the region lock.
+ *     Return the buffer.
+ *
+ * Process reading/writing a buffer:
+ *     Acquire the region lock.
+ *     Find/create the buffer header.
+ *     If reading, increment the reference count (guarantee the buffer stays).
+ *     Set the BH_LOCKED flag.
+ *     Acquire the buffer lock (guaranteed not to block).
+ *     Release the region lock.
+ *     Do the I/O and/or initialize buffer contents.
+ *     Acquire the region lock.
+ *     Clear the BH_LOCKED flag.
+ *     Release the region lock.
+ *     Release the buffer lock.
+ *     If reading, return the buffer.
+ *
+ * Pointers to DB_MPOOL, MPOOL, DB_MPOOLFILE and MPOOLFILE structures are not
+ * reacquired when a region lock is reacquired because they couldn't have been
+ * closed/discarded and because they never move in memory.
+ */
+#define        LOCKINIT(dbmp, mutexp)                                          \
+       if (F_ISSET(dbmp, MP_LOCKHANDLE | MP_LOCKREGION))               \
+               (void)__db_mutex_init(mutexp, (dbmp)->fd)
+
+#define        LOCKHANDLE(dbmp, mutexp)                                        \
+       if (F_ISSET(dbmp, MP_LOCKHANDLE))                               \
+               (void)__db_mutex_lock(mutexp, (dbmp)->fd,               \
+               (dbmp)->dbenv == NULL ? NULL : (dbmp)->dbenv->db_yield)
+#define        UNLOCKHANDLE(dbmp, mutexp)                                      \
+       if (F_ISSET(dbmp, MP_LOCKHANDLE))                               \
+               (void)__db_mutex_unlock(mutexp, (dbmp)->fd)
+
+#define        LOCKREGION(dbmp)                                                \
+       if (F_ISSET(dbmp, MP_LOCKREGION))                               \
+               (void)__db_mutex_lock(&((RLAYOUT *)(dbmp)->mp)->lock,   \
+                   (dbmp)->fd,                                         \
+                   (dbmp)->dbenv == NULL ? NULL : (dbmp)->dbenv->db_yield)
+#define        UNLOCKREGION(dbmp)                                              \
+       if (F_ISSET(dbmp, MP_LOCKREGION))                               \
+               (void)__db_mutex_unlock(&((RLAYOUT *)(dbmp)->mp)->lock, \
+               (dbmp)->fd)
+
+#define        LOCKBUFFER(dbmp, bhp)                                           \
+       if (F_ISSET(dbmp, MP_LOCKREGION))                               \
+               (void)__db_mutex_lock(&(bhp)->mutex, (dbmp)->fd,        \
+                   (dbmp)->dbenv == NULL ? NULL : (dbmp)->dbenv->db_yield)
+#define        UNLOCKBUFFER(dbmp, bhp)                                         \
+       if (F_ISSET(dbmp, MP_LOCKREGION))                               \
+               (void)__db_mutex_unlock(&(bhp)->mutex, (dbmp)->fd)
+
+/*
+ * DB_MPOOL --
+ *     Per-process memory pool structure.
+ */
+struct __db_mpool {
+/* These fields need to be protected for multi-threaded support. */
+       db_mutex_t      mutex;          /* Structure lock. */
+
+                                       /* List of pgin/pgout routines. */
+       LIST_HEAD(__db_mpregh, __db_mpreg) dbregq;
+
+                                       /* List of DB_MPOOLFILE's. */
+       TAILQ_HEAD(__db_mpoolfileh, __db_mpoolfile) dbmfq;
+
+/* These fields are not protected. */
+       DB_ENV     *dbenv;              /* Reference to error information. */
+
+       MPOOL      *mp;                 /* Address of the shared MPOOL. */
+
+       void       *maddr;              /* Address of mmap'd region. */
+       void       *addr;               /* Address of shalloc() region. */
+
+       DB_HASHTAB *htab;               /* Hash table of bucket headers. */
+
+       int         fd;                 /* Underlying mmap'd fd. */
+
+
+#define        MP_ISPRIVATE    0x01            /* Private, so local memory. */
+#define        MP_LOCKHANDLE   0x02            /* Threaded, lock handles and region. */
+#define        MP_LOCKREGION   0x04            /* Concurrent access, lock region. */
+       u_int32_t  flags;
+};
+
+/*
+ * DB_MPREG --
+ *     DB_MPOOL registry of pgin/pgout functions.
+ */
+struct __db_mpreg {
+       LIST_ENTRY(__db_mpreg) q;       /* Linked list. */
+
+       int ftype;                      /* File type. */
+                                       /* Pgin, pgout routines. */
+       int (*pgin) __P((db_pgno_t, void *, DBT *));
+       int (*pgout) __P((db_pgno_t, void *, DBT *));
+};
+
+/*
+ * DB_MPOOLFILE --
+ *     Per-process DB_MPOOLFILE information.
+ */
+struct __db_mpoolfile {
+/* These fields need to be protected for multi-threaded support. */
+       db_mutex_t      mutex;          /* Structure lock. */
+
+       int        fd;                  /* Underlying file descriptor. */
+
+       u_int32_t pinref;               /* Pinned block reference count. */
+
+/* These fields are not protected. */
+       TAILQ_ENTRY(__db_mpoolfile) q;  /* Linked list of DB_MPOOLFILE's. */
+
+       char      *path;                /* Initial file path. */
+       DB_MPOOL  *dbmp;                /* Overlying DB_MPOOL. */
+       MPOOLFILE *mfp;                 /* Underlying MPOOLFILE. */
+
+       void      *addr;                /* Address of mmap'd region. */
+       size_t     len;                 /* Length of mmap'd region. */
+
+#define        MP_PATH_ALLOC   0x01            /* Path is allocated memory. */
+#define        MP_PATH_TEMP    0x02            /* Backing file is a temporary. */
+#define        MP_READONLY     0x04            /* File is readonly. */
+       u_int32_t  flags;
+};
+
+/*
+ * MPOOL --
+ *     Shared memory pool region.  One of these is allocated in shared
+ *     memory, and describes the pool.
+ */
+struct __mpool {
+       RLAYOUT     rlayout;            /* General region information. */
+
+       SH_TAILQ_HEAD(__bhq) bhq;       /* LRU list of buckets. */
+       SH_TAILQ_HEAD(__bhfq) bhfq;     /* Free buckets. */
+       SH_TAILQ_HEAD(__mpfq) mpfq;     /* List of MPOOLFILEs. */
+
+       /*
+        * We make the assumption that the early pages of the file are far
+        * more likely to be retrieved than the later pages, which means
+        * that the top bits are more interesting for hashing since they're
+        * less likely to collide.  On the other hand, since 512 4K pages
+        * represents a 2MB file, only the bottom 9 bits of the page number
+        * are likely to be set.  We XOR in the offset in the MPOOL of the
+        * MPOOLFILE that backs this particular page, since that should also
+        * be unique for the page.
+        */
+#define        BUCKET(mp, mf_offset, pgno)                                     \
+       (((pgno) ^ ((mf_offset) << 9)) % (mp)->htab_buckets)
+
+       size_t      htab;               /* Hash table offset. */
+       size_t      htab_buckets;       /* Number of hash table entries. */
+
+       DB_LSN      lsn;                /* Maximum checkpoint LSN. */
+       int         lsn_cnt;            /* Checkpoint buffers left to write. */
+
+       DB_MPOOL_STAT stat;             /* Global mpool statistics. */
+
+#define        MP_LSN_RETRY    0x01            /* Retry all BH_WRITE buffers. */
+       u_int32_t  flags;
+};
+
+/*
+ * MPOOLFILE --
+ *     Shared DB_MPOOLFILE information.
+ */
+struct __mpoolfile {
+       SH_TAILQ_ENTRY  q;              /* List of MPOOLFILEs */
+
+       u_int32_t ref;                  /* Reference count. */
+
+       int       ftype;                /* File type. */
+       int       can_mmap;             /* If the file can be mmap'd. */
+
+       int       lsn_off;              /* Page's LSN offset. */
+
+       size_t    path_off;             /* File name location. */
+
+       size_t    fileid_off;           /* File identification location. */
+
+       size_t    pgcookie_len;         /* Pgin/pgout cookie length. */
+       size_t    pgcookie_off;         /* Pgin/pgout cookie location. */
+
+       int       lsn_cnt;              /* Checkpoint buffers left to write. */
+
+       DB_MPOOL_FSTAT stat;            /* Per-file mpool statistics. */
+};
+
+/*
+ * BH --
+ *     Buffer header.
+ */
+struct __bh {
+       db_mutex_t      mutex;          /* Structure lock. */
+
+       u_int16_t       ref;            /* Reference count. */
+
+#define        BH_CALLPGIN     0x001           /* Page needs to be reworked... */
+#define        BH_DIRTY        0x002           /* Page was modified. */
+#define        BH_DISCARD      0x004           /* Page is useless. */
+#define        BH_LOCKED       0x008           /* Page is locked (I/O in progress). */
+#define        BH_TRASH        0x010           /* Page is garbage. */
+#define        BH_WRITE        0x020           /* Page scheduled for writing. */
+       u_int16_t  flags;
+
+       SH_TAILQ_ENTRY  q;              /* LRU list of bucket headers. */
+       SH_TAILQ_ENTRY  mq;             /* MPOOLFILE list of bucket headers. */
+
+       db_pgno_t pgno;                 /* Underlying MPOOLFILE page number. */
+       size_t    mf_offset;            /* Associated MPOOLFILE offset. */
+
+       /*
+        * !!!
+        * This array must be size_t aligned -- the DB access methods put PAGE
+        * and other structures into it, and expect to be able to access them
+        * directly.  (We guarantee size_t alignment in the db_mpool(3) manual
+        * page as well.)
+        */
+       u_int8_t   buf[1];              /* Variable length data. */
+};
+
+#include "mp_ext.h"
diff --git a/db2/include/mp_ext.h b/db2/include/mp_ext.h
new file mode 100644 (file)
index 0000000..3934c13
--- /dev/null
@@ -0,0 +1,14 @@
+/* Do not edit: automatically built by dist/distrib. */
+int __memp_bhwrite
+    __P((DB_MPOOL *, MPOOLFILE *, BH *, int *, int *));
+int __memp_pgread __P((DB_MPOOLFILE *, BH *, int));
+int __memp_pgwrite __P((DB_MPOOLFILE *, BH *, int *, int *));
+int __memp_pg __P((DB_MPOOLFILE *, BH *, int));
+void __memp_bhfree __P((DB_MPOOL *, MPOOLFILE *, BH *, int));
+int __memp_fopen __P((DB_MPOOL *, const char *, int, int,
+   int, size_t, int, DBT *, u_int8_t *, int, DB_MPOOLFILE **));
+void __memp_debug __P((DB_MPOOL *, FILE *, int));
+int __memp_ralloc __P((DB_MPOOL *, size_t, size_t *, void *));
+int __memp_ropen
+   __P((DB_MPOOL *, const char *, size_t, int, int));
+int __memp_rclose __P((DB_MPOOL *));
diff --git a/db2/include/mutex_ext.h b/db2/include/mutex_ext.h
new file mode 100644 (file)
index 0000000..ff46b6a
--- /dev/null
@@ -0,0 +1,4 @@
+/* Do not edit: automatically built by dist/distrib. */
+void __db_mutex_init __P((db_mutex_t *, off_t));
+int __db_mutex_lock __P((db_mutex_t *, int, int (*)(void)));
+int __db_mutex_unlock __P((db_mutex_t *, int));
diff --git a/db2/include/os_ext.h b/db2/include/os_ext.h
new file mode 100644 (file)
index 0000000..59d72ac
--- /dev/null
@@ -0,0 +1,19 @@
+/* Do not edit: automatically built by dist/distrib. */
+int __db_abspath __P((const char *));
+char *__db_rpath __P((const char *));
+int __db_dir __P((DB_ENV *, const char *, char ***, int *));
+void __db_dirf __P((DB_ENV *, char **, int));
+int __db_fileid __P((DB_ENV *, const char *, int, u_int8_t *));
+int __db_lseek __P((int, size_t, db_pgno_t, u_long, int));
+int __db_mmap __P((int, size_t, int, int, void *));
+int __db_munmap __P((void *, size_t));
+int __db_oflags __P((int));
+int __db_fdopen __P((const char *, int, int, int, int *));
+int __db_fsync __P((int));
+int __db_close __P((int));
+int __db_read __P((int, void *, size_t, ssize_t *));
+int __db_write __P((int, void *, size_t, ssize_t *));
+int __db_sleep __P((u_long, u_long));
+int __db_exists __P((const char *, int *));
+int __db_stat __P((DB_ENV *, const char *, int, off_t *, off_t *));
+int __db_unlink __P((const char *));
diff --git a/db2/include/queue.h b/db2/include/queue.h
new file mode 100644 (file)
index 0000000..0909c86
--- /dev/null
@@ -0,0 +1,275 @@
+/*     BSDI $Id$       */
+
+/* 
+ * Copyright (c) 1991, 1993
+ *     The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *     This product includes software developed by the University of
+ *     California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *     @(#)queue.h     8.5 (Berkeley) 8/20/94
+ */
+
+#ifndef        _SYS_QUEUE_H_
+#define        _SYS_QUEUE_H_
+
+/*
+ * This file defines three types of data structures: lists, tail queues,
+ * and circular queues.
+ *
+ * A list is headed by a single forward pointer (or an array of forward
+ * pointers for a hash table header). The elements are doubly linked
+ * so that an arbitrary element can be removed without a need to
+ * traverse the list. New elements can be added to the list before
+ * or after an existing element or at the head of the list. A list
+ * may only be traversed in the forward direction.
+ *
+ * A tail queue is headed by a pair of pointers, one to the head of the
+ * list and the other to the tail of the list. The elements are doubly
+ * linked so that an arbitrary element can be removed without a need to
+ * traverse the list. New elements can be added to the list before or
+ * after an existing element, at the head of the list, or at the end of
+ * the list. A tail queue may only be traversed in the forward direction.
+ *
+ * A circle queue is headed by a pair of pointers, one to the head of the
+ * list and the other to the tail of the list. The elements are doubly
+ * linked so that an arbitrary element can be removed without a need to
+ * traverse the list. New elements can be added to the list before or after
+ * an existing element, at the head of the list, or at the end of the list.
+ * A circle queue may be traversed in either direction, but has a more
+ * complex end of list detection.
+ *
+ * For details on the use of these macros, see the queue(3) manual page.
+ */
+
+/*
+ * List definitions.
+ */
+#define LIST_HEAD(name, type)                                          \
+struct name {                                                          \
+       struct type *lh_first;  /* first element */                     \
+}
+
+#define LIST_ENTRY(type)                                               \
+struct {                                                               \
+       struct type *le_next;   /* next element */                      \
+       struct type **le_prev;  /* address of previous next element */  \
+}
+
+#define        LIST_FIRST(head)                ((head)->lh_first)
+#define        LIST_NEXT(elm, field)           ((elm)->field.le_next)
+#define        LIST_END(head)                  NULL
+
+/*
+ * List functions.
+ */
+#define        LIST_INIT(head) {                                               \
+       (head)->lh_first = NULL;                                        \
+}
+
+#define LIST_INSERT_AFTER(listelm, elm, field) do {                    \
+       if (((elm)->field.le_next = (listelm)->field.le_next) != NULL)  \
+               (listelm)->field.le_next->field.le_prev =               \
+                   &(elm)->field.le_next;                              \
+       (listelm)->field.le_next = (elm);                               \
+       (elm)->field.le_prev = &(listelm)->field.le_next;               \
+} while (0)
+
+#define        LIST_INSERT_BEFORE(listelm, elm, field) do {                    \
+       (elm)->field.le_prev = (listelm)->field.le_prev;                \
+       (elm)->field.le_next = (listelm);                               \
+       *(listelm)->field.le_prev = (elm);                              \
+       (listelm)->field.le_prev = &(elm)->field.le_next;               \
+} while (0)
+
+#define LIST_INSERT_HEAD(head, elm, field) do {                                \
+       if (((elm)->field.le_next = (head)->lh_first) != NULL)          \
+               (head)->lh_first->field.le_prev = &(elm)->field.le_next;\
+       (head)->lh_first = (elm);                                       \
+       (elm)->field.le_prev = &(head)->lh_first;                       \
+} while (0)
+
+#define LIST_REMOVE(elm, field) do {                                   \
+       if ((elm)->field.le_next != NULL)                               \
+               (elm)->field.le_next->field.le_prev =                   \
+                   (elm)->field.le_prev;                               \
+       *(elm)->field.le_prev = (elm)->field.le_next;                   \
+} while (0)
+
+/*
+ * Tail queue definitions.
+ */
+#define TAILQ_HEAD(name, type)                                         \
+struct name {                                                          \
+       struct type *tqh_first; /* first element */                     \
+       struct type **tqh_last; /* addr of last next element */         \
+}
+
+#define TAILQ_ENTRY(type)                                              \
+struct {                                                               \
+       struct type *tqe_next;  /* next element */                      \
+       struct type **tqe_prev; /* address of previous next element */  \
+}
+
+#define        TAILQ_FIRST(head)               ((head)->tqh_first)
+#define        TAILQ_NEXT(elm, field)          ((elm)->field.tqe_next)
+#define        TAILQ_END(head)                 NULL
+
+/*
+ * Tail queue functions.
+ */
+#define        TAILQ_INIT(head) do {                                           \
+       (head)->tqh_first = NULL;                                       \
+       (head)->tqh_last = &(head)->tqh_first;                          \
+} while (0)
+
+#define TAILQ_INSERT_HEAD(head, elm, field) do {                       \
+       if (((elm)->field.tqe_next = (head)->tqh_first) != NULL)        \
+               (head)->tqh_first->field.tqe_prev =                     \
+                   &(elm)->field.tqe_next;                             \
+       else                                                            \
+               (head)->tqh_last = &(elm)->field.tqe_next;              \
+       (head)->tqh_first = (elm);                                      \
+       (elm)->field.tqe_prev = &(head)->tqh_first;                     \
+} while (0)
+
+#define TAILQ_INSERT_TAIL(head, elm, field) do {                       \
+       (elm)->field.tqe_next = NULL;                                   \
+       (elm)->field.tqe_prev = (head)->tqh_last;                       \
+       *(head)->tqh_last = (elm);                                      \
+       (head)->tqh_last = &(elm)->field.tqe_next;                      \
+} while (0)
+
+#define TAILQ_INSERT_AFTER(head, listelm, elm, field) do {             \
+       if (((elm)->field.tqe_next = (listelm)->field.tqe_next) != NULL)\
+               (elm)->field.tqe_next->field.tqe_prev =                 \
+                   &(elm)->field.tqe_next;                             \
+       else                                                            \
+               (head)->tqh_last = &(elm)->field.tqe_next;              \
+       (listelm)->field.tqe_next = (elm);                              \
+       (elm)->field.tqe_prev = &(listelm)->field.tqe_next;             \
+} while (0)
+
+#define        TAILQ_INSERT_BEFORE(listelm, elm, field) do {                   \
+       (elm)->field.tqe_prev = (listelm)->field.tqe_prev;              \
+       (elm)->field.tqe_next = (listelm);                              \
+       *(listelm)->field.tqe_prev = (elm);                             \
+       (listelm)->field.tqe_prev = &(elm)->field.tqe_next;             \
+} while (0)
+
+#define TAILQ_REMOVE(head, elm, field) do {                            \
+       if (((elm)->field.tqe_next) != NULL)                            \
+               (elm)->field.tqe_next->field.tqe_prev =                 \
+                   (elm)->field.tqe_prev;                              \
+       else                                                            \
+               (head)->tqh_last = (elm)->field.tqe_prev;               \
+       *(elm)->field.tqe_prev = (elm)->field.tqe_next;                 \
+} while (0)
+
+/*
+ * Circular queue definitions.
+ */
+#define CIRCLEQ_HEAD(name, type)                                       \
+struct name {                                                          \
+       struct type *cqh_first;         /* first element */             \
+       struct type *cqh_last;          /* last element */              \
+}
+
+#define CIRCLEQ_ENTRY(type)                                            \
+struct {                                                               \
+       struct type *cqe_next;          /* next element */              \
+       struct type *cqe_prev;          /* previous element */          \
+}
+
+#define        CIRCLEQ_FIRST(head)             ((head)->cqh_first)
+#define        CIRCLEQ_LAST(head)              ((head)->cqh_last)
+#define        CIRCLEQ_END(head)               ((void *)(head))
+#define        CIRCLEQ_NEXT(elm, field)        ((elm)->field.cqe_next)
+#define        CIRCLEQ_PREV(elm, field)        ((elm)->field.cqe_prev)
+
+/*
+ * Circular queue functions.
+ */
+#define        CIRCLEQ_INIT(head) do {                                         \
+       (head)->cqh_first = (void *)(head);                             \
+       (head)->cqh_last = (void *)(head);                              \
+} while (0)
+
+#define CIRCLEQ_INSERT_AFTER(head, listelm, elm, field) do {           \
+       (elm)->field.cqe_next = (listelm)->field.cqe_next;              \
+       (elm)->field.cqe_prev = (listelm);                              \
+       if ((listelm)->field.cqe_next == (void *)(head))                \
+               (head)->cqh_last = (elm);                               \
+       else                                                            \
+               (listelm)->field.cqe_next->field.cqe_prev = (elm);      \
+       (listelm)->field.cqe_next = (elm);                              \
+} while (0)
+
+#define CIRCLEQ_INSERT_BEFORE(head, listelm, elm, field) do {          \
+       (elm)->field.cqe_next = (listelm);                              \
+       (elm)->field.cqe_prev = (listelm)->field.cqe_prev;              \
+       if ((listelm)->field.cqe_prev == (void *)(head))                \
+               (head)->cqh_first = (elm);                              \
+       else                                                            \
+               (listelm)->field.cqe_prev->field.cqe_next = (elm);      \
+       (listelm)->field.cqe_prev = (elm);                              \
+} while (0)
+
+#define CIRCLEQ_INSERT_HEAD(head, elm, field) do {                     \
+       (elm)->field.cqe_next = (head)->cqh_first;                      \
+       (elm)->field.cqe_prev = (void *)(head);                         \
+       if ((head)->cqh_last == (void *)(head))                         \
+               (head)->cqh_last = (elm);                               \
+       else                                                            \
+               (head)->cqh_first->field.cqe_prev = (elm);              \
+       (head)->cqh_first = (elm);                                      \
+} while (0)
+
+#define CIRCLEQ_INSERT_TAIL(head, elm, field) do {                     \
+       (elm)->field.cqe_next = (void *)(head);                         \
+       (elm)->field.cqe_prev = (head)->cqh_last;                       \
+       if ((head)->cqh_first == (void *)(head))                        \
+               (head)->cqh_first = (elm);                              \
+       else                                                            \
+               (head)->cqh_last->field.cqe_next = (elm);               \
+       (head)->cqh_last = (elm);                                       \
+} while (0)
+
+#define        CIRCLEQ_REMOVE(head, elm, field) do {                           \
+       if ((elm)->field.cqe_next == (void *)(head))                    \
+               (head)->cqh_last = (elm)->field.cqe_prev;               \
+       else                                                            \
+               (elm)->field.cqe_next->field.cqe_prev =                 \
+                   (elm)->field.cqe_prev;                              \
+       if ((elm)->field.cqe_prev == (void *)(head))                    \
+               (head)->cqh_first = (elm)->field.cqe_next;              \
+       else                                                            \
+               (elm)->field.cqe_prev->field.cqe_next =                 \
+                   (elm)->field.cqe_next;                              \
+} while (0)
+#endif /* !_SYS_QUEUE_H_ */
diff --git a/db2/include/shqueue.h b/db2/include/shqueue.h
new file mode 100644 (file)
index 0000000..c3e2f4a
--- /dev/null
@@ -0,0 +1,361 @@
+/*
+ * Copyright (c) 1991, 1993
+ *     The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *     This product includes software developed by the University of
+ *     California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *     @(#)shqueue.h   8.11 (Sleepycat) 7/27/97
+ */
+
+#ifndef        _SYS_SHQUEUE_H_
+#define        _SYS_SHQUEUE_H_
+
+/*
+ * This file defines three types of data structures: lists, tail queues, and
+ * circular queues, similarly to the include file <sys/queue.h>.
+ *
+ * The difference is that this set of macros can be used for structures that
+ * reside in shared memory that may be mapped at different addresses in each
+ * process.  In most cases, the macros for shared structures exactly mirror
+ * the normal macros, although the macro calls require an additional type
+ * parameter, only used by the HEAD and ENTRY macros of the standard macros.
+ *
+ * For details on the use of these macros, see the queue(3) manual page.
+ */
+
+/*
+ * Shared list definitions.
+ */
+#define SH_LIST_HEAD(name)                                             \
+struct name {                                                          \
+       ssize_t slh_first;      /* first element */                     \
+}
+
+#define SH_LIST_ENTRY                                                  \
+struct {                                                               \
+       ssize_t sle_next;       /* relative offset next element */      \
+       ssize_t sle_prev;       /* relative offset of prev element */   \
+}
+
+/*
+ * Shared list functions.  Since we use relative offsets for pointers,
+ * 0 is a valid offset.  Therefore, we use -1 to indicate end of list.
+ * The macros ending in "P" return pointers without checking for end
+ * of list, the others check for end of list and evaluate to either a
+ * pointer or NULL.
+ */
+
+#define SH_LIST_FIRSTP(head, type)                                     \
+       ((struct type *)(((u_int8_t *)(head)) + (head)->slh_first))
+
+#define SH_LIST_FIRST(head, type)                                      \
+       ((head)->slh_first == -1 ? NULL :                               \
+       ((struct type *)(((u_int8_t *)(head)) + (head)->slh_first)))
+
+#define SH_LIST_NEXTP(elm, field, type)                                        \
+       ((struct type *)(((u_int8_t *)(elm)) + (elm)->field.sle_next))
+
+#define SH_LIST_NEXT(elm, field, type)                                 \
+       ((elm)->field.sle_next == -1 ? NULL :                           \
+       ((struct type *)(((u_int8_t *)(elm)) + (elm)->field.sle_next)))
+
+#define SH_LIST_PREV(elm, field)                                       \
+       ((ssize_t *)(((u_int8_t *)(elm)) + (elm)->field.sle_prev))
+
+#define SH_PTR_TO_OFF(src, dest)                                       \
+       ((ssize_t)(((u_int8_t *)(dest)) - ((u_int8_t *)(src))))
+
+#define        SH_LIST_END(head)                       NULL
+
+/*
+ * Take the element's next pointer and calculate what the corresponding
+ * Prev pointer should be -- basically it is the negation plus the offset
+ * of the next field in the structure.
+ */
+#define SH_LIST_NEXT_TO_PREV(elm, field)                               \
+       (-(elm)->field.sle_next + SH_PTR_TO_OFF(elm, &(elm)->field.sle_next))
+
+#define        SH_LIST_INIT(head) (head)->slh_first = -1
+
+#define SH_LIST_INSERT_AFTER(listelm, elm, field, type) do {           \
+       if ((listelm)->field.sle_next != -1) {                          \
+               (elm)->field.sle_next = SH_PTR_TO_OFF(elm,              \
+                   SH_LIST_NEXTP(listelm, field, type));               \
+               SH_LIST_NEXTP(listelm, field, type)->field.sle_prev =   \
+                       SH_LIST_NEXT_TO_PREV(elm, field);               \
+       } else                                                          \
+               (elm)->field.sle_next = -1;                             \
+       (listelm)->field.sle_next = SH_PTR_TO_OFF(listelm, elm);        \
+       (elm)->field.sle_prev = SH_LIST_NEXT_TO_PREV(listelm, field);   \
+} while (0)
+
+#define SH_LIST_INSERT_HEAD(head, elm, field, type) do {               \
+       if ((head)->slh_first != -1) {                                  \
+               (elm)->field.sle_next =                                 \
+                   (head)->slh_first - SH_PTR_TO_OFF(head, elm);       \
+               SH_LIST_FIRSTP(head, type)->field.sle_prev =            \
+                       SH_LIST_NEXT_TO_PREV(elm, field);               \
+       } else                                                          \
+               (elm)->field.sle_next = -1;                             \
+       (head)->slh_first = SH_PTR_TO_OFF(head, elm);                   \
+       (elm)->field.sle_prev = SH_PTR_TO_OFF(elm, &(head)->slh_first); \
+} while (0)
+
+#define SH_LIST_REMOVE(elm, field, type) do {                          \
+       if ((elm)->field.sle_next != -1) {                              \
+               SH_LIST_NEXTP(elm, field, type)->field.sle_prev =       \
+                       (elm)->field.sle_prev - (elm)->field.sle_next;  \
+               *SH_LIST_PREV(elm, field) += (elm)->field.sle_next;     \
+       } else                                                          \
+               *SH_LIST_PREV(elm, field) = -1;                         \
+} while (0)
+
+/*
+ * Shared tail queue definitions.
+ */
+#define SH_TAILQ_HEAD(name)                                            \
+struct name {                                                          \
+       ssize_t stqh_first;     /* relative offset of first element */  \
+       ssize_t stqh_last;      /* relative offset of last's next */    \
+}
+
+#define SH_TAILQ_ENTRY                                                 \
+struct {                                                               \
+       ssize_t stqe_next;      /* relative offset of next element */   \
+       ssize_t stqe_prev;      /* relative offset of prev's next */    \
+}
+
+/*
+ * Shared tail queue functions.
+ */
+#define SH_TAILQ_FIRSTP(head, type)                                    \
+       ((struct type *)((u_int8_t *)(head) + (head)->stqh_first))
+
+#define SH_TAILQ_FIRST(head, type)                                     \
+       ((head)->stqh_first == -1 ? NULL : SH_TAILQ_FIRSTP(head, type))
+
+#define SH_TAILQ_NEXTP(elm, field, type)                               \
+       ((struct type *)((u_int8_t *)(elm) + (elm)->field.stqe_next))
+
+#define SH_TAILQ_NEXT(elm, field, type)                                        \
+       ((elm)->field.stqe_next == -1 ? NULL : SH_TAILQ_NEXTP(elm, field, type))
+
+#define SH_TAILQ_PREVP(elm, field)                                     \
+       ((ssize_t *)((u_int8_t *)(elm) + (elm)->field.stqe_prev))
+
+#define SH_TAILQ_LAST(head)                                            \
+       ((ssize_t *)(((u_int8_t *)(head)) + (head)->stqh_last))
+
+#define SH_TAILQ_NEXT_TO_PREV(elm, field)                              \
+       (-(elm)->field.stqe_next + SH_PTR_TO_OFF(elm, &(elm)->field.stqe_next))
+
+#define SH_TAILQ_END(head)             NULL
+
+#define        SH_TAILQ_INIT(head) {                                           \
+       (head)->stqh_first = -1;                                        \
+       (head)->stqh_last = SH_PTR_TO_OFF(head, &(head)->stqh_first);   \
+}
+
+#define SH_TAILQ_INSERT_HEAD(head, elm, field, type) do {              \
+       if ((head)->stqh_first != -1) {                                 \
+               (elm)->field.stqe_next =                                \
+                   (head)->stqh_first - SH_PTR_TO_OFF(head, elm);      \
+               SH_TAILQ_FIRSTP(head, type)->field.stqe_prev =          \
+                       SH_TAILQ_NEXT_TO_PREV(elm, field);              \
+       } else {                                                        \
+               (elm)->field.stqe_next = -1;                            \
+               (head)->stqh_last =                                     \
+                   SH_PTR_TO_OFF(head, &(elm)->field.stqe_next);       \
+       }                                                               \
+       (head)->stqh_first = SH_PTR_TO_OFF(head, elm);                  \
+       (elm)->field.stqe_prev =                                        \
+           SH_PTR_TO_OFF(elm, &(head)->stqh_first);                    \
+} while (0)
+
+#define SH_TAILQ_INSERT_TAIL(head, elm, field) do {                    \
+       (elm)->field.stqe_next = -1;                                    \
+       (elm)->field.stqe_prev =                                        \
+           -SH_PTR_TO_OFF(head, elm) + (head)->stqh_last;              \
+       if ((head)->stqh_last ==                                        \
+           SH_PTR_TO_OFF((head), &(head)->stqh_first))                 \
+               (head)->stqh_first = SH_PTR_TO_OFF(head, elm);          \
+       else                                                            \
+               *SH_TAILQ_LAST(head) = -(head)->stqh_last +             \
+                   SH_PTR_TO_OFF((elm), &(elm)->field.stqe_next) +     \
+                   SH_PTR_TO_OFF(head, elm);                           \
+       (head)->stqh_last =                                             \
+           SH_PTR_TO_OFF(head, &((elm)->field.stqe_next));             \
+} while (0)
+
+#define SH_TAILQ_INSERT_AFTER(head, listelm, elm, field, type) do {    \
+       if ((listelm)->field.stqe_next != -1) {                         \
+               (elm)->field.stqe_next = (listelm)->field.stqe_next -   \
+                   SH_PTR_TO_OFF(listelm, elm);                        \
+               SH_TAILQ_NEXTP(listelm, field, type)->field.stqe_prev = \
+                   SH_TAILQ_NEXT_TO_PREV(elm, field);                  \
+       } else {                                                        \
+               (elm)->field.stqe_next = -1;                            \
+               (head)->stqh_last =                                     \
+                   SH_PTR_TO_OFF(head, &elm->field.stqe_next);         \
+       }                                                               \
+       (listelm)->field.stqe_next = SH_PTR_TO_OFF(listelm, elm);       \
+       (elm)->field.stqe_prev = SH_TAILQ_NEXT_TO_PREV(listelm, field); \
+} while (0)
+
+#define SH_TAILQ_REMOVE(head, elm, field, type) do {                   \
+       if ((elm)->field.stqe_next != -1) {                             \
+               SH_TAILQ_NEXTP(elm, field, type)->field.stqe_prev =     \
+                   (elm)->field.stqe_prev +                            \
+                   SH_PTR_TO_OFF(SH_TAILQ_NEXTP(elm,                   \
+                   field, type), elm);                                 \
+               *SH_TAILQ_PREVP(elm, field) += elm->field.stqe_next;    \
+       } else {                                                        \
+               (head)->stqh_last = (elm)->field.stqe_prev +            \
+                       SH_PTR_TO_OFF(head, elm);                       \
+               *SH_TAILQ_PREVP(elm, field) = -1;                       \
+       }                                                               \
+} while (0)
+
+/*
+ * Shared circular queue definitions.
+ */
+#define SH_CIRCLEQ_HEAD(name)                                          \
+struct name {                                                          \
+       ssize_t scqh_first;             /* first element */             \
+       ssize_t scqh_last;              /* last element */              \
+}
+
+#define SH_CIRCLEQ_ENTRY                                               \
+struct {                                                               \
+       ssize_t scqe_next;              /* next element */              \
+       ssize_t scqe_prev;              /* previous element */          \
+}
+
+/*
+ * Shared circular queue functions.
+ */
+#define SH_CIRCLEQ_FIRSTP(head, type)                                  \
+       ((struct type *)(((u_int8_t *)(head)) + (head)->scqh_first))
+
+#define SH_CIRCLEQ_FIRST(head, type)                                   \
+       ((head)->scqh_first == -1 ?                                     \
+       (void *)head : SH_CIRCLEQ_FIRSTP(head, type))
+
+#define SH_CIRCLEQ_LASTP(head, type)                                   \
+       ((struct type *)(((u_int8_t *)(head)) + (head)->scqh_last))
+
+#define SH_CIRCLEQ_LAST(head, type)                                    \
+       ((head)->scqh_last == -1 ? (void *)head : SH_CIRCLEQ_LASTP(head, type))
+
+#define SH_CIRCLEQ_NEXTP(elm, field, type)                             \
+       ((struct type *)(((u_int8_t *)(elm)) + (elm)->field.scqe_next))
+
+#define SH_CIRCLEQ_NEXT(head, elm, field, type)                                \
+       ((elm)->field.scqe_next == SH_PTR_TO_OFF(elm, head) ?           \
+           (void *)head : SH_CIRCLEQ_NEXTP(elm, field, type))
+
+#define SH_CIRCLEQ_PREVP(elm, field, type)                             \
+       ((struct type *)(((u_int8_t *)(elm)) + (elm)->field.scqe_prev))
+
+#define SH_CIRCLEQ_PREV(head, elm, field, type)                                \
+       ((elm)->field.scqe_prev == SH_PTR_TO_OFF(elm, head) ?           \
+           (void *)head : SH_CIRCLEQ_PREVP(elm, field, type))
+
+#define        SH_CIRCLEQ_END(head)            ((void *)(head))
+
+#define        SH_CIRCLEQ_INIT(head) {                                         \
+       (head)->scqh_first = 0;                                         \
+       (head)->scqh_last = 0;                                          \
+}
+
+#define SH_CIRCLEQ_INSERT_AFTER(head, listelm, elm, field, type) do {  \
+       (elm)->field.scqe_prev = SH_PTR_TO_OFF(elm, listelm);           \
+       (elm)->field.scqe_next = (listelm)->field.scqe_next +           \
+           (elm)->field.scqe_prev;                                     \
+       if (SH_CIRCLEQ_NEXTP(listelm, field, type) == (void *)head)     \
+               (head)->scqh_last = SH_PTR_TO_OFF(head, elm);           \
+       else                                                            \
+               SH_CIRCLEQ_NEXTP(listelm,                               \
+                   field, type)->field.scqe_prev =                     \
+                   SH_PTR_TO_OFF(SH_CIRCLEQ_NEXTP(listelm,             \
+                   field, type), elm);                                 \
+       (listelm)->field.scqe_next = -(elm)->field.scqe_prev;           \
+} while (0)
+
+#define SH_CIRCLEQ_INSERT_BEFORE(head, listelm, elm, field, type) do { \
+       (elm)->field.scqe_next = SH_PTR_TO_OFF(elm, listelm);           \
+       (elm)->field.scqe_prev = (elm)->field.scqe_next -               \
+               SH_CIRCLEQ_PREVP(listelm, field, type)->field.scqe_next;\
+       if (SH_CIRCLEQ_PREVP(listelm, field, type) == (void *)(head))   \
+               (head)->scqh_first = SH_PTR_TO_OFF(head, elm);          \
+       else                                                            \
+               SH_CIRCLEQ_PREVP(listelm,                               \
+                   field, type)->field.scqe_next =                     \
+                   SH_PTR_TO_OFF(SH_CIRCLEQ_PREVP(listelm,             \
+                   field, type), elm);                                 \
+       (listelm)->field.scqe_prev = -(elm)->field.scqe_next;           \
+} while (0)
+
+#define SH_CIRCLEQ_INSERT_HEAD(head, elm, field, type) do {            \
+       (elm)->field.scqe_prev = SH_PTR_TO_OFF(elm, head);              \
+       (elm)->field.scqe_next = (head)->scqh_first +                   \
+               (elm)->field.scqe_prev;                                 \
+       if ((head)->scqh_last == 0)                                     \
+               (head)->scqh_last = -(elm)->field.scqe_prev;            \
+       else                                                            \
+               SH_CIRCLEQ_FIRSTP(head, type)->field.scqe_prev =        \
+                   SH_PTR_TO_OFF(SH_CIRCLEQ_FIRSTP(head, type), elm);  \
+       (head)->scqh_first = -(elm)->field.scqe_prev;                   \
+} while (0)
+
+#define SH_CIRCLEQ_INSERT_TAIL(head, elm, field, type) do {            \
+       (elm)->field.scqe_next = SH_PTR_TO_OFF(elm, head);              \
+       (elm)->field.scqe_prev = (head)->scqh_last +                    \
+           (elm)->field.scqe_next;                                     \
+       if ((head)->scqh_first == 0)                                    \
+               (head)->scqh_first = -(elm)->field.scqe_next;           \
+       else                                                            \
+               SH_CIRCLEQ_LASTP(head, type)->field.scqe_next =         \
+                   SH_PTR_TO_OFF(SH_CIRCLEQ_LASTP(head, type), elm);   \
+       (head)->scqh_last = -(elm)->field.scqe_next;                    \
+} while (0)
+
+#define        SH_CIRCLEQ_REMOVE(head, elm, field, type) do {                  \
+       if (SH_CIRCLEQ_NEXTP(elm, field, type) == (void *)(head))       \
+               (head)->scqh_last += (elm)->field.scqe_prev;            \
+       else                                                            \
+               SH_CIRCLEQ_NEXTP(elm, field, type)->field.scqe_prev +=  \
+                   (elm)->field.scqe_prev;                             \
+       if (SH_CIRCLEQ_PREVP(elm, field, type) == (void *)(head))       \
+               (head)->scqh_first += (elm)->field.scqe_next;           \
+       else                                                            \
+               SH_CIRCLEQ_PREVP(elm, field, type)->field.scqe_next +=  \
+                   (elm)->field.scqe_next;                             \
+} while (0)
+#endif /* !_SYS_SHQUEUE_H_ */
diff --git a/db2/include/txn.h b/db2/include/txn.h
new file mode 100644 (file)
index 0000000..f4e0999
--- /dev/null
@@ -0,0 +1,112 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997
+ *     Sleepycat Software.  All rights reserved.
+ *
+ *     @(#)txn.h       10.6 (Sleepycat) 7/29/97
+ */
+#ifndef        _TXN_H_
+#define        _TXN_H_
+
+/*
+ * The name of the transaction shared memory region is DEFAULT_TXN_FILE and
+ * the region is always created group RW of the group owning the directory.
+ */
+#define        DEFAULT_TXN_FILE        "__db_txn.share"
+#define        TXN_INVALID             0xffffffff /* Maximum number of txn ids. */
+#define TXN_MINIMUM            0x80000000 /* First transaction id */
+
+/*
+ * Transaction type declarations.
+ */
+
+/*
+ * Internal data maintained in shared memory for each transaction.
+ */
+typedef struct __txn_detail {
+       u_int32_t txnid;                /* current transaction id
+                                          used to link free list also */
+       DB_LSN  last_lsn;               /* last lsn written for this txn */
+       DB_LSN  begin_lsn;              /* lsn of begin record */
+       size_t  last_lock;              /* offset in lock region of last lock
+                                          for this transaction. */
+#define        TXN_UNALLOC     0
+#define        TXN_RUNNING     1
+#define        TXN_ABORTED     2
+#define        TXN_PREPARED    3
+       u_int32_t status;               /* status of the transaction */
+} TXN_DETAIL;
+
+/*
+ * The transaction manager encapsulates the transaction system.  It contains
+ * references to the log and lock managers as well as the state that keeps
+ * track of the shared memory region.
+ */
+struct __db_txnmgr {
+/* These fields need to be protected for multi-threaded support. */
+       db_mutex_t      mutex;          /* Synchronization. */
+                                       /* list of active transactions */
+       TAILQ_HEAD(_chain, __db_txn)    txn_chain;
+
+/* These fields are not protected. */
+       DB_ENV          *dbenv;         /* Environment. */
+       int (*recover)                  /* Recovery dispatch routine */
+           __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+       int              fd;            /* mapped file descriptor */
+       u_int            flags;         /* DB_TXN_NOSYNC, DB_THREAD */
+       size_t           reg_size;      /* how large we think the region is */
+       DB_TXNREGION    *region;        /* address of shared memory region */
+};
+
+/*
+ * Layout of the shared memory region.
+ *
+ */
+struct __db_txnregion {
+       RLAYOUT         hdr;            /* Shared memory region header. */
+       u_int32_t       magic;          /* transaction magic number */
+       u_int32_t       version;        /* version number */
+       u_int32_t       maxtxns;        /* maximum number of active txns */
+       u_int32_t       last_txnid;     /* last transaction id given out */
+       u_int32_t       free_txn;       /* head of transaction free list */
+       DB_LSN          pending_ckp;    /* last checkpoint did not finish */
+       DB_LSN          last_ckp;       /* lsn of the last checkpoint */
+       time_t          time_ckp;       /* time of last checkpoint */
+       u_int32_t       logtype;        /* type of logging */
+       u_int32_t       locktype;       /* lock type */
+       u_int32_t       naborts;        /* number of aborted transactions */
+       u_int32_t       ncommits;       /* number of committed transactions */
+       u_int32_t       nbegins;        /* number of begun transactions */
+       TXN_DETAIL      table[1];       /* array of TXN structures */
+};
+
+#define        TXN_REGION_SIZE(N)                                              \
+                       (sizeof(DB_TXNREGION) + N * sizeof(DB_TXN))
+
+/* Macros to lock/unlock the region and threads. */
+#define        LOCK_TXNTHREAD(tmgrp)                                           \
+       if (F_ISSET(tmgrp, DB_THREAD))                                  \
+               (void)__db_mutex_lock(&(tmgrp)->mutex, -1,              \
+                   (tmgrp)->dbenv == NULL ? NULL : (tmgrp)->dbenv->db_yield)
+#define        UNLOCK_TXNTHREAD(tmgrp)                                         \
+       if (F_ISSET(tmgrp, DB_THREAD))                                  \
+               (void)__db_mutex_unlock(&(tmgrp)->mutex, -1)
+
+#define        LOCK_TXNREGION(tmgrp)                                           \
+       (void)__db_mutex_lock(&(tmgrp)->region->hdr.lock,(tmgrp)->fd,   \
+           (tmgrp)->dbenv == NULL ? NULL : (tmgrp)->dbenv->db_yield)
+#define        UNLOCK_TXNREGION(tmgrp)                                         \
+       (void)__db_mutex_unlock(&(tmgrp)->region->hdr.lock, (tmgrp)->fd)
+
+/*
+ * Log record types.
+ */
+#define        TXN_BEGIN       1
+#define        TXN_COMMIT      2
+#define        TXN_PREPARE     3
+#define        TXN_CHECKPOINT  4
+
+#include "txn_auto.h"
+#include "txn_ext.h"
+#endif /* !_TXN_H_ */
diff --git a/db2/include/txn_auto.h b/db2/include/txn_auto.h
new file mode 100644 (file)
index 0000000..fd5a456
--- /dev/null
@@ -0,0 +1,25 @@
+/* Do not edit: automatically built by dist/db_gen.sh. */
+#ifndef txn_AUTO_H
+#define txn_AUTO_H
+
+#define        DB_txn_regop    (DB_txn_BEGIN + 1)
+
+typedef struct _txn_regop_args {
+       u_int32_t type;
+       DB_TXN *txnid;
+       DB_LSN prev_lsn;
+       u_int32_t       opcode;
+} __txn_regop_args;
+
+
+#define        DB_txn_ckp      (DB_txn_BEGIN + 2)
+
+typedef struct _txn_ckp_args {
+       u_int32_t type;
+       DB_TXN *txnid;
+       DB_LSN prev_lsn;
+       DB_LSN  ckp_lsn;
+       DB_LSN  last_ckp;
+} __txn_ckp_args;
+
+#endif
diff --git a/db2/include/txn_ext.h b/db2/include/txn_ext.h
new file mode 100644 (file)
index 0000000..8ba0b0c
--- /dev/null
@@ -0,0 +1,18 @@
+/* Do not edit: automatically built by dist/distrib. */
+int __txn_regop_log
+    __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t,
+    u_int32_t));
+int __txn_regop_print
+   __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+int __txn_regop_read __P((void *, __txn_regop_args **));
+int __txn_ckp_log
+    __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t,
+    DB_LSN *, DB_LSN *));
+int __txn_ckp_print
+   __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+int __txn_ckp_read __P((void *, __txn_ckp_args **));
+int __txn_init_print __P((DB_ENV *));
+int __txn_init_recover __P((DB_ENV *));
+int __txn_regop_recover
+    __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+int __txn_ckp_recover __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
diff --git a/db2/lock/lock.c b/db2/lock/lock.c
new file mode 100644 (file)
index 0000000..8fc9133
--- /dev/null
@@ -0,0 +1,1362 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997
+ *     Sleepycat Software.  All rights reserved.
+ */
+
+#include "config.h"
+
+#ifndef lint
+static const char sccsid[] = "@(#)lock.c       10.31 (Sleepycat) 8/17/97";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#endif
+
+#include "db_int.h"
+#include "shqueue.h"
+#include "db_page.h"
+#include "db_shash.h"
+#include "lock.h"
+#include "common_ext.h"
+#include "db_am.h"
+
+static void __lock_checklocker __P((DB_LOCKTAB *, struct __db_lock *, int));
+static int  __lock_count_locks __P((DB_LOCKREGION *));
+static int  __lock_count_objs __P((DB_LOCKREGION *));
+static int  __lock_create __P((const char *, int, DB_ENV *));
+static void __lock_freeobj __P((DB_LOCKTAB *, DB_LOCKOBJ *));
+static int  __lock_get_internal __P((DB_LOCKTAB *, u_int32_t, int, const DBT *,
+    db_lockmode_t, struct __db_lock **));
+static int  __lock_grow_region __P((DB_LOCKTAB *, int, size_t));
+static int  __lock_put_internal __P((DB_LOCKTAB *, struct __db_lock *, int));
+static void __lock_remove_waiter
+    __P((DB_LOCKTAB *, DB_LOCKOBJ *, struct __db_lock *, db_status_t));
+static void __lock_reset_region __P((DB_LOCKTAB *));
+static int  __lock_validate_region __P((DB_LOCKTAB *));
+#ifdef DEBUG
+static void __lock_dump_locker __P((DB_LOCKTAB *, DB_LOCKOBJ *));
+static void __lock_dump_object __P((DB_LOCKTAB *, DB_LOCKOBJ *));
+static void __lock_printlock __P((DB_LOCKTAB *, struct __db_lock *, int));
+#endif
+
+/*
+ * Create and initialize a lock region in shared memory.
+ */
+
+/*
+ * __lock_create --
+ *     Create the lock region.  Returns an errno.  In most cases,
+ * the errno should be that returned by __db_ropen, in which case
+ * an EAGAIN means that we should retry, and an EEXIST means that
+ * the region exists and we didn't need to create it.  Any other
+ * sort of errno should be treated as a system error, leading to a
+ * failure of the original interface call.
+ */
+static int
+__lock_create(path, mode, dbenv)
+       const char *path;
+       int mode;
+       DB_ENV *dbenv;
+{
+       struct __db_lock *lp;
+       struct lock_header *tq_head;
+       struct obj_header *obj_head;
+       DB_LOCKOBJ *op;
+       DB_LOCKREGION *lrp;
+       u_int maxlocks;
+       u_int32_t i;
+       int fd, lock_modes, nelements, ret;
+       u_int8_t *conflicts, *curaddr;
+
+       maxlocks = dbenv == NULL || dbenv->lk_max == 0 ?
+           DB_LOCK_DEFAULT_N : dbenv->lk_max;
+       lock_modes = dbenv == NULL || dbenv->lk_modes == 0 ?
+           DB_LOCK_RW_N : dbenv->lk_modes;
+       conflicts = dbenv == NULL || dbenv->lk_conflicts == NULL ?
+           (u_int8_t *)db_rw_conflicts : dbenv->lk_conflicts;
+
+       if ((ret =
+           __db_rcreate(dbenv, DB_APP_NONE, path, DB_DEFAULT_LOCK_FILE, mode,
+           LOCK_REGION_SIZE(lock_modes, maxlocks, __db_tablesize(maxlocks)),
+           &fd, &lrp)) != 0)
+               return (ret);
+
+       /* Region exists; now initialize it. */
+       lrp->table_size = __db_tablesize(maxlocks);
+       lrp->magic = DB_LOCKMAGIC;
+       lrp->version = DB_LOCKVERSION;
+       lrp->id = 0;
+       lrp->maxlocks = maxlocks;
+       lrp->need_dd = 0;
+       lrp->detect = DB_LOCK_NORUN;
+       lrp->numobjs = maxlocks;
+       lrp->nlockers = 0;
+       lrp->mem_bytes = ALIGN(STRING_SIZE(maxlocks), sizeof(size_t));
+       lrp->increment = lrp->hdr.size / 2;
+       lrp->nmodes = lock_modes;
+       lrp->nconflicts = 0;
+       lrp->nrequests = 0;
+       lrp->nreleases = 0;
+       lrp->ndeadlocks = 0;
+
+       /*
+        * As we write the region, we've got to maintain the alignment
+        * for the structures that follow each chunk.  This information
+        * ends up being encapsulated both in here as well as in the
+        * lock.h file for the XXX_SIZE macros.
+        */
+       /* Initialize conflict matrix. */
+       curaddr = (u_int8_t *)lrp + sizeof(DB_LOCKREGION);
+       memcpy(curaddr, conflicts, lock_modes * lock_modes);
+       curaddr += lock_modes * lock_modes;
+
+       /*
+        * Initialize hash table.
+        */
+       curaddr = (u_int8_t *)ALIGNP(curaddr, LOCK_HASH_ALIGN);
+       lrp->hash_off = curaddr - (u_int8_t *)lrp;
+       nelements = lrp->table_size;
+       __db_hashinit(curaddr, nelements);
+       curaddr += nelements * sizeof(DB_HASHTAB);
+
+       /*
+        * Initialize locks onto a free list. Since locks contains mutexes,
+        * we need to make sure that each lock is aligned on a MUTEX_ALIGNMENT
+        * boundary.
+        */
+       curaddr = (u_int8_t *)ALIGNP(curaddr, MUTEX_ALIGNMENT);
+       tq_head = &lrp->free_locks;
+       SH_TAILQ_INIT(tq_head);
+
+       for (i = 0; i++ < maxlocks;
+           curaddr += ALIGN(sizeof(struct __db_lock), MUTEX_ALIGNMENT)) {
+               lp = (struct __db_lock *)curaddr;
+               lp->status = DB_LSTAT_FREE;
+               SH_TAILQ_INSERT_HEAD(tq_head, lp, links, __db_lock);
+       }
+
+       /* Initialize objects onto a free list.  */
+       obj_head = &lrp->free_objs;
+       SH_TAILQ_INIT(obj_head);
+
+       for (i = 0; i++ < maxlocks; curaddr += sizeof(DB_LOCKOBJ)) {
+               op = (DB_LOCKOBJ *)curaddr;
+               SH_TAILQ_INSERT_HEAD(obj_head, op, links, __db_lockobj);
+       }
+
+       /*
+        * Initialize the string space; as for all shared memory allocation
+        * regions, this requires size_t alignment, since we store the
+        * lengths of malloc'd areas in the area..
+        */
+       curaddr = (u_int8_t *)ALIGNP(curaddr, sizeof(size_t));
+       lrp->mem_off = curaddr - (u_int8_t *)lrp;
+       __db_shalloc_init(curaddr, lrp->mem_bytes);
+
+       /* Release the lock. */
+       (void)__db_mutex_unlock(&lrp->hdr.lock, fd);
+
+       /* Now unmap the region. */
+       if ((ret = __db_rclose(dbenv, fd, lrp)) != 0) {
+               (void)lock_unlink(path, 1 /* force */, dbenv);
+               return (ret);
+       }
+
+       return (0);
+}
+
+int
+lock_open(path, flags, mode, dbenv, ltp)
+       const char *path;
+       int flags, mode;
+       DB_ENV *dbenv;
+       DB_LOCKTAB **ltp;
+{
+       DB_LOCKTAB *lt;
+       int ret, retry_cnt;
+
+       /* Validate arguments. */
+#ifdef HAVE_SPINLOCKS
+#define        OKFLAGS (DB_CREATE | DB_THREAD)
+#else
+#define        OKFLAGS (DB_CREATE)
+#endif
+       if ((ret = __db_fchk(dbenv, "lock_open", flags, OKFLAGS)) != 0)
+               return (ret);
+
+       /*
+        * Create the lock table structure.
+        */
+       if ((lt = (DB_LOCKTAB *)calloc(1, sizeof(DB_LOCKTAB))) == NULL) {
+               __db_err(dbenv, "%s", strerror(errno));
+               return (ENOMEM);
+       }
+       lt->dbenv = dbenv;
+
+       /*
+        * Now, create the lock region if it doesn't already exist.
+        */
+       retry_cnt = 0;
+retry: if (LF_ISSET(DB_CREATE) &&
+           (ret = __lock_create(path, mode, dbenv)) != 0)
+               if (ret == EAGAIN && ++retry_cnt < 3) {
+                       (void)__db_sleep(1, 0);
+                       goto retry;
+               } else if (ret == EEXIST) /* We did not create the region */
+                       LF_CLR(DB_CREATE);
+               else
+                       goto out;
+
+       /*
+        * Finally, open the region, map it in, and increment the
+        * reference count.
+        */
+       retry_cnt = 0;
+retry1:        if ((ret = __db_ropen(dbenv, DB_APP_NONE, path, DB_DEFAULT_LOCK_FILE,
+           LF_ISSET(~(DB_CREATE | DB_THREAD)), &lt->fd, &lt->region)) != 0) {
+               if (ret == EAGAIN && ++retry_cnt < 3) {
+                       (void)__db_sleep(1, 0);
+                       goto retry1;
+               }
+               goto out;
+        }
+
+       if (lt->region->magic != DB_LOCKMAGIC) {
+               __db_err(dbenv, "lock_open: Bad magic number");
+               ret = EINVAL;
+               goto out;
+       }
+
+       /* Check for automatic deadlock detection. */
+       if (dbenv->lk_detect != DB_LOCK_NORUN) {
+               if (lt->region->detect != DB_LOCK_NORUN &&
+                   dbenv->lk_detect != DB_LOCK_DEFAULT &&
+                   lt->region->detect != dbenv->lk_detect) {
+                       __db_err(dbenv,
+                           "lock_open: incompatible deadlock detector mode");
+                       ret = EINVAL;
+                       goto out;
+               }
+               if (lt->region->detect == DB_LOCK_NORUN)
+                       lt->region->detect = dbenv->lk_detect;
+       }
+
+       /* Set up remaining pointers into region. */
+       lt->conflicts = (u_int8_t *)lt->region + sizeof(DB_LOCKREGION);
+       lt->hashtab =
+           (DB_HASHTAB *)((u_int8_t *)lt->region + lt->region->hash_off);
+       lt->mem = (void *)((u_int8_t *)lt->region + lt->region->mem_off);
+       lt->reg_size = lt->region->hdr.size;
+
+       *ltp = lt;
+       return (0);
+
+/* Error handling. */
+out:   if (lt->region != NULL)
+               (void)__db_rclose(lt->dbenv, lt->fd, lt->region);
+       if (LF_ISSET(DB_CREATE))
+               (void)lock_unlink(path, 1, lt->dbenv);
+       free(lt);
+       return (ret);
+}
+
+int
+lock_id (lt, idp)
+       DB_LOCKTAB *lt;
+       u_int32_t *idp;
+{
+       u_int32_t id;
+
+       LOCK_LOCKREGION(lt);
+       if (lt->region->id >= DB_LOCK_MAXID)
+               lt->region->id = 0;
+       id = ++lt->region->id;
+       UNLOCK_LOCKREGION(lt);
+
+       *idp = id;
+       return (0);
+}
+
+int
+lock_vec(lt, locker, flags, list, nlist, elistp)
+       DB_LOCKTAB *lt;
+       u_int32_t locker;
+       int flags, nlist;
+       DB_LOCKREQ *list, **elistp;
+{
+       struct __db_lock *lp;
+       DB_LOCKOBJ *sh_obj, *sh_locker;
+       int i, ret, run_dd;
+
+       /* Validate arguments. */
+       if ((ret =
+           __db_fchk(lt->dbenv, "lock_vec", flags, DB_LOCK_NOWAIT)) != 0)
+               return (ret);
+
+       LOCK_LOCKREGION(lt);
+
+       if ((ret = __lock_validate_region(lt)) != 0) {
+               UNLOCK_LOCKREGION(lt);
+               return (ret);
+       }
+
+       ret = 0;
+       for (i = 0; i < nlist && ret == 0; i++) {
+               switch (list[i].op) {
+               case DB_LOCK_GET:
+                       ret = __lock_get_internal(lt, locker, flags,
+                           list[i].obj, list[i].mode, &lp);
+                       if (ret == 0)
+                               list[i].lock = LOCK_TO_OFFSET(lt, lp);
+                       break;
+               case DB_LOCK_PUT:
+                       lp = OFFSET_TO_LOCK(lt, list[i].lock);
+                       if (lp->holder != locker) {
+                               ret = DB_LOCK_NOTHELD;
+                               break;
+                       }
+                       list[i].mode = lp->mode;
+
+                       /* XXX Need to copy the object. ??? */
+                       ret = __lock_put_internal(lt, lp, 0);
+                       break;
+               case DB_LOCK_PUT_ALL:
+                       /* Find the locker. */
+                       if ((ret = __lock_getobj(lt, locker,
+                           NULL, DB_LOCK_LOCKER, &sh_locker)) != 0)
+                               break;
+
+                       for (lp = SH_LIST_FIRST(&sh_locker->heldby, __db_lock);
+                           lp != NULL;
+                           lp = SH_LIST_FIRST(&sh_locker->heldby, __db_lock)) {
+                               if ((ret = __lock_put_internal(lt, lp, 0)) != 0)
+                                       break;
+                       }
+                       __lock_freeobj(lt, sh_locker);
+                       lt->region->nlockers--;
+                       break;
+               case DB_LOCK_PUT_OBJ:
+
+                       /* Look up the object in the hash table. */
+                       __db_hashlookup(lt->hashtab, __db_lockobj, links,
+                           list[i].obj, sh_obj, lt->region->table_size,
+                           __lock_ohash, __lock_cmp);
+                       if (sh_obj == NULL) {
+                               ret = EINVAL;
+                               break;
+                       }
+                       /*
+                        * Release waiters first, because they won't cause
+                        * anyone else to be awakened.  If we release the
+                        * lockers first, all the waiters get awakened
+                        * needlessly.
+                        */
+                       for (lp = SH_TAILQ_FIRST(&sh_obj->waiters, __db_lock);
+                           lp != NULL;
+                           lp = SH_TAILQ_FIRST(&sh_obj->waiters, __db_lock)) {
+                               lt->region->nreleases += lp->refcount;
+                               __lock_remove_waiter(lt, sh_obj, lp,
+                                   DB_LSTAT_NOGRANT);
+                               __lock_checklocker(lt, lp, 1);
+                       }
+
+                       for (lp = SH_TAILQ_FIRST(&sh_obj->holders, __db_lock);
+                           lp != NULL;
+                           lp = SH_TAILQ_FIRST(&sh_obj->holders, __db_lock)) {
+
+                               lt->region->nreleases += lp->refcount;
+                               SH_LIST_REMOVE(lp, locker_links, __db_lock);
+                               SH_TAILQ_REMOVE(&sh_obj->holders, lp, links,
+                                   __db_lock);
+                               lp->status = DB_LSTAT_FREE;
+                               SH_TAILQ_INSERT_HEAD(&lt->region->free_locks,
+                                   lp, links, __db_lock);
+                       }
+
+                       /* Now free the object. */
+                       __lock_freeobj(lt, sh_obj);
+                       break;
+#ifdef DEBUG
+               case DB_LOCK_DUMP:
+                       /* Find the locker. */
+                       if ((ret = __lock_getobj(lt, locker,
+                           NULL, DB_LOCK_LOCKER, &sh_locker)) != 0)
+                               break;
+
+                       for (lp = SH_LIST_FIRST(&sh_locker->heldby, __db_lock);
+                           lp != NULL;
+                           lp = SH_LIST_NEXT(lp, locker_links, __db_lock)) {
+                               __lock_printlock(lt, lp, 1);
+                               ret = EINVAL;
+                       }
+                       if (ret == 0) {
+                               __lock_freeobj(lt, sh_locker);
+                               lt->region->nlockers--;
+                       }
+                       break;
+#endif
+               default:
+                       ret = EINVAL;
+                       break;
+               }
+       }
+
+       if (lt->region->need_dd && lt->region->detect != DB_LOCK_NORUN) {
+               run_dd = 1;
+               lt->region->need_dd = 0;
+       } else
+               run_dd = 0;
+
+       UNLOCK_LOCKREGION(lt);
+
+       if (ret == 0 && run_dd)
+               lock_detect(lt, 0, lt->region->detect);
+
+       if (elistp && ret != 0)
+               *elistp = &list[i - 1];
+       return (ret);
+}
+
+int
+lock_get(lt, locker, flags, obj, lock_mode, lock)
+       DB_LOCKTAB *lt;
+       u_int32_t locker;
+       int flags;
+       const DBT *obj;
+       db_lockmode_t lock_mode;
+       DB_LOCK *lock;
+{
+       struct __db_lock *lockp;
+       int ret;
+
+       /* Validate arguments. */
+       if ((ret =
+           __db_fchk(lt->dbenv, "lock_get", flags, DB_LOCK_NOWAIT)) != 0)
+               return (ret);
+
+       LOCK_LOCKREGION(lt);
+
+       ret = __lock_validate_region(lt);
+       if (ret == 0 && (ret = __lock_get_internal(lt,
+           locker, flags, obj, lock_mode, &lockp)) == 0) {
+               *lock = LOCK_TO_OFFSET(lt, lockp);
+               lt->region->nrequests++;
+       }
+
+       UNLOCK_LOCKREGION(lt);
+       return (ret);
+}
+
+int
+lock_put(lt, lock)
+       DB_LOCKTAB *lt;
+       DB_LOCK lock;
+{
+       struct __db_lock *lockp;
+       int ret, run_dd;
+
+       LOCK_LOCKREGION(lt);
+
+       if ((ret = __lock_validate_region(lt)) != 0)
+               return (ret);
+       else {
+               lockp = OFFSET_TO_LOCK(lt, lock);
+               ret = __lock_put_internal(lt, lockp, 0);
+       }
+
+       __lock_checklocker(lt, lockp, 0);
+
+       if (lt->region->need_dd && lt->region->detect != DB_LOCK_NORUN) {
+               run_dd = 1;
+               lt->region->need_dd = 0;
+       } else
+               run_dd = 0;
+
+       UNLOCK_LOCKREGION(lt);
+
+       if (ret == 0 && run_dd)
+               lock_detect(lt, 0, lt->region->detect);
+
+       return (ret);
+}
+
+int
+lock_close(lt)
+       DB_LOCKTAB *lt;
+{
+       int ret;
+
+       if ((ret = __db_rclose(lt->dbenv, lt->fd, lt->region)) != 0)
+               return (ret);
+
+       /* Free lock table. */
+       free(lt);
+       return (0);
+}
+
+int
+lock_unlink(path, force, dbenv)
+       const char *path;
+       int force;
+       DB_ENV *dbenv;
+{
+       return (__db_runlink(dbenv,
+           DB_APP_NONE, path, DB_DEFAULT_LOCK_FILE, force));
+}
+
+/*
+ * XXX This looks like it could be void, but I'm leaving it returning
+ * an int because I think it will have to when we go through and add
+ * the appropriate error checking for the EINTR on mutexes.
+ */
+static int
+__lock_put_internal(lt, lockp, do_all)
+       DB_LOCKTAB *lt;
+       struct __db_lock *lockp;
+       int do_all;
+{
+       struct __db_lock *lp_w, *lp_h, *next_waiter;
+       DB_LOCKOBJ *sh_obj;
+       int state_changed;
+
+       if (lockp->refcount == 0 || (lockp->status != DB_LSTAT_HELD &&
+           lockp->status != DB_LSTAT_WAITING) || lockp->obj == 0) {
+               __db_err(lt->dbenv, "lock_put: invalid lock %lu",
+                   (u_long)((u_int8_t *)lockp - (u_int8_t *)lt->region));
+               return (EINVAL);
+       }
+
+       if (do_all)
+               lt->region->nreleases += lockp->refcount;
+       else
+               lt->region->nreleases++;
+       if (do_all == 0 && lockp->refcount > 1) {
+               lockp->refcount--;
+               return (0);
+       }
+
+       /* Get the object associated with this lock. */
+       sh_obj = (DB_LOCKOBJ *)((u_int8_t *)lockp + lockp->obj);
+
+       /* Remove lock from locker list. */
+       SH_LIST_REMOVE(lockp, locker_links, __db_lock);
+
+       /* Remove this lock from its holders/waitlist. */
+       if (lockp->status != DB_LSTAT_HELD)
+               __lock_remove_waiter(lt, sh_obj, lockp, DB_LSTAT_FREE);
+       else
+               SH_TAILQ_REMOVE(&sh_obj->holders, lockp, links, __db_lock);
+
+       /*
+        * We need to do lock promotion.  We also need to determine if
+        * we're going to need to run the deadlock detector again.  If
+        * we release locks, and there are waiters, but no one gets promoted,
+        * then we haven't fundamentally changed the lockmgr state, so
+        * we may still have a deadlock and we have to run again.  However,
+        * if there were no waiters, or we actually promoted someone, then
+        * we are OK and we don't have to run it immediately.
+        */
+       for (lp_w = SH_TAILQ_FIRST(&sh_obj->waiters, __db_lock),
+           state_changed = lp_w == NULL;
+           lp_w != NULL;
+           lp_w = next_waiter) {
+               next_waiter = SH_TAILQ_NEXT(lp_w, links, __db_lock);
+               for (lp_h = SH_TAILQ_FIRST(&sh_obj->holders, __db_lock);
+                   lp_h != NULL;
+                   lp_h = SH_TAILQ_NEXT(lp_h, links, __db_lock)) {
+                       if (CONFLICTS(lt, lp_h->mode, lp_w->mode) &&
+                           lp_h->holder != lp_w->holder)
+                               break;
+               }
+               if (lp_h != NULL)       /* Found a conflict. */
+                       break;
+
+               /* No conflict, promote the waiting lock. */
+               SH_TAILQ_REMOVE(&sh_obj->waiters, lp_w, links, __db_lock);
+               lp_w->status = DB_LSTAT_PENDING;
+               SH_TAILQ_INSERT_TAIL(&sh_obj->holders, lp_w, links);
+
+               /* Wake up waiter. */
+               (void)__db_mutex_unlock(&lp_w->mutex, lt->fd);
+               state_changed = 1;
+       }
+
+       /* Check if object should be reclaimed. */
+       if (SH_TAILQ_FIRST(&sh_obj->holders, __db_lock) == NULL) {
+               __db_hashremove_el(lt->hashtab, __db_lockobj, links, sh_obj,
+                   lt->region->table_size, __lock_lhash);
+               __db_shalloc_free(lt->mem, SH_DBT_PTR(&sh_obj->lockobj));
+               SH_TAILQ_INSERT_HEAD(&lt->region->free_objs, sh_obj, links,
+                   __db_lockobj);
+               state_changed = 1;
+       }
+
+       /* Free lock. */
+       lockp->status = DB_LSTAT_FREE;
+       SH_TAILQ_INSERT_HEAD(&lt->region->free_locks, lockp, links, __db_lock);
+
+       /*
+        * If we did not promote anyone; we need to run the deadlock
+        * detector again.
+        */
+       if (state_changed == 0)
+               lt->region->need_dd = 1;
+
+       return (0);
+}
+
+static int
+__lock_get_internal(lt, locker, flags, obj, lock_mode, lockp)
+       DB_LOCKTAB *lt;
+       u_int32_t locker;
+       int flags;
+       const DBT *obj;
+       db_lockmode_t lock_mode;
+       struct __db_lock **lockp;
+{
+       struct __db_lock *newl, *lp;
+       DB_LOCKOBJ *sh_obj, *sh_locker;
+       DB_LOCKREGION *lrp;
+       size_t newl_off;
+       int ret;
+
+       ret = 0;
+       /*
+        * Check that lock mode is valid.
+        */
+
+       lrp = lt->region;
+       if ((u_int32_t)lock_mode >= lrp->nmodes) {
+               __db_err(lt->dbenv,
+                   "lock_get: invalid lock mode %lu\n", (u_long)lock_mode);
+               return (EINVAL);
+       }
+
+       /* Allocate a new lock.  Optimize for the common case of a grant. */
+       if ((newl = SH_TAILQ_FIRST(&lrp->free_locks, __db_lock)) == NULL) {
+               if ((ret = __lock_grow_region(lt, DB_LOCK_LOCK, 0)) != 0)
+                       return (ret);
+               lrp = lt->region;
+               newl = SH_TAILQ_FIRST(&lrp->free_locks, __db_lock);
+       }
+       newl_off = LOCK_TO_OFFSET(lt, newl);
+
+       /* Optimize for common case of granting a lock. */
+       SH_TAILQ_REMOVE(&lrp->free_locks, newl, links, __db_lock);
+
+       newl->mode = lock_mode;
+       newl->status = DB_LSTAT_HELD;
+       newl->holder = locker;
+       newl->refcount = 1;
+
+       if ((ret =
+           __lock_getobj(lt, 0, (DBT *)obj, DB_LOCK_OBJTYPE, &sh_obj)) != 0)
+               return (ret);
+
+       lrp = lt->region;                       /* getobj might have grown */
+       newl = OFFSET_TO_LOCK(lt, newl_off);
+
+       /* Now make new lock point to object */
+       newl->obj = SH_PTR_TO_OFF(newl, sh_obj);
+
+       /*
+        * Now we have a lock and an object and we need to see if we should
+        * grant the lock.  We use a FIFO ordering so we can only grant a
+        * new lock if it does not conflict with anyone on the holders list
+        * OR anyone on the waiters list.  In case of conflict, we put the
+        * new lock on the end of the waiters list.
+        */
+       for (lp = SH_TAILQ_FIRST(&sh_obj->holders, __db_lock);
+           lp != NULL;
+           lp = SH_TAILQ_NEXT(lp, links, __db_lock)) {
+               if (CONFLICTS(lt, lp->mode, lock_mode) &&
+                   locker != lp->holder)
+                       break;
+               else if (lp->holder == locker && lp->mode == lock_mode &&
+                   lp->status == DB_LSTAT_HELD) {
+                       /* Lock is already held, just inc the ref count. */
+                       lp->refcount++;
+                       SH_TAILQ_INSERT_HEAD(&lrp->free_locks, newl, links,
+                           __db_lock);
+                       *lockp = lp;
+                       return (0);
+               }
+       }
+
+       if (lp == NULL)
+               for (lp = SH_TAILQ_FIRST(&sh_obj->waiters, __db_lock);
+                   lp != NULL;
+                   lp = SH_TAILQ_NEXT(lp, links, __db_lock)) {
+                       if (CONFLICTS(lt, lp->mode, lock_mode) &&
+                           locker != lp->holder)
+                               break;
+               }
+       if (lp == NULL)
+               SH_TAILQ_INSERT_TAIL(&sh_obj->holders, newl, links);
+       else if (!(flags & DB_LOCK_NOWAIT))
+               SH_TAILQ_INSERT_TAIL(&sh_obj->waiters, newl, links);
+       else {
+               /* Free the lock and return an error. */
+               newl->status = DB_LSTAT_FREE;
+               SH_TAILQ_INSERT_HEAD(&lrp->free_locks, newl, links, __db_lock);
+               return (DB_LOCK_NOTGRANTED);
+       }
+
+       /*
+        * This is really a blocker for the process, so initialize it
+        * set.  That way the current process will block when it tries
+        * to get it and the waking process will release it.
+        */
+       (void)__db_mutex_init(&newl->mutex,
+           MUTEX_LOCK_OFFSET(lt->region, &newl->mutex));
+       (void)__db_mutex_lock(&newl->mutex, lt->fd,
+           lt->dbenv == NULL ? NULL : lt->dbenv->db_yield);
+
+       /*
+        * Now, insert the lock onto its locker's list.
+        */
+       if ((ret =
+           __lock_getobj(lt, locker, NULL, DB_LOCK_LOCKER, &sh_locker)) != 0)
+               return (ret);
+
+       lrp = lt->region;
+       SH_LIST_INSERT_HEAD(&sh_locker->heldby, newl, locker_links, __db_lock);
+
+       if (lp != NULL) {
+               newl->status = DB_LSTAT_WAITING;
+               lrp->nconflicts++;
+               /*
+                * We are about to wait; must release the region mutex.
+                * Then, when we wakeup, we need to reacquire the region
+                * mutex before continuing.
+                */
+               if (lrp->detect == DB_LOCK_NORUN)
+                       lt->region->need_dd = 1;
+               UNLOCK_LOCKREGION(lt);
+
+               /*
+                * We are about to wait; before waiting, see if the deadlock
+                * detector should be run.
+                */
+               if (lrp->detect != DB_LOCK_NORUN)
+                       ret = lock_detect(lt, 0, lrp->detect);
+
+               (void)__db_mutex_lock(&newl->mutex,
+                   lt->fd, lt->dbenv == NULL ? NULL : lt->dbenv->db_yield);
+
+               LOCK_LOCKREGION(lt);
+               if (newl->status != DB_LSTAT_PENDING) {
+                       /* Return to free list. */
+                       __lock_checklocker(lt, newl, 0);
+                       SH_TAILQ_INSERT_HEAD(&lrp->free_locks, newl, links,
+                           __db_lock);
+                       switch (newl->status) {
+                               case DB_LSTAT_ABORTED:
+                                       ret = DB_LOCK_DEADLOCK;
+                                       break;
+                               case DB_LSTAT_NOGRANT:
+                                       ret = DB_LOCK_NOTGRANTED;
+                                       break;
+                               default:
+                                       ret = EINVAL;
+                                       break;
+                       }
+                       newl->status = DB_LSTAT_FREE;
+                       newl = NULL;
+               } else
+                       newl->status = DB_LSTAT_HELD;
+       }
+
+       *lockp = newl;
+       return (ret);
+}
+
+/*
+ * This is called at every interface to verify if the region
+ * has changed size, and if so, to remap the region in and
+ * reset the process pointers.
+ */
+static int
+__lock_validate_region(lt)
+       DB_LOCKTAB *lt;
+{
+       int ret;
+
+       if (lt->reg_size == lt->region->hdr.size)
+               return (0);
+
+       /* Grow the region. */
+       if ((ret = __db_rremap(lt->dbenv, lt->region,
+           lt->reg_size, lt->region->hdr.size, lt->fd, &lt->region)) != 0)
+               return (ret);
+
+       __lock_reset_region(lt);
+
+       return (0);
+}
+
+/*
+ * We have run out of space; time to grow the region.
+ */
+static int
+__lock_grow_region(lt, which, howmuch)
+       DB_LOCKTAB *lt;
+       int which;
+       size_t howmuch;
+{
+       struct __db_lock *newl;
+       struct lock_header *lock_head;
+       struct obj_header *obj_head;
+       DB_LOCKOBJ *op;
+       DB_LOCKREGION *lrp;
+       float lock_ratio, obj_ratio;
+       size_t incr, oldsize, used;
+       u_int32_t i, newlocks, newmem, newobjs;
+       int ret, usedlocks, usedmem, usedobjs;
+       u_int8_t *curaddr;
+
+       lrp = lt->region;
+       oldsize = lrp->hdr.size;
+       incr = lrp->increment;
+
+       /* Figure out how much of each sort of space we have. */
+       usedmem = lrp->mem_bytes - __db_shalloc_count(lt->mem);
+       usedobjs = lrp->numobjs - __lock_count_objs(lrp);
+       usedlocks = lrp->maxlocks - __lock_count_locks(lrp);
+
+       /*
+        * Figure out what fraction of the used space belongs to each
+        * different type of "thing" in the region.  Then partition the
+        * new space up according to this ratio.
+        */
+       used = usedmem +
+           usedlocks * ALIGN(sizeof(struct __db_lock), MUTEX_ALIGNMENT) +
+           usedobjs * sizeof(DB_LOCKOBJ);
+
+       lock_ratio = usedlocks *
+           ALIGN(sizeof(struct __db_lock), MUTEX_ALIGNMENT) / (float)used;
+       obj_ratio = usedobjs * sizeof(DB_LOCKOBJ) / (float)used;
+
+       newlocks = (u_int32_t)(lock_ratio *
+           incr / ALIGN(sizeof(struct __db_lock), MUTEX_ALIGNMENT));
+       newobjs = (u_int32_t)(obj_ratio * incr / sizeof(DB_LOCKOBJ));
+       newmem = incr -
+           (newobjs * sizeof(DB_LOCKOBJ) +
+           newlocks * ALIGN(sizeof(struct __db_lock), MUTEX_ALIGNMENT));
+
+       /*
+        * Make sure we allocate enough memory for the object being
+        * requested.
+        */
+       switch (which) {
+               case DB_LOCK_LOCK:
+                       if (newlocks == 0) {
+                               newlocks = 10;
+                               incr += newlocks * sizeof(struct __db_lock);
+                       }
+                       break;
+               case DB_LOCK_OBJ:
+                       if (newobjs == 0) {
+                               newobjs = 10;
+                               incr += newobjs * sizeof(DB_LOCKOBJ);
+                       }
+                       break;
+               case DB_LOCK_MEM:
+                       if (newmem < howmuch * 2) {
+                               incr += howmuch * 2 - newmem;
+                               newmem = howmuch * 2;
+                       }
+                       break;
+       }
+
+       newmem += ALIGN(incr, sizeof(size_t)) - incr;
+       incr = ALIGN(incr, sizeof(size_t));
+
+       /*
+        * Since we are going to be allocating locks at the beginning of the
+        * new chunk, we need to make sure that the chunk is MUTEX_ALIGNMENT
+        * aligned.  We did not guarantee this when we created the region, so
+        * we may need to pad the old region by extra bytes to ensure this
+        * alignment.
+        */
+       incr += ALIGN(oldsize, MUTEX_ALIGNMENT) - oldsize;
+
+       __db_err(lt->dbenv,
+           "Growing lock region: %lu locks %lu objs %lu bytes",
+           (u_long)newlocks, (u_long)newobjs, (u_long)newmem);
+
+       if ((ret = __db_rgrow(lt->dbenv, lt->fd, incr)) != 0)
+               return (ret);
+       if ((ret = __db_rremap(lt->dbenv,
+           lt->region, oldsize, oldsize + incr, lt->fd, &lt->region)) != 0)
+               return (ret);
+       __lock_reset_region(lt);
+
+       /* Update region parameters. */
+       lrp = lt->region;
+       lrp->increment = incr << 1;
+       lrp->maxlocks += newlocks;
+       lrp->numobjs += newobjs;
+       lrp->mem_bytes += newmem;
+
+       curaddr = (u_int8_t *)lrp + oldsize;
+       curaddr = (u_int8_t *)ALIGNP(curaddr, MUTEX_ALIGNMENT);
+
+       /* Put new locks onto the free list. */
+       lock_head = &lrp->free_locks;
+       for (i = 0; i++ < newlocks;
+           curaddr += ALIGN(sizeof(struct __db_lock), MUTEX_ALIGNMENT)) {
+               newl = (struct __db_lock *)curaddr;
+               SH_TAILQ_INSERT_HEAD(lock_head, newl, links, __db_lock);
+       }
+
+       /* Put new objects onto the free list.  */
+       obj_head = &lrp->free_objs;
+       for (i = 0; i++ < newobjs; curaddr += sizeof(DB_LOCKOBJ)) {
+               op = (DB_LOCKOBJ *)curaddr;
+               SH_TAILQ_INSERT_HEAD(obj_head, op, links, __db_lockobj);
+       }
+
+       *((size_t *)curaddr) = newmem - sizeof(size_t);
+       curaddr += sizeof(size_t);
+       __db_shalloc_free(lt->mem, curaddr);
+
+       return (0);
+}
+
+#ifdef DEBUG
+void
+__lock_dump_region(lt, flags)
+       DB_LOCKTAB *lt;
+       unsigned long flags;
+{
+       struct __db_lock *lp;
+       DB_LOCKOBJ *op;
+       DB_LOCKREGION *lrp;
+       u_int32_t i, j;
+
+       lrp = lt->region;
+
+       printf("Lock region parameters\n");
+       printf("%s:0x%x\t%s:%lu\t%s:%lu\t%s:%lu\n%s:%lu\t%s:%lu\t%s:%lu\t\n",
+           "magic      ", lrp->magic,
+           "version    ", (u_long)lrp->version,
+           "processes  ", (u_long)lrp->hdr.refcnt,
+           "maxlocks   ", (u_long)lrp->maxlocks,
+           "table size ", (u_long)lrp->table_size,
+           "nmodes     ", (u_long)lrp->nmodes,
+           "numobjs    ", (u_long)lrp->numobjs);
+       printf("%s:%lu\t%s:%lu\t%s:%lu\n%s:%lu\t%s:%lu\t%s:%lu\n",
+           "size       ", (u_long)lrp->hdr.size,
+           "nlockers   ", (u_long)lrp->nlockers,
+           "hash_off   ", (u_long)lrp->hash_off,
+           "increment  ", (u_long)lrp->increment,
+           "mem_off    ", (u_long)lrp->mem_off,
+           "mem_bytes  ", (u_long)lrp->mem_bytes);
+#ifndef HAVE_SPINLOCKS
+       printf("Mutex: off %lu", (u_long)lrp->hdr.lock.off);
+#endif
+#ifdef MUTEX_STATISTICS
+       printf(" waits %lu nowaits %lu",
+           (u_long)lrp->hdr.lock.mutex_set_wait,
+           (u_long)lrp->hdr.lock.mutex_set_nowait);
+#endif
+       printf("\n%s:%lu\t%s:%lu\t%s:%lu\t%s:%lu\n",
+           "nconflicts ", (u_long)lrp->nconflicts,
+           "nrequests  ", (u_long)lrp->nrequests,
+           "nreleases  ", (u_long)lrp->nreleases,
+           "ndeadlocks ", (u_long)lrp->ndeadlocks);
+       printf("need_dd    %lu\n", (u_long)lrp->need_dd);
+       if (flags & LOCK_DEBUG_CONF) {
+               printf("\nConflict matrix\n");
+
+               for (i = 0; i < lrp->nmodes; i++) {
+                       for (j = 0; j < lrp->nmodes; j++)
+                               printf("%lu\t",
+                                   (u_long)lt->conflicts[i * lrp->nmodes + j]);
+                       printf("\n");
+               }
+       }
+
+       for (i = 0; i < lrp->table_size; i++) {
+               op = SH_TAILQ_FIRST(&lt->hashtab[i], __db_lockobj);
+               if (op != NULL && flags & LOCK_DEBUG_BUCKET)
+                       printf("Bucket %lu:\n", (unsigned long)i);
+               while (op != NULL) {
+                       if (op->type == DB_LOCK_LOCKER &&
+                           flags & LOCK_DEBUG_LOCKERS)
+                               __lock_dump_locker(lt, op);
+                       else if (flags & LOCK_DEBUG_OBJECTS &&
+                           op->type == DB_LOCK_OBJTYPE)
+                               __lock_dump_object(lt, op);
+                       op = SH_TAILQ_NEXT(op, links, __db_lockobj);
+               }
+       }
+
+       if (flags & LOCK_DEBUG_LOCK) {
+               printf("\nLock Free List\n");
+               for (lp = SH_TAILQ_FIRST(&lrp->free_locks, __db_lock);
+                   lp != NULL;
+                   lp = SH_TAILQ_NEXT(lp, links, __db_lock)) {
+                       printf("0x%x: %lu\t%lu\t%lu\t0x%x\n", (u_int)lp,
+                           (u_long)lp->holder, (u_long)lp->mode,
+                           (u_long)lp->status, (u_int)lp->obj);
+               }
+       }
+
+       if (flags & LOCK_DEBUG_LOCK) {
+               printf("\nObject Free List\n");
+               for (op = SH_TAILQ_FIRST(&lrp->free_objs, __db_lockobj);
+                   op != NULL;
+                   op = SH_TAILQ_NEXT(op, links, __db_lockobj))
+                       printf("0x%x\n", (u_int)op);
+       }
+
+       if (flags & LOCK_DEBUG_MEM) {
+               printf("\nMemory Free List\n");
+               __db_shalloc_dump(stdout, lt->mem);
+       }
+}
+
+static void
+__lock_dump_locker(lt, op)
+       DB_LOCKTAB *lt;
+       DB_LOCKOBJ *op;
+{
+       struct __db_lock *lp;
+       u_int32_t locker;
+       void *ptr;
+
+       ptr = SH_DBT_PTR(&op->lockobj);
+       memcpy(&locker, ptr, sizeof(u_int32_t));
+       printf("L %lu", (u_long)locker);
+
+       lp = SH_LIST_FIRST(&op->heldby, __db_lock);
+       if (lp == NULL) {
+               printf("\n");
+               return;
+       }
+       for (; lp != NULL; lp = SH_LIST_NEXT(lp, locker_links, __db_lock))
+               __lock_printlock(lt, lp, 0);
+}
+
+static void
+__lock_dump_object(lt, op)
+       DB_LOCKTAB *lt;
+       DB_LOCKOBJ *op;
+{
+       struct __db_lock *lp;
+       u_int32_t j;
+       char *ptr;
+
+       ptr = SH_DBT_PTR(&op->lockobj);
+       for (j = 0; j < op->lockobj.size; ptr++, j++)
+               printf("%c", (int)*ptr);
+       printf("\n");
+
+       printf("H:");
+       for (lp =
+           SH_TAILQ_FIRST(&op->holders, __db_lock);
+           lp != NULL;
+           lp = SH_TAILQ_NEXT(lp, links, __db_lock))
+               __lock_printlock(lt, lp, 0);
+       lp = SH_TAILQ_FIRST(&op->waiters, __db_lock);
+       if (lp != NULL) {
+               printf("\nW:");
+               for (; lp != NULL; lp = SH_TAILQ_NEXT(lp, links, __db_lock))
+                       __lock_printlock(lt, lp, 0);
+       }
+}
+
+int
+__lock_is_locked(lt, locker, dbt, mode)
+       DB_LOCKTAB *lt;
+       u_int32_t locker;
+       DBT *dbt;
+       db_lockmode_t mode;
+{
+       struct __db_lock *lp;
+       DB_LOCKOBJ *sh_obj;
+       DB_LOCKREGION *lrp;
+
+       lrp = lt->region;
+
+       /* Look up the object in the hash table. */
+       __db_hashlookup(lt->hashtab, __db_lockobj, links,
+           dbt, sh_obj, lrp->table_size, __lock_ohash, __lock_cmp);
+       if (sh_obj == NULL)
+               return (0);
+
+       for (lp = SH_TAILQ_FIRST(&sh_obj->holders, __db_lock);
+           lp != NULL;
+           lp = SH_TAILQ_FIRST(&sh_obj->holders, __db_lock)) {
+               if (lp->holder == locker && lp->mode == mode)
+                       return (1);
+       }
+
+       return (0);
+}
+
+static void
+__lock_printlock(lt, lp, ispgno)
+       DB_LOCKTAB *lt;
+       struct __db_lock *lp;
+       int ispgno;
+{
+       DB_LOCKOBJ *lockobj;
+       db_pgno_t pgno;
+       size_t obj;
+       u_int8_t *ptr;
+       char *mode, *stat;
+
+       switch (lp->mode) {
+       case DB_LOCK_IREAD:
+               mode = "IREAD";
+               break;
+       case DB_LOCK_IWR:
+               mode = "IWR";
+               break;
+       case DB_LOCK_IWRITE:
+               mode = "IWRITE";
+               break;
+       case DB_LOCK_NG:
+               mode = "NG";
+               break;
+       case DB_LOCK_READ:
+               mode = "READ";
+               break;
+       case DB_LOCK_WRITE:
+               mode = "WRITE";
+               break;
+       default:
+               mode = "UNKNOWN";
+               break;
+       }
+       switch (lp->status) {
+       case DB_LSTAT_ABORTED:
+               stat = "ABORT";
+               break;
+       case DB_LSTAT_ERR:
+               stat = "ERROR";
+               break;
+       case DB_LSTAT_FREE:
+               stat = "FREE";
+               break;
+       case DB_LSTAT_HELD:
+               stat = "HELD";
+               break;
+       case DB_LSTAT_NOGRANT:
+               stat = "NONE";
+               break;
+       case DB_LSTAT_WAITING:
+               stat = "WAIT";
+               break;
+       case DB_LSTAT_PENDING:
+               stat = "PENDING";
+               break;
+       default:
+               stat = "UNKNOWN";
+               break;
+       }
+       printf("\t%lu\t%s\t%lu\t%s\t",
+           (u_long)lp->holder, mode, (u_long)lp->refcount, stat);
+
+       lockobj = (DB_LOCKOBJ *)((u_int8_t *)lp + lp->obj);
+       ptr = SH_DBT_PTR(&lockobj->lockobj);
+       if (ispgno) {
+               /* Assume this is a DBT lock. */
+               memcpy(&pgno, ptr, sizeof(db_pgno_t));
+               printf("page %lu\n", (u_long)pgno);
+       } else {
+               obj = (u_int8_t *)lp + lp->obj - (u_int8_t *)lt->region;
+               printf("0x%lx ", (u_long)obj);
+               __db_pr(ptr, lockobj->lockobj.size);
+               printf("\n");
+       }
+}
+
+#endif
+
+static int
+__lock_count_locks(lrp)
+       DB_LOCKREGION *lrp;
+{
+       struct __db_lock *newl;
+       int count;
+
+       count = 0;
+       for (newl = SH_TAILQ_FIRST(&lrp->free_locks, __db_lock);
+           newl != NULL;
+           newl = SH_TAILQ_NEXT(newl, links, __db_lock))
+               count++;
+
+       return (count);
+}
+
+static int
+__lock_count_objs(lrp)
+       DB_LOCKREGION *lrp;
+{
+       DB_LOCKOBJ *obj;
+       int count;
+
+       count = 0;
+       for (obj = SH_TAILQ_FIRST(&lrp->free_objs, __db_lockobj);
+           obj != NULL;
+           obj = SH_TAILQ_NEXT(obj, links, __db_lockobj))
+               count++;
+
+       return (count);
+}
+
+/*
+ * PUBLIC: int __lock_getobj  __P((DB_LOCKTAB *,
+ * PUBLIC:     u_int32_t, DBT *, u_int32_t type, DB_LOCKOBJ **));
+ */
+int
+__lock_getobj(lt, locker, dbt, type, objp)
+       DB_LOCKTAB *lt;
+       u_int32_t locker, type;
+       DBT *dbt;
+       DB_LOCKOBJ **objp;
+{
+       DB_LOCKREGION *lrp;
+       DB_LOCKOBJ *sh_obj;
+       u_int32_t obj_size;
+       int ret;
+       void *p, *src;
+
+       lrp = lt->region;
+
+       /* Look up the object in the hash table. */
+       if (type == DB_LOCK_OBJTYPE) {
+               __db_hashlookup(lt->hashtab, __db_lockobj, links, dbt, sh_obj,
+                   lrp->table_size, __lock_ohash, __lock_cmp);
+               obj_size = dbt->size;
+       } else {
+               __db_hashlookup(lt->hashtab, __db_lockobj, links, locker,
+                   sh_obj, lrp->table_size, __lock_locker_hash,
+                   __lock_locker_cmp);
+               obj_size = sizeof(locker);
+       }
+
+       /*
+        * If we found the object, then we can just return it.  If
+        * we didn't find the object, then we need to create it.
+        */
+       if (sh_obj == NULL) {
+               /* Create new object and then insert it into hash table. */
+               if ((sh_obj = SH_TAILQ_FIRST(&lrp->free_objs, __db_lockobj))
+                   == NULL) {
+                       if ((ret = __lock_grow_region(lt, DB_LOCK_OBJ, 0)) != 0)
+                               return (ret);
+                       lrp = lt->region;
+                       sh_obj = SH_TAILQ_FIRST(&lrp->free_objs, __db_lockobj);
+               }
+               if ((ret = __db_shalloc(lt->mem, obj_size, 0, &p)) != 0) {
+                       if ((ret = __lock_grow_region(lt,
+                           DB_LOCK_MEM, obj_size)) != 0)
+                               return (ret);
+                       lrp = lt->region;
+                       /* Reacquire the head of the list. */
+                       sh_obj = SH_TAILQ_FIRST(&lrp->free_objs, __db_lockobj);
+                       (void)__db_shalloc(lt->mem, obj_size, 0, &p);
+               }
+               sh_obj->type = type;
+               src = type == DB_LOCK_OBJTYPE ? dbt->data : (void *)&locker;
+               memcpy(p, src, obj_size);
+               SH_TAILQ_REMOVE(&lrp->free_objs, sh_obj, links, __db_lockobj);
+
+               SH_TAILQ_INIT(&sh_obj->waiters);
+               if (type == DB_LOCK_LOCKER)
+                       SH_LIST_INIT(&sh_obj->heldby);
+               else
+                       SH_TAILQ_INIT(&sh_obj->holders);
+               sh_obj->lockobj.size = obj_size;
+               sh_obj->lockobj.off = SH_PTR_TO_OFF(&sh_obj->lockobj, p);
+
+               __db_hashinsert(lt->hashtab, __db_lockobj, links, sh_obj,
+                   lrp->table_size, __lock_lhash);
+
+               if (type == DB_LOCK_LOCKER)
+                       lrp->nlockers++;
+       }
+
+       *objp = sh_obj;
+       return (0);
+}
+
+/*
+ * Any lock on the waitlist has a process waiting for it.  Therefore, we
+ * can't return the lock to the freelist immediately.  Instead, we can
+ * remove the lock from the list of waiters, set the status field of the
+ * lock, and then let the process waking up return the lock to the
+ * free list.
+ */
+static void
+__lock_remove_waiter(lt, sh_obj, lockp, status)
+       DB_LOCKTAB *lt;
+       DB_LOCKOBJ *sh_obj;
+       struct __db_lock *lockp;
+       db_status_t status;
+{
+       SH_TAILQ_REMOVE(&sh_obj->waiters, lockp, links, __db_lock);
+       lockp->status = status;
+
+       /* Wake whoever is waiting on this lock. */
+       (void)__db_mutex_unlock(&lockp->mutex, lt->fd);
+}
+
+static void
+__lock_freeobj(lt, obj)
+       DB_LOCKTAB *lt;
+       DB_LOCKOBJ *obj;
+{
+       __db_hashremove_el(lt->hashtab, __db_lockobj, links,
+           obj, lt->region->table_size, __lock_lhash);
+       __db_shalloc_free(lt->mem, SH_DBT_PTR(&obj->lockobj));
+       SH_TAILQ_INSERT_HEAD(&lt->region->free_objs, obj, links, __db_lockobj);
+}
+
+static void
+__lock_checklocker(lt, lockp, do_remove)
+       DB_LOCKTAB *lt;
+       struct __db_lock *lockp;
+       int do_remove;
+{
+       DB_LOCKOBJ *sh_locker;
+
+       if (do_remove)
+               SH_LIST_REMOVE(lockp, locker_links, __db_lock);
+
+       /* if the locker list is NULL, free up the object. */
+       if (__lock_getobj(lt, lockp->holder, NULL, DB_LOCK_LOCKER, &sh_locker)
+           == 0 && SH_LIST_FIRST(&sh_locker->heldby, __db_lock) == NULL) {
+               __lock_freeobj(lt, sh_locker);
+               lt->region->nlockers--;
+       }
+}
+
+static void
+__lock_reset_region(lt)
+       DB_LOCKTAB *lt;
+{
+       lt->conflicts = (u_int8_t *)lt->region + sizeof(DB_LOCKREGION);
+       lt->hashtab =
+           (DB_HASHTAB *)((u_int8_t *)lt->region + lt->region->hash_off);
+       lt->mem = (void *)((u_int8_t *)lt->region + lt->region->mem_off);
+       lt->reg_size = lt->region->hdr.size;
+}
diff --git a/db2/lock/lock_conflict.c b/db2/lock/lock_conflict.c
new file mode 100644 (file)
index 0000000..ff0287f
--- /dev/null
@@ -0,0 +1,39 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997
+ *     Sleepycat Software.  All rights reserved.
+ */
+
+#include "config.h"
+
+#ifndef lint
+static const char sccsid[] = "@(#)lock_conflict.c      10.2 (Sleepycat) 6/21/97";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+#endif
+
+#include "db_int.h"
+
+/*
+ * The conflict arrays are set up such that the row is the lock you
+ * are holding and the column is the lock that is desired.
+ */
+const u_int8_t db_rw_conflicts[] = {
+       /*              N   R   W */
+       /*   N */       0,  0,  0,
+       /*   R */       0,  0,  1,
+       /*   W */       0,  1,  1
+};
+
+const u_int8_t db_riw_conflicts[] = {
+       /*              N       S       X       IS      IX      SIX */
+       /*   N */       0,      0,      0,      0,      0,      0,
+       /*   S */       0,      0,      1,      0,      1,      1,
+       /*   X */       1,      1,      1,      1,      1,      1,
+       /*  IS */       0,      0,      1,      0,      0,      0,
+       /*  IX */       0,      1,      1,      0,      0,      0,
+       /* SIX */       0,      1,      1,      0,      0,      0
+};
diff --git a/db2/lock/lock_deadlock.c b/db2/lock/lock_deadlock.c
new file mode 100644 (file)
index 0000000..54a73af
--- /dev/null
@@ -0,0 +1,496 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997
+ *     Sleepycat Software.  All rights reserved.
+ */
+
+#include "config.h"
+
+#ifndef lint
+static const char copyright[] =
+"@(#) Copyright (c) 1997\n\
+       Sleepycat Software Inc.  All rights reserved.\n";
+static const char sccsid[] = "@(#)lock_deadlock.c      10.20 (Sleepycat) 8/21/97";
+#endif
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+
+#include <errno.h>
+#include <string.h>
+#include <stdlib.h>
+#endif
+
+#include "db_int.h"
+#include "shqueue.h"
+#include "db_shash.h"
+#include "lock.h"
+#include "common_ext.h"
+
+#define        ISSET_MAP(M, N) (M[(N) / 32] & (1 << (N) % 32))
+
+#define        CLEAR_MAP(M, N) {                                               \
+       u_int32_t __i;                                                  \
+       for (__i = 0; __i < (N); __i++)                                 \
+               M[__i] = 0;                                             \
+}
+
+#define        SET_MAP(M, B)   (M[(B) / 32] |= (1 << ((B) % 32)))
+#define        CLR_MAP(M, B)   (M[(B) / 32] &= ~(1 << ((B) % 32)))
+
+#define        OR_MAP(D, S, N) {                                               \
+       u_int32_t __i;                                                  \
+       for (__i = 0; __i < (N); __i++)                                 \
+               D[__i] |= S[__i];                                       \
+}
+#define        BAD_KILLID      0xffffffff
+
+typedef struct {
+       int             valid;
+       u_int32_t       id;
+       DB_LOCK         last_lock;
+} locker_info;
+
+static int  __dd_abort __P((DB_ENV *, locker_info *));
+static int  __dd_build __P((DB_ENV *, u_int32_t **, int *, locker_info **));
+#ifdef DEBUG
+static void __dd_debug __P((DB_ENV *, locker_info *, u_int32_t *, int));
+#endif
+static u_int32_t
+          *__dd_find __P((u_int32_t *, locker_info *, u_int32_t));
+
+int
+lock_detect(lt, flags, atype)
+       DB_LOCKTAB *lt;
+       int flags;
+       u_int32_t atype;
+{
+       DB_ENV *dbenv;
+       locker_info *idmap;
+       u_int32_t *bitmap, *deadlock, killid;
+       int do_pass, i, nlockers, nentries, ret;
+
+       /* Validate arguments. */
+       if ((ret =
+           __db_fchk(lt->dbenv, "lock_detect", flags, DB_LOCK_CONFLICT)) != 0)
+               return (ret);
+
+       /* Check if a detector run is necessary. */
+       do_pass = 1;
+       dbenv = lt->dbenv;
+       if (LF_ISSET(DB_LOCK_CONFLICT)) {
+               /* Make a pass every time a lock waits. */
+               LOCK_LOCKREGION(lt);
+               do_pass = dbenv->lk_info->region->need_dd != 0;
+               UNLOCK_LOCKREGION(lt);
+       }
+
+       if (!do_pass)
+               return (0);
+
+       /* Build the waits-for bitmap. */
+       if ((ret = __dd_build(dbenv, &bitmap, &nlockers, &idmap)) != 0)
+               return (ret);
+
+       if (nlockers == 0)
+               return (0);
+#ifdef DEBUG
+       if (dbenv->db_verbose != 0)
+               __dd_debug(dbenv, idmap, bitmap, nlockers);
+#endif
+       /* Find a deadlock. */
+       deadlock = __dd_find(bitmap, idmap, nlockers);
+       nentries = ALIGN(nlockers, 32) / 32;
+       killid = BAD_KILLID;
+       if (deadlock != NULL) {
+               /* Kill someone. */
+               switch (atype) {
+               case DB_LOCK_OLDEST:
+                       /*
+                        * Find the first bit set in the current
+                        * array and then look for a lower tid in
+                        * the array.
+                        */
+                       for (i = 0; i < nlockers; i++)
+                               if (ISSET_MAP(deadlock, i))
+                                       killid = i;
+
+                       if (killid == BAD_KILLID) {
+                               __db_err(dbenv,
+                                   "warning: could not find %s",
+                                   "locker to abort");
+                               break;
+                       }
+
+                       /*
+                        * The oldest transaction has the lowest
+                        * transaction id.
+                        */
+                       for (i = killid + 1; i < nlockers; i++)
+                               if (ISSET_MAP(deadlock, i) &&
+                                   idmap[i].id < idmap[killid].id)
+                                       killid = i;
+                       break;
+               case DB_LOCK_DEFAULT:
+               case DB_LOCK_RANDOM:
+                       /*
+                        * We are trying to calculate the id of the
+                        * locker whose entry is indicated by deadlock.
+                        * We know that this is less than nlockers, so
+                        * the cast below is valid.
+                        */
+                       killid =
+                           (u_int32_t)((deadlock - bitmap) / nentries);
+                       break;
+               case DB_LOCK_YOUNGEST:
+                       /*
+                        * Find the first bit set in the current
+                        * array and then look for a lower tid in
+                        * the array.
+                        */
+                       for (i = 0; i < nlockers; i++)
+                               if (ISSET_MAP(deadlock, i))
+                                       killid = i;
+
+                       if (killid == BAD_KILLID) {
+                               __db_err(dbenv,
+                                   "warning: could not find %s",
+                                   "locker to abort");
+                               break;
+                       }
+                       /*
+                        * The youngest transaction has the highest
+                        * transaction id.
+                        */
+                       for (i = killid + 1; i < nlockers; i++)
+                               if (ISSET_MAP(deadlock, i) &&
+                                   idmap[i].id > idmap[killid].id)
+                                       killid = i;
+                       break;
+               default:
+                       killid = BAD_KILLID;
+                       ret = EINVAL;
+               }
+
+               /* Kill the locker with lockid idmap[killid]. */
+               if (dbenv->db_verbose != 0 && killid != BAD_KILLID)
+                       __db_err(dbenv, "Aborting locker %lx",
+                           (u_long)idmap[killid].id);
+
+               if (killid != BAD_KILLID &&
+                   (ret = __dd_abort(dbenv, &idmap[killid])) != 0)
+                       __db_err(dbenv,
+                           "warning: unable to abort locker %lx",
+                           (u_long)idmap[killid].id);
+       }
+       free(bitmap);
+       free(idmap);
+
+       return (ret);
+}
+
+/*
+ * ========================================================================
+ * Utilities
+ */
+static int
+__dd_build(dbenv, bmp, nlockers, idmap)
+       DB_ENV *dbenv;
+       u_int32_t **bmp;
+       int *nlockers;
+       locker_info **idmap;
+{
+       DB_LOCKTAB *lt;
+       DB_LOCKOBJ *op, *lockerp;
+       struct __db_lock *lp;
+       u_int32_t *bitmap, count, *entryp, i, id, nentries, *tmpmap;
+       locker_info *id_array;
+       int is_first, ret;
+
+       lt = dbenv->lk_info;
+
+       /*
+        * We'll check how many lockers there are, add a few more in for
+        * good measure and then allocate all the structures.  Then we'll
+        * verify that we have enough room when we go back in and get the
+        * mutex the second time.
+        */
+       LOCK_LOCKREGION(lt);
+retry: count = lt->region->nlockers;
+       lt->region->need_dd = 0;
+       UNLOCK_LOCKREGION(lt);
+
+       if (count == 0) {
+               *nlockers = 0;
+               return (0);
+       }
+
+       if (dbenv->db_verbose)
+               __db_err(dbenv, "%lu lockers", (u_long)count);
+
+       count += 10;
+       nentries = ALIGN(count, 32) / 32;
+       /*
+        * Allocate enough space for a count by count bitmap matrix.
+        *
+        * XXX
+        * We can probably save the malloc's between iterations just
+        * reallocing if necessary because count grew by too much.
+        */
+       if ((bitmap = (u_int32_t *)calloc((size_t)count,
+           sizeof(u_int32_t) * nentries)) == NULL) {
+               __db_err(dbenv, "%s", strerror(ENOMEM));
+               return (ENOMEM);
+       }
+
+       if ((tmpmap =
+           (u_int32_t *)calloc(sizeof(u_int32_t), nentries)) == NULL) {
+               __db_err(dbenv, "%s", strerror(ENOMEM));
+               free(bitmap);
+               return (ENOMEM);
+       }
+
+       if ((id_array = (locker_info *)calloc((size_t)count,
+           sizeof(locker_info))) == NULL) {
+               __db_err(dbenv, "%s", strerror(ENOMEM));
+               free(bitmap);
+               free(tmpmap);
+               return (ENOMEM);
+       }
+
+       /*
+        * Now go back in and actually fill in the matrix.
+        */
+       LOCK_LOCKREGION(lt);
+       if (lt->region->nlockers > count) {
+               free(bitmap);
+               free(tmpmap);
+               free(id_array);
+               goto retry;
+       }
+
+       /*
+        * First we go through and assign each locker a deadlock detector id.
+        * Note that we fill in the idmap in the next loop since that's the
+        * only place where we conveniently have both the deadlock id and the
+        * actual locker.
+        */
+       for (id = 0, i = 0; i < lt->region->table_size; i++)
+               for (op = SH_TAILQ_FIRST(&lt->hashtab[i], __db_lockobj);
+                   op != NULL; op = SH_TAILQ_NEXT(op, links, __db_lockobj))
+                       if (op->type == DB_LOCK_LOCKER)
+                               op->dd_id = id++;
+       /*
+        * We go through the hash table and find each object.  For each object,
+        * we traverse the waiters list and add an entry in the waitsfor matrix
+        * for each waiter/holder combination.
+        */
+       for (i = 0; i < lt->region->table_size; i++) {
+               for (op = SH_TAILQ_FIRST(&lt->hashtab[i], __db_lockobj);
+                   op != NULL; op = SH_TAILQ_NEXT(op, links, __db_lockobj)) {
+                       if (op->type != DB_LOCK_OBJTYPE)
+                               continue;
+                       CLEAR_MAP(tmpmap, nentries);
+
+                       /*
+                        * First we go through and create a bit map that
+                        * represents all the holders of this object.
+                        */
+                       for (lp = SH_TAILQ_FIRST(&op->holders, __db_lock);
+                           lp != NULL;
+                           lp = SH_TAILQ_NEXT(lp, links, __db_lock)) {
+                               if ((errno = __lock_getobj(lt, lp->holder,
+                                   NULL, DB_LOCK_LOCKER, &lockerp)) != 0) {
+                                       __db_err(dbenv,
+                                           "warning unable to find object");
+                                       continue;
+                               }
+                               id_array[lockerp->dd_id].id = lp->holder;
+                               id_array[lockerp->dd_id].valid = 1;
+
+                               /*
+                                * If the holder has already been aborted, then
+                                * we should ignore it for now.
+                                */
+                               if (lp->status == DB_LSTAT_HELD)
+                                       SET_MAP(tmpmap, lockerp->dd_id);
+                       }
+
+                       /*
+                        * Next, for each waiter, we set its row in the matrix
+                        * equal to the map of holders we set up above.
+                        */
+                       for (is_first = 1,
+                           lp = SH_TAILQ_FIRST(&op->waiters, __db_lock);
+                           lp != NULL;
+                           is_first = 0,
+                           lp = SH_TAILQ_NEXT(lp, links, __db_lock)) {
+                               if ((ret = __lock_getobj(lt,
+                                   lp->holder, NULL, DB_LOCK_LOCKER, &lockerp))
+                                   != 0) {
+                                       __db_err(dbenv,
+                                           "warning unable to find object");
+                                       continue;
+                               }
+                               id_array[lockerp->dd_id].id = lp->holder;
+                               id_array[lockerp->dd_id].valid = 1;
+
+                               /*
+                                * If the transaction is pending abortion, then
+                                * ignore it on this iteration.
+                                */
+                               if (lp->status != DB_LSTAT_WAITING)
+                                       continue;
+
+                               entryp = bitmap + (nentries * lockerp->dd_id);
+                               OR_MAP(entryp, tmpmap, nentries);
+                               /*
+                                * If this is the first waiter on the queue,
+                                * then we remove the waitsfor relationship
+                                * with oneself.  However, if it's anywhere
+                                * else on the queue, then we have to keep
+                                * it and we have an automatic deadlock.
+                                */
+                               if (is_first)
+                                       CLR_MAP(entryp, lockerp->dd_id);
+                       }
+               }
+       }
+
+       /* Now for each locker; record its last lock. */
+       for (id = 0; id < count; id++) {
+               if (!id_array[id].valid)
+                       continue;
+               if ((ret = __lock_getobj(lt,
+                   id_array[id].id, NULL, DB_LOCK_LOCKER, &lockerp)) != 0) {
+                       __db_err(dbenv,
+                           "No locks for locker %lu", (u_long)id_array[id].id);
+                       continue;
+               }
+               lp = SH_LIST_FIRST(&lockerp->heldby, __db_lock);
+               if (lp != NULL)
+                       id_array[id].last_lock = LOCK_TO_OFFSET(lt, lp);
+       }
+
+       /* Pass complete, reset the deadlock detector bit. */
+       lt->region->need_dd = 0;
+       UNLOCK_LOCKREGION(lt);
+
+       /*
+        * Now we can release everything except the bitmap matrix that we
+        * created.
+        */
+       *nlockers = id;
+       *idmap = id_array;
+       *bmp = bitmap;
+       free(tmpmap);
+       return (0);
+}
+
+static u_int32_t *
+__dd_find(bmp, idmap, nlockers)
+       u_int32_t *bmp;
+       locker_info *idmap;
+       u_int32_t nlockers;
+{
+       u_int32_t i, j, nentries, *mymap, *tmpmap;
+
+       /*
+        * For each locker, or in the bits from the lockers
+        * on which that locker is waiting.
+        */
+       nentries = ALIGN(nlockers, 32) / 32;
+       for (mymap = bmp, i = 0; i < nlockers; i++, mymap += nentries) {
+               if (!idmap[i].valid)
+                       continue;
+               for (j = 0; j < nlockers; j++) {
+                       if (ISSET_MAP(mymap, j)) {
+                               /* Find the map for this bit. */
+                               tmpmap = bmp + (nentries * j);
+                               OR_MAP(mymap, tmpmap, nentries);
+                               if (ISSET_MAP(mymap, i))
+                                       return (mymap);
+                       }
+               }
+       }
+       return (NULL);
+}
+
+static int
+__dd_abort(dbenv, info)
+       DB_ENV *dbenv;
+       locker_info *info;
+{
+       DB_LOCKTAB *lt;
+       DB_LOCKOBJ *lockerp, *sh_obj;
+       struct __db_lock *lockp;
+       int ret;
+
+       lt = dbenv->lk_info;
+       LOCK_LOCKREGION(lt);
+
+       /* Find the locker's last lock. */
+       if ((ret =
+           __lock_getobj(lt, info->id, NULL, DB_LOCK_LOCKER, &lockerp)) != 0)
+               goto out;
+
+       lockp = SH_LIST_FIRST(&lockerp->heldby, __db_lock);
+       if (LOCK_TO_OFFSET(lt, lockp) != info->last_lock ||
+           lockp == NULL || lockp->status != DB_LSTAT_WAITING)
+               goto out;
+
+       /* Abort lock, take it off list, and wake up this lock. */
+       lockp->status = DB_LSTAT_ABORTED;
+       lt->region->ndeadlocks++;
+       SH_LIST_REMOVE(lockp, locker_links, __db_lock);
+       sh_obj = (DB_LOCKOBJ *)((u_int8_t *)lockp + lockp->obj);
+       SH_TAILQ_REMOVE(&sh_obj->waiters, lockp, links, __db_lock);
+        (void)__db_mutex_unlock(&lockp->mutex, lt->fd);
+
+       ret = 0;
+
+out:   UNLOCK_LOCKREGION(lt);
+       return (ret);
+}
+
+#ifdef DEBUG
+static void
+__dd_debug(dbenv, idmap, bitmap, nlockers)
+       DB_ENV *dbenv;
+       locker_info *idmap;
+       u_int32_t *bitmap;
+       int nlockers;
+{
+       u_int32_t *mymap;
+       int i, j, nentries;
+       char *msgbuf;
+
+       __db_err(dbenv, "Waitsfor array");
+       __db_err(dbenv, "waiter\twaiting on");
+       /*
+        * Alloc space to print 10 bytes per item waited on.
+        */
+       if ((msgbuf = (char *)malloc((nlockers + 1) * 10 + 64)) == NULL) {
+               errno = ENOMEM;
+               __db_err(dbenv, "%s", strerror(errno));
+               return;
+       }
+
+       nentries = ALIGN(nlockers, 32) / 32;
+       for (mymap = bitmap, i = 0; i < nlockers; i++, mymap += nentries) {
+               if (!idmap[i].valid)
+                       continue;
+               sprintf(msgbuf, "%lx\t\t", (u_long)idmap[i].id);/* Waiter. */
+               for (j = 0; j < nlockers; j++)
+                       if (ISSET_MAP(mymap, j))
+                               sprintf(msgbuf, "%s %lx", msgbuf,
+                                   (u_long)idmap[j].id);
+               (void)sprintf(msgbuf,
+                   "%s %lu", msgbuf, (u_long)idmap[i].last_lock);
+               __db_err(dbenv, msgbuf);
+       }
+
+       free(msgbuf);
+}
+#endif
diff --git a/db2/lock/lock_util.c b/db2/lock/lock_util.c
new file mode 100644 (file)
index 0000000..4063849
--- /dev/null
@@ -0,0 +1,103 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997
+ *     Sleepycat Software.  All rights reserved.
+ */
+
+#include "config.h"
+
+#ifndef lint
+static const char sccsid[] = "@(#)lock_util.c  10.4 (Sleepycat) 7/22/97";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+
+#include <fcntl.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#endif
+
+#include "db_int.h"
+#include "shqueue.h"
+#include "db_page.h"
+#include "db_shash.h"
+#include "hash.h"
+#include "lock.h"
+
+/*
+ * This function is used to compare a DBT that is about to be entered
+ * into a hash table with an object already in the hash table.  Note
+ * that it just returns true on equal and 0 on not-equal.  Therefore this
+ * cannot be used as a sort function; its purpose is to be used as a
+ * hash comparison function.
+ * PUBLIC: int __lock_cmp __P((DBT *, DB_LOCKOBJ *));
+ */
+int
+__lock_cmp(dbt, lock_obj)
+       DBT *dbt;
+       DB_LOCKOBJ *lock_obj;
+{
+       void *obj_data;
+
+       if (lock_obj->type != DB_LOCK_OBJTYPE)
+               return (0);
+       obj_data = SH_DBT_PTR(&lock_obj->lockobj);
+       return (dbt->size == lock_obj->lockobj.size &&
+               memcmp(dbt->data, obj_data, dbt->size) == 0);
+}
+
+/*
+ * PUBLIC: int __lock_locker_cmp __P((u_int32_t, DB_LOCKOBJ *));
+ */
+int
+__lock_locker_cmp(locker, lock_obj)
+       u_int32_t locker;
+       DB_LOCKOBJ *lock_obj;
+{
+       void *obj_data;
+
+       if (lock_obj->type != DB_LOCK_LOCKER)
+               return (0);
+
+       obj_data = SH_DBT_PTR(&lock_obj->lockobj);
+       return (memcmp(&locker, obj_data, sizeof(u_int32_t)) == 0);
+}
+
+/*
+ * PUBLIC: int __lock_ohash __P((DBT *));
+ */
+int
+__lock_ohash(dbt)
+       DBT *dbt;
+{
+       return (__ham_func5(dbt->data, dbt->size));
+}
+
+/*
+ * PUBLIC: u_int32_t __lock_locker_hash __P((u_int32_t));
+ */
+u_int32_t
+__lock_locker_hash(locker)
+       u_int32_t locker;
+{
+       return (__ham_func5(&locker, sizeof(locker)));
+}
+
+/*
+ * PUBLIC: u_int32_t __lock_lhash __P((DB_LOCKOBJ *));
+ */
+u_int32_t
+__lock_lhash(lock_obj)
+       DB_LOCKOBJ *lock_obj;
+{
+       void *obj_data;
+
+       obj_data = SH_DBT_PTR(&lock_obj->lockobj);
+       return (__ham_func5(obj_data, lock_obj->lockobj.size));
+}
+
diff --git a/db2/log/log.c b/db2/log/log.c
new file mode 100644 (file)
index 0000000..1684ce8
--- /dev/null
@@ -0,0 +1,438 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997
+ *     Sleepycat Software.  All rights reserved.
+ */
+#include "config.h"
+
+#ifndef lint
+static const char sccsid[] = "@(#)log.c        10.24 (Sleepycat) 8/16/97";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#endif
+
+#include "db_int.h"
+#include "shqueue.h"
+#include "db_shash.h"
+#include "log.h"
+#include "db_dispatch.h"
+#include "txn_auto.h"
+#include "common_ext.h"
+
+static int __log_recover __P((DB_ENV *, DB_LOG *));
+
+/*
+ * log_open --
+ *     Initialize and/or join a log.
+ */
+int
+log_open(path, flags, mode, dbenv, lpp)
+       const char *path;
+       int flags;
+       int mode;
+       DB_ENV *dbenv;
+       DB_LOG **lpp;
+{
+       DB_LOG *dblp;
+       LOG *lp;
+       size_t len;
+       int fd, newregion, ret, retry_cnt;
+
+       /* Validate arguments. */
+#ifdef HAVE_SPINLOCKS
+#define        OKFLAGS (DB_CREATE | DB_THREAD)
+#else
+#define        OKFLAGS (DB_CREATE)
+#endif
+       if ((ret = __db_fchk(dbenv, "log_open", flags, OKFLAGS)) != 0)
+               return (ret);
+
+       /*
+        * We store 4-byte offsets into the file, so the maximum file
+        * size can't be larger than that.
+        */
+       if (dbenv != NULL && dbenv->lg_max > UINT32_T_MAX) {
+               __db_err(dbenv, "log_open: maximum file size too large");
+               return (EINVAL);
+       }
+
+       /* Create and initialize the DB_LOG structure. */
+       if ((dblp = (DB_LOG *)calloc(1, sizeof(DB_LOG))) == NULL)
+               return (ENOMEM);
+
+       dblp->dbenv = dbenv;
+       dblp->lfd = -1;
+       ZERO_LSN(dblp->c_lsn);
+       dblp->c_fd = -1;
+       if (LF_ISSET(DB_THREAD)) {
+               F_SET(dblp, DB_AM_THREAD);
+               (void)__db_mutex_init(&dblp->mutex, -1);
+       }
+
+       /*
+        * The log region isn't fixed size because we store the registered
+        * file names there.  Make it fairly large so that we don't have to
+        * grow it.
+        */
+       len = 30 * 1024;
+
+       /* Map in the region. */
+       retry_cnt = newregion = 0;
+retry: if (LF_ISSET(DB_CREATE)) {
+               ret = __db_rcreate(dbenv, DB_APP_LOG, path,
+                   DB_DEFAULT_LOG_FILE, mode, len, &fd, &dblp->maddr);
+               if (ret == 0) {
+                       /* Put the LOG structure first in the region. */
+                       lp = dblp->maddr;
+
+                       /* Initialize the rest of the region as free space. */
+                       dblp->addr = (u_int8_t *)dblp->maddr + sizeof(LOG);
+                       __db_shalloc_init(dblp->addr, len - sizeof(LOG));
+
+                       /* Initialize the LOG structure. */
+                       lp->persist.lg_max = dbenv == NULL ? 0 : dbenv->lg_max;
+                       if (lp->persist.lg_max == 0)
+                               lp->persist.lg_max = DEFAULT_MAX;
+                       lp->persist.magic = DB_LOGMAGIC;
+                       lp->persist.version = DB_LOGVERSION;
+                       lp->persist.mode = mode;
+                       SH_TAILQ_INIT(&lp->fq);
+
+                       /* Initialize LOG LSNs. */
+                       lp->lsn.file = 1;
+                       lp->lsn.offset = 0;
+
+                       newregion = 1;
+               } else if (ret != EEXIST)
+                       return (ret);
+       }
+
+       /* If we didn't or couldn't create the region, try and join it. */
+       if (!newregion &&
+           (ret = __db_ropen(dbenv, DB_APP_LOG,
+           path, DB_DEFAULT_LOG_FILE, 0, &fd, &dblp->maddr)) != 0) {
+               /*
+                * If we fail because the file isn't available, wait a
+                * second and try again.
+                */
+               if (ret == EAGAIN && ++retry_cnt < 3) {
+                       (void)__db_sleep(1, 0);
+                       goto retry;
+               }
+               return (ret);
+       }
+
+       /* Set up the common information. */
+       dblp->lp = dblp->maddr;
+       dblp->addr = (u_int8_t *)dblp->maddr + sizeof(LOG);
+       dblp->fd = fd;
+
+       /*
+        * If doing recovery, try and recover any previous log files
+        * before releasing the lock.
+        */
+       if (newregion) {
+               if ((ret = __log_recover(dbenv, dblp)) != 0) {
+                       log_unlink(path, 1, dbenv);
+                       return (ret);
+               }
+               UNLOCK_LOGREGION(dblp);
+       }
+       *lpp = dblp;
+       return (0);
+}
+
+/*
+ * __log_recover --
+ *     Recover a log.
+ */
+static int
+__log_recover(dbenv, dblp)
+       DB_ENV *dbenv;
+       DB_LOG *dblp;
+{
+       DBT dbt;
+       DB_LSN lsn;
+       LOG *lp;
+       u_int32_t chk;
+       int cnt, found_checkpoint, ret;
+
+       lp = dblp->lp;
+
+       /*
+        * Find a log file.  If none exist, we simply return, leaving
+        * everything initialized to a new log.
+        */
+       if ((ret = __log_find(dbenv, lp, &cnt)) != 0)
+               return (ret);
+       if (cnt == 0)
+               return (0);
+
+       /* We have a log file name, find the last one. */
+       while (cnt < MAXLFNAME)
+               if (__log_valid(dbenv, lp, ++cnt) != 0) {
+                       --cnt;
+                       break;
+               }
+
+       /*
+        * We have the last useful log file and we've loaded any persistent
+        * information.  Pretend that the log is larger than it can possibly
+        * be, and read this file, looking for a checkpoint and its end.
+        */
+       dblp->c_lsn.file = cnt;
+       dblp->c_lsn.offset = 0;
+       lsn = dblp->c_lsn;
+       lp->lsn.file = cnt + 1;
+       lp->lsn.offset = 0;
+
+       /* Set the cursor.  Shouldn't fail, leave error messages on. */
+       memset(&dbt, 0, sizeof(dbt));
+       if ((ret = __log_get(dblp, &lsn, &dbt, DB_SET, 0)) != 0)
+               return (ret);
+
+       /*
+        * Read to the end of the file, saving checkpoints.  This will fail
+        * at some point, so turn off error messages.
+        */
+       found_checkpoint = 0;
+       while (__log_get(dblp, &lsn, &dbt, DB_NEXT, 1) == 0) {
+               if (dbt.size < sizeof(u_int32_t))
+                       continue;
+               memcpy(&chk, dbt.data, sizeof(u_int32_t));
+               if (chk == DB_txn_ckp) {
+                       lp->c_lsn = lsn;
+                       found_checkpoint = 1;
+               }
+       }
+
+       /*
+        * We know where the end of the log is.  Since that record is on disk,
+        * it's also the last-synced LSN.
+        */
+       lp->lsn = lsn;
+       lp->lsn.offset += dblp->c_len;
+       lp->s_lsn = lp->lsn;
+
+       /* Set up the current buffer information, too. */
+       lp->len = dblp->c_len;
+       lp->b_off = 0;
+       lp->w_off = lp->lsn.offset;
+
+       /*
+        * It's possible that we didn't find a checkpoint because there wasn't
+        * one in the last log file.  Start searching.
+        */
+       while (!found_checkpoint && cnt > 1) {
+               dblp->c_lsn.file = --cnt;
+               dblp->c_lsn.offset = 0;
+               lsn = dblp->c_lsn;
+
+               /* Set the cursor.  Shouldn't fail, leave error messages on. */
+               if ((ret = __log_get(dblp, &lsn, &dbt, DB_SET, 0)) != 0)
+                       return (ret);
+
+               /*
+                * Read to the end of the file, saving checkpoints.  Shouldn't
+                * fail, leave error messages on.
+                */
+               while (__log_get(dblp, &lsn, &dbt, DB_NEXT, 0) == 0) {
+                       if (dbt.size < sizeof(u_int32_t))
+                               continue;
+                       memcpy(&chk, dbt.data, sizeof(u_int32_t));
+                       if (chk == DB_txn_ckp) {
+                               lp->c_lsn = lsn;
+                               found_checkpoint = 1;
+                       }
+               }
+       }
+
+       /* If we never find a checkpoint, that's okay, just 0 it out. */
+       if (!found_checkpoint) {
+               lp->c_lsn.file = 1;
+               lp->c_lsn.offset = 0;
+       }
+
+       __db_err(dbenv,
+           "Recovering the log: last valid LSN: file: %lu offset %lu",
+           (u_long)lp->lsn.file, (u_long)lp->lsn.offset);
+
+       /* Reset the cursor.  */
+       ZERO_LSN(dblp->c_lsn);
+
+       return (0);
+}
+
+/*
+ * __log_find --
+ *     Try to find a log file.
+ *
+ * PUBLIC: int __log_find __P((DB_ENV *, LOG *, int *));
+ */
+int
+__log_find(dbenv, lp, valp)
+       DB_ENV *dbenv;
+       LOG *lp;
+       int *valp;
+{
+       int cnt, fcnt, logval, ret;
+       const char *dir;
+       char **names, *p, *q;
+
+       /* Find the directory name. */
+       if ((ret = __log_name(dbenv, 1, &p)) != 0)
+               return (ret);
+       if ((q = __db_rpath(p)) == NULL)
+               dir = PATH_DOT;
+       else {
+               *q = '\0';
+               dir = p;
+       }
+
+       /* Get the list of file names. */
+       ret = __db_dir(dbenv, dir, &names, &fcnt);
+       FREES(p);
+       if (ret != 0)
+               return (ret);
+
+       /*
+        * Search for a valid log file name, return a value of 0 on
+        * failure.
+        */
+       *valp = 0;
+       for (cnt = fcnt, logval = 0; --cnt >= 0;)
+               if (strncmp(names[cnt], "log.", sizeof("log.") - 1) == 0) {
+                       logval = atoi(names[cnt] + 4);
+                       if (logval != 0 &&
+                           __log_valid(dbenv, lp, logval) == 0) {
+                               *valp = logval;
+                               break;
+                       }
+               }
+
+       /* Discard the list. */
+       __db_dirf(dbenv, names, fcnt);
+
+       return (ret);
+}
+
+/*
+ * log_valid --
+ *     Validate a log file.
+ *
+ * PUBLIC: int __log_valid __P((DB_ENV *, LOG *, int));
+ */
+int
+__log_valid(dbenv, lp, cnt)
+       DB_ENV *dbenv;
+       LOG *lp;
+       int cnt;
+{
+       LOGP persist;
+       ssize_t nw;
+       int fd, ret;
+       char *p;
+
+       if ((ret = __log_name(dbenv, cnt, &p)) != 0)
+               return (ret);
+
+       fd = -1;
+       if ((ret = __db_fdopen(p,
+           DB_RDONLY | DB_SEQUENTIAL,
+           DB_RDONLY | DB_SEQUENTIAL, 0, &fd)) != 0 ||
+           (ret = __db_lseek(fd, 0, 0, sizeof(HDR), SEEK_SET)) != 0 ||
+           (ret = __db_read(fd, &persist, sizeof(LOGP), &nw)) != 0 ||
+           nw != sizeof(LOGP)) {
+               if (ret == 0)
+                       ret = EIO;
+               if (fd != -1) {
+                       (void)__db_close(fd);
+                       __db_err(dbenv,
+                           "Ignoring log file: %s: %s", p, strerror(ret));
+               }
+               goto err;
+       }
+       (void)__db_close(fd);
+
+       if (persist.magic != DB_LOGMAGIC) {
+               __db_err(dbenv,
+                   "Ignoring log file: %s: magic number %lx, not %lx",
+                   p, (u_long)persist.magic, (u_long)DB_LOGMAGIC);
+               ret = EINVAL;
+               goto err;
+       }
+       if (persist.version < DB_LOGOLDVER || persist.version > DB_LOGVERSION) {
+               __db_err(dbenv,
+                   "Ignoring log file: %s: unsupported log version %lu",
+                   p, (u_long)persist.version);
+               ret = EINVAL;
+               goto err;
+       }
+
+       if (lp != NULL) {
+               lp->persist.lg_max = persist.lg_max;
+               lp->persist.mode = persist.mode;
+       }
+       ret = 0;
+
+err:   FREES(p);
+       return (ret);
+}
+
+/*
+ * log_close --
+ *     Close a log.
+ */
+int
+log_close(dblp)
+       DB_LOG *dblp;
+{
+       int ret, t_ret;
+
+       ret = 0;
+
+       /* Close the region. */
+       if ((t_ret =
+           __db_rclose(dblp->dbenv, dblp->fd, dblp->maddr)) != 0 && ret == 0)
+               ret = t_ret;
+
+       /* Close open files, release allocated memory. */
+       if (dblp->lfd != -1 && (t_ret = __db_close(dblp->lfd)) != 0 && ret == 0)
+               ret = t_ret;
+       if (dblp->c_dbt.data != NULL)
+               FREE(dblp->c_dbt.data, dblp->c_dbt.ulen);
+       if (dblp->c_fd != -1 &&
+           (t_ret = __db_close(dblp->c_fd)) != 0 && ret == 0)
+               ret = t_ret;
+
+       /* Free the structure. */
+       if (dblp->dbentry != NULL)
+               FREE(dblp->dbentry, (dblp->dbentry_cnt * sizeof(DB_ENTRY)));
+       FREE(dblp, sizeof(DB_LOG));
+
+       return (ret);
+}
+
+/*
+ * log_unlink --
+ *     Exit a log.
+ */
+int
+log_unlink(path, force, dbenv)
+       const char *path;
+       int force;
+       DB_ENV *dbenv;
+{
+       return (__db_runlink(dbenv,
+           DB_APP_LOG, path, DB_DEFAULT_LOG_FILE, force));
+}
diff --git a/db2/log/log.src b/db2/log/log.src
new file mode 100644 (file)
index 0000000..9f48291
--- /dev/null
@@ -0,0 +1,53 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997
+ *     Sleepycat Software.  All rights reserved.
+ *
+ *     @(#)log.src     10.3 (Sleepycat) 8/20/97
+ *
+ * This is the source file used to create the logging functions for the
+ * log package.  Each access method (or set of routines wishing to register
+ * record types with the transaction system) should have a file like this.
+ * Each type of log record and its parameters is defined.  The basic
+ * format of a record definition is:
+ *
+ * BEGIN       <RECORD_TYPE>
+ * ARG|STRING|POINTER  <variable name> <variable type> <printf format>
+ * ...
+ * END
+ * ARG the argument is a simple parameter of the type *        specified.
+ * DBT the argument is a DBT (db.h) containing a length and pointer.
+ * PTR the argument is a pointer to the data type specified; the entire
+ *     type should be logged.
+ *
+ * There are a set of shell scripts of the form xxx.sh that generate c
+ * code and or h files to process these.  (This is probably better done
+ * in a single PERL script, but for now, this works.)
+ *
+ * The DB recovery system requires the following three fields appear in
+ * every record, and will assign them to the per-record-type structures
+ * as well as making them the first parameters to the appropriate logging
+ * call.
+ * rectype:    record-type, identifies the structure and log/read call
+ * txnid:      transaction id, a DBT in this implementation
+ * prev:       the last LSN for this transaction
+ */
+
+/*
+ * Use the argument of PREFIX as the prefix for all record types,
+ * routines, id numbers, etc.
+ */
+PREFIX log
+
+/* Used for registering new name/id translations. */
+BEGIN  register
+DBT    name            DBT             s
+DBT    uid             DBT             s
+ARG    id              u_int32_t       lu
+ARG    ftype           DBTYPE          lx
+END
+
+BEGIN  unregister
+ARG    id              u_int32_t       lu
+END
diff --git a/db2/log/log_archive.c b/db2/log/log_archive.c
new file mode 100644 (file)
index 0000000..d70d4c6
--- /dev/null
@@ -0,0 +1,413 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1997
+ *     Sleepycat Software.  All rights reserved.
+ */
+
+#include "config.h"
+
+#ifndef lint
+static const char sccsid[] = "@(#)log_archive.c        10.23 (Sleepycat) 8/23/97";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#endif
+
+#include "db_int.h"
+#include "db_dispatch.h"
+#include "shqueue.h"
+#include "log.h"
+#include "clib_ext.h"
+#include "common_ext.h"
+
+static int absname __P((char *, char *, char **));
+static int build_data __P((DB_LOG *, char *, char ***, void *(*)(size_t)));
+static int cmpfunc __P((const void *, const void *));
+static int usermem __P((char ***, void *(*)(size_t)));
+
+/*
+ * log_archive --
+ *     Supporting function for db_archive(1).
+ */
+int
+log_archive(logp, listp, flags, db_malloc)
+       DB_LOG *logp;
+       char ***listp;
+       int flags;
+       void *(*db_malloc) __P((size_t));
+{
+       DBT rec;
+       DB_LSN stable_lsn;
+       u_int32_t fnum;
+       int array_size, n, ret;
+       char **array, **arrayp, *name, *p, *pref, buf[MAXPATHLEN];
+
+       fnum = 0;                               /* XXX: Shut the compiler up. */
+
+#define        OKFLAGS (DB_ARCH_ABS | DB_ARCH_DATA | DB_ARCH_LOG)
+       if (flags != 0) {
+               if ((ret =
+                   __db_fchk(logp->dbenv, "log_archive", flags, OKFLAGS)) != 0)
+                       return (ret);
+               if ((ret =
+                   __db_fcchk(logp->dbenv,
+                       "log_archive", flags, DB_ARCH_DATA, DB_ARCH_LOG)) != 0)
+                       return (ret);
+       }
+
+       /*
+        * Get the absolute pathname of the current directory.  It would
+        * be nice to get the shortest pathname of the database directory,
+        * but that's just not possible.
+        */
+       if (LF_ISSET(DB_ARCH_ABS)) {
+               errno = 0;
+               if ((pref = getcwd(buf, sizeof(buf))) == NULL)
+                       return (errno == 0 ? ENOMEM : errno);
+       } else
+               pref = NULL;
+
+       switch (LF_ISSET(~DB_ARCH_ABS)) {
+       case DB_ARCH_DATA:
+               return (build_data(logp, pref, listp, db_malloc));
+       case DB_ARCH_LOG:
+               memset(&rec, 0, sizeof(rec));
+               if (F_ISSET(logp, DB_AM_THREAD))
+                       F_SET(&rec, DB_DBT_MALLOC);
+               if ((ret = log_get(logp, &stable_lsn, &rec, DB_LAST)) != 0)
+                       return (ret);
+               if (F_ISSET(logp, DB_AM_THREAD))
+                       free(rec.data);
+               fnum = stable_lsn.file;
+               break;
+       case 0:
+               if ((ret = __log_findckp(logp, &stable_lsn)) != 0) {
+                       if (ret != DB_NOTFOUND)
+                               return (ret);
+                       *listp = NULL;
+                       return (0);
+               }
+               /* Remove any log files before the last stable LSN. */
+               fnum = stable_lsn.file - 1;
+               break;
+       }
+
+#define        LIST_INCREMENT  64
+       /* Get some initial space. */
+       if ((array =
+           (char **)malloc(sizeof(char *) * (array_size = 10))) == NULL)
+               return (ENOMEM);
+       array[0] = NULL;
+
+       /* Build an array of the file names. */
+       for (n = 0; fnum > 0; --fnum) {
+               if ((ret = __log_name(logp->dbenv, fnum, &name)) != 0)
+                       goto err;
+               if (__db_exists(name, NULL) != 0)
+                       break;
+
+               if (n >= array_size - 1) {
+                       array_size += LIST_INCREMENT;
+                       if ((array = (char **)realloc(array,
+                           sizeof(char *) * array_size)) == NULL) {
+                               ret = ENOMEM;
+                               goto err;
+                       }
+               }
+
+               if (LF_ISSET(DB_ARCH_ABS)) {
+                       if ((ret = absname(pref, name, &array[n])) != 0)
+                               goto err;
+                       FREES(name);
+               } else if ((p = __db_rpath(name)) != NULL) {
+                       if ((array[n] = (char *)strdup(p + 1)) == NULL) {
+                               ret = ENOMEM;
+                               goto err;
+                       }
+                       FREES(name);
+               } else
+                       array[n] = name;
+
+               array[++n] = NULL;
+       }
+
+       /* If there's nothing to return, we're done. */
+       if (n == 0) {
+               *listp = NULL;
+               ret = 0;
+               goto err;
+       }
+
+       /* Sort the list. */
+       qsort(array, (size_t)n, sizeof(char *), cmpfunc);
+
+       /* Rework the memory. */
+       if ((ret = usermem(&array, db_malloc)) != 0)
+               goto err;
+
+       *listp = array;
+       return (0);
+
+err:   if (array != NULL) {
+               for (arrayp = array; *arrayp != NULL; ++arrayp)
+                       FREES(*arrayp);
+               free(array);
+       }
+       return (ret);
+}
+
+/*
+ * build_data --
+ *     Build a list of datafiles for return.
+ */
+static int
+build_data(logp, pref, listp, db_malloc)
+       DB_LOG *logp;
+       char *pref, ***listp;
+       void *(*db_malloc) __P((size_t));
+{
+       DBT rec;
+       DB_LSN lsn;
+       __log_register_args *argp;
+       u_int32_t rectype;
+       int array_size, last, n, nxt, ret;
+       char **array, **arrayp, *p, *real_name;
+
+       /* Get some initial space. */
+       if ((array =
+           (char **)malloc(sizeof(char *) * (array_size = 10))) == NULL)
+               return (ENOMEM);
+       array[0] = NULL;
+
+       memset(&rec, 0, sizeof(rec));
+       if (F_ISSET(logp, DB_AM_THREAD))
+               F_SET(&rec, DB_DBT_MALLOC);
+       for (n = 0, ret = log_get(logp, &lsn, &rec, DB_FIRST);
+           ret == 0; ret = log_get(logp, &lsn, &rec, DB_NEXT)) {
+               if (rec.size < sizeof(rectype)) {
+                       ret = EINVAL;
+                       __db_err(logp->dbenv, "log_archive: bad log record");
+                       goto lg_free;
+               }
+
+               memcpy(&rectype, rec.data, sizeof(rectype));
+               if (rectype != DB_log_register) {
+                       if (F_ISSET(logp, DB_AM_THREAD)) {
+                               free(rec.data);
+                               rec.data = NULL;
+                       }
+                       continue;
+               }
+               if ((ret = __log_register_read(rec.data, &argp)) != 0) {
+                       ret = EINVAL;
+                       __db_err(logp->dbenv,
+                           "log_archive: unable to read log record");
+                       goto lg_free;
+               }
+
+               if (n >= array_size - 1) {
+                       array_size += LIST_INCREMENT;
+                       if ((array = (char **)realloc(array,
+                           sizeof(char *) * array_size)) == NULL) {
+                               ret = ENOMEM;
+                               goto lg_free;
+                       }
+               }
+
+               if ((array[n] = (char *)strdup(argp->name.data)) == NULL) {
+                       ret = ENOMEM;
+lg_free:               if (F_ISSET(&rec, DB_DBT_MALLOC) && rec.data != NULL)
+                               free(rec.data);
+                       goto err1;
+               }
+
+               array[++n] = NULL;
+               free(argp);
+
+               if (F_ISSET(logp, DB_AM_THREAD)) {
+                       free(rec.data);
+                       rec.data = NULL;
+               }
+       }
+
+       /* If there's nothing to return, we're done. */
+       if (n == 0) {
+               ret = 0;
+               *listp = NULL;
+               goto err1;
+       }
+
+       /* Sort the list. */
+       qsort(array, (size_t)n, sizeof(char *), cmpfunc);
+
+       /*
+        * Build the real pathnames, discarding nonexistent files and
+        * duplicates.
+        */
+       for (last = nxt = 0; nxt < n;) {
+               /*
+                * Discard duplicates.  Last is the next slot we're going
+                * to return to the user, nxt is the next slot that we're
+                * going to consider.
+                */
+               if (last != nxt) {
+                       array[last] = array[nxt];
+                       array[nxt] = NULL;
+               }
+               for (++nxt; nxt < n &&
+                   strcmp(array[last], array[nxt]) == 0; ++nxt) {
+                       FREES(array[nxt]);
+                       array[nxt] = NULL;
+               }
+
+               /* Get the real name. */
+               if ((ret = __db_appname(logp->dbenv,
+                   DB_APP_DATA, NULL, array[last], NULL, &real_name)) != 0)
+                       goto err2;
+
+               /* If the file doesn't exist, ignore it. */
+               if (__db_exists(real_name, NULL) != 0) {
+                       FREES(real_name);
+                       FREES(array[last]);
+                       array[last] = NULL;
+                       continue;
+               }
+
+               /* Rework the name as requested by the user. */
+               FREES(array[last]);
+               array[last] = NULL;
+               if (pref != NULL) {
+                       ret = absname(pref, real_name, &array[last]);
+                       FREES(real_name);
+                       if (ret != 0)
+                               goto err2;
+               } else if ((p = __db_rpath(real_name)) != NULL) {
+                       array[last] = (char *)strdup(p + 1);
+                       FREES(real_name);
+                       if (array[last] == NULL)
+                               goto err2;
+               } else
+                       array[last] = real_name;
+               ++last;
+       }
+
+       /* NULL-terminate the list. */
+       array[last] = NULL;
+
+       /* Rework the memory. */
+       if ((ret = usermem(&array, db_malloc)) != 0)
+               goto err1;
+
+       *listp = array;
+       return (0);
+
+err2:  /*
+        * XXX
+        * We've possibly inserted NULLs into the array list, so clean up a
+        * bit so that the other error processing works.
+        */
+       if (array != NULL)
+               for (; nxt < n; ++nxt)
+                       FREES(array[nxt]);
+       /* FALLTHROUGH */
+
+err1:  if (array != NULL) {
+               for (arrayp = array; *arrayp != NULL; ++arrayp)
+                       FREES(*arrayp);
+               free(array);
+       }
+       return (ret);
+}
+
+/*
+ * absname --
+ *     Return an absolute path name for the file.
+ */
+static int
+absname(pref, name, newnamep)
+       char *pref, *name, **newnamep;
+{
+       size_t l_pref, l_name;
+       char *newname;
+
+       l_pref = strlen(pref);
+       l_name = strlen(name);
+
+       /* Malloc space for concatenating the two. */
+       if ((newname = (char *)malloc(l_pref + l_name + 2)) == NULL)
+               return (ENOMEM);
+
+       /* Build the name. */
+       memcpy(newname, pref, l_pref);
+       if (strchr(PATH_SEPARATOR, newname[l_pref - 1]) == NULL)
+               newname[l_pref++] = PATH_SEPARATOR[0];
+       memcpy(newname + l_pref, name, l_name + 1);
+       *newnamep = newname;
+
+       return (0);
+}
+
+/*
+ * usermem --
+ *     Create a single chunk of memory that holds the returned information.
+ *     If the user has their own malloc routine, use it.
+ */
+static int
+usermem(listp, func)
+       char ***listp;
+       void *(*func) __P((size_t));
+{
+       size_t len;
+       char **array, **arrayp, **orig, *strp;
+
+       /* Find out how much space we need. */
+       for (len = 0, orig = *listp; *orig != NULL; ++orig)
+               len += sizeof(char *) + strlen(*orig) + 1;
+       len += sizeof(char *);
+
+       /*
+        * Allocate it and set up the pointers.
+        *
+        * XXX
+        * Don't simplify this expression, SunOS compilers don't like it.
+        */
+       if (func == NULL)
+               array = (char **)malloc(len);
+       else
+               array = (char **)func(len);
+       if (array == NULL)
+               return (ENOMEM);
+       strp = (char *)(array + (orig - *listp) + 1);
+
+       /* Copy the original information into the new memory. */
+       for (orig = *listp, arrayp = array; *orig != NULL; ++orig, ++arrayp) {
+               len = strlen(*orig);
+               memcpy(strp, *orig, len + 1);
+               *arrayp = strp;
+               strp += len + 1;
+
+               FREES(*orig);
+       }
+
+       /* NULL-terminate the list. */
+       *arrayp = NULL;
+
+       free(*listp);
+       *listp = array;
+
+       return (0);
+}
+
+static int
+cmpfunc(p1, p2)
+       const void *p1, *p2;
+{
+       return (strcmp(*((char **)p1), *((char **)p2)));
+}
diff --git a/db2/log/log_auto.c b/db2/log/log_auto.c
new file mode 100644 (file)
index 0000000..5940008
--- /dev/null
@@ -0,0 +1,351 @@
+/* Do not edit: automatically built by dist/db_gen.sh. */
+#include "config.h"
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <ctype.h>
+#include <errno.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+#endif
+
+#include "db_int.h"
+#include "shqueue.h"
+#include "db_page.h"
+#include "db_dispatch.h"
+#include "log.h"
+#include "db_am.h"
+#include "common_ext.h"
+
+/*
+ * PUBLIC: int __log_register_log
+ * PUBLIC:     __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t,
+ * PUBLIC:     DBT *, DBT *, u_int32_t, DBTYPE));
+ */
+int __log_register_log(logp, txnid, ret_lsnp, flags,
+       name, uid, id, ftype)
+       DB_LOG *logp;
+       DB_TXN *txnid;
+       DB_LSN *ret_lsnp;
+       u_int32_t flags;
+       DBT *name;
+       DBT *uid;
+       u_int32_t id;
+       DBTYPE ftype;
+{
+       DBT logrec;
+       DB_LSN *lsnp, null_lsn;
+       u_int32_t zero;
+       u_int32_t rectype, txn_num;
+       int ret;
+       u_int8_t *bp;
+
+       rectype = DB_log_register;
+       txn_num = txnid == NULL ? 0 : txnid->txnid;
+       if (txnid == NULL) {
+               null_lsn.file = 0;
+               null_lsn.offset = 0;
+               lsnp = &null_lsn;
+       } else
+               lsnp = &txnid->last_lsn;
+       logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN)
+           + sizeof(u_int32_t) + (name == NULL ? 0 : name->size)
+           + sizeof(u_int32_t) + (uid == NULL ? 0 : uid->size)
+           + sizeof(id)
+           + sizeof(ftype);
+       if ((logrec.data = (void *)malloc(logrec.size)) == NULL)
+               return (ENOMEM);
+
+       bp = logrec.data;
+       memcpy(bp, &rectype, sizeof(rectype));
+       bp += sizeof(rectype);
+       memcpy(bp, &txn_num, sizeof(txn_num));
+       bp += sizeof(txn_num);
+       memcpy(bp, lsnp, sizeof(DB_LSN));
+       bp += sizeof(DB_LSN);
+       if (name == NULL) {
+               zero = 0;
+               memcpy(bp, &zero, sizeof(u_int32_t));
+               bp += sizeof(u_int32_t);
+       } else {
+               memcpy(bp, &name->size, sizeof(name->size));
+               bp += sizeof(name->size);
+               memcpy(bp, name->data, name->size);
+               bp += name->size;
+       }
+       if (uid == NULL) {
+               zero = 0;
+               memcpy(bp, &zero, sizeof(u_int32_t));
+               bp += sizeof(u_int32_t);
+       } else {
+               memcpy(bp, &uid->size, sizeof(uid->size));
+               bp += sizeof(uid->size);
+               memcpy(bp, uid->data, uid->size);
+               bp += uid->size;
+       }
+       memcpy(bp, &id, sizeof(id));
+       bp += sizeof(id);
+       memcpy(bp, &ftype, sizeof(ftype));
+       bp += sizeof(ftype);
+#ifdef DEBUG
+       if ((u_int32_t)(bp - (u_int8_t *)logrec.data) != logrec.size)
+               fprintf(stderr, "Error in log record length");
+#endif
+       ret = __log_put(logp, ret_lsnp, (DBT *)&logrec, flags);
+       if (txnid != NULL)
+               txnid->last_lsn = *ret_lsnp;
+       free(logrec.data);
+       return (ret);
+}
+
+/*
+ * PUBLIC: int __log_register_print
+ * PUBLIC:    __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+ */
+
+int
+__log_register_print(notused1, dbtp, lsnp, notused3, notused4)
+       DB_LOG *notused1;
+       DBT *dbtp;
+       DB_LSN *lsnp;
+       int notused3;
+       void *notused4;
+{
+       __log_register_args *argp;
+       u_int32_t i;
+       int c, ret;
+
+       i = 0;
+       c = 0;
+       notused1 = NULL;
+       notused3 = 0;
+       notused4 = NULL;
+
+       if((ret = __log_register_read(dbtp->data, &argp)) != 0)
+               return (ret);
+       printf("[%lu][%lu]log_register: rec: %lu txnid %lx prevlsn [%lu][%lu]\n",
+           (u_long)lsnp->file,
+           (u_long)lsnp->offset,
+           (u_long)argp->type,
+           (u_long)argp->txnid->txnid,
+           (u_long)argp->prev_lsn.file,
+           (u_long)argp->prev_lsn.offset);
+       printf("\tname: ");
+       for (i = 0; i < argp->name.size; i++) {
+               c = ((char *)argp->name.data)[i];
+               if (isprint(c) || c == 0xa)
+                       putchar(c);
+               else
+                       printf("%#x ", c);
+       }
+       printf("\n");
+       printf("\tuid: ");
+       for (i = 0; i < argp->uid.size; i++) {
+               c = ((char *)argp->uid.data)[i];
+               if (isprint(c) || c == 0xa)
+                       putchar(c);
+               else
+                       printf("%#x ", c);
+       }
+       printf("\n");
+       printf("\tid: %lu\n", (u_long)argp->id);
+       printf("\tftype: 0x%lx\n", (u_long)argp->ftype);
+       printf("\n");
+       free(argp);
+       return (0);
+}
+
+/*
+ * PUBLIC: int __log_register_read __P((void *, __log_register_args **));
+ */
+int
+__log_register_read(recbuf, argpp)
+       void *recbuf;
+       __log_register_args **argpp;
+{
+       __log_register_args *argp;
+       u_int8_t *bp;
+
+       argp = (__log_register_args *)malloc(sizeof(__log_register_args) +
+           sizeof(DB_TXN));
+       if (argp == NULL)
+               return (ENOMEM);
+       argp->txnid = (DB_TXN *)&argp[1];
+       bp = recbuf;
+       memcpy(&argp->type, bp, sizeof(argp->type));
+       bp += sizeof(argp->type);
+       memcpy(&argp->txnid->txnid,  bp, sizeof(argp->txnid->txnid));
+       bp += sizeof(argp->txnid->txnid);
+       memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN));
+       bp += sizeof(DB_LSN);
+       memcpy(&argp->name.size, bp, sizeof(u_int32_t));
+       bp += sizeof(u_int32_t);
+       argp->name.data = bp;
+       bp += argp->name.size;
+       memcpy(&argp->uid.size, bp, sizeof(u_int32_t));
+       bp += sizeof(u_int32_t);
+       argp->uid.data = bp;
+       bp += argp->uid.size;
+       memcpy(&argp->id, bp, sizeof(argp->id));
+       bp += sizeof(argp->id);
+       memcpy(&argp->ftype, bp, sizeof(argp->ftype));
+       bp += sizeof(argp->ftype);
+       *argpp = argp;
+       return (0);
+}
+
+/*
+ * PUBLIC: int __log_unregister_log
+ * PUBLIC:     __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t,
+ * PUBLIC:     u_int32_t));
+ */
+int __log_unregister_log(logp, txnid, ret_lsnp, flags,
+       id)
+       DB_LOG *logp;
+       DB_TXN *txnid;
+       DB_LSN *ret_lsnp;
+       u_int32_t flags;
+       u_int32_t id;
+{
+       DBT logrec;
+       DB_LSN *lsnp, null_lsn;
+       u_int32_t rectype, txn_num;
+       int ret;
+       u_int8_t *bp;
+
+       rectype = DB_log_unregister;
+       txn_num = txnid == NULL ? 0 : txnid->txnid;
+       if (txnid == NULL) {
+               null_lsn.file = 0;
+               null_lsn.offset = 0;
+               lsnp = &null_lsn;
+       } else
+               lsnp = &txnid->last_lsn;
+       logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN)
+           + sizeof(id);
+       if ((logrec.data = (void *)malloc(logrec.size)) == NULL)
+               return (ENOMEM);
+
+       bp = logrec.data;
+       memcpy(bp, &rectype, sizeof(rectype));
+       bp += sizeof(rectype);
+       memcpy(bp, &txn_num, sizeof(txn_num));
+       bp += sizeof(txn_num);
+       memcpy(bp, lsnp, sizeof(DB_LSN));
+       bp += sizeof(DB_LSN);
+       memcpy(bp, &id, sizeof(id));
+       bp += sizeof(id);
+#ifdef DEBUG
+       if ((u_int32_t)(bp - (u_int8_t *)logrec.data) != logrec.size)
+               fprintf(stderr, "Error in log record length");
+#endif
+       ret = __log_put(logp, ret_lsnp, (DBT *)&logrec, flags);
+       if (txnid != NULL)
+               txnid->last_lsn = *ret_lsnp;
+       free(logrec.data);
+       return (ret);
+}
+
+/*
+ * PUBLIC: int __log_unregister_print
+ * PUBLIC:    __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+ */
+
+int
+__log_unregister_print(notused1, dbtp, lsnp, notused3, notused4)
+       DB_LOG *notused1;
+       DBT *dbtp;
+       DB_LSN *lsnp;
+       int notused3;
+       void *notused4;
+{
+       __log_unregister_args *argp;
+       u_int32_t i;
+       int c, ret;
+
+       i = 0;
+       c = 0;
+       notused1 = NULL;
+       notused3 = 0;
+       notused4 = NULL;
+
+       if((ret = __log_unregister_read(dbtp->data, &argp)) != 0)
+               return (ret);
+       printf("[%lu][%lu]log_unregister: rec: %lu txnid %lx prevlsn [%lu][%lu]\n",
+           (u_long)lsnp->file,
+           (u_long)lsnp->offset,
+           (u_long)argp->type,
+           (u_long)argp->txnid->txnid,
+           (u_long)argp->prev_lsn.file,
+           (u_long)argp->prev_lsn.offset);
+       printf("\tid: %lu\n", (u_long)argp->id);
+       printf("\n");
+       free(argp);
+       return (0);
+}
+
+/*
+ * PUBLIC: int __log_unregister_read __P((void *, __log_unregister_args **));
+ */
+int
+__log_unregister_read(recbuf, argpp)
+       void *recbuf;
+       __log_unregister_args **argpp;
+{
+       __log_unregister_args *argp;
+       u_int8_t *bp;
+
+       argp = (__log_unregister_args *)malloc(sizeof(__log_unregister_args) +
+           sizeof(DB_TXN));
+       if (argp == NULL)
+               return (ENOMEM);
+       argp->txnid = (DB_TXN *)&argp[1];
+       bp = recbuf;
+       memcpy(&argp->type, bp, sizeof(argp->type));
+       bp += sizeof(argp->type);
+       memcpy(&argp->txnid->txnid,  bp, sizeof(argp->txnid->txnid));
+       bp += sizeof(argp->txnid->txnid);
+       memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN));
+       bp += sizeof(DB_LSN);
+       memcpy(&argp->id, bp, sizeof(argp->id));
+       bp += sizeof(argp->id);
+       *argpp = argp;
+       return (0);
+}
+
+/*
+ * PUBLIC: int __log_init_print __P((DB_ENV *));
+ */
+int
+__log_init_print(dbenv)
+       DB_ENV *dbenv;
+{
+       int ret;
+
+       if ((ret = __db_add_recovery(dbenv,
+           __log_register_print, DB_log_register)) != 0)
+               return (ret);
+       if ((ret = __db_add_recovery(dbenv,
+           __log_unregister_print, DB_log_unregister)) != 0)
+               return (ret);
+       return (0);
+}
+
+/*
+ * PUBLIC: int __log_init_recover __P((DB_ENV *));
+ */
+int
+__log_init_recover(dbenv)
+       DB_ENV *dbenv;
+{
+       int ret;
+
+       if ((ret = __db_add_recovery(dbenv,
+           __log_register_recover, DB_log_register)) != 0)
+               return (ret);
+       if ((ret = __db_add_recovery(dbenv,
+           __log_unregister_recover, DB_log_unregister)) != 0)
+               return (ret);
+       return (0);
+}
+
diff --git a/db2/log/log_compare.c b/db2/log/log_compare.c
new file mode 100644 (file)
index 0000000..601b25c
--- /dev/null
@@ -0,0 +1,34 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997
+ *     Sleepycat Software.  All rights reserved.
+ */
+#include "config.h"
+
+#ifndef lint
+static const char sccsid[] = "@(#)log_compare.c        10.2 (Sleepycat) 6/21/97";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+#endif
+
+#include "db_int.h"
+
+/*
+ * log_compare --
+ *     Compare two LSN's.
+ */
+int
+log_compare(lsn0, lsn1)
+       const DB_LSN *lsn0, *lsn1;
+{
+       if (lsn0->file != lsn1->file)
+               return (lsn0->file < lsn1->file ? -1 : 1);
+
+       if (lsn0->offset != lsn1->offset)
+               return (lsn0->offset < lsn1->offset ? -1 : 1);
+
+       return (0);
+}
diff --git a/db2/log/log_findckp.c b/db2/log/log_findckp.c
new file mode 100644 (file)
index 0000000..67fe9c9
--- /dev/null
@@ -0,0 +1,130 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997
+ *     Sleepycat Software.  All rights reserved.
+ */
+
+#include "config.h"
+
+#ifndef lint
+static const char sccsid[] = "@(#)log_findckp.c        10.10 (Sleepycat) 7/30/97";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+#endif
+
+#include "db_int.h"
+#include "shqueue.h"
+#include "log.h"
+#include "txn.h"
+#include "common_ext.h"
+
+/*
+ * __log_findckp --
+ *
+ * Looks for the most recent checkpoint that occurs before the most recent
+ * checkpoint LSN.  This is the point from which recovery can start and the
+ * point up to which archival/truncation can take place.  Checkpoints in
+ * the log look like:
+ *
+ * -------------------------------------------------------------------
+ *  | ckp A, ckplsn 100 |  .... record .... | ckp B, ckplsn 600 | ...
+ * -------------------------------------------------------------------
+ *         LSN 500                                 LSN 1000
+ *
+ * If we read what log returns from using the DB_CKP parameter to logput,
+ * we'll get the record at LSN 1000.  The checkpoint LSN there is 600.
+ * Now we have to scan backwards looking for a checkpoint before LSN 600.
+ * We find one at 500.  This means that we can truncate the log before
+ * 500 or run recovery beginning at 500.
+ *
+ * Returns 0 if we find a checkpoint.
+ * Returns errno on error.
+ * Returns DB_NOTFOUND if we could not find a suitable start point and
+ * we should start from the beginning.
+ *
+ * PUBLIC: int __log_findckp __P((DB_LOG *, DB_LSN *));
+ */
+int
+__log_findckp(lp, lsnp)
+       DB_LOG *lp;
+       DB_LSN *lsnp;
+{
+       DBT data;
+       DB_LSN ckp_lsn, last_ckp, next_lsn;
+       __txn_ckp_args *ckp_args;
+       int ret, verbose;
+
+       verbose = lp->dbenv != NULL && lp->dbenv->db_verbose != 0;
+
+       /*
+        * Need to find the appropriate point from which to begin
+        * recovery.
+        */
+       memset(&data, 0, sizeof(data));
+       if (F_ISSET(lp, DB_AM_THREAD))
+               F_SET(&data, DB_DBT_MALLOC);
+       if ((ret = log_get(lp, &last_ckp, &data, DB_CHECKPOINT)) != 0)
+               return (ret == ENOENT ? DB_NOTFOUND : ret);
+       ZERO_LSN(ckp_lsn);
+
+       next_lsn = last_ckp;
+       do {
+               if (F_ISSET(lp, DB_AM_THREAD))
+                       free(data.data);
+
+               if ((ret = log_get(lp, &next_lsn, &data, DB_SET)) != 0)
+                       return (ret);
+               if ((ret = __txn_ckp_read(data.data, &ckp_args)) != 0) {
+                       if (F_ISSET(lp, DB_AM_THREAD))
+                               free(data.data);
+                       return (ret);
+               }
+               if (IS_ZERO_LSN(ckp_lsn))
+                       ckp_lsn = ckp_args->ckp_lsn;
+               if (verbose) {
+                       __db_err(lp->dbenv, "Checkpoint at: [%lu][%lu]",
+                           (u_long)last_ckp.file, (u_long)last_ckp.offset);
+                       __db_err(lp->dbenv, "Checkpoint LSN: [%lu][%lu]",
+                           (u_long)ckp_args->ckp_lsn.file,
+                           (u_long)ckp_args->ckp_lsn.offset);
+                       __db_err(lp->dbenv, "Previous checkpoint: [%lu][%lu]",
+                           (u_long)ckp_args->last_ckp.file,
+                           (u_long)ckp_args->last_ckp.offset);
+               }
+               last_ckp = next_lsn;
+               next_lsn = ckp_args->last_ckp;
+               free(ckp_args);
+       } while (!IS_ZERO_LSN(next_lsn) &&
+           log_compare(&last_ckp, &ckp_lsn) > 0);
+
+       if (F_ISSET(lp, DB_AM_THREAD))
+               free(data.data);
+
+       /*
+        * At this point, either, next_lsn is ZERO or ckp_lsn is the
+        * checkpoint lsn and last_ckp is the LSN of the last checkpoint
+        * before ckp_lsn.  If the compare in the loop is still true, then
+        * next_lsn must be 0 and we need to roll forward from the
+        * beginning of the log.
+        */
+       if (log_compare(&last_ckp, &ckp_lsn) > 0) {
+               if ((ret = log_get(lp, &last_ckp, &data, DB_FIRST)) != 0)
+                       return (ret);
+               if (F_ISSET(lp, DB_AM_THREAD))
+                       free(data.data);
+       } 
+       *lsnp = last_ckp;
+
+       if (verbose)
+               __db_err(lp->dbenv, "Rolling forward from [%lu][%lu]",
+                       (u_long)last_ckp.file, (u_long)last_ckp.offset);
+
+       return (IS_ZERO_LSN(last_ckp) ? DB_NOTFOUND : 0);
+}
diff --git a/db2/log/log_get.c b/db2/log/log_get.c
new file mode 100644 (file)
index 0000000..37eb5cb
--- /dev/null
@@ -0,0 +1,355 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997
+ *     Sleepycat Software.  All rights reserved.
+ */
+#include "config.h"
+
+#ifndef lint
+static const char sccsid[] = "@(#)log_get.c    10.16 (Sleepycat) 8/19/97";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#endif
+
+#include "db_int.h"
+#include "shqueue.h"
+#include "db_page.h"
+#include "log.h"
+#include "hash.h"
+#include "common_ext.h"
+
+/*
+ * log_get --
+ *     Get a log record.
+ */
+int
+log_get(dblp, alsn, dbt, flags)
+       DB_LOG *dblp;
+       DB_LSN *alsn;
+       DBT *dbt;
+       int flags;
+{
+       LOG *lp;
+       int ret;
+
+       /* Validate arguments. */
+#define        OKFLAGS (DB_CHECKPOINT | \
+    DB_CURRENT | DB_FIRST | DB_LAST | DB_NEXT | DB_PREV | DB_SET)
+       if (flags != 0) {
+               if ((ret =
+                   __db_fchk(dblp->dbenv, "log_get", flags, OKFLAGS)) != 0)
+                       return (ret);
+               switch (flags) {
+               case DB_CHECKPOINT:
+               case DB_CURRENT:
+               case DB_FIRST:
+               case DB_LAST:
+               case DB_NEXT:
+               case DB_PREV:
+               case DB_SET:
+               case 0:
+                       break;
+               default:
+                       return (__db_ferr(dblp->dbenv, "log_get", 1));
+               }
+       }
+       if (F_ISSET(dblp, DB_AM_THREAD)) {
+               if (LF_ISSET(DB_NEXT | DB_PREV | DB_CURRENT))
+                       return (__db_ferr(dblp->dbenv, "log_get", 1));
+               if (!F_ISSET(dbt, DB_DBT_USERMEM | DB_DBT_MALLOC))
+                       return (__db_ferr(dblp->dbenv, "threaded data", 1));
+       }
+
+       lp = dblp->lp;
+
+       LOCK_LOGREGION(dblp);
+
+       /*
+        * If we get one of the log's header records, repeat the operation.
+        * This assumes that applications don't ever request the log header
+        * records by LSN, but that seems reasonable to me.
+        */
+       ret = __log_get(dblp, alsn, dbt, flags, 0);
+       if (ret == 0 && alsn->offset == 0) {
+               switch (flags) {
+               case DB_FIRST:
+                       flags = DB_NEXT;
+                       break;
+               case DB_LAST:
+                       flags = DB_PREV;
+                       break;
+               }
+               ret = __log_get(dblp, alsn, dbt, flags, 0);
+       }
+
+       UNLOCK_LOGREGION(dblp);
+
+       return (ret);
+}
+
+/*
+ * __log_get --
+ *     Get a log record; internal version.
+ *
+ * PUBLIC: int __log_get __P((DB_LOG *, DB_LSN *, DBT *, int, int));
+ */
+int
+__log_get(dblp, alsn, dbt, flags, silent)
+       DB_LOG *dblp;
+       DB_LSN *alsn;
+       DBT *dbt;
+       int flags, silent;
+{
+       DB_LSN nlsn;
+       HDR hdr;
+       LOG *lp;
+       size_t len;
+       ssize_t nr;
+       int cnt, ret;
+       const char *fail;
+       char *np, *tbuf;
+       void *p, *shortp;
+
+       lp = dblp->lp;
+       fail = np = tbuf = NULL;
+
+       nlsn = dblp->c_lsn;
+       switch (flags) {
+       case DB_CHECKPOINT:
+               nlsn = dblp->lp->c_lsn;
+               if (IS_ZERO_LSN(nlsn)) {
+                       __db_err(dblp->dbenv,
+       "log_get: unable to find checkpoint record: no checkpoint set.");
+                       ret = ENOENT;
+                       goto err2;
+               }
+               break;
+       case DB_NEXT:                           /* Next log record. */
+               if (!IS_ZERO_LSN(nlsn)) {
+                       /* Increment the cursor by the cursor record size. */
+                       nlsn.offset += dblp->c_len;
+                       break;
+               }
+               /* FALLTHROUGH */
+       case DB_FIRST:                          /* Find the first log record. */
+               /*
+                * Find any log file.  Note, we may have only entered records
+                * in the buffer, and not yet written a log file.
+                */
+               if ((ret = __log_find(dblp->dbenv, lp, &cnt)) != 0) {
+                       __db_err(dblp->dbenv,
+       "log_get: unable to find the first record: no log files found.");
+                       goto err2;
+               }
+
+               /* If there's anything in the buffer, it belongs to file 1. */
+               if (cnt == 0)
+                       cnt = 1;
+
+               /* Now go backwards to find the smallest one. */
+               for (; cnt > 1; --cnt)
+                       if (__log_valid(dblp->dbenv, NULL, cnt) != 0) {
+                               ++cnt;
+                               break;
+                       }
+               nlsn.file = cnt;
+               nlsn.offset = 0;
+               break;
+       case DB_CURRENT:                        /* Current log record. */
+               break;
+       case DB_PREV:                           /* Previous log record. */
+               if (!IS_ZERO_LSN(nlsn)) {
+                       /* If at start-of-file, move to the previous file. */
+                       if (nlsn.offset == 0) {
+                               if (nlsn.file == 1)
+                                       return (DB_NOTFOUND);
+
+                               --nlsn.file;
+                               nlsn.offset = dblp->c_off;
+                       } else
+                               nlsn.offset = dblp->c_off;
+                       break;
+               }
+               /* FALLTHROUGH */
+       case DB_LAST:                           /* Last log record. */
+               nlsn.file = lp->lsn.file;
+               nlsn.offset = lp->lsn.offset - lp->len;
+               break;
+       case DB_SET:                            /* Set log record. */
+               nlsn = *alsn;
+               break;
+       }
+
+retry:
+       /* Return 1 if the request is past end-of-file. */
+       if (nlsn.file > lp->lsn.file ||
+           (nlsn.file == lp->lsn.file && nlsn.offset >= lp->lsn.offset))
+               return (DB_NOTFOUND);
+
+       /* If we've switched files, discard the current fd. */
+       if (dblp->c_lsn.file != nlsn.file && dblp->c_fd != -1) {
+               (void)__db_close(dblp->c_fd);
+               dblp->c_fd = -1;
+       }
+
+       /* If the entire record is in the in-memory buffer, copy it out. */
+       if (nlsn.file == lp->lsn.file && nlsn.offset >= lp->w_off) {
+               /* Copy the header. */
+               p = lp->buf + (nlsn.offset - lp->w_off);
+               memcpy(&hdr, p, sizeof(HDR));
+
+               /* Copy the record. */
+               len = hdr.len - sizeof(HDR);
+               if ((ret = __db_retcopy(dbt, (u_int8_t *)p + sizeof(HDR),
+                   len, &dblp->c_dbt.data, &dblp->c_dbt.ulen, NULL)) != 0)
+                       goto err1;
+               goto cksum;
+       }
+
+       /*
+        * Move the file descriptor to the page that has the hdr.  We dealt
+        * with moving to a previous log file in the flags switch code, but
+        * we don't yet know if we'll need to move to a subsequent file.
+        *
+        * Acquire a file descriptor.
+        */
+       if (dblp->c_fd == -1) {
+               if ((ret = __log_name(dblp->dbenv, nlsn.file, &np)) != 0)
+                       goto err1;
+               if ((ret = __db_fdopen(np, DB_RDONLY | DB_SEQUENTIAL,
+                   DB_RDONLY | DB_SEQUENTIAL, 0, &dblp->c_fd)) != 0) {
+                       fail = np;
+                       goto err1;
+               }
+               free(np);
+               np = NULL;
+       }
+
+       /* Seek to the header offset and read the header. */
+       if ((ret = __db_lseek(dblp->c_fd, 0, 0, nlsn.offset, SEEK_SET)) != 0) {
+               fail = "seek";
+               goto err1;
+       }
+       if ((ret = __db_read(dblp->c_fd, &hdr, sizeof(HDR), &nr)) != 0) {
+               fail = "read";
+               goto err1;
+       }
+       if (nr == sizeof(HDR))
+               shortp = NULL;
+       else {
+               /* If read returns EOF, try the next file. */
+               if (nr == 0) {
+                       if (flags != DB_NEXT || nlsn.file == lp->lsn.file)
+                               goto corrupt;
+
+                       /* Move to the next file. */
+                       ++nlsn.file;
+                       nlsn.offset = 0;
+                       goto retry;
+               }
+
+               /*
+                * If read returns a short count the rest of the record has
+                * to be in the in-memory buffer.
+                */
+               if (lp->b_off < sizeof(HDR) - nr)
+                       goto corrupt;
+
+               /* Get the rest of the header from the in-memory buffer. */
+               memcpy((u_int8_t *)&hdr + nr, lp->buf, sizeof(HDR) - nr);
+               shortp = lp->buf + (sizeof(HDR) - nr);
+       }
+
+       /*
+        * Check for buffers of 0's, that's what we usually see during
+        * recovery, although it's certainly not something on which we
+        * can depend.
+        */
+       if (hdr.len <= sizeof(HDR))
+               goto corrupt;
+       len = hdr.len - sizeof(HDR);
+
+       /* If we've already moved to the in-memory buffer, fill from there. */
+       if (shortp != NULL) {
+               if (lp->b_off < ((u_int8_t *)shortp - lp->buf) + len)
+                       goto corrupt;
+               if ((ret = __db_retcopy(dbt, shortp, len,
+                   &dblp->c_dbt.data, &dblp->c_dbt.ulen, NULL)) != 0)
+                       goto err1;
+               goto cksum;
+       }
+
+       /* Allocate temporary memory to hold the record. */
+       if ((tbuf = (char *)malloc(len)) == NULL) {
+               ret = ENOMEM;
+               goto err1;
+       }
+
+       /*
+        * Read the record into the buffer.  If read returns a short count,
+        * there was an error or the rest of the record is in the in-memory
+        * buffer.  Note, the information may be garbage if we're in recovery,
+        * so don't read past the end of the buffer's memory.
+        */
+       if ((ret = __db_read(dblp->c_fd, tbuf, len, &nr)) != 0) {
+               fail = "read";
+               goto err1;
+       }
+       if (len - nr > sizeof(lp->buf))
+               goto corrupt;
+       if (nr != (ssize_t)len) {
+               if (lp->b_off < len - nr)
+                       goto corrupt;
+
+               /* Get the rest of the record from the in-memory buffer. */
+               memcpy((u_int8_t *)tbuf + nr, lp->buf, len - nr);
+       }
+
+       /* Copy the record into the user's DBT. */
+       if ((ret = __db_retcopy(dbt, tbuf, len,
+           &dblp->c_dbt.data, &dblp->c_dbt.ulen, NULL)) != 0)
+               goto err1;
+       free(tbuf);
+
+cksum: if (hdr.cksum != __ham_func4(dbt->data, dbt->size)) {
+               if (!silent)
+                       __db_err(dblp->dbenv, "log_get: checksum mismatch");
+               goto corrupt;
+       }
+
+       /* Update the cursor and the return lsn. */
+       dblp->c_off = hdr.prev;
+       dblp->c_len = hdr.len;
+       dblp->c_lsn = *alsn = nlsn;
+
+       return (0);
+
+corrupt:/*
+        * This is the catchall -- for some reason we didn't find enough
+        * information or it wasn't reasonable information, and it wasn't
+        * because a system call failed.
+        */
+       ret = EIO;
+       fail = "read";
+
+err1:  if (!silent)
+               if (fail == NULL)
+                       __db_err(dblp->dbenv, "log_get: %s", strerror(ret));
+               else
+                       __db_err(dblp->dbenv,
+                           "log_get: %s: %s", fail, strerror(ret));
+err2:  if (np != NULL)
+               free(np);
+       if (tbuf != NULL)
+               free(tbuf);
+       return (ret);
+}
diff --git a/db2/log/log_put.c b/db2/log/log_put.c
new file mode 100644 (file)
index 0000000..db31f9b
--- /dev/null
@@ -0,0 +1,484 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997
+ *     Sleepycat Software.  All rights reserved.
+ */
+#include "config.h"
+
+#ifndef lint
+static const char sccsid[] = "@(#)log_put.c    10.12 (Sleepycat) 8/20/97";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <unistd.h>
+#endif
+
+#include "db_int.h"
+#include "shqueue.h"
+#include "db_page.h"
+#include "log.h"
+#include "hash.h"
+#include "common_ext.h"
+
+static int __log_fill __P((DB_LOG *, void *, u_int32_t));
+static int __log_newfd __P((DB_LOG *));
+static int __log_write __P((DB_LOG *, void *, u_int32_t));
+static int __log_putr __P((DB_LOG *, const DBT *, u_int32_t));
+
+/*
+ * log_put --
+ *     Write a log record.
+ */
+int
+log_put(dblp, lsn, dbt, flags)
+       DB_LOG *dblp;
+       DB_LSN *lsn;
+       const DBT *dbt;
+       int flags;
+{
+       int ret;
+
+       /* Validate arguments. */
+#define        OKFLAGS (DB_CHECKPOINT | DB_FLUSH)
+       if (flags != 0) {
+               if ((ret =
+                   __db_fchk(dblp->dbenv, "log_put", flags, OKFLAGS)) != 0)
+                       return (ret);
+               switch (flags) {
+               case DB_CHECKPOINT:
+               case DB_FLUSH:
+               case 0:
+                       break;
+               default:
+                       return (__db_ferr(dblp->dbenv, "log_put", 1));
+               }
+       }
+
+       LOCK_LOGREGION(dblp);
+
+       ret = __log_put(dblp, lsn, dbt, flags);
+
+       UNLOCK_LOGREGION(dblp);
+
+       return (ret);
+}
+
+/*
+ * __log_put --
+ *     Write a log record; internal version.
+ *
+ * PUBLIC: int __log_put __P((DB_LOG *, DB_LSN *, const DBT *, int));
+ */
+int
+__log_put(dblp, lsn, dbt, flags)
+       DB_LOG *dblp;
+       DB_LSN *lsn;
+       const DBT *dbt;
+       int flags;
+{
+       DBT t;
+       DBT fid_dbt;
+       DB_LSN r_unused;
+       FNAME *fnp;
+       LOG *lp;
+       u_int32_t lastoff;
+       int ret;
+
+       lp = dblp->lp;
+
+       /* If this information won't fit in the file, swap files. */
+       if (lp->lsn.offset + sizeof(HDR) + dbt->size > lp->persist.lg_max) {
+               if (sizeof(HDR) +
+                   sizeof(LOGP) + dbt->size > lp->persist.lg_max) {
+                       __db_err(dblp->dbenv,
+                           "log_put: record larger than maximum file size");
+                       return (EINVAL);
+               }
+               if (lp->b_off != 0) {
+                       if ((ret = __log_write(dblp, lp->buf, lp->b_off)) != 0)
+                               return (ret);
+                       if ((ret = __db_fsync(dblp->lfd)) != 0)
+                               return (ret);
+                       lp->s_lsn.file = lp->lsn.file;
+                       lp->s_lsn.offset = lp->lsn.offset - 1;
+               }
+
+               /*
+                * Save the last known offset from the previous file, we'll
+                * need it to initialize the persistent header information.
+                */
+               lastoff = lp->lsn.offset;
+
+               ++lp->lsn.file;
+               lp->lsn.offset = 0;
+               lp->w_off = 0;
+       } else
+               lastoff = 0;
+
+       /*
+        * Insert persistent information as the first record in every file.
+        * Note that the previous length is wrong for the very first record
+        * of the log, but that's okay, we check for it during retrieval.
+        */
+       if (lp->lsn.offset == 0) {
+               t.data = &lp->persist;
+               t.size = sizeof(LOGP);
+               if ((ret = __log_putr(dblp,
+                   &t, lastoff == 0 ? 0 : lastoff - lp->len)) != 0)
+                       return (ret);
+       }
+
+       /* Initialize the LSN information returned to the user. */
+       lsn->file = lp->lsn.file;
+       lsn->offset = lp->lsn.offset;
+
+       /* Put out the user's record. */
+       if ((ret = __log_putr(dblp, dbt, lp->lsn.offset - lp->len)) != 0)
+               return (ret);
+
+       /*
+        * On a checkpoint, we:
+        *      Put out the checkpoint record (above).
+        *      Save the LSN of the checkpoint in the shared region.
+        *      Append the set of file name information into the log.
+        *      Flush the current buffer contents to disk.
+        *      Sync the log to disk.
+        *      Save the time the checkpoint was written.
+        *      Reset the bytes written since the last checkpoint.
+        */
+       if (flags == DB_CHECKPOINT) {
+               lp->c_lsn = *lsn;
+
+               for (fnp = SH_TAILQ_FIRST(&dblp->lp->fq, __fname);
+                   fnp != NULL; fnp = SH_TAILQ_NEXT(fnp, q, __fname)) {
+                       t.data = ADDR(dblp, fnp->name_off);
+                       t.size = strlen(t.data) + 1;
+                       memset(&fid_dbt, 0, sizeof(fid_dbt));
+                       fid_dbt.data = ADDR(dblp, fnp->fileid_off);
+                       fid_dbt.size = DB_FILE_ID_LEN;
+                       if ((ret = __log_register_log(dblp, NULL, &r_unused,
+                           0, &t, &fid_dbt, fnp->id, fnp->s_type)) != 0)
+                               return (ret);
+               }
+               if (lp->b_off != 0 &&
+                   (ret = __log_write(dblp, lp->buf, lp->b_off)) != 0)
+                       return (ret);
+               (void)time(&lp->chkpt);
+               lp->written = 0;
+
+               if ((ret = __db_fsync(dblp->lfd)) != 0)
+                       return (ret);
+               lp->s_lsn.file = lp->lsn.file;
+               lp->s_lsn.offset = lp->lsn.offset - 1;
+       }
+
+       /* We always flush on a checkpoint. */
+       if (flags == DB_FLUSH || flags == DB_CHECKPOINT) {
+               if (lp->b_off != 0 &&
+                   (ret = __log_write(dblp, lp->buf, lp->b_off)) != 0)
+                       return (ret);
+
+               if ((ret = __db_fsync(dblp->lfd)) != 0)
+                       return (ret);
+               lp->s_lsn.file = lp->lsn.file;
+               lp->s_lsn.offset = lp->lsn.offset - 1;
+       }
+
+       /*
+        * If we just did I/O, i.e., this LSN could have spanned the start of
+        * the in-core buffer, we remember it so that we can flush correctly
+        * during a sync.
+        */
+       if (lsn->offset < lp->w_off && lsn->offset + lp->len > lp->w_off)
+               lp->span_lsn = *lsn;
+       return (0);
+}
+
+/*
+ * __log_putr --
+ *     Actually put a record into the log.
+ */
+static int
+__log_putr(dblp, dbt, prev)
+       DB_LOG *dblp;
+       const DBT *dbt;
+       u_int32_t prev;
+{
+       HDR hdr;
+       LOG *lp;
+       int ret;
+
+       lp = dblp->lp;
+
+       /*
+        * Initialize the header.  If we just switched files, lsn.offset will
+        * be 0, and what we really want is the offset of the previous record
+        * in the previous file.  Fortunately, prev holds the value we want.
+        */
+       hdr.prev = prev;
+       hdr.len = sizeof(HDR) + dbt->size;
+       hdr.cksum = __ham_func4(dbt->data, dbt->size);
+
+       if ((ret = __log_fill(dblp, &hdr, sizeof(HDR))) != 0)
+               return (ret);
+       lp->lsn.offset += sizeof(HDR);
+
+       if ((ret = __log_fill(dblp, dbt->data, dbt->size)) != 0)
+               return (ret);
+       lp->lsn.offset += dbt->size;
+
+       lp->len = sizeof(HDR) + dbt->size;
+       return (0);
+}
+
+/*
+ * log_flush --
+ *     Write all records less than or equal to the specified LSN.
+ */
+int
+log_flush(dblp, lsn)
+       DB_LOG *dblp;
+       const DB_LSN *lsn;
+{
+       DB_LSN t_lsn;
+       LOG *lp;
+       int ret;
+
+       ret = 0;
+       lp = dblp->lp;
+
+       LOCK_LOGREGION(dblp);
+
+       /* If no LSN specified, flush the entire log. */
+       if (lsn == NULL) {
+               t_lsn.file = lp->lsn.file;
+               t_lsn.offset = lp->lsn.offset - lp->len;
+               lsn = &t_lsn;
+       }
+
+       /* If it's a non-existent record, it's an error. */
+       if (lsn->file > lp->lsn.file ||
+           (lsn->file == lp->lsn.file && lsn->offset > lp->lsn.offset)) {
+               __db_err(dblp->dbenv, "log_flush: LSN past current end-of-log");
+               ret = EINVAL;
+               goto ret1;
+       }
+
+       /*
+        * If it's from a previous file, we're done because we sync each
+        * file when we move to a new one.
+        */
+       if (lsn->file < lp->lsn.file)
+               goto ret1;
+
+       /*
+        * If it's less than the last-sync'd offset, we've already sync'd
+        * this LSN.
+        */
+       if (lsn->offset <= lp->s_lsn.offset)
+               goto ret1;
+
+       /*
+        * We may need to write the current buffer.  We have to write the
+        * current buffer if the sync LSN is greater than or equal to the
+        * saved spanning-LSN.
+        */
+       if (lsn->file >= lp->span_lsn.file &&
+           lsn->offset >= lp->span_lsn.offset)
+               if ((ret = __log_write(dblp, lp->buf, lp->b_off)) != 0)
+                       goto ret1;
+
+       /* Acquire a file descriptor if we don't have one. */
+       if (dblp->lfname != dblp->lp->lsn.file &&
+           (ret = __log_newfd(dblp)) != 0)
+               goto ret1;
+
+       if ((ret = __db_fsync(dblp->lfd)) != 0)
+               goto ret1;
+
+       lp->s_lsn.file = lp->lsn.file;
+       lp->s_lsn.offset = lsn->offset;
+
+ret1:  UNLOCK_LOGREGION(dblp);
+       return (ret);
+}
+
+/*
+ * __log_fill --
+ *     Write information into the log.
+ */
+static int
+__log_fill(dblp, addr, len)
+       DB_LOG *dblp;
+       void *addr;
+       u_int32_t len;
+{
+       LOG *lp;
+       u_int32_t nrec;
+       size_t nw, remain;
+       int ret;
+
+       /* Copy out the data. */
+       for (lp = dblp->lp; len > 0;) {
+               /*
+                * If we're on a buffer boundary and the data is big enough,
+                * copy as many records as we can directly from the data.
+                */
+               if (lp->b_off == 0 && len >= sizeof(lp->buf)) {
+                       nrec = len / sizeof(lp->buf);
+                       if ((ret = __log_write(dblp,
+                           addr, nrec * sizeof(lp->buf))) != 0)
+                               return (ret);
+                       addr = (u_int8_t *)addr + nrec * sizeof(lp->buf);
+                       len -= nrec * sizeof(lp->buf);
+                       continue;
+               }
+
+               /* Figure out how many bytes we can copy this time. */
+               remain = sizeof(lp->buf) - lp->b_off;
+               nw = remain > len ? len : remain;
+               memcpy(lp->buf + lp->b_off, addr, nw);
+               addr = (u_int8_t *)addr + nw;
+               len -= nw;
+               lp->b_off += nw;
+
+               /* If we fill the buffer, flush it. */
+               if (lp->b_off == sizeof(lp->buf) &&
+                   (ret = __log_write(dblp, lp->buf, sizeof(lp->buf))) != 0)
+                       return (ret);
+       }
+       return (0);
+}
+
+/*
+ * __log_write --
+ *     Write the log buffer to disk.
+ */
+static int
+__log_write(dblp, addr, len)
+       DB_LOG *dblp;
+       void *addr;
+       u_int32_t len;
+{
+       LOG *lp;
+       ssize_t nw;
+       int ret;
+
+       /*
+        * If we haven't opened the log file yet or the current one
+        * has changed, acquire a new log file.
+        */
+       lp = dblp->lp;
+       if (dblp->lfd == -1 || dblp->lfname != lp->lsn.file)
+               if ((ret = __log_newfd(dblp)) != 0)
+                       return (ret);
+
+       /*
+        * Seek to the offset in the file (someone may have written it
+        * since we last did).
+        */
+       if ((ret = __db_lseek(dblp->lfd, 0, 0, lp->w_off, SEEK_SET)) != 0)
+               return (ret);
+       if ((ret = __db_write(dblp->lfd, addr, len, &nw)) != 0)
+               return (ret);
+       if (nw != (int32_t)len)
+               return (EIO);
+
+       /* Update the seek offset and reset the buffer offset. */
+       lp->b_off = 0;
+       lp->w_off += len;
+       lp->written += len;
+
+       return (0);
+}
+
+/*
+ * log_file --
+ *     Map a DB_LSN to a file name.
+ */
+int
+log_file(dblp, lsn, namep, len)
+       DB_LOG *dblp;
+       const DB_LSN *lsn;
+       char *namep;
+       size_t len;
+{
+       int ret;
+       char *p;
+
+       LOCK_LOGREGION(dblp);
+
+       ret = __log_name(dblp->dbenv, lsn->file, &p);
+
+       UNLOCK_LOGREGION(dblp);
+
+       if (ret != 0)
+               return (ret);
+
+       /* Check to make sure there's enough room and copy the name. */
+       if (len < strlen(p)) {
+               *namep = '\0';
+               return (ENOMEM);
+       }
+       (void)strcpy(namep, p);
+       free(p);
+
+       return (0);
+}
+
+/*
+ * __log_newfd --
+ *     Acquire a file descriptor for the current log file.
+ */
+static int
+__log_newfd(dblp)
+       DB_LOG *dblp;
+{
+       int ret;
+       char *p;
+
+       /* Close any previous file descriptor. */
+       if (dblp->lfd != -1) {
+               (void)__db_close(dblp->lfd);
+               dblp->lfd = -1;
+       }
+
+       /* Get the path of the new file and open it. */
+       dblp->lfname = dblp->lp->lsn.file;
+       if ((ret = __log_name(dblp->dbenv, dblp->lfname, &p)) != 0)
+               return (ret);
+       if ((ret = __db_fdopen(p,
+           DB_CREATE | DB_SEQUENTIAL,
+           DB_CREATE | DB_SEQUENTIAL,
+           dblp->lp->persist.mode, &dblp->lfd)) != 0)
+               __db_err(dblp->dbenv,
+                   "log_put: %s: %s", p, strerror(errno));
+       FREES(p);
+       return (ret);
+}
+
+/*
+ * __log_name --
+ *     Return the log name for a particular file.
+ *
+ * PUBLIC: int __log_name __P((DB_ENV *, int, char **));
+ */
+int
+__log_name(dbenv, fn, np)
+       DB_ENV *dbenv;
+       int fn;
+       char **np;
+{
+       char name[sizeof(LFNAME) + 10];
+
+       (void)snprintf(name, sizeof(name), LFNAME, fn);
+       return (__db_appname(dbenv, DB_APP_LOG, NULL, name, NULL, np));
+}
diff --git a/db2/log/log_rec.c b/db2/log/log_rec.c
new file mode 100644 (file)
index 0000000..dbc5960
--- /dev/null
@@ -0,0 +1,332 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997
+ *     Sleepycat Software.  All rights reserved.
+ */
+/*
+ * Copyright (c) 1995, 1996
+ *     The President and Fellows of Harvard University.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *     This product includes software developed by the University of
+ *     California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "config.h"
+
+#ifndef lint
+static const char sccsid[] = "@(#)log_rec.c    10.11 (Sleepycat) 8/20/97";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+#endif
+
+#include "db_int.h"
+#include "shqueue.h"
+#include "log.h"
+#include "db_dispatch.h"
+#include "common_ext.h"
+
+static int __log_open_file __P((DB_LOG *, 
+    u_int8_t *, char *, DBTYPE, u_int32_t));
+
+/*
+ * PUBLIC: int __log_register_recover
+ * PUBLIC:     __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+ */
+int
+__log_register_recover(logp, dbtp, lsnp, redo, info)
+       DB_LOG *logp;
+       DBT *dbtp;
+       DB_LSN *lsnp;
+       int redo;
+       void *info;
+{
+       __log_register_args *argp;
+       int ret;
+
+#ifdef DEBUG_RECOVER
+       __log_register_print(logp, dbtp, lsnp, redo, info);
+#endif
+       info = info;                            /* XXX: Shut the compiler up. */
+       lsnp = lsnp;
+
+       F_SET(logp, DB_AM_RECOVER);
+
+       if ((ret = __log_register_read(dbtp->data, &argp)) != 0)
+               goto out;
+
+       ret = __log_open_file(logp,
+           argp->uid.data, argp->name.data, argp->ftype, argp->id);
+       if (ret == ENOENT) {
+               if (redo == TXN_OPENFILES)
+                       __db_err(logp->dbenv,
+                           "warning: file %s not found", argp->name.data);
+               ret = 0;
+       }
+
+out:   F_CLR(logp, DB_AM_RECOVER);
+       if (argp != NULL)
+               free(argp);
+       return (ret);
+}
+
+/*
+ * PUBLIC: int __log_unregister_recover
+ * PUBLIC:     __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+ */
+int
+__log_unregister_recover(logp, dbtp, lsnp, redo, info)
+       DB_LOG *logp;
+       DBT *dbtp;
+       DB_LSN *lsnp;
+       int redo;
+       void *info;
+{
+       __log_unregister_args *argp;
+       int ret;
+
+#ifdef DEBUG_RECOVER
+       __log_unregister_print(logp, dbtp, lsnp, redo, info);
+#endif
+       info = info;                            /* XXX: Shut the compiler up. */
+       lsnp = lsnp;
+
+       if (redo == TXN_OPENFILES ||
+           redo == TXN_BACKWARD_ROLL || redo == TXN_UNDO)
+               return (0);
+
+       F_SET(logp, DB_AM_RECOVER);
+       if ((ret = __log_unregister_read(dbtp->data, &argp)) != 0)
+               goto out;
+
+       LOCK_LOGTHREAD(logp);
+       if (logp->dbentry[argp->id].dbp == NULL)
+               ret = EINVAL;
+       else if (--logp->dbentry[argp->id].refcount == 0) {
+               ret = logp->dbentry[argp->id].dbp->close(
+                   logp->dbentry[argp->id].dbp, 0);
+               logp->dbentry[argp->id].dbp = NULL;
+       }
+       UNLOCK_LOGTHREAD(logp);
+
+out:   F_CLR(logp, DB_AM_RECOVER);
+       if (argp != NULL)
+               free(argp);
+       return (ret);
+}
+
+/* Hand coded routines. */
+
+/*
+ * Called during log_register recovery.  Make sure that we have an
+ * entry in the dbentry table for this ndx.
+ * Returns 0 on success, non-zero on error.
+ */
+static int
+__log_open_file(lp, uid, name, ftype, ndx)
+       DB_LOG *lp;
+       u_int8_t *uid;
+       char *name;
+       DBTYPE ftype;
+       u_int32_t ndx;
+{
+       DB *dbp;
+       int ret;
+
+       LOCK_LOGTHREAD(lp);
+       if (ndx < lp->dbentry_cnt &&
+           (lp->dbentry[ndx].deleted == 1 || lp->dbentry[ndx].dbp != NULL)) {
+               lp->dbentry[ndx].refcount++;
+
+               UNLOCK_LOGTHREAD(lp);
+               return (0);
+       }
+       UNLOCK_LOGTHREAD(lp);
+
+       /* Need to open file. */
+       dbp = NULL;
+       if ((ret = db_open(name, ftype, 0, 0, lp->dbenv, NULL, &dbp)) == 0) {
+               /*
+                * Verify that we are opening the same file that we were
+                * referring to when we wrote this log record.
+                */
+               if (memcmp(uid, dbp->lock.fileid, DB_FILE_ID_LEN) != 0) {
+                       (void)dbp->close(dbp, 0);
+                       dbp = NULL;
+                       ret = ENOENT;
+               }
+       }
+
+       if (ret == 0 || ret == ENOENT)
+               (void)__log_add_logid(lp, dbp, ndx);
+
+       return (ret);
+}
+
+/*
+ * This function returns:
+ *     0 SUCCESS (the entry was not previously set and is now set or the
+ *             entry was previously set and we just inced the ref count.
+ *     >0 on system error (returns errno value).
+ * PUBLIC: int __log_add_logid __P((DB_LOG *, DB *, u_int32_t));
+ */
+int
+__log_add_logid(logp, dbp, ndx)
+       DB_LOG *logp;
+       DB *dbp;
+       u_int32_t ndx;
+{
+       DB_ENTRY *temp_entryp;
+       u_int32_t i;
+       int ret;
+
+       ret = 0;
+
+       LOCK_LOGTHREAD(logp);
+       /*
+        * Check if we need to grow the table.
+        */
+       if (logp->dbentry_cnt <= ndx) {
+               if (logp->dbentry_cnt == 0) {
+                       logp->dbentry =
+                           (DB_ENTRY *)malloc(DB_GROW_SIZE * sizeof(DB_ENTRY));
+                       if (logp->dbentry == NULL) {
+                               ret = ENOMEM;
+                               goto err;
+                       }
+               } else {
+                       temp_entryp = (DB_ENTRY *)realloc(logp->dbentry,
+                           (DB_GROW_SIZE + logp->dbentry_cnt) *
+                           sizeof(DB_ENTRY));
+                       if (temp_entryp == NULL) {
+                               ret = ENOMEM;
+                               goto err;
+                       }
+                       logp->dbentry = temp_entryp;
+
+               }
+               /* Initialize the new entries. */
+               for (i = logp->dbentry_cnt;
+                   i < logp->dbentry_cnt + DB_GROW_SIZE; i++) {
+                       logp->dbentry[i].dbp = NULL;
+                       logp->dbentry[i].deleted = 0;
+               }
+
+               logp->dbentry_cnt += DB_GROW_SIZE;
+       }
+
+       if (logp->dbentry[ndx].deleted == 0 && logp->dbentry[ndx].dbp == NULL) {
+               logp->dbentry[ndx].dbp = dbp;
+               logp->dbentry[ndx].refcount = 1;
+               logp->dbentry[ndx].deleted = dbp == NULL;
+       } else
+               logp->dbentry[ndx].refcount++;
+
+err:   UNLOCK_LOGTHREAD(logp);
+       return (ret);
+}
+
+
+/*
+ * __db_fileid_to_db --
+ *     Return the DB corresponding to the specified fileid.
+ *
+ * PUBLIC: int __db_fileid_to_db __P((DB_LOG *, DB **, u_int32_t));
+ */
+int
+__db_fileid_to_db(logp, dbpp, ndx)
+       DB_LOG *logp;
+       DB **dbpp;
+       u_int32_t ndx;
+{
+       int ret;
+
+       ret = 0;
+       LOCK_LOGTHREAD(logp);
+
+       /*
+        * Return DB_DELETED if the file has been deleted
+        * (it's not an error).
+        */
+       if (logp->dbentry[ndx].deleted) {
+               ret = DB_DELETED;
+               goto err;
+       }
+
+       /*
+        * Otherwise return 0, but if we don't have a corresponding DB,
+        * it's an error.
+        */
+       if ((*dbpp = logp->dbentry[ndx].dbp) == NULL)
+               ret = ENOENT;
+
+err:   UNLOCK_LOGTHREAD(logp);
+       return (ret);
+}
+
+/*
+ * Close files that were opened by the recovery daemon.
+ *
+ * PUBLIC: void __log_close_files __P((DB_LOG *));
+ */
+void
+__log_close_files(logp)
+       DB_LOG *logp;
+{
+       u_int32_t i;
+
+       LOCK_LOGTHREAD(logp);
+       for (i = 0; i < logp->dbentry_cnt; i++)
+               if (logp->dbentry[i].dbp)
+                       logp->dbentry[i].dbp->close(logp->dbentry[i].dbp, 0);
+       UNLOCK_LOGTHREAD(logp);
+}
+
+/*
+ * PUBLIC: void __log_rem_logid __P((DB_LOG *, u_int32_t));
+ */
+void
+__log_rem_logid(logp, ndx)
+       DB_LOG *logp;
+       u_int32_t ndx;
+{
+       LOCK_LOGTHREAD(logp);
+       if (--logp->dbentry[ndx].refcount == 0) {
+               logp->dbentry[ndx].dbp = NULL;
+               logp->dbentry[ndx].deleted = 0;
+       }
+       UNLOCK_LOGTHREAD(logp);
+}
diff --git a/db2/log/log_register.c b/db2/log/log_register.c
new file mode 100644 (file)
index 0000000..582eab9
--- /dev/null
@@ -0,0 +1,199 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997
+ *     Sleepycat Software.  All rights reserved.
+ */
+#include "config.h"
+
+#ifndef lint
+static const char sccsid[] = "@(#)log_register.c       10.10 (Sleepycat) 8/20/97";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+#endif
+
+#include "db_int.h"
+#include "shqueue.h"
+#include "log.h"
+#include "common_ext.h"
+
+/*
+ * log_register --
+ *     Register a file name.
+ */
+int
+log_register(dblp, dbp, name, type, idp)
+       DB_LOG *dblp;
+       DB *dbp;
+       const char *name;
+       DBTYPE type;
+       u_int32_t *idp;
+{
+       DBT r_name;
+       DBT fid_dbt;
+       DB_LSN r_unused;
+       FNAME *fnp;
+       size_t len;
+       u_int32_t fid;
+       int inserted, ret;
+       char *fullname;
+       void *fidp, *namep;
+
+       fid = 0;
+       inserted = 0;
+       fullname = NULL;
+       fnp = fidp = namep = NULL;
+
+       /* Check the arguments. */
+       if (type != DB_BTREE && type != DB_HASH && type != DB_RECNO) {
+               __db_err(dblp->dbenv, "log_register: unknown DB file type");
+               return (EINVAL);
+       }
+
+       /* Get the log file id. */
+       if ((ret = __db_appname(dblp->dbenv,
+           DB_APP_DATA, NULL, name, NULL, &fullname)) != 0)
+               return (ret);
+
+       LOCK_LOGREGION(dblp);
+
+       /*
+        * See if we've already got this file in the log, finding the
+        * next-to-lowest file id currently in use as we do it.
+        */
+       for (fid = 1, fnp = SH_TAILQ_FIRST(&dblp->lp->fq, __fname);
+           fnp != NULL; fnp = SH_TAILQ_NEXT(fnp, q, __fname)) {
+               if (fid <= fnp->id)
+                       fid = fnp->id + 1;
+               if (!memcmp(dbp->lock.fileid,
+                   ADDR(dblp, fnp->fileid_off), DB_FILE_ID_LEN)) {
+                       ++fnp->ref;
+                       fid = fnp->id;
+                       if (!F_ISSET(dblp, DB_AM_RECOVER) &&
+                           (ret = __log_add_logid(dblp, dbp, fid) != 0))
+                               goto err;
+                       goto ret1;
+               }
+       }
+
+       /* Allocate a new file name structure. */
+       if ((ret = __db_shalloc(dblp->addr, sizeof(FNAME), 0, &fnp)) != 0)
+               goto err;
+       fnp->ref = 1;
+       fnp->id = fid;
+       fnp->s_type = type;
+
+       if ((ret = __db_shalloc(dblp->addr, DB_FILE_ID_LEN, 0, &fidp)) != 0)
+               goto err;
+       /*
+        * XXX Now that uids are fixed size, we can put them in the fnp
+        * structure.
+        */
+       fnp->fileid_off = OFFSET(dblp, fidp);
+       memcpy(fidp, dbp->lock.fileid, DB_FILE_ID_LEN);
+
+       len = strlen(name) + 1;
+       if ((ret = __db_shalloc(dblp->addr, len, 0, &namep)) != 0)
+               goto err;
+       fnp->name_off = OFFSET(dblp, namep);
+       memcpy(namep, name, len);
+
+       SH_TAILQ_INSERT_HEAD(&dblp->lp->fq, fnp, q, __fname);
+       inserted = 1;
+
+       /* Log the registry. */
+       if (!F_ISSET(dblp, DB_AM_RECOVER)) {
+               r_name.data = (void *)name;             /* XXX: Yuck! */
+               r_name.size = strlen(name) + 1;
+               memset(&fid_dbt, 0, sizeof(fid_dbt));
+               fid_dbt.data = dbp->lock.fileid;
+               fid_dbt.size = DB_FILE_ID_LEN;
+               if ((ret = __log_register_log(dblp, NULL, &r_unused,
+                   0, &r_name, &fid_dbt, fid, type)) != 0)
+                       goto err;
+               if ((ret = __log_add_logid(dblp, dbp, fid)) != 0)
+                       goto err;
+       }
+
+       if (0) {
+err:           /*
+                * XXX
+                * We should grow the region.
+                */
+               if (inserted)
+                       SH_TAILQ_REMOVE(&dblp->lp->fq, fnp, q, __fname);
+               if (namep != NULL)
+                       __db_shalloc_free(dblp->addr, namep);
+               if (fidp != NULL)
+                       __db_shalloc_free(dblp->addr, fidp);
+               if (fnp != NULL)
+                       __db_shalloc_free(dblp->addr, fnp);
+       }
+
+ret1:  UNLOCK_LOGREGION(dblp);
+
+       if (fullname != NULL)
+               FREES(fullname);
+
+       if (idp != NULL)
+               *idp = fid;
+       return (ret);
+}
+
+/*
+ * log_unregister --
+ *     Discard a registered file name.
+ */
+int
+log_unregister(dblp, fid)
+       DB_LOG *dblp;
+       u_int32_t fid;
+{
+       DB_LSN r_unused;
+       FNAME *fnp;
+       int ret;
+
+       ret = 0;
+       LOCK_LOGREGION(dblp);
+
+       /* Unlog the registry. */
+       if (!F_ISSET(dblp, DB_AM_RECOVER) &&
+           (ret = __log_unregister_log(dblp, NULL, &r_unused, 0, fid)) != 0)
+               return (ret);
+
+       /* Find the entry in the log. */
+       for (fnp = SH_TAILQ_FIRST(&dblp->lp->fq, __fname);
+           fnp != NULL; fnp = SH_TAILQ_NEXT(fnp, q, __fname))
+               if (fid == fnp->id)
+                       break;
+       if (fnp == NULL) {
+               __db_err(dblp->dbenv, "log_unregister: non-existent file id");
+               ret = EINVAL;
+               goto ret1;
+       }
+
+       /* If more than 1 reference, decrement the reference and return. */
+       if (fnp->ref > 1) {
+               --fnp->ref;
+               goto ret1;
+       }
+
+       /* Free the unique file information, name and structure. */
+       __db_shalloc_free(dblp->addr, ADDR(dblp, fnp->fileid_off));
+       __db_shalloc_free(dblp->addr, ADDR(dblp, fnp->name_off));
+       SH_TAILQ_REMOVE(&dblp->lp->fq, fnp, q, __fname);
+       __db_shalloc_free(dblp->addr, fnp);
+
+       /* Remove from the process local table. */
+       __log_rem_logid(dblp, fid);
+
+ret1:  UNLOCK_LOGREGION(dblp);
+
+       return (ret);
+}
diff --git a/db2/makedb.c b/db2/makedb.c
new file mode 100644 (file)
index 0000000..68c9514
--- /dev/null
@@ -0,0 +1,363 @@
+/* Create simple DB database from textual input.
+   Copyright (C) 1996, 1997 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Ulrich Drepper <drepper@cygnus.com>, 1996.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Library General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Library General Public License for more details.
+
+   You should have received a copy of the GNU Library General Public
+   License along with the GNU C Library; see the file COPYING.LIB.  If not,
+   write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+   Boston, MA 02111-1307, USA.  */
+
+#include <argp.h>
+#include <ctype.h>
+#include <db_185.h>
+#include <errno.h>
+#include <error.h>
+#include <fcntl.h>
+#include <libintl.h>
+#include <locale.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+/* Get libc version number.  */
+#include "../version.h"
+
+#define PACKAGE _libc_intl_domainname
+
+/* If non-zero convert key to lower case.  */
+static int to_lowercase;
+
+/* If non-zero print content of input file, one entry per line.  */
+static int do_undo;
+
+/* If non-zero do not print informational messages.  */
+static int be_quiet;
+
+/* Name of output file.  */
+static const char *output_name;
+
+/* Name and version of program.  */
+static void print_version (FILE *stream, struct argp_state *state);
+void (*argp_program_version_hook) (FILE *, struct argp_state *) = print_version;
+
+/* Definitions of arguments for argp functions.  */
+static const struct argp_option options[] =
+{
+  { "fold-case", 'f', NULL, 0, N_("Convert key to lower case") },
+  { "output", 'o', N_("NAME"), 0, N_("Write output to file NAME") },
+  { "quiet", 'q', NULL, 0,
+    N_("Do not print messages while building database") },
+  { "undo", 'u', NULL, 0,
+    N_("Print content of database file, one entry a line") },
+  { NULL, 0, NULL, 0, NULL }
+};
+
+/* Short description of program.  */
+static const char doc[] = N_("Create simple DB database from textual input.");
+
+/* Strings for arguments in help texts.  */
+static const char args_doc[] = N_("\
+INPUT-FILE OUTPUT-FILE\n-o OUTPUT-FILE INPUT-FILE\n-u INPUT-FILE");
+
+/* Prototype for option handler.  */
+static error_t parse_opt __P ((int key, char *arg, struct argp_state *state));
+
+/* Function to print some extra text in the help message.  */
+static char *more_help __P ((int key, const char *text, void *input));
+
+/* Data structure to communicate with argp functions.  */
+static struct argp argp =
+{
+  options, parse_opt, args_doc, doc, NULL, more_help
+};
+
+
+/* Prototypes for local functions.  */
+static int process_input __P ((FILE *input, const char *inname, DB *output,
+                              int to_lowercase, int be_quiet));
+static int print_database __P ((DB *db));
+int main __P ((int argc, char *argv[]));
+
+
+int
+main (argc, argv)
+     int argc;
+     char *argv[];
+{
+  const char *input_name;
+  FILE *input_file;
+  DB *db_file;
+  int status;
+  int remaining;
+
+  /* Set locale via LC_ALL.  */
+  setlocale (LC_ALL, "");
+
+  /* Set the text message domain.  */
+  textdomain (_libc_intl_domainname);
+
+  /* Initialize local variables.  */
+  input_name = NULL;
+
+  /* Parse and process arguments.  */
+  argp_parse (&argp, argc, argv, 0, &remaining, NULL);
+
+  /* Determine file names.  */
+  if (do_undo || output_name != NULL)
+    {
+      if (remaining + 1 != argc)
+       {
+       wrong_arguments:
+         error (0, 0, gettext ("wrong number of arguments"));
+         argp_help (&argp, stdout, ARGP_HELP_SEE,
+                    program_invocation_short_name);
+         exit (1);
+       }
+      input_name = argv[remaining];
+    }
+  else
+    {
+      if (remaining + 2 != argc)
+       goto wrong_arguments;
+
+      input_name = argv[remaining++];
+      output_name = argv[remaining];
+    }
+
+  /* Special handling if we are asked to print the database.  */
+  if (do_undo)
+    {
+      db_file = dbopen (input_name, O_RDONLY, 0666, DB_BTREE, NULL);
+      if (db_file == NULL)
+       error (EXIT_FAILURE, 0, gettext ("cannot open database file `%s': %s"),
+              input_name,
+              errno == EINVAL ? gettext ("incorrectly formatted file")
+                              : strerror (errno));
+
+      status = print_database (db_file);
+
+      db_file->close (db_file);
+
+      return status;
+    }
+
+  /* Open input file.  */
+  if (strcmp (input_name, "-") == 0 || strcmp (input_name, "/dev/stdin") == 0)
+    input_file = stdin;
+  else
+    {
+      input_file = fopen (input_name, "r");
+      if (input_file == NULL)
+       error (EXIT_FAILURE, errno, gettext ("cannot open input file `%s'"),
+              input_name);
+    }
+
+  /* Open output file.  This must not be standard output so we don't
+     handle "-" and "/dev/stdout" special.  */
+  db_file = dbopen (output_name, O_CREAT | O_RDWR | O_TRUNC, 0666,
+                   DB_BTREE, NULL);
+  if (db_file == NULL)
+    error (EXIT_FAILURE, errno, gettext ("cannot open output file `%s'"));
+
+  /* Start the real work.  */
+  status = process_input (input_file, input_name, db_file, to_lowercase,
+                         be_quiet);
+
+  /* Close files.  */
+  if (input_file != stdin)
+    fclose (input_file);
+  db_file->close (db_file);
+
+  return status;
+}
+
+
+/* Handle program arguments.  */
+static error_t
+parse_opt (int key, char *arg, struct argp_state *state)
+{
+  switch (key)
+    {
+    case 'f':
+      to_lowercase = 1;
+      break;
+    case 'o':
+      output_name = arg;
+      break;
+    case 'q':
+      be_quiet = 1;
+      break;
+    case 'u':
+      do_undo = 1;
+      break;
+    default:
+      return ARGP_ERR_UNKNOWN;
+    }
+  return 0;
+}
+
+
+static char *
+more_help (int key, const char *text, void *input)
+{
+  switch (key)
+    {
+    case ARGP_KEY_HELP_EXTRA:
+      /* We print some extra information.  */
+      return strdup (gettext ("\
+Report bugs using the `glibcbug' script to <bugs@gnu.ai.mit.edu>.\n"));
+    default:
+      break;
+    }
+  return (char *) text;
+}
+
+/* Print the version information.  */
+static void
+print_version (FILE *stream, struct argp_state *state)
+{
+  fprintf (stream, "makedb (GNU %s) %s\n", PACKAGE, VERSION);
+  fprintf (stream, gettext ("\
+Copyright (C) %s Free Software Foundation, Inc.\n\
+This is free software; see the source for copying conditions.  There is NO\n\
+warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n\
+"), "1996, 1997");
+  fprintf (stream, gettext ("Written by %s.\n"), "Ulrich Drepper");
+}
+
+
+static int
+process_input (input, inname, output, to_lowercase, be_quiet)
+     FILE *input;
+     const char *inname;
+     DB *output;
+     int to_lowercase;
+     int be_quiet;
+{
+  char *line;
+  size_t linelen;
+  int status;
+  size_t linenr;
+
+  line = NULL;
+  linelen = 0;
+  status = EXIT_SUCCESS;
+  linenr = 0;
+
+  while (!feof (input))
+    {
+      DBT key;
+      DBT val;
+      char *cp;
+      int n;
+
+      n = getline (&line, &linelen, input);
+      if (n < 0)
+       /* This means end of file or some bug.  */
+       break;
+      if (n == 0)
+       /* Short read.  Probably interrupted system call. */
+       continue;
+
+      ++linenr;
+
+      if (line[n - 1] == '\n')
+       /* Remove trailing newline.  */
+       line[--n] = '\0';
+
+      cp = line;
+      while (isspace (*cp))
+       ++cp;
+
+      if (*cp == '#')
+       /* First non-space character in line '#': it's a comment.  */
+       continue;
+
+      key.data = cp;
+      while (*cp != '\0' && !isspace (*cp))
+       {
+         if (to_lowercase)
+           *cp = tolower (*cp);
+         ++cp;
+       }
+
+      if (key.data == cp)
+       /* It's an empty line.  */
+       continue;
+
+      key.size = cp - (char *) key.data;
+
+      while (isspace (*cp))
+       ++cp;
+
+      val.data = cp;
+      val.size = &line[n] - cp;
+
+      /* Store the value.  */
+      status = output->put (output, &key, &val, R_NOOVERWRITE);
+      if (status != 0)
+       {
+         if (status == 1)
+           {
+             if (!be_quiet)
+               error_at_line (0, 0, inname, linenr,
+                              gettext ("duplicate key"));
+             /* This is no real error.  Just give a warning.  */
+             status = 0;
+           }
+         else
+           error (0, errno, gettext ("while writing data base file"));
+
+         status = status ? EXIT_FAILURE : EXIT_SUCCESS;
+
+         clearerr (input);
+         break;
+       }
+    }
+
+  if (ferror (input))
+    {
+      error (0, 0, gettext ("problems while reading `%s'"));
+      status = EXIT_FAILURE;
+    }
+
+  return status;
+}
+
+
+static int
+print_database (db)
+     DB *db;
+{
+  DBT key;
+  DBT val;
+  int no_more;
+
+  no_more = db->seq (db, &key, &val, R_FIRST);
+  while (!no_more)
+    {
+      printf ("%.*s %.*s\n", (int) key.size, (char *) key.data, (int) val.size,
+             (char *) val.data);
+
+      no_more = db->seq (db, &key, &val, R_NEXT);
+    }
+
+  if (no_more == -1)
+    {
+      error (0, errno, gettext ("while reading database"));
+      return EXIT_FAILURE;
+    }
+
+  return EXIT_SUCCESS;
+}
diff --git a/db2/mp/mp_bh.c b/db2/mp/mp_bh.c
new file mode 100644 (file)
index 0000000..e1b68ce
--- /dev/null
@@ -0,0 +1,437 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997
+ *     Sleepycat Software.  All rights reserved.
+ */
+#include "config.h"
+
+#ifndef lint
+static const char sccsid[] = "@(#)mp_bh.c      10.12 (Sleepycat) 8/20/97";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+
+#include <errno.h>
+#include <string.h>
+#include <unistd.h>
+#endif
+
+#include "db_int.h"
+#include "shqueue.h"
+#include "db_shash.h"
+#include "mp.h"
+#include "common_ext.h"
+
+/*
+ * __memp_bhwrite --
+ *     Write the page associated with a given bucket header.
+ *
+ * PUBLIC: int __memp_bhwrite
+ * PUBLIC:     __P((DB_MPOOL *, MPOOLFILE *, BH *, int *, int *));
+ */
+int
+__memp_bhwrite(dbmp, mfp, bhp, restartp, wrotep)
+       DB_MPOOL *dbmp;
+       MPOOLFILE *mfp;
+       BH *bhp;
+       int *restartp, *wrotep;
+{
+       DBT dbt;
+       DB_MPOOLFILE *dbmfp;
+       DB_MPREG *mpreg;
+
+       if (restartp != NULL)
+               *restartp = 0;
+       if (wrotep != NULL)
+               *wrotep = 0;
+
+       /*
+        * Walk the process' DB_MPOOLFILE list and try and find a file
+        * descriptor for this file.
+        */
+       LOCKHANDLE(dbmp, &dbmp->mutex);
+       for (dbmfp = TAILQ_FIRST(&dbmp->dbmfq);
+           dbmfp != NULL; dbmfp = TAILQ_NEXT(dbmfp, q))
+               if (dbmfp->mfp == mfp)
+                       break;
+       UNLOCKHANDLE(dbmp, &dbmp->mutex);
+       if (dbmfp != NULL)
+               goto found;
+
+       /*
+        * It's not a page from a file we've opened.  If the file requires
+        * input/output processing, see if this process has ever registered
+        * information as to how to write this type of file.  If not, there's
+        * nothing we can do.
+        */
+       if (mfp->ftype != 0) {
+               LOCKHANDLE(dbmp, &dbmp->mutex);
+               for (mpreg = LIST_FIRST(&dbmp->dbregq);
+                   mpreg != NULL; mpreg = LIST_NEXT(mpreg, q))
+                       if (mpreg->ftype == mfp->ftype)
+                               break;
+               UNLOCKHANDLE(dbmp, &dbmp->mutex);
+               if (mpreg == NULL)
+                       return (0);
+       }
+
+       /*
+        * Try and open the file; ignore any error, assume it's a permissions
+        * problem.
+        */
+       dbt.size = mfp->pgcookie_len;
+       dbt.data = ADDR(dbmp, mfp->pgcookie_off);
+       if (__memp_fopen(dbmp, ADDR(dbmp, mfp->path_off),
+           mfp->ftype, 0, 0, mfp->stat.st_pagesize,
+           mfp->lsn_off, &dbt, ADDR(dbmp, mfp->fileid_off), 0, &dbmfp) != 0)
+               return (0);
+
+found: return (__memp_pgwrite(dbmfp, bhp, restartp, wrotep));
+}
+
+/*
+ * __memp_pgread --
+ *     Read a page from a file.
+ *
+ * PUBLIC: int __memp_pgread __P((DB_MPOOLFILE *, BH *, int));
+ */
+int
+__memp_pgread(dbmfp, bhp, can_create)
+       DB_MPOOLFILE *dbmfp;
+       BH *bhp;
+       int can_create;
+{
+       DB_MPOOL *dbmp;
+       MPOOLFILE *mfp;
+       size_t pagesize;
+       ssize_t nr;
+       int ret;
+
+       dbmp = dbmfp->dbmp;
+       mfp = dbmfp->mfp;
+       pagesize = mfp->stat.st_pagesize;
+
+       F_SET(bhp, BH_LOCKED | BH_TRASH);
+       LOCKBUFFER(dbmp, bhp);
+       UNLOCKREGION(dbmp);
+
+       /*
+        * Temporary files may not yet have been created.
+        *
+        * Seek to the page location.
+        */
+       ret = 0;
+       LOCKHANDLE(dbmp, &dbmfp->mutex);
+       if (dbmfp->fd == -1 || (ret =
+           __db_lseek(dbmfp->fd, pagesize, bhp->pgno, 0, SEEK_SET)) != 0) {
+               if (!can_create) {
+                       if (dbmfp->fd == -1)
+                               ret = EINVAL;
+                       UNLOCKHANDLE(dbmp, &dbmfp->mutex);
+                       __db_err(dbmp->dbenv,
+                           "%s: page %lu doesn't exist, create flag not set",
+                           dbmfp->path, (u_long)bhp->pgno);
+                       goto err;
+               }
+               UNLOCKHANDLE(dbmp, &dbmfp->mutex);
+
+               /* Clear any uninitialized data. */
+               memset(bhp->buf, 0, pagesize);
+               goto pgin;
+       }
+
+       /*
+        * Read the page; short reads are treated like creates, although
+        * any valid data is preserved.
+        */
+       ret = __db_read(dbmfp->fd, bhp->buf, pagesize, &nr);
+       UNLOCKHANDLE(dbmp, &dbmfp->mutex);
+       if (ret != 0)
+               goto err;
+
+       if (nr == (ssize_t)pagesize)
+               can_create = 0;
+       else {
+               if (!can_create) {
+                       ret = EINVAL;
+                       goto err;
+               }
+
+               /* Clear any uninitialized data. */
+               memset(bhp->buf + nr, 0, pagesize - nr);
+       }
+
+       /* Call any pgin function. */
+pgin:  ret = mfp->ftype == 0 ? 0 : __memp_pg(dbmfp, bhp, 1);
+
+       /* Reacquire the region lock. */
+       LOCKREGION(dbmp);
+
+       /* If the pgin function succeeded, the data is now valid. */
+       if (ret == 0)
+               F_CLR(bhp, BH_TRASH);
+
+       /* Update the statistics. */
+       if (can_create) {
+               ++dbmp->mp->stat.st_page_create;
+               ++mfp->stat.st_page_create;
+       } else {
+               ++dbmp->mp->stat.st_page_in;
+               ++mfp->stat.st_page_in;
+       }
+
+       if (0) {
+err:           LOCKREGION(dbmp);
+       }
+
+       /* Release the buffer. */
+       F_CLR(bhp, BH_LOCKED);
+       UNLOCKBUFFER(dbmp, bhp);
+
+       return (ret);
+}
+
+/*
+ * __memp_pgwrite --
+ *     Write a page to a file.
+ *
+ * PUBLIC: int __memp_pgwrite __P((DB_MPOOLFILE *, BH *, int *, int *));
+ */
+int
+__memp_pgwrite(dbmfp, bhp, restartp, wrotep)
+       DB_MPOOLFILE *dbmfp;
+       BH *bhp;
+       int *restartp, *wrotep;
+{
+       DB_ENV *dbenv;
+       DB_LOG *lg_info;
+       DB_LSN lsn;
+       DB_MPOOL *dbmp;
+       MPOOL *mp;
+       MPOOLFILE *mfp;
+       size_t pagesize;
+       ssize_t nw;
+       int callpgin, ret;
+       const char *fail;
+
+       dbmp = dbmfp->dbmp;
+       dbenv = dbmp->dbenv;
+       mfp = dbmfp->mfp;
+
+       if (restartp != NULL)
+               *restartp = 0;
+       if (wrotep != NULL)
+               *wrotep = 0;
+       callpgin = 0;
+       pagesize = mfp->stat.st_pagesize;
+
+       F_SET(bhp, BH_LOCKED);
+       LOCKBUFFER(dbmp, bhp);
+       UNLOCKREGION(dbmp);
+
+       if (restartp != NULL)
+               *restartp = 1;
+
+       /* Copy the LSN off the page if we're going to need it. */
+       lg_info = dbenv->lg_info;
+       if (lg_info != NULL || F_ISSET(bhp, BH_WRITE))
+               memcpy(&lsn, bhp->buf + mfp->lsn_off, sizeof(DB_LSN));
+
+       /* Ensure the appropriate log records are on disk. */
+       if (lg_info != NULL && (ret = log_flush(lg_info, &lsn)) != 0)
+               goto err;
+
+       /*
+        * Call any pgout function.  We set the callpgin flag so that on
+        * error we flag that the contents of the buffer may be trash.
+        */
+       if (mfp->ftype == 0)
+               ret = 0;
+       else {
+               callpgin = 1;
+               if ((ret = __memp_pg(dbmfp, bhp, 0)) != 0)
+                       goto err;
+       }
+
+       /* Temporary files may not yet have been created. */
+       LOCKHANDLE(dbmp, &dbmfp->mutex);
+       if (dbmfp->fd == -1 && ((ret = __db_appname(dbenv, DB_APP_TMP,
+           NULL, NULL, &dbmfp->fd, NULL)) != 0 || dbmfp->fd == -1)) {
+               UNLOCKHANDLE(dbmp, &dbmfp->mutex);
+               __db_err(dbenv, "unable to create temporary backing file");
+               goto err;
+       }
+
+       /* Write the page out. */
+       if ((ret =
+           __db_lseek(dbmfp->fd, pagesize, bhp->pgno, 0, SEEK_SET)) != 0)
+               fail = "seek";
+       else if ((ret = __db_write(dbmfp->fd, bhp->buf, pagesize, &nw)) != 0)
+               fail = "write";
+       UNLOCKHANDLE(dbmp, &dbmfp->mutex);
+       if (ret != 0) {
+               /*
+                * XXX
+                * Shut the compiler up; it doesn't understand the correlation
+                * between the failing clauses to __db_lseek and __db_write and
+                * this ret != 0.
+                */
+               fail = NULL;
+               goto syserr;
+       }
+
+       if (nw != (ssize_t)pagesize) {
+               ret = EIO;
+               fail = "write";
+               goto syserr;
+       }
+
+       if (wrotep != NULL)
+               *wrotep = 1;
+
+       /* Reacquire the region lock. */
+       LOCKREGION(dbmp);
+
+       /* Clean up the flags based on a successful write. */
+       F_SET(bhp, BH_CALLPGIN);
+       F_CLR(bhp, BH_DIRTY | BH_LOCKED);
+       UNLOCKBUFFER(dbmp, bhp);
+
+       /*
+        * If we wrote a buffer which a checkpoint is waiting for, update
+        * the count of pending buffers (both in the mpool as a whole and
+        * for this file).  If the count for this file goes to zero, flush
+        * the writes.
+        *
+        * XXX:
+        * We ignore errors from the sync -- it makes no sense to return an
+        * error to the calling process, so set a flag causing the sync to
+        * be retried later.
+        *
+        * If the buffer we wrote has a LSN larger than the current largest
+        * we've written for this checkpoint, update the saved value.
+        */
+       mp = dbmp->mp;
+       if (F_ISSET(bhp, BH_WRITE)) {
+               if (log_compare(&lsn, &mp->lsn) > 0)
+                       mp->lsn = lsn;
+               F_CLR(bhp, BH_WRITE);
+
+               --mp->lsn_cnt;
+               if (--mfp->lsn_cnt == 0) {
+                       /*
+                        * Don't lock -- there are no atomicity issues for
+                        * fsync(2).
+                        */
+                       if (__db_fsync(dbmfp->fd) != 0)
+                               F_SET(mp, MP_LSN_RETRY);
+               }
+       }
+
+       /* Update I/O statistics. */
+       ++mp->stat.st_page_out;
+       ++mfp->stat.st_page_out;
+
+       return (0);
+
+syserr:        __db_err(dbenv,
+           "%s: %s failed for page %lu", dbmfp->path, fail, (u_long)bhp->pgno);
+
+err:   UNLOCKBUFFER(dbmp, bhp);
+       LOCKREGION(dbmp);
+       if (callpgin)
+               F_SET(bhp, BH_CALLPGIN);
+       F_CLR(bhp, BH_LOCKED);
+       return (ret);
+}
+
+/*
+ * __memp_pg --
+ *     Call the pgin/pgout routine.
+ *
+ * PUBLIC: int __memp_pg __P((DB_MPOOLFILE *, BH *, int));
+ */
+int
+__memp_pg(dbmfp, bhp, is_pgin)
+       DB_MPOOLFILE *dbmfp;
+       BH *bhp;
+       int is_pgin;
+{
+       DBT dbt, *dbtp;
+       DB_MPOOL *dbmp;
+       DB_MPREG *mpreg;
+       MPOOLFILE *mfp;
+       int ftype, ret;
+
+       dbmp = dbmfp->dbmp;
+       mfp = dbmfp->mfp;
+
+       LOCKHANDLE(dbmp, &dbmp->mutex);
+
+       ftype = mfp->ftype;
+       for (mpreg = LIST_FIRST(&dbmp->dbregq);
+           mpreg != NULL; mpreg = LIST_NEXT(mpreg, q)) {
+               if (ftype != mpreg->ftype)
+                       continue;
+               if (mfp->pgcookie_len == 0)
+                       dbtp = NULL;
+               else {
+                       dbt.size = mfp->pgcookie_len;
+                       dbt.data = ADDR(dbmp, mfp->pgcookie_off);
+                       dbtp = &dbt;
+               }
+               UNLOCKHANDLE(dbmp, &dbmp->mutex);
+
+               if (is_pgin) {
+                       if (mpreg->pgin != NULL && (ret =
+                           mpreg->pgin(bhp->pgno, bhp->buf, dbtp)) != 0)
+                               goto err;
+               } else
+                       if (mpreg->pgout != NULL && (ret =
+                           mpreg->pgout(bhp->pgno, bhp->buf, dbtp)) != 0)
+                               goto err;
+               break;
+       }
+
+       if (mpreg == NULL)
+               UNLOCKHANDLE(dbmp, &dbmp->mutex);
+
+       return (0);
+
+err:   UNLOCKHANDLE(dbmp, &dbmp->mutex);
+       __db_err(dbmp->dbenv, "%s: %s failed for page %lu",
+           dbmfp->path, is_pgin ? "pgin" : "pgout", (u_long)bhp->pgno);
+       return (ret);
+}
+
+/*
+ * __memp_bhfree --
+ *     Free a bucket header and its referenced data.
+ *
+ * PUBLIC: void __memp_bhfree __P((DB_MPOOL *, MPOOLFILE *, BH *, int));
+ */
+void
+__memp_bhfree(dbmp, mfp, bhp, free_mem)
+       DB_MPOOL *dbmp;
+       MPOOLFILE *mfp;
+       BH *bhp;
+       int free_mem;
+{
+       size_t off;
+
+       /* Delete the buffer header from the MPOOL hash list. */
+       off = BUCKET(dbmp->mp, OFFSET(dbmp, mfp), bhp->pgno);
+       SH_TAILQ_REMOVE(&dbmp->htab[off], bhp, mq, __bh);
+
+       /* Delete the buffer header from the LRU chain. */
+       SH_TAILQ_REMOVE(&dbmp->mp->bhq, bhp, q, __bh);
+
+       /*
+        * If we're not reusing it immediately, free the buffer header
+        * and data for real.
+        */
+       if (free_mem)
+               __db_shalloc_free(dbmp->addr, bhp);
+}
diff --git a/db2/mp/mp_fget.c b/db2/mp/mp_fget.c
new file mode 100644 (file)
index 0000000..418802a
--- /dev/null
@@ -0,0 +1,359 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997
+ *     Sleepycat Software.  All rights reserved.
+ */
+#include "config.h"
+
+#ifndef lint
+static const char sccsid[] = "@(#)mp_fget.c    10.22 (Sleepycat) 8/19/97";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+#endif
+
+#include "db_int.h"
+#include "shqueue.h"
+#include "db_shash.h"
+#include "mp.h"
+#include "common_ext.h"
+
+int __sleep_on_every_page_get;         /* XXX: thread debugging option. */
+
+/*
+ * memp_fget --
+ *     Get a page from the file.
+ */
+int
+memp_fget(dbmfp, pgnoaddr, flags, addrp)
+       DB_MPOOLFILE *dbmfp;
+       db_pgno_t *pgnoaddr;
+       u_long flags;
+       void *addrp;
+{
+       BH *bhp, *tbhp;
+       DB_MPOOL *dbmp;
+       MPOOL *mp;
+       MPOOLFILE *mfp;
+       db_pgno_t lastpgno;
+       size_t bucket, mf_offset;
+       off_t size;
+       u_long cnt;
+       int b_incr, b_inserted, readonly_alloc, ret;
+       void *addr;
+
+       dbmp = dbmfp->dbmp;
+
+       /*
+        * Validate arguments.
+        *
+        * !!!
+        * Don't test for DB_MPOOL_CREATE and DB_MPOOL_NEW flags for readonly
+        * files here, and create non-existent pages in readonly files if the
+        * flags are set, later.  The reason is that the hash access method
+        * wants to get empty pages that don't really exist in readonly files.
+        * The only alternative is for hash to write the last "bucket" all the
+        * time, which we don't want to do because one of our big goals in life
+        * is to keep database files small.  It's sleazy as hell, but we catch
+        * any attempt to actually write the file in memp_fput().
+        */
+#define        OKFLAGS (DB_MPOOL_CREATE | DB_MPOOL_LAST | DB_MPOOL_NEW)
+       if (flags != 0) {
+               if ((ret =
+                   __db_fchk(dbmp->dbenv, "memp_fget", flags, OKFLAGS)) != 0)
+                       return (ret);
+
+               switch (flags) {
+               case DB_MPOOL_CREATE:
+               case DB_MPOOL_LAST:
+               case DB_MPOOL_NEW:
+               case 0:
+                       break;
+               default:
+                       return (__db_ferr(dbmp->dbenv, "memp_fget", 1));
+               }
+       }
+
+#ifdef DEBUG
+       /*
+        * XXX
+        * We want to switch threads as often as possible.  Sleep every time
+        * we get a new page to make it more likely.
+        */
+       if (__sleep_on_every_page_get && (dbmp->dbenv == NULL ||
+           dbmp->dbenv->db_yield == NULL || dbmp->dbenv->db_yield() != 0))
+               __db_sleep(0, 1);
+#endif
+
+       mp = dbmp->mp;
+       mfp = dbmfp->mfp;
+       mf_offset = OFFSET(dbmp, mfp);
+       addr = NULL;
+       bhp = NULL;
+       b_incr = b_inserted = readonly_alloc = ret = 0;
+
+       LOCKREGION(dbmp);
+
+       /*
+        * If mmap'ing the file, just return a pointer.  However, if another
+        * process has opened the file for writing since we mmap'd it, start
+        * playing the game by their rules, i.e. everything goes through the
+        * cache.  All pages previously returned should be safe, as long as
+        * a locking protocol was observed.
+        *
+        * XXX
+        * We don't discard the map because we don't know when all of the
+        * pages will have been discarded from the process' address space.
+        * It would be possible to do so by reference counting the open
+        * pages from the mmap, but it's unclear to me that it's worth it.
+        */
+       if (dbmfp->addr != NULL && dbmfp->mfp->can_mmap) {
+               lastpgno = dbmfp->len == 0 ?
+                   0 : (dbmfp->len - 1) / mfp->stat.st_pagesize;
+               if (LF_ISSET(DB_MPOOL_LAST))
+                       *pgnoaddr = lastpgno;
+               else {
+                       /*
+                        * !!!
+                        * Allocate a page that can never really exist.  See
+                        * the comment above about non-existent pages and the
+                        * hash access method.
+                        */
+                       if (LF_ISSET(DB_MPOOL_CREATE | DB_MPOOL_NEW))
+                               readonly_alloc = 1;
+                       else if (*pgnoaddr > lastpgno) {
+                               __db_err(dbmp->dbenv,
+                                   "%s: page %lu doesn't exist",
+                                   dbmfp->path, (u_long)*pgnoaddr);
+                               ret = EINVAL;
+                               goto err;
+                       }
+               }
+               if (!readonly_alloc) {
+                       addr = ADDR(dbmfp, *pgnoaddr * mfp->stat.st_pagesize);
+
+                       ++mp->stat.st_map;
+                       ++mfp->stat.st_map;
+
+                       goto mapret;
+               }
+       }
+
+       /*
+        * If requesting the last page or a new page, find the last page.  The
+        * tricky thing is that the user may have created a page already that's
+        * after any page that exists in the file.
+        */
+       if (LF_ISSET(DB_MPOOL_LAST | DB_MPOOL_NEW)) {
+               /*
+                * Temporary files may not yet have been created.
+                *
+                * Don't lock -- there are no atomicity issues for stat(2).
+                */
+               if (dbmfp->fd == -1)
+                       size = 0;
+               else if ((ret = __db_stat(dbmp->dbenv,
+                   dbmfp->path, dbmfp->fd, &size, NULL)) != 0)
+                       goto err;
+
+               *pgnoaddr = size == 0 ? 0 : (size - 1) / mfp->stat.st_pagesize;
+
+               /*
+                * Walk the list of BH's, looking for later pages.  Save the
+                * pointer if a later page is found so that we don't have to
+                * search the list twice.
+                *
+                * If requesting a new page, return the page one after the last
+                * page -- which we'll have to create.
+                */
+               for (tbhp = SH_TAILQ_FIRST(&mp->bhq, __bh);
+                   tbhp != NULL; tbhp = SH_TAILQ_NEXT(tbhp, q, __bh))
+                       if (tbhp->pgno >= *pgnoaddr &&
+                           tbhp->mf_offset == mf_offset) {
+                               bhp = tbhp;
+                               *pgnoaddr = bhp->pgno;
+                       }
+               if (LF_ISSET(DB_MPOOL_NEW))
+                       ++*pgnoaddr;
+       }
+
+       /* If we already found the right buffer, return it. */
+       if (LF_ISSET(DB_MPOOL_LAST) && bhp != NULL) {
+               addr = bhp->buf;
+               goto found;
+       }
+
+       /* If we haven't checked the BH list yet, do the search. */
+       if (!LF_ISSET(DB_MPOOL_LAST | DB_MPOOL_NEW)) {
+               ++mp->stat.st_hash_searches;
+               bucket = BUCKET(mp, mf_offset, *pgnoaddr);
+               for (cnt = 0,
+                   bhp = SH_TAILQ_FIRST(&dbmp->htab[bucket], __bh);
+                   bhp != NULL; bhp = SH_TAILQ_NEXT(bhp, mq, __bh)) {
+                       ++cnt;
+                       if (bhp->pgno == *pgnoaddr &&
+                           bhp->mf_offset == mf_offset) {
+                               addr = bhp->buf;
+                               if (cnt > mp->stat.st_hash_longest)
+                                       mp->stat.st_hash_longest = cnt;
+                               mp->stat.st_hash_examined += cnt;
+                               goto found;
+                       }
+               }
+               if (cnt > mp->stat.st_hash_longest)
+                       mp->stat.st_hash_longest = cnt;
+               mp->stat.st_hash_examined += cnt;
+       }
+
+       /*
+        * Allocate a new buffer header and data space, and mark the contents
+        * as useless.
+        */
+       if ((ret = __memp_ralloc(dbmp, sizeof(BH) -
+           sizeof(u_int8_t) + mfp->stat.st_pagesize, NULL, &bhp)) != 0)
+               goto err;
+       addr = bhp->buf;
+#ifdef DEBUG
+       if ((ALIGNTYPE)addr & (sizeof(size_t) - 1)) {
+               __db_err(dbmp->dbenv,
+                   "Internal error: BH data NOT size_t aligned.");
+               abort();
+       }
+#endif
+       memset(bhp, 0, sizeof(BH));
+       LOCKINIT(dbmp, &bhp->mutex);
+
+       /*
+        * Prepend the bucket header to the head of the appropriate MPOOL
+        * bucket hash list.  Append the bucket header to the tail of the
+        * MPOOL LRU chain.
+        *
+        * We have to do this before we read in the page so we can discard
+        * our region lock without screwing up the world.
+        */
+       bucket = BUCKET(mp, mf_offset, *pgnoaddr);
+       SH_TAILQ_INSERT_HEAD(&dbmp->htab[bucket], bhp, mq, __bh);
+       SH_TAILQ_INSERT_TAIL(&mp->bhq, bhp, q);
+       b_inserted = 1;
+
+       /* Set the page number, and associated MPOOLFILE. */
+       bhp->mf_offset = mf_offset;
+       bhp->pgno = *pgnoaddr;
+
+       /*
+        * If we know we created the page, zero it out and continue.
+        *
+        * !!!
+        * Note: DB_MPOOL_NEW deliberately doesn't call the pgin function.
+        * If DB_MPOOL_CREATE is used, then the application's pgin function
+        * has to be able to handle pages of 0's -- if it uses DB_MPOOL_NEW,
+        * it can detect all of its page creates, and not bother.
+        *
+        * Otherwise, read the page into memory, optionally creating it if
+        * DB_MPOOL_CREATE is set.
+        *
+        * Increment the reference count for created buffers, but importantly,
+        * increment the reference count for buffers we're about to read so
+        * that the buffer can't move.
+        */
+       ++bhp->ref;
+       b_incr = 1;
+
+       if (LF_ISSET(DB_MPOOL_NEW))
+               memset(addr, 0, mfp->stat.st_pagesize);
+       else {
+               /*
+                * It's possible for the read function to fail, which means
+                * that we fail as well.
+                */
+reread:                if ((ret = __memp_pgread(dbmfp,
+                   bhp, LF_ISSET(DB_MPOOL_CREATE | DB_MPOOL_NEW))) != 0)
+                       goto err;
+
+               /*
+                * !!!
+                * The __memp_pgread call discarded and reacquired the region
+                * lock.  Because the buffer reference count was incremented
+                * before the region lock was discarded the buffer didn't move.
+                */
+               ++mp->stat.st_cache_miss;
+               ++mfp->stat.st_cache_miss;
+       }
+
+       if (0) {
+found:         /* Increment the reference count. */
+               if (bhp->ref == UINT16_T_MAX) {
+                       __db_err(dbmp->dbenv,
+                           "%s: too many references to page %lu",
+                           dbmfp->path, bhp->pgno);
+                       ret = EAGAIN;
+                       goto err;
+               }
+               ++bhp->ref;
+               b_incr = 1;
+
+               /*
+                * Any found buffer might be trouble.
+                *
+                * BH_LOCKED --
+                * I/O in progress, wait for it to finish.  Because the buffer
+                * reference count was incremented before the region lock was
+                * discarded we know the buffer didn't move.
+                */
+               if (F_ISSET(bhp, BH_LOCKED)) {
+                       UNLOCKREGION(dbmp);
+                       LOCKBUFFER(dbmp, bhp);
+                       /* Waiting for I/O to finish... */
+                       UNLOCKBUFFER(dbmp, bhp);
+                       LOCKREGION(dbmp);
+               }
+
+               /*
+                * BH_TRASH --
+                * The buffer is garbage.
+                */
+               if (F_ISSET(bhp, BH_TRASH))
+                       goto reread;
+
+               /*
+                * BH_CALLPGIN --
+                * The buffer was written, and the contents need to be
+                * converted again.
+                */
+               if (F_ISSET(bhp, BH_CALLPGIN)) {
+                       if ((ret = __memp_pg(dbmfp, bhp, 1)) != 0)
+                               goto err;
+                       F_CLR(bhp, BH_CALLPGIN);
+               }
+
+               ++mp->stat.st_cache_hit;
+               ++mfp->stat.st_cache_hit;
+       }
+
+mapret:        LOCKHANDLE(dbmp, &dbmfp->mutex);
+       ++dbmfp->pinref;
+       UNLOCKHANDLE(dbmp, &dbmfp->mutex);
+
+       if (0) {
+err:           /*
+                * If no other process is already waiting on a created buffer,
+                * go ahead and discard it, it's not useful.
+                */
+               if (b_incr)
+                       --bhp->ref;
+               if (b_inserted && bhp->ref == 0)
+                       __memp_bhfree(dbmp, mfp, bhp, 1);
+       }
+
+       UNLOCKREGION(dbmp);
+
+       *(void **)addrp = addr;
+       return (ret);
+}
diff --git a/db2/mp/mp_fopen.c b/db2/mp/mp_fopen.c
new file mode 100644 (file)
index 0000000..7703847
--- /dev/null
@@ -0,0 +1,437 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997
+ *     Sleepycat Software.  All rights reserved.
+ */
+#include "config.h"
+
+#ifndef lint
+static const char sccsid[] = "@(#)mp_fopen.c   10.24 (Sleepycat) 8/20/97";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#endif
+
+#include "db_int.h"
+#include "shqueue.h"
+#include "db_shash.h"
+#include "mp.h"
+#include "common_ext.h"
+
+static int __memp_mf_close __P((DB_MPOOL *, DB_MPOOLFILE *));
+static int __memp_mf_open __P((DB_MPOOL *, DB_MPOOLFILE *,
+    int, int, size_t, int, DBT *, u_int8_t *, int, MPOOLFILE **));
+
+/*
+ * memp_fopen --
+ *     Open a backing file for the memory pool.
+ */
+int
+memp_fopen(dbmp, path, ftype,
+    flags, mode, pagesize, lsn_offset, pgcookie, fileid, retp)
+       DB_MPOOL *dbmp;
+       const char *path;
+       int ftype, flags, mode, lsn_offset;
+       size_t pagesize;
+       DBT *pgcookie;
+       u_int8_t *fileid;
+       DB_MPOOLFILE **retp;
+{
+       int ret;
+
+       /* Validate arguments. */
+       if ((ret = __db_fchk(dbmp->dbenv,
+           "memp_fopen", flags, DB_CREATE | DB_NOMMAP | DB_RDONLY)) != 0)
+               return (ret);
+
+       return (__memp_fopen(dbmp, path, ftype,
+           flags, mode, pagesize, lsn_offset, pgcookie, fileid, 1, retp));
+}
+
+/*
+ * __memp_fopen --
+ *     Open a backing file for the memory pool; internal version.
+ *
+ * PUBLIC: int __memp_fopen __P((DB_MPOOL *, const char *, int, int,
+ * PUBLIC:    int, size_t, int, DBT *, u_int8_t *, int, DB_MPOOLFILE **));
+ */
+int
+__memp_fopen(dbmp, path,
+    ftype, flags, mode, pagesize, lsn_offset, pgcookie, fileid, needlock, retp)
+       DB_MPOOL *dbmp;
+       const char *path;
+       int ftype, flags, mode, lsn_offset, needlock;
+       size_t pagesize;
+       DBT *pgcookie;
+       u_int8_t *fileid;
+       DB_MPOOLFILE **retp;
+{
+       DB_ENV *dbenv;
+       DB_MPOOLFILE *dbmfp;
+       MPOOLFILE *mfp;
+       off_t size;
+       int ret;
+
+       dbenv = dbmp->dbenv;
+       ret = 0;
+
+       /* Require a non-zero pagesize. */
+       if (pagesize == 0) {
+               __db_err(dbenv, "memp_fopen: pagesize not specified");
+               return (EINVAL);
+       }
+
+       /* Allocate and initialize the per-process structure. */
+       if ((dbmfp =
+           (DB_MPOOLFILE *)calloc(1, sizeof(DB_MPOOLFILE))) == NULL) {
+               __db_err(dbenv, "%s: %s",
+                   path == NULL ? TEMPORARY : path, strerror(ENOMEM));
+               return (ENOMEM);
+       }
+       LOCKINIT(dbmp, &dbmfp->mutex);
+       dbmfp->dbmp = dbmp;
+       dbmfp->fd = -1;
+       if (LF_ISSET(DB_RDONLY))
+               F_SET(dbmfp, MP_READONLY);
+
+       if (path == NULL) {
+               if (LF_ISSET(DB_RDONLY)) {
+                       __db_err(dbenv,
+                           "memp_fopen: temporary files can't be readonly");
+                       ret = EINVAL;
+                       goto err;
+               }
+               dbmfp->path = (char *) TEMPORARY;
+               F_SET(dbmfp, MP_PATH_TEMP);
+       } else {
+               /* Calculate the real name for this file. */
+               if ((ret = __db_appname(dbenv,
+                   DB_APP_DATA, NULL, path, NULL, &dbmfp->path)) != 0)
+                       goto err;
+               F_SET(dbmfp, MP_PATH_ALLOC);
+
+
+               /* Open the file. */
+               if ((ret = __db_fdopen(dbmfp->path,
+                   LF_ISSET(DB_CREATE | DB_RDONLY), DB_CREATE | DB_RDONLY,
+                   mode, &dbmfp->fd)) != 0) {
+                       __db_err(dbenv, "%s: %s", dbmfp->path, strerror(ret));
+                       goto err;
+               }
+
+               /* Don't permit files that aren't a multiple of the pagesize. */
+               if ((ret = __db_stat(dbenv,
+                    dbmfp->path, dbmfp->fd, &size, NULL)) != 0)
+                       goto err;
+               if (size % pagesize) {
+                       __db_err(dbenv,
+                           "%s: file size not a multiple of the pagesize",
+                           dbmfp->path);
+                       ret = EINVAL;
+                       goto err;
+               }
+       }
+
+       /* Find/allocate the shared file object. */
+       if (needlock)
+               LOCKREGION(dbmp);
+       ret = __memp_mf_open(dbmp, dbmfp, ftype,
+           F_ISSET(dbmfp, MP_READONLY), pagesize,
+           lsn_offset, pgcookie, fileid, F_ISSET(dbmfp, MP_PATH_TEMP), &mfp);
+       if (needlock)
+               UNLOCKREGION(dbmp);
+       if (ret != 0)
+               goto err;
+
+       dbmfp->mfp = mfp;
+
+       /*
+        * If a file:
+        *
+        *      + is read-only
+        *      + doesn't require any pgin/pgout support
+        *      + is less than mp_mmapsize bytes in size.
+        *      + and the DB_NOMMAP flag wasn't set
+        *
+        * we can mmap it instead of reading/writing buffers.  Don't do error
+        * checking based on the mmap call failure.  We want to do normal I/O
+        * on the file if the reason we failed was because the file was on an
+        * NFS mounted partition, and we can fail in buffer I/O just as easily
+        * as here.
+        *
+        * XXX
+        * We'd like to test to see if the file is too big to mmap.  Since we
+        * don't know what size or type off_t's or size_t's are, or the largest
+        * unsigned integral type is, or what random insanity the local C
+        * compiler will perpetrate, doing the comparison in a portable way is
+        * flatly impossible.  Hope that mmap fails if the file is too large.
+        */
+#define        DB_MAXMMAPSIZE  (10 * 1024 * 1024)      /* 10 Mb. */
+       dbmfp->addr = NULL;
+       mfp->can_mmap = F_ISSET(dbmfp, MP_READONLY) &&
+           ftype == 0 && !LF_ISSET(DB_NOMMAP) && path != NULL &&
+           size <= (dbenv == NULL || dbenv->mp_mmapsize == 0 ?
+           DB_MAXMMAPSIZE : (off_t)dbenv->mp_mmapsize);
+       if (mfp->can_mmap) {
+               dbmfp->len = size;
+               if (__db_mmap(dbmfp->fd, dbmfp->len, 1, 1, &dbmfp->addr) != 0) {
+                       mfp->can_mmap = 0;
+                       dbmfp->addr = NULL;
+               }
+       }
+
+       LOCKHANDLE(dbmp, &dbmp->mutex);
+       TAILQ_INSERT_TAIL(&dbmp->dbmfq, dbmfp, q);
+       UNLOCKHANDLE(dbmp, &dbmp->mutex);
+
+       *retp = dbmfp;
+       return (0);
+
+err:   if (F_ISSET(dbmfp, MP_PATH_ALLOC))
+               FREES(dbmfp->path);
+       if (dbmfp->fd != -1)
+               (void)__db_close(dbmfp->fd);
+       if (dbmfp != NULL)
+               FREE(dbmfp, sizeof(DB_MPOOLFILE));
+       return (ret);
+}
+
+/*
+ * __memp_mf_open --
+ *     Open an MPOOLFILE.
+ */
+static int
+__memp_mf_open(dbmp, dbmfp,
+    ftype, readonly, pagesize, lsn_offset, pgcookie, fileid, istemp, retp)
+       DB_MPOOL *dbmp;
+       DB_MPOOLFILE *dbmfp;
+       int ftype, readonly, lsn_offset, istemp;
+       size_t pagesize;
+       DBT *pgcookie;
+       u_int8_t *fileid;
+       MPOOLFILE **retp;
+{
+       MPOOLFILE *mfp;
+       int ret;
+       u_int8_t idbuf[DB_FILE_ID_LEN];
+       void *p;
+
+       /* Temporary files can't match previous files. */
+       if (istemp)
+               goto alloc;
+
+       /*
+        * Get the file id if we weren't give one.  Generated file id's don't
+        * use timestamps, otherwise there'd be no chance of anyone joining
+        * the party.
+        */
+       if (fileid == NULL) {
+               if ((ret =
+                   __db_fileid(dbmp->dbenv, dbmfp->path, 0, idbuf)) != 0)
+                       return (ret);
+               fileid = idbuf;
+       }
+
+       /* Walk the list of MPOOLFILE's, looking for a matching file. */
+       for (mfp = SH_TAILQ_FIRST(&dbmp->mp->mpfq, __mpoolfile);
+           mfp != NULL; mfp = SH_TAILQ_NEXT(mfp, q, __mpoolfile))
+               if (!memcmp(fileid,
+                   ADDR(dbmp, mfp->fileid_off), DB_FILE_ID_LEN)) {
+                       if (ftype != mfp->ftype ||
+                           pagesize != mfp->stat.st_pagesize) {
+                               __db_err(dbmp->dbenv,
+                                   "%s: ftype or pagesize changed",
+                                   dbmfp->path);
+                               ret = EINVAL;
+                               mfp = NULL;
+                               goto ret1;
+                       }
+                       /*
+                        * Found it: increment the reference count and update
+                        * the mmap-able status.
+                        */
+                       ++mfp->ref;
+                       if (!readonly)
+                               mfp->can_mmap = 0;
+                       goto ret1;
+               }
+
+       /* Allocate a new MPOOLFILE. */
+alloc: if ((ret = __memp_ralloc(dbmp, sizeof(MPOOLFILE), NULL, &mfp)) != 0)
+               goto ret1;
+
+       /* Initialize the structure. */
+       memset(mfp, 0, sizeof(MPOOLFILE));
+       mfp->ref = 1;
+       mfp->ftype = ftype;
+       mfp->lsn_off = lsn_offset;
+       mfp->stat.st_pagesize = pagesize;
+
+       /* Copy the file path into shared memory. */
+       if ((ret = __memp_ralloc(dbmp,
+           strlen(dbmfp->path) + 1, &mfp->path_off, &p)) != 0)
+               goto err;
+       memcpy(p, dbmfp->path, strlen(dbmfp->path) + 1);
+
+       /* Copy the file identification string into shared memory. */
+       if (istemp)
+               mfp->fileid_off = 0;
+       else {
+               if ((ret = __memp_ralloc(dbmp,
+                   DB_FILE_ID_LEN, &mfp->fileid_off, &p)) != 0)
+                       goto err;
+               memcpy(p, fileid, DB_FILE_ID_LEN);
+       }
+
+       /* Copy the page cookie into shared memory. */
+       if (pgcookie == NULL || pgcookie->size == 0) {
+               mfp->pgcookie_len = 0;
+               mfp->pgcookie_off = 0;
+       } else {
+               if ((ret = __memp_ralloc(dbmp,
+                   pgcookie->size, &mfp->pgcookie_off, &p)) != 0)
+                       goto err;
+               memcpy(p, pgcookie->data, pgcookie->size);
+               mfp->pgcookie_len = pgcookie->size;
+       }
+
+       /* Prepend the MPOOLFILE to the list of MPOOLFILE's. */
+       SH_TAILQ_INSERT_HEAD(&dbmp->mp->mpfq, mfp, q, __mpoolfile);
+
+       if (0) {
+err:           if (mfp->path_off != 0)
+                       __db_shalloc_free(dbmp->addr,
+                           ADDR(dbmp, mfp->path_off));
+               if (!istemp)
+                       __db_shalloc_free(dbmp->addr,
+                           ADDR(dbmp, mfp->fileid_off));
+               if (mfp != NULL)
+                       __db_shalloc_free(dbmp->addr, mfp);
+               mfp = NULL;
+       }
+
+ret1:  *retp = mfp;
+       return (0);
+}
+
+/*
+ * memp_fclose --
+ *     Close a backing file for the memory pool.
+ */
+int
+memp_fclose(dbmfp)
+       DB_MPOOLFILE *dbmfp;
+{
+       DB_MPOOL *dbmp;
+       int ret, t_ret;
+
+       dbmp = dbmfp->dbmp;
+       ret = 0;
+
+       /* Complain if pinned blocks never returned. */
+       if (dbmfp->pinref != 0)
+               __db_err(dbmp->dbenv, "%s: close: %lu blocks left pinned",
+                   dbmfp->path, (u_long)dbmfp->pinref);
+
+       /* Remove the DB_MPOOLFILE structure from the list. */
+       LOCKHANDLE(dbmp, &dbmp->mutex);
+       TAILQ_REMOVE(&dbmp->dbmfq, dbmfp, q);
+       UNLOCKHANDLE(dbmp, &dbmp->mutex);
+
+       /* Close the underlying MPOOLFILE. */
+       (void)__memp_mf_close(dbmp, dbmfp);
+
+       /* Discard any mmap information. */
+       if (dbmfp->addr != NULL &&
+           (ret = __db_munmap(dbmfp->addr, dbmfp->len)) != 0)
+               __db_err(dbmp->dbenv, "%s: %s", dbmfp->path, strerror(ret));
+
+       /* Close the file; temporary files may not yet have been created. */
+       if (dbmfp->fd != -1 && (t_ret = __db_close(dbmfp->fd)) != 0) {
+               __db_err(dbmp->dbenv, "%s: %s", dbmfp->path, strerror(t_ret));
+               if (ret != 0)
+                       t_ret = ret;
+       }
+
+       /* Potentially allocated path. */
+       if (F_ISSET(dbmfp, MP_PATH_ALLOC))
+               FREES(dbmfp->path);
+
+       /* Free the DB_MPOOLFILE structure. */
+       FREE(dbmfp, sizeof(DB_MPOOLFILE));
+
+       return (ret);
+}
+
+/*
+ * __memp_mf_close --
+ *     Close down an MPOOLFILE.
+ */
+static int
+__memp_mf_close(dbmp, dbmfp)
+       DB_MPOOL *dbmp;
+       DB_MPOOLFILE *dbmfp;
+{
+       BH *bhp, *nbhp;
+       MPOOL *mp;
+       MPOOLFILE *mfp;
+       size_t mf_offset;
+
+       mp = dbmp->mp;
+       mfp = dbmfp->mfp;
+
+       LOCKREGION(dbmp);
+
+       /* If more than a single reference, simply decrement. */
+       if (mfp->ref > 1) {
+               --mfp->ref;
+               goto ret1;
+       }
+
+       /*
+        * Move any BH's held by the file to the free list.  We don't free the
+        * memory itself because we may be discarding the memory pool, and it's
+        * fairly expensive to reintegrate the buffers back into the region for
+        * no purpose.
+        */
+       mf_offset = OFFSET(dbmp, mfp);
+       for (bhp = SH_TAILQ_FIRST(&mp->bhq, __bh); bhp != NULL; bhp = nbhp) {
+               nbhp = SH_TAILQ_NEXT(bhp, q, __bh);
+
+#ifdef DEBUG_NO_DIRTY
+               /* Complain if we find any blocks that were left dirty. */
+               if (F_ISSET(bhp, BH_DIRTY))
+                       __db_err(dbmp->dbenv,
+                           "%s: close: pgno %lu left dirty; ref %lu",
+                           dbmfp->path, (u_long)bhp->pgno, (u_long)bhp->ref);
+#endif
+
+               if (bhp->mf_offset == mf_offset) {
+                       __memp_bhfree(dbmp, mfp, bhp, 0);
+                       SH_TAILQ_INSERT_HEAD(&mp->bhfq, bhp, q, __bh);
+               }
+       }
+
+       /* Delete from the list of MPOOLFILEs. */
+       SH_TAILQ_REMOVE(&mp->mpfq, mfp, q, __mpoolfile);
+
+       /* Free the space. */
+       __db_shalloc_free(dbmp->addr, mfp);
+       __db_shalloc_free(dbmp->addr, ADDR(dbmp, mfp->path_off));
+       if (mfp->fileid_off != 0)
+               __db_shalloc_free(dbmp->addr, ADDR(dbmp, mfp->fileid_off));
+       if (mfp->pgcookie_off != 0)
+               __db_shalloc_free(dbmp->addr, ADDR(dbmp, mfp->pgcookie_off));
+
+ret1:  UNLOCKREGION(dbmp);
+       return (0);
+}
diff --git a/db2/mp/mp_fput.c b/db2/mp/mp_fput.c
new file mode 100644 (file)
index 0000000..5fac8ae
--- /dev/null
@@ -0,0 +1,140 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997
+ *     Sleepycat Software.  All rights reserved.
+ */
+#include "config.h"
+
+#ifndef lint
+static const char sccsid[] = "@(#)mp_fput.c    10.10 (Sleepycat) 7/20/97";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+
+#include <errno.h>
+#include <stdlib.h>
+#endif
+
+#include "db_int.h"
+#include "shqueue.h"
+#include "db_shash.h"
+#include "mp.h"
+#include "common_ext.h"
+
+/*
+ * memp_fput --
+ *     Mpool file put function.
+ */
+int
+memp_fput(dbmfp, pgaddr, flags)
+       DB_MPOOLFILE *dbmfp;
+       void *pgaddr;
+       u_long flags;
+{
+       BH *bhp;
+       DB_MPOOL *dbmp;
+       MPOOLFILE *mfp;
+       int wrote, ret;
+
+       dbmp = dbmfp->dbmp;
+
+       /* Validate arguments. */
+       if (flags) {
+               if ((ret = __db_fchk(dbmp->dbenv, "memp_fput", flags,
+                   DB_MPOOL_CLEAN | DB_MPOOL_DIRTY | DB_MPOOL_DISCARD)) != 0)
+                       return (ret);
+               if ((ret = __db_fcchk(dbmp->dbenv, "memp_fput",
+                   flags, DB_MPOOL_CLEAN, DB_MPOOL_DIRTY)) != 0)
+                       return (ret);
+
+               if (LF_ISSET(DB_MPOOL_DIRTY) && F_ISSET(dbmfp, MP_READONLY)) {
+                       __db_err(dbmp->dbenv,
+                           "%s: dirty flag set for readonly file page",
+                           dbmfp->path);
+                       return (EACCES);
+               }
+       }
+
+       /* Decrement the pinned reference count. */
+       LOCKHANDLE(dbmp, &dbmfp->mutex);
+       if (dbmfp->pinref == 0)
+               __db_err(dbmp->dbenv,
+                   "%s: put: more blocks returned than retrieved",
+                   dbmfp->path);
+       else
+               --dbmfp->pinref;
+       UNLOCKHANDLE(dbmp, &dbmfp->mutex);
+
+       /*
+        * If we're mapping the file, there's nothing to do.  Because we can
+        * quit mapping at any time, we have to check on each buffer to see
+        * if it's in the map region.
+        */
+       if (dbmfp->addr != NULL && pgaddr >= dbmfp->addr &&
+           (u_int8_t *)pgaddr <= (u_int8_t *)dbmfp->addr + dbmfp->len)
+               return (0);
+
+       /* Convert the page address to a buffer header. */
+       bhp = (BH *)((u_int8_t *)pgaddr - SSZA(BH, buf));
+
+       LOCKREGION(dbmp);
+
+       /* Set/clear the page bits. */
+       if (LF_ISSET(DB_MPOOL_CLEAN))
+               F_CLR(bhp, BH_DIRTY);
+       if (LF_ISSET(DB_MPOOL_DIRTY))
+               F_SET(bhp, BH_DIRTY);
+       if (LF_ISSET(DB_MPOOL_DISCARD))
+               F_SET(bhp, BH_DISCARD);
+
+       /*
+        * If more than one reference to the page, we're done.  Ignore discard
+        * flags (for now) and leave it at its position in the LRU chain.  The
+        * rest gets done at last reference close.
+        */
+#ifdef DEBUG
+       if (bhp->ref == 0) {
+               __db_err(dbmp->dbenv,
+                   "Internal error: bhp->ref on page %lu went negative.",
+                   (u_long)bhp->pgno);
+               abort();
+       }
+#endif
+       if (--bhp->ref > 0) {
+               UNLOCKREGION(dbmp);
+               return (0);
+       }
+
+       /* Move the buffer to the head/tail of the LRU chain. */
+       SH_TAILQ_REMOVE(&dbmp->mp->bhq, bhp, q, __bh);
+       if (F_ISSET(bhp, BH_DISCARD))
+               SH_TAILQ_INSERT_HEAD(&dbmp->mp->bhq, bhp, q, __bh);
+       else
+               SH_TAILQ_INSERT_TAIL(&dbmp->mp->bhq, bhp, q);
+
+       /*
+        * If this buffer is scheduled for writing because of a checkpoint,
+        * write it now.  If we can't write it, set a flag so that the next
+        * time the memp_sync function is called we try writing it there,
+        * as the checkpoint application better be able to write all of the
+        * files.
+        */
+       if (F_ISSET(bhp, BH_WRITE))
+               if (F_ISSET(bhp, BH_DIRTY)) {
+                       if (__memp_bhwrite(dbmp,
+                           dbmfp->mfp, bhp, NULL, &wrote) != 0 || !wrote)
+                               F_SET(dbmp->mp, MP_LSN_RETRY);
+               } else {
+                       F_CLR(bhp, BH_WRITE);
+
+                       mfp = ADDR(dbmp, bhp->mf_offset);
+                       --mfp->lsn_cnt;
+
+                       --dbmp->mp->lsn_cnt;
+               }
+
+       UNLOCKREGION(dbmp);
+       return (0);
+}
diff --git a/db2/mp/mp_fset.c b/db2/mp/mp_fset.c
new file mode 100644 (file)
index 0000000..588085a
--- /dev/null
@@ -0,0 +1,72 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997
+ *     Sleepycat Software.  All rights reserved.
+ */
+#include "config.h"
+
+#ifndef lint
+static const char sccsid[] = "@(#)mp_fset.c    10.8 (Sleepycat) 8/19/97";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+
+#include <errno.h>
+#endif
+
+#include "db_int.h"
+#include "shqueue.h"
+#include "db_shash.h"
+#include "mp.h"
+#include "common_ext.h"
+
+/*
+ * memp_fset --
+ *     Mpool page set-flag routine.
+ */
+int
+memp_fset(dbmfp, pgaddr, flags)
+       DB_MPOOLFILE *dbmfp;
+       void *pgaddr;
+       u_long flags;
+{
+       BH *bhp;
+       DB_MPOOL *dbmp;
+       int ret;
+
+       dbmp = dbmfp->dbmp;
+
+       /* Validate arguments. */
+       if (flags != 0) {
+               if ((ret = __db_fchk(dbmp->dbenv, "memp_fset", flags,
+                   DB_MPOOL_DIRTY | DB_MPOOL_CLEAN | DB_MPOOL_DISCARD)) != 0)
+                       return (ret);
+               if ((ret = __db_fcchk(dbmp->dbenv, "memp_fset",
+                   flags, DB_MPOOL_CLEAN, DB_MPOOL_DIRTY)) != 0)
+                       return (ret);
+
+               if (LF_ISSET(DB_MPOOL_DIRTY) && F_ISSET(dbmfp, MP_READONLY)) {
+                       __db_err(dbmp->dbenv,
+                           "%s: dirty flag set for readonly file page",
+                           dbmfp->path);
+                       return (EACCES);
+               }
+       }
+
+       /* Convert the page address to a buffer header. */
+       bhp = (BH *)((u_int8_t *)pgaddr - SSZA(BH, buf));
+
+       LOCKREGION(dbmp);
+
+       if (LF_ISSET(DB_MPOOL_DIRTY))
+               F_SET(bhp, BH_DIRTY);
+       if (LF_ISSET(DB_MPOOL_CLEAN))
+               F_CLR(bhp, BH_DIRTY);
+       if (LF_ISSET(DB_MPOOL_DISCARD))
+               F_SET(bhp, BH_DISCARD);
+
+       UNLOCKREGION(dbmp);
+       return (0);
+}
diff --git a/db2/mp/mp_open.c b/db2/mp/mp_open.c
new file mode 100644 (file)
index 0000000..257ce1b
--- /dev/null
@@ -0,0 +1,176 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997
+ *     Sleepycat Software.  All rights reserved.
+ */
+#include "config.h"
+
+#ifndef lint
+static const char sccsid[] = "@(#)mp_open.c    10.12 (Sleepycat) 7/6/97";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#endif
+
+#include "db_int.h"
+#include "shqueue.h"
+#include "db_shash.h"
+#include "mp.h"
+#include "common_ext.h"
+
+/*
+ * memp_open --
+ *     Initialize and/or join a memory pool.
+ */
+int
+memp_open(path, flags, mode, dbenv, retp)
+       const char *path;
+       int flags, mode;
+       DB_ENV *dbenv;
+       DB_MPOOL **retp;
+{
+       DB_MPOOL *dbmp;
+       size_t cachesize;
+       int ret;
+
+       /* Validate arguments. */
+#ifdef HAVE_SPINLOCKS
+#define        OKFLAGS (DB_CREATE | DB_MPOOL_PRIVATE | DB_NOMMAP | DB_THREAD)
+#else
+#define        OKFLAGS (DB_CREATE | DB_MPOOL_PRIVATE | DB_NOMMAP)
+#endif
+       if ((ret = __db_fchk(dbenv, "memp_open", flags, OKFLAGS)) != 0)
+               return (ret);
+
+       /* Extract fields from DB_ENV structure. */
+       cachesize = dbenv == NULL ? 0 : dbenv->mp_size;
+
+       /* Create and initialize the DB_MPOOL structure. */
+       if ((dbmp = (DB_MPOOL *)calloc(1, sizeof(DB_MPOOL))) == NULL)
+               return (ENOMEM);
+       LOCKINIT(dbmp, &dbmp->mutex);
+       LIST_INIT(&dbmp->dbregq);
+       TAILQ_INIT(&dbmp->dbmfq);
+
+       dbmp->dbenv = dbenv;
+
+       /* Decide if it's possible for anyone else to access the pool. */
+       if ((dbenv == NULL && path == NULL) ||
+           (dbenv != NULL && F_ISSET(dbenv, DB_MPOOL_PRIVATE)))
+               F_SET(dbmp, MP_ISPRIVATE);
+
+       /*
+        * Map in the region.  We do locking regardless, as portions of it are
+        * implemented in common code (if we put the region in a file, that is).
+        */
+       F_SET(dbmp, MP_LOCKREGION);
+       if ((ret = __memp_ropen(dbmp, path, cachesize, mode, flags)) != 0)
+               goto err;
+       F_CLR(dbmp, MP_LOCKREGION);
+
+       /*
+        * If there's concurrent access, then we have to lock the region.
+        * If it's threaded, then we have to lock both the handles and the
+        * region.
+        */
+       if (!F_ISSET(dbmp, MP_ISPRIVATE))
+               F_SET(dbmp, MP_LOCKREGION);
+       if (LF_ISSET(DB_THREAD))
+               F_SET(dbmp, MP_LOCKHANDLE | MP_LOCKREGION);
+
+       *retp = dbmp;
+       return (0);
+
+err:   if (dbmp != NULL)
+               FREE(dbmp, sizeof(DB_MPOOL));
+       return (ret);
+}
+
+/*
+ * memp_close --
+ *     Close a memory pool.
+ */
+int
+memp_close(dbmp)
+       DB_MPOOL *dbmp;
+{
+       DB_MPOOLFILE *dbmfp;
+       DB_MPREG *mpreg;
+       int ret, t_ret;
+
+       ret = 0;
+
+       /* Discard DB_MPREGs. */
+       while ((mpreg = LIST_FIRST(&dbmp->dbregq)) != NULL) {
+               LIST_REMOVE(mpreg, q);
+               FREE(mpreg, sizeof(DB_MPREG));
+       }
+
+       /* Discard DB_MPOOLFILEs. */
+       while ((dbmfp = TAILQ_FIRST(&dbmp->dbmfq)) != NULL)
+               if ((t_ret = memp_fclose(dbmfp)) != 0 && ret == 0)
+                       ret = t_ret;
+
+       /* Close the region. */
+       if ((t_ret = __memp_rclose(dbmp)) && ret == 0)
+               ret = t_ret;
+
+       /* Free the structure. */
+       FREE(dbmp, sizeof(DB_MPOOL));
+
+       return (ret);
+}
+
+/*
+ * memp_unlink --
+ *     Exit a memory pool.
+ */
+int
+memp_unlink(path, force, dbenv)
+       const char *path;
+       int force;
+       DB_ENV *dbenv;
+{
+       return (__db_runlink(dbenv,
+           DB_APP_NONE, path, DB_DEFAULT_MPOOL_FILE, force));
+}
+
+/*
+ * memp_register --
+ *     Register a file type's pgin, pgout routines.
+ */
+int
+memp_register(dbmp, ftype, pgin, pgout)
+       DB_MPOOL *dbmp;
+       int ftype;
+       int (*pgin) __P((db_pgno_t, void *, DBT *));
+       int (*pgout) __P((db_pgno_t, void *, DBT *));
+{
+       DB_MPREG *mpr;
+
+       if ((mpr = (DB_MPREG *)malloc(sizeof(DB_MPREG))) == NULL)
+               return (ENOMEM);
+
+       mpr->ftype = ftype;
+       mpr->pgin = pgin;
+       mpr->pgout = pgout;
+
+       /*
+        * Insert at the head.  Because we do a linear walk, we'll find
+        * the most recent registry in the case of multiple entries, so
+        * we don't have to check for multiple registries.
+        */
+       LOCKHANDLE(dbmp, &dbmp->mutex);
+       LIST_INSERT_HEAD(&dbmp->dbregq, mpr, q);
+       UNLOCKHANDLE(dbmp, &dbmp->mutex);
+
+       return (0);
+}
diff --git a/db2/mp/mp_pr.c b/db2/mp/mp_pr.c
new file mode 100644 (file)
index 0000000..94eabf5
--- /dev/null
@@ -0,0 +1,313 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997
+ *     Sleepycat Software.  All rights reserved.
+ */
+#include "config.h"
+
+#ifndef lint
+static const char sccsid[] = "@(#)mp_pr.c      10.12 (Sleepycat) 7/29/97";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#endif
+
+#include "db_int.h"
+#include "shqueue.h"
+#include "db_shash.h"
+#include "mp.h"
+
+void __memp_debug __P((DB_MPOOL *, FILE *, int));
+
+static void __memp_pbh __P((FILE *, DB_MPOOL *, BH *, int));
+static void __memp_pdbmf __P((FILE *, DB_MPOOLFILE *, int));
+static void __memp_pmf __P((FILE *, MPOOLFILE *, int));
+static void __memp_pmp __P((FILE *, DB_MPOOL *, MPOOL *, int));
+
+/*
+ * memp_stat --
+ *     Display MPOOL statistics.
+ */
+int
+memp_stat(dbmp, gspp, fspp, db_malloc)
+       DB_MPOOL *dbmp;
+       DB_MPOOL_STAT **gspp;
+       DB_MPOOL_FSTAT ***fspp;
+       void *(*db_malloc) __P((size_t));
+{
+       DB_MPOOL_FSTAT **tfsp;
+       MPOOLFILE *mfp;
+       size_t len, nlen;
+       char *name;
+
+       /* Allocate space for the global statistics. */
+       if (gspp != NULL) {
+               *gspp = NULL;
+
+               if ((*gspp = db_malloc == NULL ?
+                   (DB_MPOOL_STAT *)malloc(sizeof(**gspp)) :
+                   (DB_MPOOL_STAT *)db_malloc(sizeof(**gspp))) == NULL)
+                       return (ENOMEM);
+
+               LOCKREGION(dbmp);
+
+               /* Copy out the global statistics. */
+               **gspp = dbmp->mp->stat;
+               (*gspp)->st_hash_buckets = dbmp->mp->htab_buckets;
+
+               UNLOCKREGION(dbmp);
+       }
+
+       if (fspp != NULL) {
+               *fspp = NULL;
+
+               LOCKREGION(dbmp);
+
+               /* Count the MPOOLFILE structures. */
+               for (len = 0,
+                   mfp = SH_TAILQ_FIRST(&dbmp->mp->mpfq, __mpoolfile);
+                   mfp != NULL;
+                   ++len, mfp = SH_TAILQ_NEXT(mfp, q, __mpoolfile));
+
+               UNLOCKREGION(dbmp);
+
+               if (len == 0)
+                       return (0);
+
+               /* Allocate space for the pointers. */
+               len = (len + 1) * sizeof(DB_MPOOL_FSTAT *);
+               if ((*fspp = db_malloc == NULL ?
+                   (DB_MPOOL_FSTAT **)malloc(len) :
+                   (DB_MPOOL_FSTAT **)db_malloc(len)) == NULL)
+                       return (ENOMEM);
+
+               LOCKREGION(dbmp);
+
+               /* Build each individual entry. */
+               for (tfsp = *fspp,
+                   mfp = SH_TAILQ_FIRST(&dbmp->mp->mpfq, __mpoolfile);
+                   mfp != NULL;
+                   ++tfsp, mfp = SH_TAILQ_NEXT(mfp, q, __mpoolfile)) {
+                       name = ADDR(dbmp, mfp->path_off);
+                       nlen = strlen(name);
+                       len = sizeof(DB_MPOOL_FSTAT) + nlen + 1;
+                       if ((*tfsp = db_malloc == NULL ?
+                           (DB_MPOOL_FSTAT *)malloc(len) :
+                           (DB_MPOOL_FSTAT *)db_malloc(len)) == NULL)
+                               return (ENOMEM);
+                       **tfsp = mfp->stat;
+                       (*tfsp)->file_name = (char *)
+                           (u_int8_t *)*tfsp + sizeof(DB_MPOOL_FSTAT);
+                       memcpy((*tfsp)->file_name, name, nlen + 1);
+               }
+               *tfsp = NULL;
+
+               UNLOCKREGION(dbmp);
+       }
+       return (0);
+}
+
+/*
+ * __memp_debug --
+ *     Display MPOOL structures.
+ *
+ * PUBLIC: void __memp_debug __P((DB_MPOOL *, FILE *, int));
+ */
+void
+__memp_debug(dbmp, fp, data)
+       DB_MPOOL *dbmp;
+       FILE *fp;
+       int data;
+{
+       DB_MPOOLFILE *dbmfp;
+       u_long cnt;
+
+       /* Make it easy to call from the debugger. */
+       if (fp == NULL)
+               fp = stderr;
+
+       /* Welcome message. */
+       (void)fprintf(fp, "%s\nMpool per-process (%lu) statistics\n",
+           DB_LINE, (u_long)getpid());
+
+       if (data)
+               (void)fprintf(fp, "    fd: %d; addr %lx; maddr %lx\n",
+                   dbmp->fd, (u_long)dbmp->addr, (u_long)dbmp->maddr);
+
+       /* Display the DB_MPOOLFILE structures. */
+       for (cnt = 0, dbmfp = TAILQ_FIRST(&dbmp->dbmfq);
+           dbmfp != NULL; ++cnt, dbmfp = TAILQ_NEXT(dbmfp, q));
+       (void)fprintf(fp, "%lu process-local files\n", cnt);
+       for (dbmfp = TAILQ_FIRST(&dbmp->dbmfq);
+           dbmfp != NULL; dbmfp = TAILQ_NEXT(dbmfp, q)) {
+               (void)fprintf(fp, "%s\n", dbmfp->path);
+               __memp_pdbmf(fp, dbmfp, data);
+       }
+
+       /* Switch to global statistics. */
+       (void)fprintf(fp, "\n%s\nMpool statistics\n", DB_LINE);
+
+       /* Display the MPOOL structure. */
+       __memp_pmp(fp, dbmp, dbmp->mp, data);
+
+       /* Flush in case we're debugging. */
+       (void)fflush(fp);
+}
+
+/*
+ * __memp_pdbmf --
+ *     Display a DB_MPOOLFILE structure.
+ */
+static void
+__memp_pdbmf(fp, dbmfp, data)
+       FILE *fp;
+       DB_MPOOLFILE *dbmfp;
+       int data;
+{
+       if (!data)
+               return;
+
+       (void)fprintf(fp, "    fd: %d; %s\n",
+           dbmfp->fd, F_ISSET(dbmfp, MP_READONLY) ? "readonly" : "read/write");
+}
+
+/*
+ * __memp_pmp --
+ *     Display the MPOOL structure.
+ */
+static void
+__memp_pmp(fp, dbmp, mp, data)
+       FILE *fp;
+       DB_MPOOL *dbmp;
+       MPOOL *mp;
+       int data;
+{
+       BH *bhp;
+       MPOOLFILE *mfp;
+       DB_HASHTAB *htabp;
+       size_t bucket;
+       int cnt;
+       const char *sep;
+
+       (void)fprintf(fp, "references: %lu; cachesize: %lu\n",
+           (u_long)mp->rlayout.refcnt, (u_long)mp->stat.st_cachesize);
+       (void)fprintf(fp,
+           "    %lu pages created\n", mp->stat.st_page_create);
+       (void)fprintf(fp,
+           "    %lu mmap pages returned\n", mp->stat.st_map);
+       (void)fprintf(fp, "    %lu I/O's (%lu read, %lu written)\n",
+           mp->stat.st_page_in + mp->stat.st_page_out,
+           mp->stat.st_page_in, mp->stat.st_page_out);
+       if (mp->stat.st_cache_hit + mp->stat.st_cache_miss != 0)
+               (void)fprintf(fp,
+                   "    %.0f%% cache hit rate (%lu hit, %lu miss)\n",
+                   ((double)mp->stat.st_cache_hit /
+           (mp->stat.st_cache_hit + mp->stat.st_cache_miss)) * 100,
+                   mp->stat.st_cache_hit, mp->stat.st_cache_miss);
+
+       /* Display the MPOOLFILE structures. */
+       for (cnt = 0, mfp = SH_TAILQ_FIRST(&dbmp->mp->mpfq, __mpoolfile);
+           mfp != NULL; ++cnt, mfp = SH_TAILQ_NEXT(mfp, q, __mpoolfile));
+       (void)fprintf(fp, "%d total files\n", cnt);
+       for (cnt = 1, mfp = SH_TAILQ_FIRST(&dbmp->mp->mpfq, __mpoolfile);
+           mfp != NULL; ++cnt, mfp = SH_TAILQ_NEXT(mfp, q, __mpoolfile)) {
+               (void)fprintf(fp, "file %d\n", cnt);
+               __memp_pmf(fp, mfp, data);
+       }
+
+       if (!data)
+               return;
+
+       /* Display the hash table list of BH's. */
+       (void)fprintf(fp, "%s\nHASH table of BH's (%lu buckets):\n",
+           DB_LINE, (u_long)mp->htab_buckets);
+       (void)fprintf(fp,
+           "longest chain searched %lu\n", mp->stat.st_hash_longest);
+       (void)fprintf(fp, "average chain searched %lu (total/calls: %lu/%lu)\n",
+           mp->stat.st_hash_examined /
+           (mp->stat.st_hash_searches ? mp->stat.st_hash_searches : 1),
+           mp->stat.st_hash_examined, mp->stat.st_hash_searches);
+       for (htabp = dbmp->htab,
+           bucket = 0; bucket < mp->htab_buckets; ++htabp, ++bucket) {
+               if (SH_TAILQ_FIRST(&dbmp->htab[bucket], __bh) != NULL)
+                       (void)fprintf(fp, "%lu:\n", (u_long)bucket);
+               for (bhp = SH_TAILQ_FIRST(&dbmp->htab[bucket], __bh);
+                   bhp != NULL; bhp = SH_TAILQ_NEXT(bhp, mq, __bh))
+                       __memp_pbh(fp, dbmp, bhp, data);
+       }
+
+       /* Display the LRU list of BH's. */
+       (void)fprintf(fp, "LRU list of BH's (pgno/offset):");
+       for (sep = "\n    ", bhp = SH_TAILQ_FIRST(&dbmp->mp->bhq, __bh);
+           bhp != NULL; sep = ", ", bhp = SH_TAILQ_NEXT(bhp, q, __bh))
+               (void)fprintf(fp, "%s%lu/%lu", sep,
+                   (u_long)bhp->pgno, (u_long)OFFSET(dbmp, bhp));
+       (void)fprintf(fp, "\n");
+}
+
+/*
+ * __memp_pmf --
+ *     Display an MPOOLFILE structure.
+ */
+static void
+__memp_pmf(fp, mfp, data)
+       FILE *fp;
+       MPOOLFILE *mfp;
+       int data;
+{
+       (void)fprintf(fp, "    %lu pages created\n", mfp->stat.st_page_create);
+       (void)fprintf(fp, "    %lu I/O's (%lu read, %lu written)\n",
+           mfp->stat.st_page_in + mfp->stat.st_page_out,
+           mfp->stat.st_page_in, mfp->stat.st_page_out);
+       if (mfp->stat.st_cache_hit + mfp->stat.st_cache_miss != 0)
+               (void)fprintf(fp,
+                   "    %.0f%% cache hit rate (%lu hit, %lu miss)\n",
+                   ((double)mfp->stat.st_cache_hit /
+                   (mfp->stat.st_cache_hit + mfp->stat.st_cache_miss)) * 100,
+                   mfp->stat.st_cache_hit, mfp->stat.st_cache_miss);
+       if (!data)
+               return;
+
+       (void)fprintf(fp, "    %d references; %s; pagesize: %lu\n", mfp->ref,
+           mfp->can_mmap ? "mmap" : "read/write",
+           (u_long)mfp->stat.st_pagesize);
+}
+
+/*
+ * __memp_pbh --
+ *     Display a BH structure.
+ */
+static void
+__memp_pbh(fp, dbmp, bhp, data)
+       FILE *fp;
+       DB_MPOOL *dbmp;
+       BH *bhp;
+       int data;
+{
+       const char *sep;
+
+       if (!data)
+               return;
+
+       (void)fprintf(fp, "    BH @ %lu (mf: %lu): page %lu; ref %lu",
+           (u_long)OFFSET(dbmp, bhp),
+           (u_long)bhp->mf_offset, (u_long)bhp->pgno, (u_long)bhp->ref);
+       sep = "; ";
+       if (F_ISSET(bhp, BH_DIRTY)) {
+               (void)fprintf(fp, "%sdirty", sep);
+               sep = ", ";
+       }
+       if (F_ISSET(bhp, BH_WRITE)) {
+               (void)fprintf(fp, "%schk_write", sep);
+               sep = ", ";
+       }
+       (void)fprintf(fp, "\n");
+}
diff --git a/db2/mp/mp_region.c b/db2/mp/mp_region.c
new file mode 100644 (file)
index 0000000..a5c5212
--- /dev/null
@@ -0,0 +1,340 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997
+ *     Sleepycat Software.  All rights reserved.
+ */
+#include "config.h"
+
+#ifndef lint
+static const char sccsid[] = "@(#)mp_region.c  10.11 (Sleepycat) 8/2/97";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#endif
+
+#include "db_int.h"
+#include "shqueue.h"
+#include "db_shash.h"
+#include "mp.h"
+#include "common_ext.h"
+
+/*
+ * __memp_ralloc --
+ *     Allocate some space in the mpool region.
+ *
+ * PUBLIC: int __memp_ralloc __P((DB_MPOOL *, size_t, size_t *, void *));
+ */
+int
+__memp_ralloc(dbmp, len, offsetp, retp)
+       DB_MPOOL *dbmp;
+       size_t len, *offsetp;
+       void *retp;
+{
+       BH *bhp, *nbhp;
+       MPOOL *mp;
+       MPOOLFILE *mfp;
+       size_t fsize, total;
+       int nomore, restart, ret, wrote;
+       void *p;
+
+       mp = dbmp->mp;
+
+       nomore = 0;
+alloc: if ((ret = __db_shalloc(dbmp->addr, len, MUTEX_ALIGNMENT, &p)) == 0) {
+               if (offsetp != NULL)
+                       *offsetp = OFFSET(dbmp, p);
+               *(void **)retp = p;
+               return (0);
+       }
+       if (nomore) {
+               __db_err(dbmp->dbenv, "%s", strerror(ret));
+               return (ret);
+       }
+
+       /* Look for a buffer on the free list that's the right size. */
+       for (bhp =
+           SH_TAILQ_FIRST(&mp->bhfq, __bh); bhp != NULL; bhp = nbhp) {
+               nbhp = SH_TAILQ_NEXT(bhp, q, __bh);
+
+               if (__db_shsizeof(bhp) == len) {
+                       SH_TAILQ_REMOVE(&mp->bhfq, bhp, q, __bh);
+                       if (offsetp != NULL)
+                               *offsetp = OFFSET(dbmp, bhp);
+                       *(void **)retp = bhp;
+                       return (0);
+               }
+       }
+
+       /* Discard from the free list until we've freed enough memory. */
+       total = 0;
+       for (bhp =
+           SH_TAILQ_FIRST(&mp->bhfq, __bh); bhp != NULL; bhp = nbhp) {
+               nbhp = SH_TAILQ_NEXT(bhp, q, __bh);
+
+               SH_TAILQ_REMOVE(&mp->bhfq, bhp, q, __bh);
+               __db_shalloc_free(dbmp->addr, bhp);
+
+               /*
+                * Retry as soon as we've freed up sufficient space.  If we
+                * have to coalesce of memory to satisfy the request, don't
+                * try until it's likely (possible?) that we'll succeed.
+                */
+               total += fsize = __db_shsizeof(bhp);
+               if (fsize >= len || total >= 3 * len)
+                       goto alloc;
+       }
+
+retry: /* Find a buffer we can flush; pure LRU. */
+       total = 0;
+       for (bhp =
+           SH_TAILQ_FIRST(&mp->bhq, __bh); bhp != NULL; bhp = nbhp) {
+               nbhp = SH_TAILQ_NEXT(bhp, q, __bh);
+
+               /* Ignore pinned or locked (I/O in progress) buffers. */
+               if (bhp->ref != 0 || F_ISSET(bhp, BH_LOCKED))
+                       continue;
+
+               /* Find the associated MPOOLFILE. */
+               mfp = ADDR(dbmp, bhp->mf_offset);
+
+               /*
+                * Write the page if it's dirty.
+                *
+                * If we wrote the page, fall through and free the buffer.  We
+                * don't have to rewalk the list to acquire the buffer because
+                * it was never available for any other process to modify it.
+                * If we didn't write the page, but we discarded and reacquired
+                * the region lock, restart the buffer list walk.  If we neither
+                * wrote the buffer nor discarded the region lock, continue down
+                * the buffer list.
+                */
+               if (F_ISSET(bhp, BH_DIRTY)) {
+                       if ((ret = __memp_bhwrite(dbmp,
+                           mfp, bhp, &restart, &wrote)) != 0)
+                               return (ret);
+
+                       /*
+                        * It's possible that another process wants this buffer
+                        * and incremented the ref count while we were writing
+                        * it.
+                        */
+                       if (bhp->ref != 0)
+                               goto retry;
+
+                       if (wrote)
+                               ++mp->stat.st_rw_evict;
+                       else {
+                               if (restart)
+                                       goto retry;
+                               else
+                                       continue;
+                       }
+               } else
+                       ++mp->stat.st_ro_evict;
+
+               /*
+                * Check to see if the buffer is the size we're looking for.
+                * If it is, simply reuse it.
+                */
+               total += fsize = __db_shsizeof(bhp);
+               if (fsize == len) {
+                       __memp_bhfree(dbmp, mfp, bhp, 0);
+
+                       if (offsetp != NULL)
+                               *offsetp = OFFSET(dbmp, bhp);
+                       *(void **)retp = bhp;
+                       return (0);
+               }
+
+               /* Free the buffer. */
+               __memp_bhfree(dbmp, mfp, bhp, 1);
+
+               /*
+                * Retry as soon as we've freed up sufficient space.  If we
+                * have to coalesce of memory to satisfy the request, don't
+                * try until it's likely (possible?) that we'll succeed.
+                */
+               if (fsize >= len || total >= 3 * len)
+                       goto alloc;
+
+               /* Restart the walk if we discarded the region lock. */
+               if (restart)
+                       goto retry;
+       }
+       nomore = 1;
+       goto alloc;
+}
+
+/*
+ * __memp_ropen --
+ *     Attach to, and optionally create, the mpool region.
+ *
+ * PUBLIC: int __memp_ropen
+ * PUBLIC:    __P((DB_MPOOL *, const char *, size_t, int, int));
+ */
+int
+__memp_ropen(dbmp, path, cachesize, mode, flags)
+       DB_MPOOL *dbmp;
+       const char *path;
+       size_t cachesize;
+       int mode, flags;
+{
+       MPOOL *mp;
+       size_t rlen;
+       int fd, newregion, ret, retry_cnt;
+
+       /*
+        * Unlike other DB subsystems, mpool can't simply grow the region
+        * because it returns pointers into the region to its clients.  To
+        * "grow" the region, we'd have to allocate a new region and then
+        * store a region number in the structures that reference regional
+        * objects.  It's reasonable that we fail regardless, as clients
+        * shouldn't have every page in the region pinned, so the only
+        * "failure" mode should be a performance penalty because we don't
+        * find a page in the cache that we'd like to have found.
+        *
+        * Up the user's cachesize by 25% to account for our overhead.
+        */
+       if (cachesize < DB_CACHESIZE_MIN)
+               if (cachesize == 0)
+                       cachesize = DB_CACHESIZE_DEF;
+               else
+                       cachesize = DB_CACHESIZE_MIN;
+       rlen = cachesize + cachesize / 4;
+
+       /* Map in the region. */
+       retry_cnt = newregion = 0;
+retry: if (LF_ISSET(DB_CREATE)) {
+               /*
+                * If it's a private mpool, use malloc, it's a lot faster than
+                * instantiating a region.
+                *
+                * XXX
+                * If we're doing locking and don't have spinlocks for this
+                * architecture, we'd have to instantiate the file, we need
+                * the file descriptor for locking.  However, it should not
+                * be possible for DB_THREAD to be set if HAVE_SPINLOCKS aren't
+                * defined.
+                */
+               if (F_ISSET(dbmp, MP_ISPRIVATE))
+                       ret = (dbmp->maddr = malloc(rlen)) == NULL ? ENOMEM : 0;
+               else
+                       ret = __db_rcreate(dbmp->dbenv, DB_APP_NONE, path,
+                           DB_DEFAULT_MPOOL_FILE, mode, rlen, &fd,
+                           &dbmp->maddr);
+               if (ret == 0) {
+                       /* Put the MPOOL structure first in the region. */
+                       mp = dbmp->maddr;
+
+                       SH_TAILQ_INIT(&mp->bhq);
+                       SH_TAILQ_INIT(&mp->bhfq);
+                       SH_TAILQ_INIT(&mp->mpfq);
+
+                       /* Initialize the rest of the region as free space. */
+                       dbmp->addr = (u_int8_t *)dbmp->maddr + sizeof(MPOOL);
+                       __db_shalloc_init(dbmp->addr, rlen - sizeof(MPOOL));
+
+                       /*
+                        *
+                        * Pretend that the cache will be broken up into 4K
+                        * pages, and that we want to keep it under, say, 10
+                        * pages on each chain.  This means a 256MB cache will
+                        * allocate ~6500 offset pairs.
+                        */
+                       mp->htab_buckets =
+                           __db_tablesize((cachesize / (4 * 1024)) / 10);
+
+                       /* Allocate hash table space and initialize it. */
+                       if ((ret = __db_shalloc(dbmp->addr,
+                           mp->htab_buckets * sizeof(DB_HASHTAB),
+                           0, &dbmp->htab)) != 0)
+                               goto err;
+                       __db_hashinit(dbmp->htab, mp->htab_buckets);
+                       mp->htab = OFFSET(dbmp, dbmp->htab);
+
+                       memset(&mp->stat, 0, sizeof(mp->stat));
+                       mp->stat.st_cachesize = cachesize;
+
+                       mp->flags = 0;
+
+                       newregion = 1;
+               } else if (ret != EEXIST)
+                       return (ret);
+       }
+
+       /* If we didn't or couldn't create the region, try and join it. */
+       if (!newregion &&
+           (ret = __db_ropen(dbmp->dbenv, DB_APP_NONE,
+           path, DB_DEFAULT_MPOOL_FILE, 0, &fd, &dbmp->maddr)) != 0) {
+               /*
+                * If we failed because the file wasn't available, wait a
+                * second and try again.
+                */
+               if (ret == EAGAIN && ++retry_cnt < 3) {
+                       (void)__db_sleep(1, 0);
+                       goto retry;
+               }
+               return (ret);
+       }
+
+       /* Set up the common pointers. */
+       dbmp->mp = dbmp->maddr;
+       dbmp->addr = (u_int8_t *)dbmp->maddr + sizeof(MPOOL);
+
+       /*
+        * If not already locked, lock the region -- if it's a new region,
+        * then either __db_rcreate() locked it for us or we malloc'd it
+        * instead of creating a region, neither of which requires locking
+        * here.
+        */
+       if (!newregion)
+               LOCKREGION(dbmp);
+
+       /*
+        * Get the hash table address; it's on the shared page, so we have
+        * to lock first.
+        */
+       dbmp->htab = ADDR(dbmp, dbmp->mp->htab);
+
+       dbmp->fd = fd;
+
+       /* If we locked the region, release it now. */
+       if (!F_ISSET(dbmp, MP_ISPRIVATE))
+               UNLOCKREGION(dbmp);
+       return (0);
+
+err:   if (fd != -1) {
+               dbmp->fd = fd;
+               (void)__memp_rclose(dbmp);
+       }
+
+       if (newregion)
+               (void)memp_unlink(path, 1, dbmp->dbenv);
+       return (ret);
+}
+
+/*
+ * __memp_rclose --
+ *     Close the mpool region.
+ *
+ * PUBLIC: int __memp_rclose __P((DB_MPOOL *));
+ */
+int
+__memp_rclose(dbmp)
+       DB_MPOOL *dbmp;
+{
+       if (F_ISSET(dbmp, MP_ISPRIVATE)) {
+               free(dbmp->maddr);
+               return (0);
+       }
+       return (__db_rclose(dbmp->dbenv, dbmp->fd, dbmp->maddr));
+}
diff --git a/db2/mp/mp_sync.c b/db2/mp/mp_sync.c
new file mode 100644 (file)
index 0000000..4f12056
--- /dev/null
@@ -0,0 +1,205 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997
+ *     Sleepycat Software.  All rights reserved.
+ */
+#include "config.h"
+
+#ifndef lint
+static const char sccsid[] = "@(#)mp_sync.c    10.8 (Sleepycat) 7/2/97";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+
+#include <errno.h>
+#include <string.h>
+#endif
+
+#include "db_int.h"
+#include "shqueue.h"
+#include "db_shash.h"
+#include "mp.h"
+#include "common_ext.h"
+
+/*
+ * memp_sync --
+ *     Mpool sync function.
+ */
+int
+memp_sync(dbmp, lsnp)
+       DB_MPOOL *dbmp;
+       DB_LSN *lsnp;
+{
+       BH *bhp;
+       DB_ENV *dbenv;
+       MPOOL *mp;
+       MPOOLFILE *mfp;
+       int can_write, wrote, lsn_cnt, restart, ret;
+
+       dbenv = dbmp->dbenv;
+
+       if (dbmp->dbenv->lg_info == NULL) {
+               __db_err(dbenv, "memp_sync requires logging");
+               return (EINVAL);
+       }
+
+       LOCKREGION(dbmp);
+
+       /*
+        * If the application is asking about a previous call, and we haven't
+        * found any buffers that the application holding the pin couldn't
+        * write, return yes or no based on the current count.  Note, if the
+        * application is asking about a LSN *smaller* than one we've already
+        * handled, then we return based on the count for that LSN.
+        */
+       mp = dbmp->mp;
+       if (!F_ISSET(mp, MP_LSN_RETRY) && log_compare(lsnp, &mp->lsn) <= 0) {
+               if (mp->lsn_cnt == 0) {
+                       *lsnp = mp->lsn;
+                       ret = 0;
+               } else
+                       ret = DB_INCOMPLETE;
+
+               UNLOCKREGION(dbmp);
+               return (ret);
+       }
+
+       /* Else, it's a new checkpoint. */
+       F_CLR(mp, MP_LSN_RETRY);
+
+       /*
+        * Save the LSN.  We know that it's a new LSN or larger than the one
+        * for which we were already doing a checkpoint.  (BTW, I don't expect
+        * to see multiple LSN's from the same or multiple processes, but You
+        * Just Never Know.  Responding as if they all called with the largest
+        * of the LSNs specified makes everything work.
+        *
+        * We don't currently use the LSN we save.  We could potentially save
+        * the last-written LSN in each buffer header and use it to determine
+        * what buffers need to be written.  The problem with this is that it's
+        * sizeof(LSN) more bytes of buffer header.  We currently write all the
+        * dirty buffers instead.
+        *
+        * Walk the list of shared memory segments clearing the count of
+        * buffers waiting to be written.
+        */
+       mp->lsn = *lsnp;
+       mp->lsn_cnt = 0;
+       for (mfp = SH_TAILQ_FIRST(&dbmp->mp->mpfq, __mpoolfile);
+           mfp != NULL; mfp = SH_TAILQ_NEXT(mfp, q, __mpoolfile))
+               mfp->lsn_cnt = 0;
+
+       /*
+        * Walk the list of buffers and mark all dirty buffers to be written
+        * and all pinned buffers to be potentially written.  We do this in
+        * single fell swoop while holding the region locked so that processes
+        * can't make new buffers dirty, causing us to never finish.  Since
+        * the application may have restarted the sync, clear any BH_WRITE
+        * flags that appear to be left over.
+        */
+       can_write = lsn_cnt = 0;
+       for (lsn_cnt = 0, bhp = SH_TAILQ_FIRST(&mp->bhq, __bh);
+           bhp != NULL; bhp = SH_TAILQ_NEXT(bhp, q, __bh))
+               if (F_ISSET(bhp, BH_DIRTY) || bhp->ref != 0) {
+                       F_SET(bhp, BH_WRITE);
+
+                       if (bhp->ref == 0)
+                               can_write = 1;
+
+                       mfp = ADDR(dbmp, bhp->mf_offset);
+                       ++mfp->lsn_cnt;
+
+                       ++lsn_cnt;
+               } else
+                       F_CLR(bhp, BH_WRITE);
+
+       mp->lsn_cnt = lsn_cnt;
+
+       /* If there no buffers we can write, we're done. */
+       if (!can_write) {
+               UNLOCKREGION(dbmp);
+               return (mp->lsn_cnt ? DB_INCOMPLETE : 0);
+       }
+
+       /*
+        * Write any buffers that we can.  Restart the walk after each write,
+        * __memp_pgwrite() discards and reacquires the region lock during I/O.
+        */
+retry: for (bhp = SH_TAILQ_FIRST(&mp->bhq, __bh);
+           bhp != NULL; bhp = SH_TAILQ_NEXT(bhp, q, __bh)) {
+               /* Ignore pinned or locked buffers. */
+               if (!F_ISSET(bhp, BH_WRITE) ||
+                   bhp->ref != 0 || F_ISSET(bhp, BH_LOCKED))
+                       continue;
+
+               mfp = ADDR(dbmp, bhp->mf_offset);
+               if ((ret =
+                   __memp_bhwrite(dbmp, mfp, bhp, &restart, &wrote)) != 0)
+                       goto err;
+               if (wrote) {
+                       if (restart)
+                               goto retry;
+                       continue;
+               }
+               __db_err(dbenv, "%s: unable to flush page: %lu",
+                   ADDR(dbmp, mfp->path_off), (u_long)bhp->pgno);
+               ret = EPERM;
+               goto err;
+       }
+       ret = mp->lsn_cnt ? DB_INCOMPLETE : 0;
+
+err:   UNLOCKREGION(dbmp);
+       return (ret);
+}
+
+/*
+ * memp_fsync --
+ *     Mpool file sync function.
+ */
+int
+memp_fsync(dbmfp)
+       DB_MPOOLFILE *dbmfp;
+{
+       BH *bhp;
+       DB_MPOOL *dbmp;
+       size_t mf_offset;
+       int pincnt, restart, ret, wrote;
+
+       /* We don't sync temporary files -- what's the use? */
+       if (F_ISSET(dbmfp, MP_PATH_TEMP))
+               return (0);
+
+       dbmp = dbmfp->dbmp;
+       ret = 0;
+
+       mf_offset = OFFSET(dbmp, dbmfp->mfp);
+
+       LOCKREGION(dbmp);
+
+       /*
+        * Walk the list of buffer headers for the MPOOLFILE, and write out any
+        * dirty buffers that we can.
+        */
+retry: pincnt = 0;
+       for (bhp = SH_TAILQ_FIRST(&dbmp->mp->bhq, __bh);
+           bhp != NULL; bhp = SH_TAILQ_NEXT(bhp, q, __bh))
+               if (F_ISSET(bhp, BH_DIRTY) && bhp->mf_offset == mf_offset) {
+                       if (bhp->ref != 0 || F_ISSET(bhp, BH_LOCKED)) {
+                               ++pincnt;
+                               continue;
+                       }
+                       if ((ret =
+                           __memp_pgwrite(dbmfp, bhp, &restart, &wrote)) != 0)
+                               goto err;
+                       if (!wrote)
+                               ++pincnt;
+                       if (restart)
+                               goto retry;
+               }
+
+       UNLOCKREGION(dbmp);
+
+err:   return (ret == 0 ? (pincnt ? DB_INCOMPLETE : 0) : ret);
+}
diff --git a/db2/mutex/68020.gcc b/db2/mutex/68020.gcc
new file mode 100644 (file)
index 0000000..9d8be64
--- /dev/null
@@ -0,0 +1,19 @@
+/*
+ * @(#)68020.gcc       10.1 (Sleepycat) 4/12/97
+ *
+ * For gcc/68K, 0 is clear, 1 is set.
+ */
+#define TSL_SET(tsl) ({                                                        \
+       register tsl_t *__l = (tsl);                                    \
+       int __r;                                                        \
+           asm volatile("tas  %1; \n                                   \
+                         seq  %0"                                      \
+               : "=dm" (__r), "=m" (*__l)                              \
+               : "1" (*__l)                                            \
+               );                                                      \
+       __r & 1;                                                        \
+})
+
+#define        TSL_UNSET(tsl)  (*(tsl) = 0)
+#define        TSL_INIT(tsl)   TSL_UNSET(tsl)
+
diff --git a/db2/mutex/README b/db2/mutex/README
new file mode 100644 (file)
index 0000000..30d6b6a
--- /dev/null
@@ -0,0 +1,105 @@
+# @(#)README   10.1 (Sleepycat) 4/12/97
+
+Resource locking routines: lock based on a db_mutex_t.  All this gunk
+(including trying to make assembly code portable), is necessary because
+System V semaphores require system calls for uncontested locks and we
+don't want to make two system calls per resource lock.
+
+First, this is how it works.  The db_mutex_t structure contains a resource
+test-and-set lock (tsl), a file offset, a pid for debugging and statistics
+information.
+
+If HAVE_SPINLOCKS is defined (i.e. we know how to do test-and-sets for
+this compiler/architecture combination), we try and lock the resource tsl
+TSL_DEFAULT_SPINS times.  If we can't acquire the lock that way, we use
+a system call to sleep for 10ms, 20ms, 40ms, etc.  (The time is bounded
+at 1 second, just in case.)  Using the timer backoff means that there are
+two assumptions: that locks are held for brief periods (never over system
+calls or I/O) and that locks are not hotly contested.
+
+If HAVE_SPINLOCKS is not defined, i.e. we can't do test-and-sets, we use
+a file descriptor to do byte locking on a file at a specified offset.  In
+this case, ALL of the locking is done in the kernel.  Because file
+descriptors are allocated per process, we have to provide the file
+descriptor as part of the lock/unlock call.  We still have to do timer
+backoff because we need to be able to block ourselves, i.e. the lock
+manager causes processes to wait by having the process acquire a mutex
+and then attempting to re-acquire the mutex.  There's no way to use kernel
+locking to block yourself, i.e. if you hold a lock and attempt to
+re-acquire it, the attempt will succeed.
+
+Next, let's talk about why it doesn't work the way a reasonable person
+would think it should work.
+
+Ideally, we'd have the ability to try to lock the resource tsl, and if
+that fails, increment a counter of waiting processes, then block in the
+kernel until the tsl is released.  The process holding the resource tsl
+would see the wait counter when it went to release the resource tsl, and
+would wake any waiting processes up after releasing the lock.  This would
+actually require both another tsl (call it the mutex tsl) and
+synchronization between the call that blocks in the kernel and the actual
+resource tsl.  The mutex tsl would be used to protect accesses to the
+db_mutex_t itself.  Locking the mutex tsl would be done by a busy loop,
+which is safe because processes would never block holding that tsl (all
+they would do is try to obtain the resource tsl and set/check the wait
+count).  The problem in this model is that the blocking call into the
+kernel requires a blocking semaphore, i.e. one whose normal state is
+locked.
+
+The only portable forms of locking under UNIX are fcntl(2) on a file
+descriptor/offset, and System V semaphores.  Neither of these locking
+methods are sufficient to solve the problem.
+
+The problem with fcntl locking is that only the process that obtained the
+lock can release it.  Remember, we want the normal state of the kernel
+semaphore to be locked.  So, if the creator of the db_mutex_t were to
+initialize the lock to "locked", then a second process locks the resource
+tsl, and then a third process needs to block, waiting for the resource
+tsl, when the second process wants to wake up the third process, it can't
+because it's not the holder of the lock!  For the second process to be
+the holder of the lock, we would have to make a system call per
+uncontested lock, which is what we were trying to get away from in the
+first place.
+
+There are some hybrid schemes, such as signaling the holder of the lock,
+or using a different blocking offset depending on which process is
+holding the lock, but it gets complicated fairly quickly.  I'm open to
+suggestions, but I'm not holding my breath.
+
+Regardless, we use this form of locking when HAVE_SPINLOCKS is not
+defined, (i.e. we're locking in the kernel) because it doesn't have the
+limitations found in System V semaphores, and because the normal state of
+the kernel object in that case is unlocked, so the process releasing the
+lock is also the holder of the lock.
+
+The System V semaphore design has a number of other limitations that make
+it inappropriate for this task.  Namely:
+
+First, the semaphore key name space is separate from the file system name
+space (although there exist methods for using file names to create
+semaphore keys).  If we use a well-known key, there's no reason to believe
+that any particular key will not already be in use, either by another
+instance of the DB application or some other application, in which case
+the DB application will fail.  If we create a key, then we have to use a
+file system name to rendezvous and pass around the key.
+
+Second, System V semaphores traditionally have compile-time, system-wide
+limits on the number of semaphore keys that you can have.  Typically, that
+number is far too low for any practical purpose.  Since the semaphores
+permit more than a single slot per semaphore key, we could try and get
+around that limit by using multiple slots, but that means that the file
+that we're using for rendezvous is going to have to contain slot
+information as well as semaphore key information, and we're going to be
+reading/writing it on every db_mutex_t init or destroy operation.  Anyhow,
+similar compile-time, system-wide limits on the numbers of slots per
+semaphore key kick in, and you're right back where you started.
+
+My fantasy is that once POSIX.1 standard mutexes are in wide-spread use,
+we can switch to them.  My guess is that it won't happen, because the
+POSIX semaphores are only required to work for threads within a process,
+and not independent processes.
+
+Note: there are races in the statistics code, but since it's just that,
+I didn't bother fixing them.  (The fix requires a mutex tsl, so, when/if
+this code is fixed to do rational locking (see above), then change the
+statistics update code to acquire/release the mutex tsl.
diff --git a/db2/mutex/alpha.dec b/db2/mutex/alpha.dec
new file mode 100644 (file)
index 0000000..83ed371
--- /dev/null
@@ -0,0 +1,25 @@
+/*
+ * @(#)alpha.dec       8.3 (Sleepycat Software) 1/18/97
+ *
+ * The DEC C asm acts as a pseudo-call.  The first argument is the assembly
+ * code, and the remaining arguments are assigned as in a procedure call, to
+ * r16, r17, etc. (represented in asm as %a0, %a1, and so forth).
+ *
+ * From: Dave Butenhof.
+ */
+
+#include <c_asm.h>
+
+#define        TSL_SET(tsl)    (asm ("mb;                                      \
+    10:        ldl_l   %v0,(%a0) ;                                             \
+       bne     %v0,30f ;                                               \
+       or      %v0,1,%r1 ;                                             \
+       stl_c   %r1,(%a0) ;                                             \
+       beq     %r1,20f ;                                               \
+       mb      ;                                                       \
+       br      %r31,30f ;                                              \
+    20:        br      %r31,10b ;                                              \
+    30:        ", (tsl)))
+
+THIS WAS NOT CONVERTED TO TAKE A POINTER AS AN ARGUMENT...
+#define        TSL_UNSET(tsl)  (asm ("mb"), *(tsl) = 0)
diff --git a/db2/mutex/alpha.gcc b/db2/mutex/alpha.gcc
new file mode 100644 (file)
index 0000000..247d04c
--- /dev/null
@@ -0,0 +1,52 @@
+/*
+ * @(#)alpha.gcc       10.1 (Sleepycat) 4/12/97
+ *
+ * The code appearing below is taken from Richard L. Sites, ed.  "Alpha
+ * Architecture Reference Manual", Digital Press, 1992, page 5-7 and 5-8.
+ * There are 2 modifications:
+ *
+ * 1. The jump from blbs __r1,30f to !__r1, which is dictated by the way the
+ * TSL_SET macro is used.  The code suggested in Sites includes the main loop
+ * of the spin lock, whereas in this code the rest the loop is specified in C.
+ * The generated code might be suboptimal if the compiler generates a forward
+ * branch for the usual case in which the mutex is uncontested.
+ *
+ * 2. At label 20, Sites suggests including code for testing for an excessive
+ * number of _processor_ lock conflicts.  (The seq_c instruction stores its
+ * first argument provided that no other processor has written to a byte range
+ * including its memory-location argument.)  Absent such checking the code
+ * below could conceivably stall silently on a multiprocessor alpha, depending
+ * on how often processor/processor conflicts occur in a particular byte range.
+ *
+ * Note that the mb ("memory-barrier") instruction in TSL_UNSET is critical to
+ * correct operation in a multiprocessor alpha (as is, of course, the mb in
+ * the TSL_SET macro).  Without the mb, changes to shared memory that occurred
+ * inside the critical section (before the TSL_UNSET) might reach shared memory
+ * _after_ the change of tsl to 0, thereby permitting another processor to see
+ * an inconsistent view of the data protected by the mutex.
+ *
+ * For gcc/alpha, 0 is clear, 1 is set.
+ */
+#define TSL_SET(tsl) ({                                                        \
+       register tsl_t *__l = (tsl);                                    \
+       register tsl_t __r1, __r2;                                      \
+       __asm__ volatile("                                              \n\
+          10: ldq_l %0,(%2)                                            \n\
+              blbs  %0,30f                                             \n\
+              or    %0,1,%1                                            \n\
+              stq_c %1,(%2)                                            \n\
+              beq   %1,20f                                             \n\
+              mb                                                       \n\
+              br    30f                                                \n\
+          20: br    10b                                                \n\
+          30: "                                                        \
+         : "=&r" (__r1), "=&r" (__r2)                                  \
+         : "r" (__l));                                                 \
+       !__r1;                                                          \
+})
+
+#define TSL_UNSET(tsl) ({                                              \
+       register tsl_t *__l = (tsl);                                    \
+       __asm__ volatile("mb; stq $31,(%0);" : : "r" (__l));            \
+})
+#define        TSL_INIT(tsl)   TSL_UNSET(tsl)
diff --git a/db2/mutex/mutex.c b/db2/mutex/mutex.c
new file mode 100644 (file)
index 0000000..b23f738
--- /dev/null
@@ -0,0 +1,280 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997
+ *     Sleepycat Software.  All rights reserved.
+ */
+
+#include "config.h"
+
+#ifndef lint
+static const char sccsid[] = "@(#)mutex.c      10.22 (Sleepycat) 8/21/97";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#endif
+
+#include "db_int.h"
+#include "common_ext.h"
+
+#ifdef HAVE_SPINLOCKS
+
+#ifdef HAVE_FUNC_AIX
+#define        TSL_INIT(x)
+#define        TSL_SET(x)      (!_check_lock(x, 0, 1))
+#define        TSL_UNSET(x)    _clear_lock(x, 0)
+#endif
+
+#ifdef HAVE_ASSEM_MC68020_GCC
+#include "68020.gcc"
+#endif
+
+#if defined(HAVE_FUNC_MSEM)
+/*
+ * XXX
+ * Should we not use MSEM_IF_NOWAIT and let the system block for us?
+ * I've no idea if this will block all threads in the process or not.
+ */
+#define        TSL_INIT(x)     msem_init(x, MSEM_UNLOCKED)
+#define        TSL_SET(x)      (!msem_lock(x, MSEM_IF_NOWAIT))
+#define        TSL_UNSET(x)    msem_unlock(x, 0)
+#endif
+
+#ifdef HAVE_FUNC_SGI
+#define        TSL_INIT(x)     init_lock(x)
+#define        TSL_SET(x)      (!acquire_lock(x))
+#define        TSL_UNSET(x)    release_lock(x)
+#endif
+
+#ifdef HAVE_FUNC_SOLARIS
+/*
+ * Semaphore calls don't work on Solaris 5.5.
+ *
+ * #define     TSL_INIT(x)     sema_init(x, 1, USYNC_PROCESS, NULL)
+ * #define     TSL_SET(x)      (sema_wait(x) == 0)
+ * #define     TSL_UNSET(x)    sema_post(x)
+ */
+#define        TSL_INIT(x)
+#define        TSL_SET(x)      (_lock_try(x))
+#define        TSL_UNSET(x)    _lock_clear(x)
+#endif
+
+#ifdef HAVE_ASSEM_SPARC_GCC
+#include "sparc.gcc"
+#endif
+
+#ifdef HAVE_ASSEM_UTS4_CC
+#define TSL_INIT(x)
+#define TSL_SET(x)     (!uts_lock(x, 1))
+#define TSL_UNSET(x)   (*(x) = 0)
+#endif
+
+#ifdef HAVE_ASSEM_X86_GCC
+#include "x86.gcc"
+#endif
+
+#if defined(_WIN32)
+/* DBDB this needs to be byte-aligned!! */
+#define        TSL_INIT(tsl)
+#define        TSL_SET(tsl)    (!InterlockedExchange((PLONG)tsl, 1))
+#define        TSL_UNSET(tsl)  (*(tsl) = 0)
+#endif
+
+#ifdef macintosh
+/* Mac spinlocks are simple because we cannot possibly be preempted. */
+#define        TSL_INIT(tsl)
+#define        TSL_SET(tsl)    (*(tsl) = 1)
+#define        TSL_UNSET(tsl)  (*(tsl) = 0)
+#endif
+
+#endif /* HAVE_SPINLOCKS */
+
+#ifdef MORE_THAN_ONE_PROCESSOR
+#define        TSL_DEFAULT_SPINS       5       /* Default spins before block. */
+#else
+#define        TSL_DEFAULT_SPINS       1       /* Default spins before block. */
+#endif
+
+/*
+ * __db_mutex_init --
+ *     Initialize a DB mutex structure.
+ *
+ * PUBLIC: void __db_mutex_init __P((db_mutex_t *, off_t));
+ */
+void
+__db_mutex_init(mp, off)
+       db_mutex_t *mp;
+       off_t off;
+{
+#ifdef DEBUG
+       if ((ALIGNTYPE)mp & (MUTEX_ALIGNMENT - 1)) {
+               (void)fprintf(stderr,
+                   "MUTEX ERROR: mutex NOT %d-byte aligned!\n",
+                   MUTEX_ALIGNMENT);
+               abort();
+       }
+#endif
+       memset(mp, 0, sizeof(db_mutex_t));
+
+#ifdef HAVE_SPINLOCKS
+       TSL_INIT(&mp->tsl_resource);
+#else
+       mp->off = off;
+#endif
+}
+
+#define        MS(n)           ((n) * 1000)    /* Milliseconds to micro-seconds. */
+#define        SECOND          (MS(1000))      /* A second's worth of micro-seconds. */
+
+/*
+ * __db_mutex_lock
+ *     Lock on a mutex, logically blocking if necessary.
+ *
+ * PUBLIC: int __db_mutex_lock __P((db_mutex_t *, int, int (*)(void)));
+ */
+int
+__db_mutex_lock(mp, fd, yield)
+       db_mutex_t *mp;
+       int fd;
+       int (*yield) __P((void));
+{
+       u_long usecs;
+
+#ifdef HAVE_SPINLOCKS
+       int nspins;
+
+       for (usecs = MS(10);;) {
+               /*
+                * Try and acquire the uncontested resource lock for
+                * TSL_DEFAULT_SPINS.
+                */
+               for (nspins = TSL_DEFAULT_SPINS; nspins > 0; --nspins)
+                       if (TSL_SET(&mp->tsl_resource)) {
+#ifdef DEBUG
+                               if (mp->pid != 0) {
+                                       (void)fprintf(stderr,
+                   "MUTEX ERROR: __db_mutex_lock: lock currently locked\n");
+                                       abort();
+                               }
+                               mp->pid = getpid();
+#endif
+#ifdef MUTEX_STATISTICS
+                               if (usecs == MS(10))
+                                       ++mp->mutex_set_nowait;
+                               else
+                                       ++mp->mutex_set_wait;
+#endif
+                               return (0);
+                       }
+
+               /* Yield the processor; wait 10ms initially, up to 1 second. */
+               if (yield == NULL || yield() != 0) {
+                       (void)__db_sleep(0, usecs);
+                       if ((usecs <<= 1) > SECOND)
+                               usecs = SECOND;
+               }
+       }
+       /* NOTREACHED */
+
+#else /* !HAVE_SPINLOCKS */
+       struct flock k_lock;
+       pid_t mypid;
+       int locked;
+
+       /* Initialize the lock. */
+       k_lock.l_whence = SEEK_SET;
+       k_lock.l_start = mp->off;
+       k_lock.l_len = 1;
+
+       for (locked = 0, mypid = getpid();;) {
+               /*
+                * Wait for the lock to become available; wait 10ms initially,
+                * up to 1 second.
+                */
+               for (usecs = MS(10); mp->pid != 0;)
+                       if (yield == NULL || yield() != 0) {
+                               (void)__db_sleep(0, usecs);
+                               if ((usecs <<= 1) > SECOND)
+                                       usecs = SECOND;
+                       }
+
+               /* Acquire an exclusive kernel lock. */
+               k_lock.l_type = F_WRLCK;
+               if (fcntl(fd, F_SETLKW, &k_lock))
+                       return (1);
+
+               /* If the resource tsl is still available, it's ours. */
+               if (mp->pid == 0) {
+                       locked = 1;
+                       mp->pid = mypid;
+               }
+
+               /* Release the kernel lock. */
+               k_lock.l_type = F_UNLCK;
+               if (fcntl(fd, F_SETLK, &k_lock))
+                       return (1);
+
+               /*
+                * If we got the resource tsl we're done.
+                *
+                * !!!
+                * We can't check to see if the lock is ours, because we may
+                * be trying to block ourselves in the lock manager, and so
+                * the holder of the lock that's preventing us from getting
+                * the lock may be us!  (Seriously.)
+                */
+               if (locked)
+                       break;
+       }
+
+#ifdef MUTEX_STATISTICS
+       ++mp->mutex_set_wait;
+#endif
+       return (0);
+#endif /* !HAVE_SPINLOCKS */
+}
+
+/*
+ * __db_mutex_unlock --
+ *     Release a lock.
+ *
+ * PUBLIC: int __db_mutex_unlock __P((db_mutex_t *, int));
+ */
+int
+__db_mutex_unlock(mp, fd)
+       db_mutex_t *mp;
+       int fd;
+{
+#ifdef DEBUG
+       if (mp->pid == 0) {
+               (void)fprintf(stderr,
+           "MUTEX ERROR: __db_mutex_unlock: lock already unlocked\n");
+               abort();
+       }
+#endif
+
+#ifdef HAVE_SPINLOCKS
+#ifdef DEBUG
+       mp->pid = 0;
+#endif
+
+       /* Release the resource tsl. */
+       TSL_UNSET(&mp->tsl_resource);
+#else
+       /*
+        * Release the resource tsl.  We don't have to acquire any locks
+        * because processes trying to acquire the lock are checking for
+        * a pid of 0, not a specific value.
+        */
+       mp->pid = 0;
+#endif
+       return (0);
+}
diff --git a/db2/mutex/parisc.gcc b/db2/mutex/parisc.gcc
new file mode 100644 (file)
index 0000000..e15f6f2
--- /dev/null
@@ -0,0 +1,40 @@
+/* 
+ * @(#)parisc.gcc      8.5 (Sleepycat) 1/18/97
+ *
+ * Copyright (c) 1996-1997, The University of Utah and the Computer Systems
+ * Laboratory at the University of Utah (CSL).  All rights reserved.
+ *
+ * Permission to use, copy, modify and distribute this software is hereby
+ * granted provided that (1) source code retains these copyright, permission,
+ * and disclaimer notices, and (2) redistributions including binaries
+ * reproduce the notices in supporting documentation, and (3) all advertising
+ * materials mentioning features or use of this software display the following
+ * acknowledgement: ``This product includes software developed by the Computer
+ * Systems Laboratory at the University of Utah.''
+ *
+ * THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF THIS SOFTWARE IN ITS "AS
+ * IS" CONDITION.  THE UNIVERSITY OF UTAH AND CSL DISCLAIM ANY LIABILITY OF
+ * ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * CSL requests users of this software to return to csl-dist@cs.utah.edu any
+ * improvements that they make and grant CSL redistribution rights.
+ */
+
+/*
+ * The PA-RISC has a "load and clear" instead of a "test and set" instruction.
+ * The 32-bit word used by that instruction must be 16-byte aligned hence we
+ * allocate 16 bytes for a tsl_t and use the word that is properly aligned.
+ * We could use the "aligned" attribute in GCC but that doesn't work for stack
+ * variables.
+ */
+#define        TSL_SET(tsl) ({                                                 \
+       int *__l = (int *)(((int)(tsl)+15)&~15);                        \
+       int __r;                                                        \
+       asm volatile("ldcws 0(%1),%0" : "=r" (__r) : "r" (__l));        \
+       __r & 1;                                                        \
+})
+
+#define        TSL_UNSET(tsl) ({                                               \
+       int *__l = (int *)(((int)(tsl)+15)&~15);                        \
+       *__l = -1;                                                      \
+})
diff --git a/db2/mutex/parisc.hp b/db2/mutex/parisc.hp
new file mode 100644 (file)
index 0000000..d10807b
--- /dev/null
@@ -0,0 +1,29 @@
+/* 
+ * @(#)parisc.hp       8.5 (Sleepycat) 1/18/97
+ *
+ * Copyright (c) 1996-1997, The University of Utah and the Computer Systems
+ * Laboratory at the University of Utah (CSL).  All rights reserved.
+ *
+ * Permission to use, copy, modify and distribute this software is hereby
+ * granted provided that (1) source code retains these copyright, permission,
+ * and disclaimer notices, and (2) redistributions including binaries
+ * reproduce the notices in supporting documentation, and (3) all advertising
+ * materials mentioning features or use of this software display the following
+ * acknowledgement: ``This product includes software developed by the Computer
+ * Systems Laboratory at the University of Utah.''
+ *
+ * THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF THIS SOFTWARE IN ITS "AS
+ * IS" CONDITION.  THE UNIVERSITY OF UTAH AND CSL DISCLAIM ANY LIABILITY OF
+ * ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * CSL requests users of this software to return to csl-dist@cs.utah.edu any
+ * improvements that they make and grant CSL redistribution rights.
+ */
+
+/*
+ * The PA-RISC has a "load and clear" instead of a "test and set" instruction.
+ * The 32-bit word used by that instruction must be 16-byte aligned hence we
+ * allocate 16 bytes for a tsl_t and use the word that is properly aligned.
+ */
+#define        TSL_SET(tsl)    tsl_set(tsl)
+#define        TSL_UNSET(tsl)  tsl_unset(tsl)
diff --git a/db2/mutex/sparc.gcc b/db2/mutex/sparc.gcc
new file mode 100644 (file)
index 0000000..8445a06
--- /dev/null
@@ -0,0 +1,33 @@
+/*
+ * @(#)sparc.gcc       10.1 (Sleepycat) 4/12/97
+ *
+ * The ldstub instruction takes the location specified by its first argument
+ * (a register containing a memory address) and loads its contents into its
+ * second argument (a register) and atomically sets the contents the location
+ * specified by its first argument to a byte of 1s.  (The value in the second
+ * argument is never read, but only overwritten.)
+ *
+ * The membar instructions are needed to ensure that writes to the lock are
+ * correctly ordered with writes that occur later in the instruction stream.
+ *
+ * For gcc/sparc, 0 is clear, 1 is set.
+ */
+
+#if defined(__sparcv9__)
+Does the following code need membar instructions for V9 processors?
+#endif
+
+#define        TSL_SET(tsl) ({                                                 \
+       register tsl_t *__l = (tsl);                                    \
+       register tsl_t __r;                                             \
+       __asm__ volatile                                                \
+           ("ldstub [%1],%0"                                           \
+           : "=r"( __r) : "r" (__l));                                  \
+       !__r;                                                           \
+})
+
+#define        TSL_UNSET(tsl) ({                                               \
+         register tsl_t *__l = (tsl);                                  \
+        __asm__ volatile ("stb %%g0,[%0]" : : "r" (__l));              \
+})
+#define        TSL_INIT(tsl)   TSL_UNSET(tsl)
diff --git a/db2/mutex/uts4.cc.s b/db2/mutex/uts4.cc.s
new file mode 100644 (file)
index 0000000..ee5f414
--- /dev/null
@@ -0,0 +1,21 @@
+ /
+ / int uts_lock ( int *p, int i );
+ /             Update the lock word pointed to by p with the
+ /             value i, using compare-and-swap.
+ /             Returns 0 if update was successful.
+ /             Returns 1 if update failed.
+ /
+         entry   uts_lock
+ uts_lock:
+         using   .,r15
+         st      r2,8(sp)        / Save R2
+         l       r2,64+0(sp)     / R2 -> word to update
+         slr     r0, r0          / R0 = current lock value must be 0
+         l       r1,64+4(sp)     / R1 = new lock value
+         cs      r0,r1,0(r2)     / Try the update ...
+         be      x               /  ... Success.  Return 0
+         la      r0,1            /  ... Failure.  Return 1
+ x:                              /
+         l       r2,8(sp)        / Restore R2
+         b       2(,r14)         / Return to caller
+         drop    r15
diff --git a/db2/mutex/x86.gcc b/db2/mutex/x86.gcc
new file mode 100644 (file)
index 0000000..886a681
--- /dev/null
@@ -0,0 +1,17 @@
+/*
+ * @(#)x86.gcc 10.2 (Sleepycat) 6/21/97
+ *
+ * For gcc/x86, 0 is clear, 1 is set.
+ */
+#define        TSL_SET(tsl) ({                                                 \
+       register tsl_t *__l = (tsl);                                    \
+       int __r;                                                        \
+       asm volatile("movl $1,%%eax; xchgb %1,%%al; xorl $1,%%eax"      \
+           : "=&a" (__r), "=m" (*__l)                                  \
+           : "1" (*__l)                                                \
+           );                                                          \
+       __r & 1;                                                        \
+})
+
+#define        TSL_UNSET(tsl)  (*(tsl) = 0)
+#define        TSL_INIT(tsl)   TSL_UNSET(tsl)
diff --git a/db2/os/db_os_abs.c b/db2/os/db_os_abs.c
new file mode 100644 (file)
index 0000000..8795205
--- /dev/null
@@ -0,0 +1,82 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1997
+ *     Sleepycat Software.  All rights reserved.
+ */
+
+#include "config.h"
+
+#ifndef lint
+static const char sccsid[] = "@(#)db_os_abs.c  10.5 (Sleepycat) 7/5/97";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <string.h>
+#endif
+
+#include "db_int.h"
+#include "os_ext.h"
+
+/*
+ * __db_abspath --
+ *     Return if a path is an absolute path.
+ *
+ * PUBLIC: int __db_abspath __P((const char *));
+ */
+int
+__db_abspath(path)
+       const char *path;
+{
+#ifdef _WIN32
+       /*
+        * !!!
+        * Check for drive specifications, e.g., "C:".  In addition, the path
+        * separator used by the win32 DB (PATH_SEPARATOR) is \; look for both
+        * / and \ since these are user-input paths.
+        */
+       if (isalpha(path[0]) && path[1] == ':')
+               path += 2;
+       return (path[0] == '/' || path[0] == '\\');
+#else
+#ifdef macintosh
+       /*
+        * !!!
+        * Absolute pathnames always start with a volume name, which must be
+        * followed by a colon, thus they are of the form:
+        *      volume: or volume:dir1:dir2:file
+        *
+        * Relative pathnames are either a single name without colons or a
+        * path starting with a colon, thus of the form:
+        *      file or :file or :dir1:dir2:file
+        */
+       return (strchr(path, ':') != NULL && path[0] != ':');
+#else
+       return (path[0] == '/');
+#endif
+#endif
+}
+
+/*
+ * __db_rpath --
+ *     Return the last path separator in the path or NULL if none found.
+ *
+ * PUBLIC: char *__db_rpath __P((const char *));
+ */
+char *
+__db_rpath(path)
+       const char *path;
+{
+       const char *s, *last;
+
+       last = NULL;
+       if (PATH_SEPARATOR[1] != '\0') {
+               for (s = path; s[0] != '\0'; ++s)
+                       if (strchr(PATH_SEPARATOR, s[0]) != NULL)
+                               last = s;
+       } else
+               for (s = path; s[0] != '\0'; ++s)
+                       if (s[0] == PATH_SEPARATOR[0])
+                               last = s;
+       return ((char *)last);
+}
diff --git a/db2/os/db_os_dir.c b/db2/os/db_os_dir.c
new file mode 100644 (file)
index 0000000..23a6a45
--- /dev/null
@@ -0,0 +1,136 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1997
+ *     Sleepycat Software.  All rights reserved.
+ */
+
+#include "config.h"
+
+#ifndef lint
+static const char sccsid[] = "@(#)db_os_dir.c  10.7 (Sleepycat) 8/23/97";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+
+#if HAVE_DIRENT_H
+# include <dirent.h>
+# define NAMLEN(dirent) strlen((dirent)->d_name)
+#else
+# define dirent direct
+# define NAMLEN(dirent) (dirent)->d_namlen
+# if HAVE_SYS_NDIR_H
+#  include <sys/ndir.h>
+# endif
+# if HAVE_SYS_DIR_H
+#  include <sys/dir.h>
+# endif
+# if HAVE_NDIR_H
+#  include <ndir.h>
+# endif
+#endif
+
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#endif
+
+#include "db_int.h"
+#include "os_ext.h"
+#include "common_ext.h"
+
+/*
+ * __db_dir --
+ *     Return a list of the files in a directory.
+ *
+ * PUBLIC: int __db_dir __P((DB_ENV *, char *, char ***, int *));
+ */
+int
+__db_dir(dbenv, dir, namesp, cntp)
+       DB_ENV *dbenv;
+       const char *dir;
+       char ***namesp;
+       int *cntp;
+{
+       int arraysz, cnt;
+       char **names;
+#ifdef _WIN32
+       struct _finddata_t fdata;
+       long dirhandle;
+       int finished;
+
+       if ((dirhandle = _findfirst(dir,&fdata)) == -1) {
+               __db_err(dbenv, "%s: %s", dir, strerror(errno));
+               return (errno);
+       }
+
+       names = NULL;
+       finished = 0;
+       for (arraysz = cnt = 0; finished != 1; ++cnt) {
+               if (cnt >= arraysz) {
+                       arraysz += 100;
+                       names = (char **)(names == NULL ?
+                           malloc(arraysz * sizeof(names[0])) :
+                           realloc(names, arraysz * sizeof(names[0])));
+                       if (names == NULL)
+                               goto nomem;
+               }
+               if ((names[cnt] = (char *)strdup(fdata.name)) == NULL)
+                       goto nomem;
+               if (_findnext(dirhandle,&fdata) != 0)
+                       finished = 1;
+       }
+       _findclose(dirhandle);
+#else /* !_WIN32 */
+       struct dirent *dp;
+       DIR *dirp;
+
+       if ((dirp = opendir(dir)) == NULL) {
+               __db_err(dbenv, "%s: %s", dir, strerror(errno));
+               return (errno);
+       }
+       names = NULL;
+       for (arraysz = cnt = 0; (dp = readdir(dirp)) != NULL; ++cnt) {
+               if (cnt >= arraysz) {
+                       arraysz += 100;
+                       names = (char **)(names == NULL ?
+                           malloc(arraysz * sizeof(names[0])) :
+                           realloc(names, arraysz * sizeof(names[0])));
+                       if (names == NULL)
+                               goto nomem;
+               }
+               if ((names[cnt] = (char *)strdup(dp->d_name)) == NULL)
+                       goto nomem;
+       }
+       (void)closedir(dirp);
+#endif /* !_WIN32 */
+
+       *namesp = names;
+       *cntp = cnt;
+       return (0);
+
+nomem: if (names != NULL)
+               __db_dirf(dbenv, names, cnt);
+       __db_err(dbenv, "%s", strerror(ENOMEM));
+       return (ENOMEM);
+}
+
+/*
+ * __db_dirf --
+ *     Free the list of files.
+ *
+ * PUBLIC: void __db_dirf __P((DB_ENV *, char **, int));
+ */
+void
+__db_dirf(dbenv, names, cnt)
+       DB_ENV *dbenv;
+       char **names;
+       int cnt;
+{
+       dbenv = dbenv;                  /* XXX: Shut the compiler up. */
+       while (cnt > 0)
+               free(names[--cnt]);
+       free (names);
+}
diff --git a/db2/os/db_os_fid.c b/db2/os/db_os_fid.c
new file mode 100644 (file)
index 0000000..8fa55fa
--- /dev/null
@@ -0,0 +1,126 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997
+ *     Sleepycat Software.  All rights reserved.
+ */
+
+#include "config.h"
+
+#ifndef lint
+static const char sccsid[] = "@(#)db_os_fid.c  10.7 (Sleepycat) 8/21/97";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#endif
+
+#include "db_int.h"
+#include "db_page.h"
+#include "os_ext.h"
+#include "common_ext.h"
+
+/*
+ * __db_fileid --
+ *     Return a unique identifier for a file.
+ *
+ * PUBLIC: int __db_fileid __P((DB_ENV *, const char *, int, u_int8_t *));
+ */
+int
+__db_fileid(dbenv, fname, timestamp, fidp)
+       DB_ENV *dbenv;
+       const char *fname;
+       int timestamp;
+       u_int8_t *fidp;
+{
+       time_t now;
+       u_int8_t *p;
+       unsigned int i;
+
+#ifdef _WIN32
+       /*
+        * The documentation for GetFileInformationByHandle() states that the
+        * inode-type numbers are not constant between processes.  Actually,
+        * they are, they're the NTFS MFT indexes.  So, this works on NTFS,
+        * but perhaps not on other platforms, and perhaps not over a network.
+        * Can't think of a better solution right now.
+        */
+       int fd = 0;
+       HANDLE fh = 0;
+       BY_HANDLE_FILE_INFORMATION fi;
+       BOOL retval = FALSE;
+
+       /* Clear the buffer. */
+       memset(fidp, 0, DB_FILE_ID_LEN);
+
+       /* first we open the file, because we're not given a handle to it */
+       fd = open(fname,_O_RDONLY,_S_IREAD);
+       if (-1 == fd) {
+               /* If we can't open it, we're in trouble */
+               return (errno);
+       }
+
+       /* File open, get its info */
+       fh = (HANDLE)_get_osfhandle(fd);
+       if ((HANDLE)(-1) != fh) {
+               retval = GetFileInformationByHandle(fh,&fi);
+       }
+       close(fd);
+
+       /*
+        * We want the three 32-bit words which tell us the volume ID and
+        * the file ID.  We make a crude attempt to copy the bytes over to
+        * the callers buffer.
+        *
+        * DBDB: really we should ensure that the bytes get packed the same
+        * way on all compilers, platforms etc.
+        */
+       if ( ((HANDLE)(-1) != fh) && (TRUE == retval) ) {
+               memcpy(fidp, &fi.nFileIndexLow, sizeof(u_int32_t));
+               fidp += sizeof(u_int32_t);
+               memcpy(fidp, &fi.nFileIndexHigh, sizeof(u_int32_t));
+               fidp += sizeof(u_int32_t);
+               memcpy(fidp, &fi.dwVolumeSerialNumber, sizeof(u_int32_t));
+       }
+#else
+       struct stat sb;
+
+       /* Clear the buffer. */
+       memset(fidp, 0, DB_FILE_ID_LEN);
+
+       /* Check for the unthinkable. */
+       if (sizeof(sb.st_ino) +
+           sizeof(sb.st_dev) + sizeof(time_t) > DB_FILE_ID_LEN)
+               return (EINVAL);
+
+       /* On UNIX, use a dev/inode pair. */
+       if (stat(fname, &sb)) {
+               __db_err(dbenv, "%s: %s", fname, strerror(errno));
+               return (errno);
+       }
+
+       /*
+        * Use the inode first and in reverse order, hopefully putting the
+        * distinguishing information early in the string.
+        */
+       for (p = (u_int8_t *)&sb.st_ino +
+           sizeof(sb.st_ino), i = 0; i < sizeof(sb.st_ino); ++i)
+               *fidp++ = *--p;
+       for (p = (u_int8_t *)&sb.st_dev +
+           sizeof(sb.st_dev), i = 0; i < sizeof(sb.st_dev); ++i)
+               *fidp++ = *--p;
+#endif
+       if (timestamp) {
+               (void)time(&now);
+               for (p = (u_int8_t *)&now +
+                   sizeof(now), i = 0; i < sizeof(now); ++i)
+                       *fidp++ = *--p;
+       }
+       return (0);
+}
diff --git a/db2/os/db_os_lseek.c b/db2/os/db_os_lseek.c
new file mode 100644 (file)
index 0000000..cecf0e1
--- /dev/null
@@ -0,0 +1,60 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1997
+ *     Sleepycat Software.  All rights reserved.
+ */
+
+#include "config.h"
+
+#ifndef lint
+static const char sccsid[] = "@(#)db_os_lseek.c        10.3 (Sleepycat) 6/28/97";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+
+#include <errno.h>
+#include <unistd.h>
+#endif
+
+#include "db_int.h"
+#include "os_ext.h"
+
+/*
+ * __db_lseek --
+ *     Seek to a page/byte offset in the file.
+ *
+ * PUBLIC: int __db_lseek __P((int, size_t, db_pgno_t, u_long, int));
+ */
+int
+__db_lseek(fd, pgsize, pageno, relative, whence)
+       int fd;
+       size_t pgsize;
+       db_pgno_t pageno;
+       u_long relative;
+       int whence;
+{
+       /* 64-bit offsets are done differently by different vendors. */
+#undef __LSEEK_SET
+#ifdef HAVE_LLSEEK
+#define        __LSEEK_SET
+       offset_t offset;                        /* Solaris. */
+
+       offset = pgsize * pageno + relative;
+       return (llseek(fd, offset, whence) == -1 ? errno : 0);
+#endif
+#ifdef HAVE_LSEEKI
+#define        __LSEEK_SET
+       __int64 offset;                         /* WNT */
+
+       offset = pgsize * pageno + relative;
+       return (_lseeki64(fd, offset, whence) == -1 ? errno : 0);
+#endif
+#ifndef        __LSEEK_SET
+       off_t offset;                           /* Default. */
+
+       offset = pgsize * pageno + relative;
+       return (lseek(fd, offset, whence) == -1 ? errno : 0);
+#endif
+}
diff --git a/db2/os/db_os_mmap.c b/db2/os/db_os_mmap.c
new file mode 100644 (file)
index 0000000..0cd8fad
--- /dev/null
@@ -0,0 +1,106 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997
+ *     Sleepycat Software.  All rights reserved.
+ */
+
+#include "config.h"
+
+#ifndef lint
+static const char sccsid[] = "@(#)db_os_mmap.c 10.4 (Sleepycat) 6/28/97";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+#include <sys/mman.h>
+
+#include <errno.h>
+#endif
+
+#include "db_int.h"
+#include "os_ext.h"
+
+/*
+ * __db_mmap --
+ *     Map in some shared memory backed by a file descriptor.
+ *
+ * PUBLIC: int __db_mmap __P((int, size_t, int, int, void *));
+ */
+int
+__db_mmap(fd, len, is_private, rdonly, addr)
+       int fd, is_private, rdonly;
+       size_t len;
+       void *addr;
+{
+#ifdef _WIN32
+       /* We have not implemented copy-on-write here */
+       void * pMemory = 0;
+       HANDLE hFile = (HANDLE)_get_osfhandle(fd);
+       HANDLE hMemory = CreateFileMapping(
+             hFile,
+             0,
+             (rdonly ? PAGE_READONLY : PAGE_READWRITE),
+             0,
+             len, /* This code fails if the library is ever compiled on a 64-bit machine */
+             0
+             );
+       if (NULL == hMemory)
+       {
+             return errno;
+       }
+       pMemory = MapViewOfFile(
+             hMemory,
+             (rdonly ? FILE_MAP_READ : FILE_MAP_ALL_ACCESS),
+             0,
+             0,
+             len
+             );
+       CloseHandle(hMemory);
+       *(void **)addr = pMemory;
+       return 0;
+
+#else /* !_WIN32 */
+
+       void *p;
+       int flags, prot;
+
+       flags = is_private ? MAP_PRIVATE : MAP_SHARED;
+#ifdef MAP_HASSEMAPHORE
+       flags += MAP_HASSEMAPHORE;
+#endif
+       prot = PROT_READ | (rdonly ? 0 : PROT_WRITE);
+
+#ifndef MAP_FAILED                     /* XXX: Mmap(2) failure return. */
+#define        MAP_FAILED      -1
+#endif
+       if ((p =
+           mmap(NULL, len, prot, flags, fd, (off_t)0)) == (void *)MAP_FAILED)
+               return (errno);
+
+       *(void **)addr = p;
+       return (0);
+#endif /* _WIN32 */
+}
+
+/*
+ * __db_unmap --
+ *     Release the specified shared memory.
+ *
+ * PUBLIC: int __db_munmap __P((void *, size_t));
+ */
+int
+__db_munmap(addr, len)
+       void *addr;
+       size_t len;
+{
+       /*
+        * !!!
+        * The argument len is always the same length as was mapped.
+        */
+#ifdef _WIN32
+       return (!UnmapViewOfFile(addr) ? errno : 0);
+#else
+       return (munmap(addr, len) ? errno : 0);
+#endif
+}
diff --git a/db2/os/db_os_open.c b/db2/os/db_os_open.c
new file mode 100644 (file)
index 0000000..1d67ef9
--- /dev/null
@@ -0,0 +1,147 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1997
+ *     Sleepycat Software.  All rights reserved.
+ */
+
+#include "config.h"
+
+#ifndef lint
+static const char sccsid[] = "@(#)db_os_open.c 10.14 (Sleepycat) 7/5/97";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+
+#include <errno.h>
+#include <fcntl.h>
+#include <unistd.h>
+#endif
+
+#include "db_int.h"
+#include "os_ext.h"
+
+/*
+ * __db_oflags --
+ *     Convert open(2) flags to DB flags.
+ *
+ * PUBLIC: int __db_oflags __P((int));
+ */
+int
+__db_oflags(oflags)
+       int oflags;
+{
+       int dbflags;
+
+       /*
+        * XXX
+        * Convert POSIX 1003.1 open(2) flags to DB flags.  Not an exact
+        * science as most POSIX implementations don't have a flag value
+        * for O_RDONLY, it's simply the lack of a write flag.
+        */
+       dbflags = 0;
+       if (oflags & O_CREAT)
+               dbflags |= DB_CREATE;
+       if (!(oflags & (O_RDWR | O_WRONLY)) || oflags & O_RDONLY)
+               dbflags |= DB_RDONLY;
+       if (oflags & O_TRUNC)
+               dbflags |= DB_TRUNCATE;
+       return (dbflags);
+}
+
+/*
+ * __db_fdopen --
+ *     Open a file descriptor.
+ *
+ * PUBLIC: int __db_fdopen __P((const char *, int, int, int, int *));
+ */
+int
+__db_fdopen(name, arg_flags, ok_flags, mode, fdp)
+       const char *name;
+       int arg_flags, ok_flags, mode, *fdp;
+{
+       int fd, flags;
+
+       if (arg_flags & ~ok_flags)
+               return (EINVAL);
+
+       flags = 0;
+       if (arg_flags & DB_CREATE)
+               flags |= O_CREAT;
+
+       if (arg_flags & DB_EXCL)
+               flags |= O_EXCL;
+
+       if (arg_flags & DB_RDONLY)
+               flags |= O_RDONLY;
+       else
+               flags |= O_RDWR;
+
+#ifdef _WIN32
+#ifdef _MSC_VER
+       if (arg_flags & DB_SEQUENTIAL)
+               flags |= _O_SEQUENTIAL;
+       else
+               flags |= _O_RANDOM;
+
+       if (arg_flags & DB_TEMPORARY)
+               flags |= _O_TEMPORARY;
+#endif
+       flags |= O_BINARY | O_NOINHERIT;
+#endif
+
+       if (arg_flags & DB_TRUNCATE)
+               flags |= O_TRUNC;
+
+       /* Open the file. */
+       if ((fd = open(name, flags, mode)) == -1)
+               return (errno);
+
+#ifndef _WIN32
+       /* Delete any temporary file; done for Win32 by _O_TEMPORARY. */
+       if (arg_flags & DB_TEMPORARY)
+               (void)unlink(name);
+#endif
+
+#if !defined(_WIN32) && !defined(macintosh)
+       /*
+        * Deny access to any child process; done for Win32 by O_NOINHERIT,
+        * MacOS has neither child processes nor fd inheritance.
+        */
+       if (fcntl(fd, F_SETFD, 1) == -1) {
+               int ret = errno;
+
+               (void)__db_close(fd);
+               return (ret);
+       }
+#endif
+       *fdp = fd;
+       return (0);
+}
+
+/*
+ * __db_fsync --
+ *     Flush a file descriptor.
+ *
+ * PUBLIC: int __db_fsync __P((int));
+ */
+int
+__db_fsync(fd)
+       int fd;
+{
+       return (fsync(fd) ? errno : 0);
+}
+
+/*
+ * __db_close --
+ *     Close a file descriptor.
+ *
+ * PUBLIC: int __db_close __P((int));
+ */
+int
+__db_close(fd)
+       int fd;
+{
+       return (close(fd) ? errno : 0);
+}
diff --git a/db2/os/db_os_rw.c b/db2/os/db_os_rw.c
new file mode 100644 (file)
index 0000000..5a6c219
--- /dev/null
@@ -0,0 +1,75 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1997
+ *     Sleepycat Software.  All rights reserved.
+ */
+
+#include "config.h"
+
+#ifndef lint
+static const char sccsid[] = "@(#)db_os_rw.c   10.4 (Sleepycat) 6/28/97";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+
+#include <errno.h>
+#include <unistd.h>
+#endif
+
+#include "db_int.h"
+#include "os_ext.h"
+
+/*
+ * __db_read --
+ *     Read from a file handle.
+ *
+ * PUBLIC: int __db_read __P((int, void *, size_t, ssize_t *));
+ */
+int
+__db_read(fd, addr, len, nrp)
+       int fd;
+       void *addr;
+       size_t len;
+       ssize_t *nrp;
+{
+       size_t offset;
+       ssize_t nr;
+       u_int8_t *taddr;
+
+       for (taddr = addr,
+           offset = 0; offset < len; taddr += nr, offset += nr) {
+               if ((nr = read(fd, taddr, len - offset)) < 0)
+                       return (errno);
+               if (nr == 0)
+                       break;
+       }
+       *nrp = taddr - (u_int8_t *)addr;
+       return (0);
+}
+
+/*
+ * __db_write --
+ *     Write to a file handle.
+ *
+ * PUBLIC: int __db_write __P((int, void *, size_t, ssize_t *));
+ */
+int
+__db_write(fd, addr, len, nwp)
+       int fd;
+       void *addr;
+       size_t len;
+       ssize_t *nwp;
+{
+       size_t offset;
+       ssize_t nw;
+       u_int8_t *taddr;
+
+       for (taddr = addr,
+           offset = 0; offset < len; taddr += nw, offset += nw)
+               if ((nw = write(fd, taddr, len - offset)) < 0)
+                       return (errno);
+       *nwp = len;
+       return (0);
+}
diff --git a/db2/os/db_os_sleep.c b/db2/os/db_os_sleep.c
new file mode 100644 (file)
index 0000000..5591789
--- /dev/null
@@ -0,0 +1,62 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1997
+ *     Sleepycat Software.  All rights reserved.
+ */
+
+#include "config.h"
+
+#ifndef lint
+static const char sccsid[] = "@(#)db_os_sleep.c        10.6 (Sleepycat) 6/28/97";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+#ifdef HAVE_SYS_TIME_H
+#include <sys/time.h>
+#endif
+#ifdef HAVE_SYS_SELECT_H
+#include <sys/select.h>
+#endif
+
+#include <errno.h>
+#ifndef HAVE_SYS_TIME_H
+#include <time.h>
+#endif
+#include <unistd.h>
+#endif
+
+#include "db_int.h"
+#include "os_ext.h"
+
+/*
+ * __db_sleep --
+ *     Yield the processor for a period of time.
+ *
+ * PUBLIC: int __db_sleep __P((u_long, u_long));
+ */
+int
+__db_sleep(secs, usecs)
+       u_long secs, usecs;             /* Seconds and microseconds. */
+{
+#ifndef _WIN32
+       struct timeval t;
+#endif
+
+       /* Don't require that the values be normalized. */
+       for (; usecs >= 1000000; ++secs, usecs -= 1000000);
+
+       /*
+        * It's important that we yield the processor here so that other
+        * processes or threads are permitted to run.
+        */
+#ifdef _WIN32
+       Sleep(secs * 1000 + usecs / 1000);
+       return (0);
+#else
+       t.tv_sec = secs;
+       t.tv_usec = usecs;
+       return (select(0, NULL, NULL, NULL, &t) == -1 ? errno : 0);
+#endif
+}
diff --git a/db2/os/db_os_stat.c b/db2/os/db_os_stat.c
new file mode 100644 (file)
index 0000000..7929b6b
--- /dev/null
@@ -0,0 +1,84 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1997
+ *     Sleepycat Software.  All rights reserved.
+ */
+
+#include "config.h"
+
+#ifndef lint
+static const char sccsid[] = "@(#)db_os_stat.c 10.6 (Sleepycat) 7/2/97";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include <errno.h>
+#include <string.h>
+#endif
+
+#include "db_int.h"
+#include "os_ext.h"
+#include "common_ext.h"
+
+/*
+ * __db_exists --
+ *     Return if the file exists.
+ *
+ * PUBLIC: int __db_exists __P((const char *, int *));
+ */
+int
+__db_exists(path, isdirp)
+       const char *path;
+       int *isdirp;
+{
+       struct stat sb;
+
+       if (stat(path, &sb) != 0)
+               return (errno);
+       if (isdirp != NULL)
+               *isdirp = S_ISDIR(sb.st_mode);
+       return (0);
+}
+
+/*
+ * __db_stat --
+ *     Return file size and I/O size; abstracted to make it easier
+ *     to replace.
+ *
+ * PUBLIC: int __db_stat __P((DB_ENV *, const char *, int, off_t *, off_t *));
+ */
+int
+__db_stat(dbenv, path, fd, sizep, iop)
+       DB_ENV *dbenv;
+       const char *path;
+       int fd;
+       off_t *sizep, *iop;
+{
+       struct stat sb;
+
+       if (fstat(fd, &sb) == -1) {
+               __db_err(dbenv, "%s: fstat: %s", path, strerror(errno));
+               return (errno);
+       }
+
+       /* Return the size of the file. */
+       if (sizep != NULL)
+               *sizep = sb.st_size;
+
+       /*
+        * Return the underlying filesystem blocksize, if available.  Default
+        * to 8K on the grounds that most OS's use less than 8K as their VM
+        * page size.
+        */
+#ifdef HAVE_ST_BLKSIZE
+       if (iop != NULL)
+               *iop = sb.st_blksize;
+#else
+       if (iop != NULL)
+               *iop = 8 * 1024;
+#endif
+       return (0);
+}
diff --git a/db2/os/db_os_unlink.c b/db2/os/db_os_unlink.c
new file mode 100644 (file)
index 0000000..872beba
--- /dev/null
@@ -0,0 +1,35 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1997
+ *     Sleepycat Software.  All rights reserved.
+ */
+
+#include "config.h"
+
+#ifndef lint
+static const char sccsid[] = "@(#)db_os_unlink.c       10.2 (Sleepycat) 6/28/97";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+
+#include <errno.h>
+#include <unistd.h>
+#endif
+
+#include "db_int.h"
+#include "os_ext.h"
+
+/*
+ * __db_unlink --
+ *     Remove a file.
+ *
+ * PUBLIC: int __db_unlink __P((const char *));
+ */
+int
+__db_unlink(path)
+       const char *path;
+{
+       return (unlink(path) == -1 ? errno : 0);
+}
diff --git a/db2/progs/db_archive/db_archive.c b/db2/progs/db_archive/db_archive.c
new file mode 100644 (file)
index 0000000..136cf2c
--- /dev/null
@@ -0,0 +1,165 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997
+ *     Sleepycat Software.  All rights reserved.
+ */
+
+#include "config.h"
+
+#ifndef lint
+static const char copyright[] =
+"@(#) Copyright (c) 1997\n\
+       Sleepycat Software Inc.  All rights reserved.\n";
+static const char sccsid[] = "@(#)db_archive.c 10.12 (Sleepycat) 7/25/97";
+#endif
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+
+#include <errno.h>
+#include <signal.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <unistd.h>
+#endif
+
+#include "db_int.h"
+#include "shqueue.h"
+#include "log.h"
+#include "db_dispatch.h"
+#include "clib_ext.h"
+#include "common_ext.h"
+
+DB_ENV *db_init __P((char *, int));
+void    onint __P((int));
+void    siginit __P((void));
+void    usage __P((void));
+int     main __P((int, char *[]));
+
+int     interrupted;
+const char *progname = "db_archive";                   /* Program name. */
+
+int
+main(argc, argv)
+       int argc;
+       char *argv[];
+{
+       extern char *optarg;
+       extern int optind;
+       DB_ENV *dbenv;
+       int ch, flags, verbose;
+       char *home, **list;
+
+       flags = verbose = 0;
+       home = NULL;
+       while ((ch = getopt(argc, argv, "ah:lsv")) != EOF)
+               switch (ch) {
+               case 'a':
+                       flags |= DB_ARCH_ABS;
+                       break;
+               case 'h':
+                       home = optarg;
+                       break;
+               case 'l':
+                       flags |= DB_ARCH_LOG;
+                       break;
+               case 's':
+                       flags |= DB_ARCH_DATA;
+                       break;
+               case 'v':
+                       verbose = 1;
+                       break;
+               case '?':
+               default:
+                       usage();
+               }
+       argc -= optind;
+       argv += optind;
+
+       if (argc != 0)
+               usage();
+
+       /* Initialize the environment. */
+       dbenv = db_init(home, verbose);
+
+       /* Get the list of names. */
+       if ((errno = log_archive(dbenv->lg_info, &list, flags, NULL)) != 0) {
+               (void)db_appexit(dbenv);
+               err(1, "log_archive");
+       }
+
+       /* Print the names. */
+       if (list != NULL)
+               for (; *list != NULL; ++list)
+                       printf("%s\n", *list);
+
+       return (db_appexit(dbenv) ? 1 : 0);
+}
+
+/*
+ * db_init --
+ *     Initialize the environment.
+ */
+DB_ENV *
+db_init(home, verbose)
+       char *home;
+       int verbose;
+{
+       DB_ENV *dbenv;
+
+       if ((dbenv = (DB_ENV *)calloc(sizeof(DB_ENV), 1)) == NULL) {
+               errno = ENOMEM;
+               err(1, NULL);
+       }
+       dbenv->db_errfile = stderr;
+       dbenv->db_errpfx = progname;
+       dbenv->db_verbose = verbose;
+
+       if ((errno = db_appinit(home, NULL, dbenv,
+           DB_CREATE | DB_INIT_LOG | DB_INIT_TXN | DB_USE_ENVIRON)) != 0)
+               err(1, "db_appinit");
+
+       siginit();
+
+       return (dbenv);
+}
+
+/*
+ * siginit --
+ *     Initialize the set of signals for which we want to clean up.
+ *     Generally, we try not to leave the shared regions locked if
+ *     we can.
+ */
+void
+siginit()
+{
+#ifdef SIGHUP
+       (void)signal(SIGHUP, onint);
+#endif
+       (void)signal(SIGINT, onint);
+#ifdef SIGKILL
+       (void)signal(SIGKILL, onint);
+#endif
+       (void)signal(SIGTERM, onint);
+}
+
+/*
+ * oninit --
+ *     Interrupt signal handler.
+ */
+void
+onint(signo)
+       int signo;
+{
+       if ((interrupted = signo) == 0)
+               interrupted = SIGINT;
+}
+
+void
+usage()
+{
+       (void)fprintf(stderr, "usage: db_archive [-alsv] [-h home]\n");
+       exit(1);
+}
diff --git a/db2/progs/db_checkpoint/db_checkpoint.c b/db2/progs/db_checkpoint/db_checkpoint.c
new file mode 100644 (file)
index 0000000..586b4b9
--- /dev/null
@@ -0,0 +1,246 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997
+ *     Sleepycat Software.  All rights reserved.
+ */
+
+#include "config.h"
+
+#ifndef lint
+static const char copyright[] =
+"@(#) Copyright (c) 1997\n\
+       Sleepycat Software Inc.  All rights reserved.\n";
+static const char sccsid[] = "@(#)db_checkpoint.c      10.9 (Sleepycat) 7/4/97";
+#endif
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+
+#include <errno.h>
+#include <limits.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+#include <unistd.h>
+#endif
+
+#include "db_int.h"
+#include "shqueue.h"
+#include "db_page.h"
+#include "log.h"
+#include "btree.h"
+#include "hash.h"
+#include "clib_ext.h"
+#include "common_ext.h"
+
+char   *check __P((DB_ENV *, long, long));
+int     checkpoint __P((DB_ENV *, char *, int));
+DB_ENV *db_init __P((char *));
+int     logpid __P((char *, int));
+void    onint __P((int));
+void    siginit __P((void));
+void    usage __P((void));
+int     main __P((int, char *[]));
+
+int     interrupted;
+time_t  now;                                   /* Checkpoint time. */
+const char *progname = "db_checkpoint";                /* Program name. */
+
+int
+main(argc, argv)
+       int argc;
+       char *argv[];
+{
+       extern char *optarg;
+       extern int optind;
+       DB_ENV *dbenv;
+       time_t now;
+       long kbytes, minutes, seconds;
+       int ch, rval, verbose;
+       char *home, *logfile;
+
+       home = logfile = NULL;
+       kbytes = minutes = 0;
+       verbose = 0;
+       while ((ch = getopt(argc, argv, "h:k:L:p:v")) != EOF)
+               switch (ch) {
+               case 'h':
+                       home = optarg;
+                       break;
+               case 'k':
+                       get_long(optarg, 1, LONG_MAX, &kbytes);
+                       break;
+               case 'L':
+                       logfile = optarg;
+                       break;
+               case 'p':
+                       get_long(optarg, 1, LONG_MAX, &minutes);
+                       break;
+               case 'v':
+                       verbose = 1;
+                       break;
+               case '?':
+               default:
+                       usage();
+               }
+       argc -= optind;
+       argv += optind;
+
+       if (argc != 0)
+               usage();
+
+       if (kbytes == 0 && minutes == 0) {
+               warnx("at least one of -k and -p must be specified");
+               usage();
+       }
+
+       /* Initialize the environment. */
+       dbenv = db_init(home);
+
+       if (logfile != NULL && logpid(logfile, 1)) {
+               (void)db_appexit(dbenv);
+               return (1);
+       }
+
+       /*
+        * If we have only a time delay, then we'll sleep the right amount
+        * to wake up when a checkpoint is necessary.  If we have a "kbytes"
+        * field set, then we'll check every 30 seconds.
+        */
+       rval = 0;
+       seconds = kbytes != 0 ? 30 : minutes * 60;
+       while (!interrupted) {
+               (void)__db_sleep(seconds, 0);
+
+               if (verbose) {
+                       (void)time(&now);
+                       printf("checkpoint: %s", ctime(&now));
+               }
+               rval = txn_checkpoint(dbenv->tx_info, kbytes, minutes);
+               if (rval < 0)
+                       break;
+
+               while (rval > 0) {
+                       if (verbose)
+                               __db_err(dbenv,
+                                   "checkpoint did not finish, retrying");
+                       (void)__db_sleep(2, 0);
+                       rval = txn_checkpoint(dbenv->tx_info, 0, 0);
+               }
+               if (rval < 0)
+                       break;
+       }
+
+       if (logfile != NULL && logpid(logfile, 0))
+               rval = 1;
+
+       if (interrupted) {
+               (void)signal(interrupted, SIG_DFL);
+               (void)raise(interrupted);
+               /* NOTREACHED */
+       }
+
+       return (db_appexit(dbenv) || rval ? 1 : 0);
+}
+
+/*
+ * db_init --
+ *     Initialize the environment.
+ */
+DB_ENV *
+db_init(home)
+       char *home;
+{
+       DB_ENV *dbenv;
+
+       if ((dbenv = (DB_ENV *)calloc(sizeof(DB_ENV), 1)) == NULL) {
+               errno = ENOMEM;
+               err(1, NULL);
+       }
+       dbenv->db_errfile = stderr;
+       dbenv->db_errpfx = progname;
+
+       if ((errno = db_appinit(home, NULL, dbenv,
+          DB_INIT_LOG | DB_INIT_TXN | DB_INIT_MPOOL | DB_USE_ENVIRON)) != 0)
+               err(1, "db_appinit");
+
+       if (memp_register(dbenv->mp_info,
+           DB_FTYPE_BTREE, __bam_pgin, __bam_pgout) ||
+           memp_register(dbenv->mp_info,
+           DB_FTYPE_HASH, __ham_pgin, __ham_pgout)) {
+               (void)db_appexit(dbenv);
+               errx(1,
+                   "db_appinit: failed to register access method functions");
+       }
+
+       siginit();
+
+       return (dbenv);
+}
+
+/*
+ * logpid --
+ *     Log that we're running.
+ */
+int
+logpid(fname, is_open)
+       char *fname;
+       int is_open;
+{
+       FILE *fp;
+       time_t now;
+
+       if (is_open) {
+               if ((fp = fopen(fname, "w")) == NULL) {
+                       warn("%s", fname);
+                       return (1);
+               }
+               (void)time(&now);
+               fprintf(fp,
+                   "%s: %lu %s", progname, (u_long)getpid(), ctime(&now));
+               fclose(fp);
+       } else
+               (void)remove(fname);
+       return (0);
+}
+
+/*
+ * siginit --
+ *     Initialize the set of signals for which we want to clean up.
+ *     Generally, we try not to leave the shared regions locked if
+ *     we can.
+ */
+void
+siginit()
+{
+#ifdef SIGHUP
+       (void)signal(SIGHUP, onint);
+#endif
+       (void)signal(SIGINT, onint);
+#ifdef SIGKILL
+       (void)signal(SIGKILL, onint);
+#endif
+       (void)signal(SIGTERM, onint);
+}
+
+/*
+ * oninit --
+ *     Interrupt signal handler.
+ */
+void
+onint(signo)
+       int signo;
+{
+       if ((interrupted = signo) == 0)
+               interrupted = SIGINT;
+}
+
+void
+usage()
+{
+       (void)fprintf(stderr,
+    "usage: db_checkpoint [-v] [-h home] [-k kbytes] [-L file] [-p min]\n");
+       exit(1);
+}
diff --git a/db2/progs/db_deadlock/db_deadlock.c b/db2/progs/db_deadlock/db_deadlock.c
new file mode 100644 (file)
index 0000000..9437e35
--- /dev/null
@@ -0,0 +1,236 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997
+ *     Sleepycat Software.  All rights reserved.
+ */
+
+#include "config.h"
+
+#ifndef lint
+static const char copyright[] =
+"@(#) Copyright (c) 1997\n\
+       Sleepycat Software Inc.  All rights reserved.\n";
+static const char sccsid[] = "@(#)db_deadlock.c        10.13 (Sleepycat) 7/20/97";
+#endif
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+
+#include <errno.h>
+#include <limits.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+#include <unistd.h>
+#endif
+
+#include "db_int.h"
+#include "clib_ext.h"
+#include "common_ext.h"
+
+#define        BAD_KILLID      0xffffffff
+
+DB_ENV *db_init __P((char *, int));
+void    onint __P((int));
+void    siginit __P((void));
+void    usage __P((void));
+int     logpid __P((char *, int));
+int     main __P((int, char *[]));
+
+int     interrupted;
+const char *progname = "db_deadlock";                  /* Program name. */
+
+int
+main(argc, argv)
+       int argc;
+       char *argv[];
+{
+       extern char *optarg;
+       extern int optind;
+       DB_ENV *dbenv;
+       u_int32_t atype;
+       time_t now;
+       long seconds;
+       int ch, flags, verbose;
+       char *home, *logfile;
+
+       atype = DB_LOCK_DEFAULT;
+       home = logfile = NULL;
+       seconds = 0;
+       flags = verbose = 0;
+       while ((ch = getopt(argc, argv, "a:h:L:t:vw")) != EOF)
+               switch (ch) {
+               case 'a':
+                       switch (optarg[0]) {
+                       case 'o':
+                               atype = DB_LOCK_OLDEST;
+                               break;
+                       case 'y':
+                               atype = DB_LOCK_YOUNGEST;
+                               break;
+                       default:
+                               usage();
+                               /* NOTREACHED */
+                       }
+                       if (optarg[1] != '\0')
+                               usage();
+                       break;
+               case 'h':
+                       home = optarg;
+                       break;
+               case 'L':
+                       logfile = optarg;
+                       break;
+               case 't':
+                       get_long(optarg, 1, LONG_MAX, &seconds);
+                       break;
+               case 'v':
+                       verbose = 1;
+                       break;
+               case 'w':
+                       LF_SET(DB_LOCK_CONFLICT);
+                       break;
+               case '?':
+               default:
+                       usage();
+               }
+       argc -= optind;
+       argv += optind;
+
+       if (argc != 0)
+               usage();
+
+       if (seconds == 0 && !LF_ISSET(DB_LOCK_CONFLICT)) {
+               warnx("at least one of -t and -w must be specified");
+               usage();
+       }
+
+       /*
+        * We detect every second when we're running in DB_LOCK_CONFLICT mode.
+        */
+       if (seconds == 0)
+               seconds = 1;
+
+       /* Initialize the deadlock detector by opening the lock manager. */
+       dbenv = db_init(home, verbose);
+
+       if (logfile != NULL && logpid(logfile, 1)) {
+               (void)db_appexit(dbenv);
+               return (1);
+       }
+
+       while (!interrupted) {
+               if (dbenv->db_verbose != 0) {
+                       time(&now);
+                       __db_err(dbenv, "Running at %s", ctime(&now));
+               }
+
+               if ((errno = lock_detect(dbenv->lk_info, flags, atype)) != 0)
+                       break;
+
+               /* Make a pass every "seconds" seconds. */
+               (void)__db_sleep(seconds, 0);
+       }
+
+       if (logfile != NULL)
+               (void)logpid(logfile, 0);
+
+       if (interrupted) {
+               (void)signal(interrupted, SIG_DFL);
+               (void)raise(interrupted);
+               /* NOTREACHED */
+       }
+
+       return (db_appexit(dbenv));
+}
+
+DB_ENV *
+db_init(home, verbose)
+       char *home;
+       int verbose;
+{
+       DB_ENV *dbenv;
+
+       if ((dbenv = (DB_ENV *)calloc(sizeof(DB_ENV), 1)) == NULL) {
+               errno = ENOMEM;
+               err(1, NULL);
+       }
+       dbenv->db_errfile = stderr;
+       dbenv->db_errpfx = progname;
+       dbenv->db_verbose = verbose;
+
+       if ((errno = db_appinit(home,
+           NULL, dbenv, DB_INIT_LOCK | DB_USE_ENVIRON)) != 0)
+               err(1, "db_appinit");
+
+       siginit();
+
+       return (dbenv);
+}
+
+/*
+ * logpid --
+ *     Log that we're running.
+ */
+int
+logpid(fname, is_open)
+       char *fname;
+       int is_open;
+{
+       FILE *fp;
+       time_t now;
+
+       if (is_open) {
+               if ((fp = fopen(fname, "w")) == NULL) {
+                       warn("%s", fname);
+                       return (1);
+               }
+               (void)time(&now);
+               fprintf(fp,
+                   "%s: %lu %s", progname, (u_long)getpid(), ctime(&now));
+               fclose(fp);
+       } else
+               (void)remove(fname);
+       return (0);
+}
+
+/*
+ * siginit --
+ *     Initialize the set of signals for which we want to clean up.
+ *     Generally, we try not to leave the shared regions locked if
+ *     we can.
+ */
+void
+siginit()
+{
+#ifdef SIGHUP
+       (void)signal(SIGHUP, onint);
+#endif
+       (void)signal(SIGINT, onint);
+#ifdef SIGKILL
+       (void)signal(SIGKILL, onint);
+#endif
+       (void)signal(SIGTERM, onint);
+}
+
+/*
+ * oninit --
+ *     Interrupt signal handler.
+ */
+void
+onint(signo)
+       int signo;
+{
+       if ((interrupted = signo) == 0)
+               interrupted = SIGINT;
+}
+
+void
+usage()
+{
+       (void)fprintf(stderr,
+    "usage: db_deadlock [-vw] [-a m | o | y] [-h home] [-L file] [-t sec]\n");
+       exit(1);
+}
diff --git a/db2/progs/db_dump/db_dump.c b/db2/progs/db_dump/db_dump.c
new file mode 100644 (file)
index 0000000..d60aa9b
--- /dev/null
@@ -0,0 +1,280 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997
+ *     Sleepycat Software.  All rights reserved.
+ */
+
+#include "config.h"
+
+#ifndef lint
+static const char copyright[] =
+"@(#) Copyright (c) 1997\n\
+       Sleepycat Software Inc.  All rights reserved.\n";
+static const char sccsid[] = "@(#)db_dump.c    10.13 (Sleepycat) 8/19/97";
+#endif
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+
+#include <ctype.h>
+#include <errno.h>
+#include <getopt.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#endif
+
+#include "db_int.h"
+#include "db_page.h"
+#include "btree.h"
+#include "hash.h"
+#include "clib_ext.h"
+
+void   configure __P((char *));
+DB_ENV *db_init __P((char *));
+void   dbt_dump __P((DBT *));
+void   dbt_print __P((DBT *));
+void   pheader __P((DB *, int));
+void   usage __P((void));
+int    main __P((int, char *[]));
+
+const char *progname = "db_dump";              /* Program name. */
+
+int
+main(argc, argv)
+       int argc;
+       char *argv[];
+{
+       extern char *optarg;
+       extern int optind;
+       DB *dbp;
+       DBC *dbcp;
+       DBT key, data;
+       DB_ENV *dbenv;
+       int ch, dflag, pflag;
+       char *home;
+
+       home = NULL;
+       dflag = pflag = 0;
+       while ((ch = getopt(argc, argv, "df:h:p")) != EOF)
+               switch (ch) {
+               case 'd':
+                       dflag = 1;
+                       break;
+               case 'f':
+                       if (freopen(optarg, "w", stdout) == NULL)
+                               err(1, "%s", optarg);
+                       break;
+               case 'h':
+                       home = optarg;
+                       break;
+               case 'p':
+                       pflag = 1;
+                       break;
+               case '?':
+               default:
+                       usage();
+               }
+       argc -= optind;
+       argv += optind;
+
+       if (argc != 1)
+               usage();
+
+       if (dflag) {
+               if (home != NULL)
+                       errx(1,
+                           "the -d and -h options may not both be specified");
+               if (pflag)
+                       errx(1,
+                           "the -d and -p options may not both be specified");
+       }
+       /* Initialize the environment. */
+       dbenv = dflag ? NULL : db_init(home);
+
+       /* Open the DB file. */
+       if ((errno =
+           db_open(argv[0], DB_UNKNOWN, DB_RDONLY, 0, dbenv, NULL, &dbp)) != 0)
+               err(1, "%s", argv[0]);
+
+       /* DB dump. */
+       if (dflag) {
+               (void)__db_dump(dbp, NULL, 1);
+               if ((errno = dbp->close(dbp, 0)) != 0)
+                       err(1, "close");
+               exit (0);
+       }
+
+       /* Get a cursor and step through the database. */
+       if ((errno = dbp->cursor(dbp, NULL, &dbcp)) != 0) {
+               (void)dbp->close(dbp, 0);
+               err(1, "cursor");
+       }
+
+       /* Print out the header. */
+       pheader(dbp, pflag);
+
+       /* Print out the key/data pairs. */
+       memset(&key, 0, sizeof(key));
+       memset(&data, 0, sizeof(data));
+       if (pflag)
+               while ((errno = dbcp->c_get(dbcp, &key, &data, DB_NEXT)) == 0) {
+                       if (dbp->type != DB_RECNO)
+                               dbt_print(&key);
+                       dbt_print(&data);
+               }
+       else
+               while ((errno = dbcp->c_get(dbcp, &key, &data, DB_NEXT)) == 0) {
+                       if (dbp->type != DB_RECNO)
+                               dbt_dump(&key);
+                       dbt_dump(&data);
+               }
+       if (errno != DB_NOTFOUND)
+               err(1, "cursor get");
+
+       if ((errno = dbp->close(dbp, 0)) != 0)
+               err(1, "close");
+       return (0);
+}
+
+/*
+ * db_init --
+ *     Initialize the environment.
+ */
+DB_ENV *
+db_init(home)
+       char *home;
+{
+       DB_ENV *dbenv;
+
+       if ((dbenv = (DB_ENV *)calloc(sizeof(DB_ENV), 1)) == NULL) {
+               errno = ENOMEM;
+               err(1, NULL);
+       }
+       dbenv->db_errfile = stderr;
+       dbenv->db_errpfx = progname;
+
+       if ((errno =
+           db_appinit(home, NULL, dbenv, DB_CREATE | DB_USE_ENVIRON)) != 0)
+               err(1, "db_appinit");
+       return (dbenv);
+}
+
+/*
+ * pheader --
+ *     Write out the header information.
+ */
+void
+pheader(dbp, pflag)
+       DB *dbp;
+       int pflag;
+{
+       DB_BTREE_STAT *btsp;
+       HTAB *hashp;
+       HASHHDR *hdr;
+       db_pgno_t pgno;
+
+       printf("format=%s\n", pflag ? "print" : "bytevalue");
+       switch (dbp->type) {
+       case DB_BTREE:
+               printf("type=btree\n");
+               if ((errno = dbp->stat(dbp, &btsp, NULL, 0)) != 0)
+                       err(1, "dbp->stat");
+               if (F_ISSET(dbp, DB_BT_RECNUM))
+                       printf("recnum=1\n");
+               if (btsp->bt_maxkey != 0)
+                       printf("bt_maxkey=%lu\n", (u_long)btsp->bt_maxkey);
+               if (btsp->bt_minkey != 0)
+                       printf("bt_minkey=%lu\n", (u_long)btsp->bt_minkey);
+               break;
+       case DB_HASH:
+               printf("type=hash\n");
+               hashp = dbp->internal;
+               pgno = PGNO_METADATA;
+               if (memp_fget(dbp->mpf, &pgno, 0, &hdr) == 0) {
+                       if (hdr->ffactor != 0)
+                               printf("h_ffactor=%lu\n", (u_long)hdr->ffactor);
+                       if (hdr->nelem != 0)
+                               printf("h_nelem=%lu\n", (u_long)hdr->nelem);
+                       (void)memp_fput(dbp->mpf, hdr, 0);
+               }
+               break;
+       case DB_RECNO:
+               printf("type=recno\n");
+               if (F_ISSET(dbp, DB_RE_RENUMBER))
+                       printf("renumber=1\n");
+               if (F_ISSET(dbp, DB_RE_FIXEDLEN))
+                       printf("re_len=%lu\n", (u_long)btsp->bt_re_len);
+               if (F_ISSET(dbp, DB_RE_PAD))
+                       printf("re_pad=%#x\n", btsp->bt_re_pad);
+               break;
+       case DB_UNKNOWN:
+               abort();
+               /* NOTREACHED */
+       }
+
+       if (F_ISSET(dbp, DB_AM_DUP))
+               printf("duplicates=1\n");
+
+       if (dbp->dbenv->db_lorder != 0)
+               printf("db_lorder=%lu\n", (u_long)dbp->dbenv->db_lorder);
+
+       if (!F_ISSET(dbp, DB_AM_PGDEF))
+               printf("db_pagesize=%lu\n", (u_long)dbp->pgsize);
+
+       printf("HEADER=END\n");
+}
+
+static char hex[] = "0123456789abcdef";
+
+/*
+ * dbt_dump --
+ *     Write out a key or data item using byte values.
+ */
+void
+dbt_dump(dbtp)
+       DBT *dbtp;
+{
+       u_int32_t len;
+       u_int8_t *p;
+
+       for (len = dbtp->size, p = dbtp->data; len--; ++p)
+               (void)printf("%c%c",
+                   hex[(u_int8_t)(*p & 0xf0) >> 4], hex[*p & 0x0f]);
+       printf("\n");
+}
+
+/*
+ * dbt_print --
+ *     Write out a key or data item using printable characters.
+ */
+void
+dbt_print(dbtp)
+       DBT *dbtp;
+{
+       u_int32_t len;
+       u_int8_t *p;
+
+       for (len = dbtp->size, p = dbtp->data; len--; ++p)
+               if (isprint(*p)) {
+                       if (*p == '\\')
+                               (void)printf("\\");
+                       (void)printf("%c", *p);
+               } else
+                       (void)printf("\\%c%c",
+                           hex[(u_int8_t)(*p & 0xf0) >> 4], hex[*p & 0x0f]);
+       printf("\n");
+}
+
+/*
+ * usage --
+ *     Display the usage message.
+ */
+void
+usage()
+{
+       (void)fprintf(stderr,
+           "usage: db_dump [-dp] [-f file] [-h home] db_file\n");
+       exit(1);
+}
diff --git a/db2/progs/db_dump185/db_dump185.c b/db2/progs/db_dump185/db_dump185.c
new file mode 100644 (file)
index 0000000..f3c1187
--- /dev/null
@@ -0,0 +1,322 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997
+ *     Sleepycat Software.  All rights reserved.
+ */
+
+#include "config.h"
+
+#ifndef lint
+static const char copyright[] =
+"@(#) Copyright (c) 1997\n\
+       Sleepycat Software Inc.  All rights reserved.\n";
+static const char sccsid[] = "@(#)db_dump185.c 10.5 (Sleepycat) 7/2/97";
+#endif
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+
+#include <ctype.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#endif
+
+#include "db_185.h"
+#include "clib_ext.h"
+
+/* Hash Table Information */
+typedef struct hashhdr {               /* Disk resident portion */
+       int             magic;          /* Magic NO for hash tables */
+       int             version;        /* Version ID */
+       u_int32_t       lorder;         /* Byte Order */
+       int             bsize;          /* Bucket/Page Size */
+       int             bshift;         /* Bucket shift */
+       int             dsize;          /* Directory Size */
+       int             ssize;          /* Segment Size */
+       int             sshift;         /* Segment shift */
+       int             ovfl_point;     /* Where overflow pages are being
+                                        * allocated */
+       int             last_freed;     /* Last overflow page freed */
+       int             max_bucket;     /* ID of Maximum bucket in use */
+       int             high_mask;      /* Mask to modulo into entire table */
+       int             low_mask;       /* Mask to modulo into lower half of
+                                        * table */
+       int             ffactor;        /* Fill factor */
+       int             nkeys;          /* Number of keys in hash table */
+} HASHHDR;
+
+typedef struct htab     {              /* Memory resident data structure */
+       HASHHDR         hdr;            /* Header */
+} HTAB;
+
+typedef struct _epgno {
+       u_int32_t pgno;                 /* the page number */
+       u_int16_t index;                /* the index on the page */
+} EPGNO;
+
+typedef struct _epg {
+       void    *page;                  /* the (pinned) page */
+       u_int16_t index;                /* the index on the page */
+} EPG;
+
+typedef struct _cursor {
+       EPGNO    pg;                    /* B: Saved tree reference. */
+       DBT      key;                   /* B: Saved key, or key.data == NULL. */
+       u_int32_t rcursor;              /* R: recno cursor (1-based) */
+
+#define        CURS_ACQUIRE    0x01            /*  B: Cursor needs to be reacquired. */
+#define        CURS_AFTER      0x02            /*  B: Unreturned cursor after key. */
+#define        CURS_BEFORE     0x04            /*  B: Unreturned cursor before key. */
+#define        CURS_INIT       0x08            /* RB: Cursor initialized. */
+       u_int8_t flags;
+} CURSOR;
+
+/* The in-memory btree/recno data structure. */
+typedef struct _btree {
+       void     *bt_mp;                /* memory pool cookie */
+
+       void     *bt_dbp;               /* pointer to enclosing DB */
+
+       EPG       bt_cur;               /* current (pinned) page */
+       void     *bt_pinned;            /* page pinned across calls */
+
+       CURSOR    bt_cursor;            /* cursor */
+
+       EPGNO     bt_stack[50];         /* stack of parent pages */
+       EPGNO    *bt_sp;                /* current stack pointer */
+
+       DBT       bt_rkey;              /* returned key */
+       DBT       bt_rdata;             /* returned data */
+
+       int       bt_fd;                /* tree file descriptor */
+
+       u_int32_t bt_free;              /* next free page */
+       u_int32_t bt_psize;             /* page size */
+       u_int16_t bt_ovflsize;          /* cut-off for key/data overflow */
+       int       bt_lorder;            /* byte order */
+                                       /* sorted order */
+       enum { NOT, BACK, FORWARD } bt_order;
+       EPGNO     bt_last;              /* last insert */
+
+                                       /* B: key comparison function */
+       int     (*bt_cmp) __P((const DBT *, const DBT *));
+                                       /* B: prefix comparison function */
+       size_t  (*bt_pfx) __P((const DBT *, const DBT *));
+                                       /* R: recno input function */
+       int     (*bt_irec) __P((struct _btree *, u_int32_t));
+
+       FILE     *bt_rfp;               /* R: record FILE pointer */
+       int       bt_rfd;               /* R: record file descriptor */
+
+       void     *bt_cmap;              /* R: current point in mapped space */
+       void     *bt_smap;              /* R: start of mapped space */
+       void     *bt_emap;              /* R: end of mapped space */
+       size_t    bt_msize;             /* R: size of mapped region. */
+
+       u_int32_t bt_nrecs;             /* R: number of records */
+       size_t    bt_reclen;            /* R: fixed record length */
+       u_char    bt_bval;              /* R: delimiting byte/pad character */
+
+/*
+ * NB:
+ * B_NODUPS and R_RECNO are stored on disk, and may not be changed.
+ */
+#define        B_INMEM         0x00001         /* in-memory tree */
+#define        B_METADIRTY     0x00002         /* need to write metadata */
+#define        B_MODIFIED      0x00004         /* tree modified */
+#define        B_NEEDSWAP      0x00008         /* if byte order requires swapping */
+#define        B_RDONLY        0x00010         /* read-only tree */
+
+#define        B_NODUPS        0x00020         /* no duplicate keys permitted */
+#define        R_RECNO         0x00080         /* record oriented tree */
+
+#define        R_CLOSEFP       0x00040         /* opened a file pointer */
+#define        R_EOF           0x00100         /* end of input file reached. */
+#define        R_FIXLEN        0x00200         /* fixed length records */
+#define        R_MEMMAPPED     0x00400         /* memory mapped file. */
+#define        R_INMEM         0x00800         /* in-memory file */
+#define        R_MODIFIED      0x01000         /* modified file */
+#define        R_RDONLY        0x02000         /* read-only file */
+
+#define        B_DB_LOCK       0x04000         /* DB_LOCK specified. */
+#define        B_DB_SHMEM      0x08000         /* DB_SHMEM specified. */
+#define        B_DB_TXN        0x10000         /* DB_TXN specified. */
+       u_int32_t flags;
+} BTREE;
+
+void db_185_btree __P((DB *, int));
+void db_185_hash __P((DB *, int));
+void dbt_dump __P((DBT *));
+void dbt_print __P((DBT *));
+void usage __P((void));
+int main __P((int, char *[]));
+
+const char *progname = "db_dump185";           /* Program name. */
+
+int
+main(argc, argv)
+       int argc;
+       char *argv[];
+{
+       extern char *optarg;
+       extern int optind;
+       DB *dbp;
+       DBT key, data;
+       int ch, pflag, rval;
+
+       pflag = 0;
+       while ((ch = getopt(argc, argv, "f:p")) != EOF)
+               switch (ch) {
+               case 'f':
+                       if (freopen(optarg, "w", stdout) == NULL)
+                               err(1, "%s", optarg);
+                       break;
+               case 'p':
+                       pflag = 1;
+                       break;
+               case '?':
+               default:
+                       usage();
+               }
+       argc -= optind;
+       argv += optind;
+
+       if (argc != 1)
+               usage();
+
+       if ((dbp = dbopen(argv[0], O_RDONLY, 0, DB_BTREE, NULL)) == NULL) {
+               if ((dbp = dbopen(argv[0], O_RDONLY, 0, DB_HASH, NULL)) == NULL)
+                       return (1);
+               db_185_hash(dbp, pflag);
+       } else
+               db_185_btree(dbp, pflag);
+
+       /*
+        * !!!
+        * DB 1.85 DBTs are a subset of DB 2.0 DBTs, so we just use the
+        * new dump/print routines.
+        */
+       if (pflag)
+               while (!(rval = dbp->seq(dbp, &key, &data, R_NEXT))) {
+                       dbt_print(&key);
+                       dbt_print(&data);
+               }
+       else
+               while (!(rval = dbp->seq(dbp, &key, &data, R_NEXT))) {
+                       dbt_dump(&key);
+                       dbt_dump(&data);
+               }
+
+       if (rval == -1)
+               err(1, "seq");
+       return (0);
+}
+
+/*
+ * db_185_hash --
+ *     Dump out hash header information.
+ */
+void
+db_185_hash(dbp, pflag)
+       DB *dbp;
+       int pflag;
+{
+       HTAB *hashp;
+
+       hashp = dbp->internal;
+
+       printf("format=%s\n", pflag ? "print" : "bytevalue");
+       printf("type=hash\n");
+       printf("h_ffactor=%lu\n", (u_long)hashp->hdr.ffactor);
+#ifdef NOT_AVAILABLE_IN_DB_185
+       printf("h_nelem=%lu\n", (u_long)hashp->hdr.nelem);
+#endif
+       if (hashp->hdr.lorder != 0)
+               printf("db_lorder=%lu\n", (u_long)hashp->hdr.lorder);
+       printf("db_pagesize=%lu\n", (u_long)hashp->hdr.bsize);
+       printf("HEADER=END\n");
+}
+
+/*
+ * db_185_btree --
+ *     Dump out btree header information.
+ */
+void
+db_185_btree(dbp, pflag)
+       DB *dbp;
+       int pflag;
+{
+       BTREE *btp;
+
+       btp = dbp->internal;
+
+       printf("format=%s\n", pflag ? "print" : "bytevalue");
+       printf("type=btree\n");
+#ifdef NOT_AVAILABLE_IN_185
+       printf("bt_minkey=%lu\n", (u_long)XXX);
+       printf("bt_maxkey=%lu\n", (u_long)XXX);
+#endif
+       if (btp->bt_lorder != 0)
+               printf("db_lorder=%lu\n", (u_long)btp->bt_lorder);
+       printf("db_pagesize=%lu\n", (u_long)btp->bt_psize);
+       if (!(btp->flags & B_NODUPS))
+               printf("duplicates=1\n");
+       printf("HEADER=END\n");
+}
+
+static char hex[] = "0123456789abcdef";
+
+/*
+ * dbt_dump --
+ *     Write out a key or data item using byte values.
+ */
+void
+dbt_dump(dbtp)
+       DBT *dbtp;
+{
+       size_t len;
+       u_int8_t *p;
+
+       for (len = dbtp->size, p = dbtp->data; len--; ++p)
+               (void)printf("%c%c",
+                   hex[(*p & 0xf0) >> 4], hex[*p & 0x0f]);
+       printf("\n");
+}
+
+/*
+ * dbt_print --
+ *     Write out a key or data item using printable characters.
+ */
+void
+dbt_print(dbtp)
+       DBT *dbtp;
+{
+       size_t len;
+       u_int8_t *p;
+
+       for (len = dbtp->size, p = dbtp->data; len--; ++p)
+               if (isprint(*p)) {
+                       if (*p == '\\')
+                               (void)printf("\\");
+                       (void)printf("%c", *p);
+               } else
+                       (void)printf("\\%c%c",
+                           hex[(*p & 0xf0) >> 4], hex[*p & 0x0f]);
+       printf("\n");
+}
+
+/*
+ * usage --
+ *     Display the usage message.
+ */
+void
+usage()
+{
+       (void)fprintf(stderr, "usage: db_dump [-p] [-f file] db_file\n");
+       exit(1);
+}
diff --git a/db2/progs/db_load/db_load.c b/db2/progs/db_load/db_load.c
new file mode 100644 (file)
index 0000000..cc90e7b
--- /dev/null
@@ -0,0 +1,457 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997
+ *     Sleepycat Software.  All rights reserved.
+ */
+
+#include "config.h"
+
+#ifndef lint
+static const char copyright[] =
+"@(#) Copyright (c) 1997\n\
+       Sleepycat Software Inc.  All rights reserved.\n";
+static const char sccsid[] = "@(#)db_load.c    10.9 (Sleepycat) 8/19/97";
+#endif
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include <errno.h>
+#include <getopt.h>
+#include <limits.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#endif
+
+#include "db_int.h"
+#include "clib_ext.h"
+
+void   badnum __P((void));
+void   configure __P((DB_INFO *, char **));
+DB_ENV *db_init __P((char *));
+int    dbt_rdump __P((DBT *));
+int    dbt_rprint __P((DBT *));
+int    digitize __P((int));
+void   rheader __P((DBTYPE *, int *, DB_INFO *));
+void   usage __P((void));
+int    main __P((int, char *[]));
+
+const char *progname = "db_load";              /* Program name. */
+
+int
+main(argc, argv)
+       int argc;
+       char *argv[];
+{
+       extern char *optarg;
+       extern int optind;
+       DB *dbp;
+       DBT key, data;
+       DBTYPE argtype, headertype;
+       DB_ENV *dbenv;
+       DB_INFO dbinfo;
+       db_recno_t recno;
+       int ch, pflag;
+       char **clist, **clp, *home;
+
+       /* Allocate enough room for configuration arguments. */
+       if ((clp = clist = calloc(argc + 1, sizeof(char *))) == NULL)
+               err(1, NULL);
+
+       home = NULL;
+       argtype = DB_UNKNOWN;
+       while ((ch = getopt(argc, argv, "c:f:h:t:")) != EOF)
+               switch (ch) {
+               case 'c':
+                       *clp++ = optarg;
+                       break;
+               case 'f':
+                       if (freopen(optarg, "r", stdin) == NULL)
+                               err(1, "%s", optarg);
+                       break;
+               case 'h':
+                       home = optarg;
+                       break;
+               case 't':
+                       if (strcmp(optarg, "btree") == 0) {
+                               argtype = DB_BTREE;
+                               break;
+                       }
+                       if (strcmp(optarg, "hash") == 0) {
+                               argtype = DB_HASH;
+                               break;
+                       }
+                       usage();
+                       /* NOTREACHED */
+               case '?':
+               default:
+                       usage();
+               }
+       argc -= optind;
+       argv += optind;
+
+       if (argc != 1)
+               usage();
+
+       /* Initialize the environment. */
+       dbenv = db_init(home);
+       memset(&dbinfo, 0, sizeof(DB_INFO));
+
+       /* Read the header. */
+       rheader(&headertype, &pflag, &dbinfo);
+
+       /* Apply command-line configuration changes. */
+       configure(&dbinfo, clist);
+
+       /* Conversion to/from recno is prohibited. */
+       if (argtype != DB_UNKNOWN) {
+               if (headertype == DB_RECNO)
+                       errx(1, "databases of type recno may not be converted");
+               headertype = argtype;
+       }
+
+       /* Open the DB file. */
+       if ((errno = db_open(argv[0], headertype, DB_CREATE | DB_TRUNCATE,
+           S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH,
+           dbenv, &dbinfo, &dbp)) != 0)
+               err(1, "%s", argv[0]);
+
+       /* Initialize the key/data pair. */
+       memset(&key, 0, sizeof(DBT));
+       if ((key.data = (void *)malloc(key.ulen = 1024)) == NULL) {
+               errno = ENOMEM;
+               err(1, NULL);
+       }
+       memset(&data, 0, sizeof(DBT));
+       if ((data.data = (void *)malloc(data.ulen = 1024)) == NULL) {
+               errno = ENOMEM;
+               err(1, NULL);
+       }
+
+       /* Get each key/data pair and add them to the database. */
+       if (headertype == DB_RECNO) {
+               key.data = &recno;
+               key.size = sizeof(recno);
+               for (recno = 1;; ++recno) {
+                       if (pflag) {
+                               if (dbt_rprint(&data))
+                                       break;
+                       } else
+                               if (dbt_rdump(&data))
+                                       break;
+                       if ((errno = dbp->put(dbp, NULL, &key, &data, 0)) != 0)
+                               err(1, "%s", argv[0]);
+               }
+       } else
+               for (;;) {
+                       if (pflag) {
+                               if (dbt_rprint(&key))
+                                       break;
+                               if (dbt_rprint(&data))
+                                       goto fmt;
+                       } else {
+                               if (dbt_rdump(&key))
+                                       break;
+                               if (dbt_rdump(&data))
+fmt:                                   err(1, "odd number of key/data pairs");
+                       }
+                       if ((errno = dbp->put(dbp, NULL, &key, &data, 0)) != 0)
+                               err(1, "%s", argv[0]);
+               }
+
+       if ((errno = dbp->close(dbp, 0)) != 0)
+               err(1, "%s", argv[0]);
+       return (0);
+}
+
+/*
+ * db_init --
+ *     Initialize the environment.
+ */
+DB_ENV *
+db_init(home)
+       char *home;
+{
+       DB_ENV *dbenv;
+
+       if ((dbenv = (DB_ENV *)calloc(sizeof(DB_ENV), 1)) == NULL) {
+               errno = ENOMEM;
+               err(1, NULL);
+       }
+       dbenv->db_errfile = stderr;
+       dbenv->db_errpfx = progname;
+
+       if ((errno =
+           db_appinit(home, NULL, dbenv, DB_CREATE | DB_USE_ENVIRON)) != 0)
+               err(1, "db_appinit");
+       return (dbenv);
+}
+
+#define        FLAG(name, value, keyword, flag)                                \
+       if (strcmp(name, keyword) == 0) {                               \
+               switch (*value) {                                       \
+               case '1':                                               \
+                       dbinfop->flags |= (flag);                       \
+                       break;                                          \
+               case '0':                                               \
+                       dbinfop->flags &= ~(flag);                      \
+                       break;                                          \
+               default:                                                \
+                       badnum();                                       \
+                       /* NOTREACHED */                                \
+               }                                                       \
+               continue;                                               \
+       }
+#define        NUMBER(name, value, keyword, field, flag)                       \
+       if (strcmp(name, keyword) == 0) {                               \
+               get_long(value, 1, LONG_MAX, &val);                     \
+               dbinfop->field = val;                                   \
+               if (flag != 0)                                          \
+                       dbinfop->flags |= (flag);                       \
+               continue;                                               \
+       }
+#define        STRING(name, value, keyword, field, flag)                       \
+       if (strcmp(name, keyword) == 0) {                               \
+               dbinfop->field = value[0];                              \
+               if (flag != 0)                                          \
+                       dbinfop->flags |= (flag);                       \
+               continue;                                               \
+       }
+
+/*
+ * configure --
+ *     Handle command-line configuration options.
+ */
+void
+configure(dbinfop, clp)
+       DB_INFO *dbinfop;
+       char **clp;
+{
+       long val;
+       char *name, *value;
+
+       for (; (name = *clp) != NULL; ++clp) {
+               if ((value = strchr(name, '=')) == NULL)
+                       errx(1,
+                   "command-line configuration uses name=value format");
+               *value++ = '\0';
+
+               NUMBER(name, value, "bt_maxkey", bt_maxkey, 0);
+               NUMBER(name, value, "bt_minkey", bt_minkey, 0);
+               NUMBER(name, value, "db_lorder", db_lorder, 0);
+               NUMBER(name, value, "db_pagesize", db_pagesize, 0);
+               FLAG(name, value, "duplicates", DB_DUP);
+               NUMBER(name, value, "h_ffactor", h_ffactor, 0);
+               NUMBER(name, value, "h_nelem", h_nelem, 0);
+               NUMBER(name, value, "re_len", re_len, DB_FIXEDLEN);
+               STRING(name, value, "re_pad", re_pad, DB_PAD);
+               FLAG(name, value, "recnum", DB_RECNUM);
+               FLAG(name, value, "renumber", DB_RENUMBER);
+
+               errx(1, "unknown command-line configuration keyword");
+       }
+}
+
+/*
+ * rheader --
+ *     Read the header message.
+ */
+void
+rheader(dbtypep, pflagp, dbinfop)
+       DBTYPE *dbtypep;
+       int *pflagp;
+       DB_INFO *dbinfop;
+{
+       long lineno, val;
+       char name[256], value[256];
+
+       *dbtypep = DB_UNKNOWN;
+       *pflagp = 0;
+
+       for (lineno = 1;; ++lineno) {
+               if (fscanf(stdin, "%[^=]=%s\n", name, value) != 2)
+                       errx(1, "line %lu: unexpected line", lineno);
+               if (strcmp(name, "HEADER") == 0)
+                       break;
+
+               if (strcmp(name, "format") == 0) {
+                       if (strcmp(value, "bytevalue") == 0) {
+                               *pflagp = 0;
+                               continue;
+                       }
+                       if (strcmp(value, "print") == 0) {
+                               *pflagp = 1;
+                               continue;
+                       }
+                       errx(1, "line %d: unknown format", lineno);
+               }
+               if (strcmp(name, "type") == 0) {
+                       if (strcmp(value, "btree") == 0) {
+                               *dbtypep = DB_BTREE;
+                               continue;
+                       }
+                       if (strcmp(value, "hash") == 0) {
+                               *dbtypep = DB_HASH;
+                               continue;
+                       }
+                       if (strcmp(value, "recno") == 0) {
+                               *dbtypep = DB_RECNO;
+                               continue;
+                       }
+                       errx(1, "line %d: unknown type", lineno);
+               }
+               NUMBER(name, value, "bt_maxkey", bt_maxkey, 0);
+               NUMBER(name, value, "bt_minkey", bt_minkey, 0);
+               NUMBER(name, value, "db_lorder", db_lorder, 0);
+               NUMBER(name, value, "db_pagesize", db_pagesize, 0);
+               FLAG(name, value, "duplicates", DB_DUP);
+               NUMBER(name, value, "h_ffactor", h_ffactor, 0);
+               NUMBER(name, value, "h_nelem", h_nelem, 0);
+               NUMBER(name, value, "re_len", re_len, DB_FIXEDLEN);
+               STRING(name, value, "re_pad", re_pad, DB_PAD);
+               FLAG(name, value, "recnum", DB_RECNUM);
+               FLAG(name, value, "renumber", DB_RENUMBER);
+
+               errx(1, "unknown input-file header configuration keyword");
+       }
+}
+
+/*
+ * dbt_rprint --
+ *     Read a printable line into a DBT structure.
+ */
+int
+dbt_rprint(dbtp)
+       DBT *dbtp;
+{
+       u_int32_t len;
+       u_int8_t *p;
+       int c1, c2, escape;
+
+       escape = 0;
+       for (p = dbtp->data, len = 0; (c1 = getchar()) != '\n';) {
+               if (c1 == EOF) {
+                       if (len == 0)
+                               return (1);
+                       err(1, "unexpected end of key/data pair");
+               }
+               if (escape) {
+                       if (c1 != '\\') {
+                               if ((c2 = getchar()) == EOF)
+                                       err(1,
+                                           "unexpected end of key/data pair");
+                               c1 = digitize(c1) << 4 | digitize(c2);
+                       }
+                       escape = 0;
+               } else
+                       if (c1 == '\\') {
+                               escape = 1;
+                               continue;
+                       }
+               if (++len >= dbtp->ulen - 10) {
+                       dbtp->ulen *= 2;
+                       if ((dbtp->data =
+                           (void *)realloc(dbtp->data, dbtp->ulen)) == NULL) {
+                               errno = ENOMEM;
+                               err(1, NULL);
+                       }
+                       p = (u_int8_t *)dbtp->data + len;
+               }
+               *p++ = c1;
+       }
+       dbtp->size = len;
+       return (0);
+}
+
+/*
+ * digitize --
+ *     Convert a character to an integer.
+ */
+int
+digitize(c)
+       int c;
+{
+       switch (c) {                    /* Don't depend on ASCII ordering. */
+       case '0': return (0);
+       case '1': return (1);
+       case '2': return (2);
+       case '3': return (3);
+       case '4': return (4);
+       case '5': return (5);
+       case '6': return (6);
+       case '7': return (7);
+       case '8': return (8);
+       case '9': return (9);
+       case 'a': return (10);
+       case 'b': return (11);
+       case 'c': return (12);
+       case 'd': return (13);
+       case 'e': return (14);
+       case 'f': return (15);
+       }
+
+       err(1, "unexpected hexadecimal value");
+       /* NOTREACHED */
+
+       return (0);
+}
+
+/*
+ * dbt_rdump --
+ *     Read a byte dump line into a DBT structure.
+ */
+int
+dbt_rdump(dbtp)
+       DBT *dbtp;
+{
+       u_int32_t len;
+       u_int8_t *p;
+       int c1, c2;
+
+       for (p = dbtp->data, len = 0; (c1 = getchar()) != '\n';) {
+               if (c1 == EOF) {
+                       if (len == 0)
+                               return (1);
+                       err(1, "unexpected end of key/data pair");
+               }
+               if ((c2 = getchar()) == EOF)
+                       err(1, "unexpected end of key/data pair");
+               if (++len >= dbtp->ulen - 10) {
+                       dbtp->ulen *= 2;
+                       if ((dbtp->data =
+                           (void *)realloc(dbtp->data, dbtp->ulen)) == NULL) {
+                               errno = ENOMEM;
+                               err(1, NULL);
+                       }
+                       p = (u_int8_t *)dbtp->data + len;
+               }
+               *p++ = digitize(c1) << 4 | digitize(c2);
+       }
+       dbtp->size = len;
+       return (0);
+}
+
+/*
+ * badnum --
+ *     Display the bad number message.
+ */
+void
+badnum()
+{
+       err(1, "boolean name=value pairs require a value of 0 or 1");
+}
+
+/*
+ * usage --
+ *     Display the usage message.
+ */
+void
+usage()
+{
+       (void)fprintf(stderr,
+"usage: db_load [-c name=value] [-f file] [-h home] [-t btree | hash] db_file\n");
+       exit(1);
+}
diff --git a/db2/progs/db_printlog/db_printlog.c b/db2/progs/db_printlog/db_printlog.c
new file mode 100644 (file)
index 0000000..12c3655
--- /dev/null
@@ -0,0 +1,160 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997
+ *     Sleepycat Software.  All rights reserved.
+ */
+
+#include "config.h"
+
+#ifndef lint
+static const char copyright[] =
+"@(#) Copyright (c) 1997\n\
+       Sleepycat Software Inc.  All rights reserved.\n";
+static const char sccsid[] = "@(#)db_printlog.c        10.8 (Sleepycat) 7/15/97";
+#endif
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+
+#include <errno.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#endif
+
+#include "db_int.h"
+#include "shqueue.h"
+#include "db_page.h"
+#include "btree.h"
+#include "hash.h"
+#include "log.h"
+#include "txn.h"
+#include "db_am.h"
+#include "clib_ext.h"
+
+DB_ENV *db_init __P((char *));
+void   onint __P((int));
+void   usage __P((void));
+
+int     interrupted;
+char   *progname = "db_printlog";                      /* Program name. */
+
+int
+main(argc, argv)
+       int argc;
+       char *argv[];
+{
+       extern char *optarg;
+       extern int optind;
+       DB_ENV *dbenv;
+       DBT data;
+       DB_LSN key;
+       int ch, eval;
+       char *home;
+
+       home = NULL;
+       while ((ch = getopt(argc, argv, "h:")) != EOF)
+               switch (ch) {
+               case 'h':
+                       home = optarg;
+                       break;
+               case '?':
+               default:
+                       usage();
+               }
+       argc -= optind;
+       argv += optind;
+
+       if ((home != NULL && argc > 0) || argc > 1)
+               usage();
+
+       /* XXX: backward compatibility, first argument is home. */
+       if (argc == 1)
+               home = argv[0];
+
+       dbenv = db_init(home);
+
+       eval = 0;
+       if ((errno = __bam_init_print(dbenv)) != 0 ||
+           (errno = __db_init_print(dbenv)) != 0 ||
+           (errno = __ham_init_print(dbenv)) != 0 ||
+           (errno = __log_init_print(dbenv)) != 0 ||
+           (errno = __txn_init_print(dbenv)) != 0) {
+               warn("initialization");
+               eval = 1;
+               (void)db_appexit(dbenv);
+       }
+
+       (void)signal(SIGINT, onint);
+
+       memset(&data, 0, sizeof(data));
+       while (!interrupted) {
+               if ((errno =
+                   log_get(dbenv->lg_info, &key, &data, DB_NEXT)) != 0) {
+                       if (errno == DB_NOTFOUND)
+                               break;
+                       eval = 1;
+                       warn("log_get");
+                       break;
+               }
+               if ((errno =
+                   __db_dispatch(dbenv->lg_info, &data, &key, 0, NULL)) != 0) {
+                       eval = 1;
+                       warn("dispatch");
+                       break;
+               }
+       }
+
+       (void)db_appexit(dbenv);
+
+       if (interrupted) {
+               (void)signal(SIGINT, SIG_DFL);
+               (void)raise(SIGINT);
+               /* NOTREACHED */
+       }
+       exit (eval);
+}
+
+/*
+ * db_init --
+ *     Initialize the environment.
+ */
+DB_ENV *
+db_init(home)
+       char *home;
+{
+       DB_ENV *dbenv;
+
+       if ((dbenv = (DB_ENV *)calloc(sizeof(DB_ENV), 1)) == NULL) {
+               errno = ENOMEM;
+               err(1, NULL);
+       }
+       dbenv->db_errfile = stderr;
+       dbenv->db_errpfx = progname;
+
+       if ((errno =
+           db_appinit(home, NULL, dbenv, DB_CREATE | DB_INIT_LOG)) != 0)
+               err(1, "db_appinit");
+       return (dbenv);
+}
+
+/*
+ * oninit --
+ *     Interrupt signal handler.
+ */
+void
+onint(signo)
+       int signo;
+{
+       signo = 1;                      /* XXX: Shut the compiler up. */
+       interrupted = 1;
+}
+
+void
+usage()
+{
+       fprintf(stderr, "usage: db_printlog [-h home]\n");
+       exit (1);
+}
diff --git a/db2/progs/db_recover/db_recover.c b/db2/progs/db_recover/db_recover.c
new file mode 100644 (file)
index 0000000..4ac5925
--- /dev/null
@@ -0,0 +1,122 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997
+ *     Sleepycat Software.  All rights reserved.
+ */
+
+#include "config.h"
+
+#ifndef lint
+static const char copyright[] =
+"@(#) Copyright (c) 1997\n\
+       Sleepycat Software Inc.  All rights reserved.\n";
+static const char sccsid[] = "@(#)db_recover.c 10.12 (Sleepycat) 7/27/97";
+#endif
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+
+#include <errno.h>
+#include <getopt.h>
+#include <stdlib.h>
+#include <time.h>
+#endif
+
+#include "db_int.h"
+#include "txn.h"
+#include "common_ext.h"
+#include "clib_ext.h"
+
+DB_ENV *db_init __P((char *, int, int));
+void    usage __P((void));
+int     main __P((int, char *[]));
+
+const char *progname = "db_recover";                   /* Program name. */
+
+int
+main(argc, argv)
+       int argc;
+       char *argv[];
+{
+       extern char *optarg;
+       extern int optind;
+       DB_ENV *dbenv;
+       time_t now;
+       int ch, flags, verbose;
+       char *home;
+
+       home = NULL;
+       flags = verbose = 0;
+       while ((ch = getopt(argc, argv, "ch:v")) != EOF)
+               switch (ch) {
+               case 'c':
+                       LF_SET(DB_RECOVER_FATAL);
+                       break;
+               case 'h':
+                       home = optarg;
+                       break;
+               case 'v':
+                       verbose = 1;
+                       break;
+               case '?':
+               default:
+                       usage();
+               }
+       argc -= optind;
+       argv += optind;
+
+       if (argc != 0)
+               usage();
+
+       dbenv = db_init(home, flags, verbose);
+       if (verbose) {
+               __db_err(dbenv, "Recovery complete at %s", ctime(&now));
+               __db_err(dbenv, "%s %lu %s [%lu][%lu]",
+                   "Maximum transaction id",
+                   (u_long)dbenv->tx_info->region->last_txnid,
+                   "Recovery checkpoint",
+                   (u_long)dbenv->tx_info->region->last_ckp.file,
+                   (u_long)dbenv->tx_info->region->last_ckp.offset);
+       }
+
+       exit (db_appexit(dbenv));
+}
+
+DB_ENV *
+db_init(home, flags, verbose)
+       char *home;
+       int flags, verbose;
+{
+       DB_ENV *dbenv;
+       int local_flags;
+
+       if ((dbenv = (DB_ENV *)calloc(sizeof(DB_ENV), 1)) == NULL) {
+               errno = ENOMEM;
+               err(1, NULL);
+       }
+       dbenv->db_errfile = stderr;
+       dbenv->db_errpfx = "db_recover";
+       dbenv->db_verbose = verbose;
+
+       /* Initialize environment for pathnames only. */
+       local_flags = DB_CREATE | DB_INIT_LOG |
+           DB_INIT_MPOOL | DB_INIT_LOCK | DB_INIT_TXN | DB_USE_ENVIRON;
+
+       if (LF_ISSET(DB_RECOVER_FATAL))
+               local_flags |= DB_RECOVER_FATAL;
+       else
+               local_flags |= DB_RECOVER;
+
+       if ((errno = db_appinit(home, NULL, dbenv, local_flags)) != 0)
+               err(1, "appinit failed");
+
+       return (dbenv);
+}
+
+void
+usage()
+{
+       (void)fprintf(stderr, "usage: db_recover [-cv] [-h home]\n");
+       exit(1);
+}
diff --git a/db2/progs/db_stat/db_stat.c b/db2/progs/db_stat/db_stat.c
new file mode 100644 (file)
index 0000000..5c7044d
--- /dev/null
@@ -0,0 +1,434 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997
+ *     Sleepycat Software.  All rights reserved.
+ */
+
+#include "config.h"
+
+#ifndef lint
+static const char copyright[] =
+"@(#) Copyright (c) 1997\n\
+       Sleepycat Software Inc.  All rights reserved.\n";
+static const char sccsid[] = "@(#)db_stat.c    8.17 (Sleepycat) 8/24/97";
+#endif
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+
+#include <ctype.h>
+#include <errno.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <unistd.h>
+#endif
+
+#include "db_int.h"
+#include "clib_ext.h"
+
+#define        DIVIDER "=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-="
+
+typedef enum { T_NOTSET, T_DB, T_MPOOL, T_TXN } test_t;
+
+void   bstat __P((DB *));
+DB_ENV *db_init __P((char *, test_t));
+void   hstat __P((DB *));
+void   mstat __P((DB_ENV *));
+void   prflags __P((u_int32_t, const FN *));
+void   onint __P((int));
+void   tstat __P((DB_ENV *));
+int    txn_compare __P((const void *, const void *));
+void   usage __P((void));
+int    main __P((int, char *[]));
+
+int     interrupted;
+const char *progname = "db_stat";                      /* Program name. */
+
+int
+main(argc, argv)
+       int argc;
+       char *argv[];
+{
+       extern char *optarg;
+       extern int optind;
+       DB *dbp;
+       DB_ENV *dbenv;
+       test_t ttype;
+       int ch;
+       char *db, *home;
+
+       ttype = T_NOTSET;
+       db = home = NULL;
+       while ((ch = getopt(argc, argv, "d:h:mt")) != EOF)
+               switch (ch) {
+               case 'd':
+                       db = optarg;
+                       ttype = T_DB;
+                       break;
+               case 'h':
+                       home = optarg;
+                       break;
+               case 'm':
+                       ttype = T_MPOOL;
+                       break;
+               case 't':
+                       ttype = T_TXN;
+                       break;
+               case '?':
+               default:
+                       usage();
+               }
+       argc -= optind;
+       argv += optind;
+
+       if (argc != 0 || ttype == T_NOTSET)
+               usage();
+
+       dbenv = db_init(home, ttype);
+
+       (void)signal(SIGINT, onint);
+
+       switch (ttype) {
+       case T_DB:
+               if ((errno = db_open(db, DB_UNKNOWN,
+                   DB_RDONLY, 0, dbenv, NULL, &dbp)) != 0)
+                       return (1);
+               switch (dbp->type) {
+               case DB_BTREE:
+               case DB_RECNO:
+                       bstat(dbp);
+                       break;
+               case DB_HASH:
+                       hstat(dbp);
+                       break;
+               case DB_UNKNOWN:
+                       abort();                /* Impossible. */
+                       /* NOTREACHED */
+               }
+               (void)dbp->close(dbp, 0);
+               break;
+       case T_MPOOL:
+               mstat(dbenv);
+               break;
+       case T_TXN:
+               tstat(dbenv);
+               break;
+       case T_NOTSET:
+               abort();                        /* Impossible. */
+               /* NOTREACHED */
+       }
+
+       (void)db_appexit(dbenv);
+
+       if (interrupted) {
+               (void)signal(SIGINT, SIG_DFL);
+               (void)raise(SIGINT);
+               /* NOTREACHED */
+       }
+       return (0);
+}
+
+/*
+ * bstat --
+ *     Display btree/recno statistics.
+ */
+void
+bstat(dbp)
+       DB *dbp;
+{
+       static const FN fn[] = {
+               { DB_DUP,       "DB_DUP" },
+               { DB_FIXEDLEN,  "DB_FIXEDLEN" },
+               { DB_RECNUM,    "DB_RECNUM" },
+               { DB_RENUMBER,  "DB_RENUMBER" },
+               { 0 }
+       };
+       DB_BTREE_STAT *sp;
+
+       if (dbp->stat(dbp, &sp, NULL, 0))
+               err(1, "dbp->stat");
+
+#define        PCT(f, t)                                                       \
+    (t == 0 ? 0 :                                                      \
+    (((double)((t * sp->bt_pagesize) - f) / (t * sp->bt_pagesize)) * 100))
+
+       prflags(sp->bt_flags, fn);
+       if (dbp->type == DB_BTREE) {
+#ifdef NOT_IMPLEMENTED
+               printf("%lu\tMaximum keys per-page.\n", (u_long)sp->bt_maxkey);
+#endif
+               printf("%lu\tMinimum keys per-page.\n", (u_long)sp->bt_minkey);
+       }
+       if (dbp->type == DB_RECNO) {
+               printf("%lu\tFixed-length record size.\n",
+                   (u_long)sp->bt_re_len);
+               if (isprint(sp->bt_re_pad))
+                       printf("%c\tFixed-length record pad.\n",
+                           (int)sp->bt_re_pad);
+               else
+                       printf("0x%x\tFixed-length record pad.\n",
+                           (int)sp->bt_re_pad);
+       }
+       printf("%lu\tUnderlying tree page size.\n", (u_long)sp->bt_pagesize);
+       printf("%lu\tNumber of levels in the tree.\n", (u_long)sp->bt_levels);
+       printf("%lu\tNumber of keys in the tree.\n", (u_long)sp->bt_nrecs);
+       printf("%lu\tNumber of tree internal pages.\n", (u_long)sp->bt_int_pg);
+       printf("%lu\tNumber of tree leaf pages.\n", (u_long)sp->bt_leaf_pg);
+       printf("%lu\tNumber of tree duplicate pages.\n",
+           (u_long)sp->bt_dup_pg);
+       printf("%lu\tNumber of tree overflow pages.\n",
+           (u_long)sp->bt_over_pg);
+       printf("%lu\tNumber of pages on the free list.\n",
+           (u_long)sp->bt_free);
+       printf("%lu\tNumber of pages freed for reuse.\n",
+           (u_long)sp->bt_freed);
+       printf("%lu\tNumber of bytes free in tree internal pages (%.0f%% ff)\n",
+           (u_long)sp->bt_int_pgfree,
+           PCT(sp->bt_int_pgfree, sp->bt_int_pg));
+       printf("%lu\tNumber of bytes free in tree leaf pages (%.0f%% ff).\n",
+           (u_long)sp->bt_leaf_pgfree,
+           PCT(sp->bt_leaf_pgfree, sp->bt_leaf_pg));
+printf("%lu\tNumber of bytes free in tree duplicate pages (%.0f%% ff).\n",
+           (u_long)sp->bt_dup_pgfree,
+           PCT(sp->bt_dup_pgfree, sp->bt_dup_pg));
+printf("%lu\tNumber of bytes free in tree overflow pages (%.0f%% ff).\n",
+           (u_long)sp->bt_over_pgfree,
+           PCT(sp->bt_over_pgfree, sp->bt_over_pg));
+       printf("%lu\tNumber of bytes saved by prefix compression.\n",
+           (u_long)sp->bt_pfxsaved);
+       printf("%lu\tTotal number of tree page splits.\n",
+           (u_long)sp->bt_split);
+       printf("%lu\tNumber of root page splits.\n", (u_long)sp->bt_rootsplit);
+       printf("%lu\tNumber of fast splits.\n", (u_long)sp->bt_fastsplit);
+       printf("%lu\tNumber of hits in tree fast-insert code.\n",
+           (u_long)sp->bt_cache_hit);
+       printf("%lu\tNumber of misses in tree fast-insert code.\n",
+           (u_long)sp->bt_cache_miss);
+       printf("%lu\tNumber of keys added.\n", (u_long)sp->bt_added);
+       printf("%lu\tNumber of keys deleted.\n", (u_long)sp->bt_deleted);
+}
+
+/*
+ * hstat --
+ *     Display hash statistics.
+ */
+void
+hstat(dbp)
+       DB *dbp;
+{
+       return;
+}
+
+/*
+ * mstat --
+ *     Display mpool statistics.
+ */
+void
+mstat(dbenv)
+       DB_ENV *dbenv;
+{
+       DB_MPOOL_FSTAT **fsp;
+       DB_MPOOL_STAT *gsp;
+
+       if (memp_stat(dbenv->mp_info, &gsp, &fsp, NULL))
+               err(1, NULL);
+
+       printf("%lu\tCache size (%luK).\n",
+           (u_long)gsp->st_cachesize, (u_long)gsp->st_cachesize / 1024);
+       printf("%lu\tRequested pages found in the cache", gsp->st_cache_hit);
+       if (gsp->st_cache_hit + gsp->st_cache_miss != 0)
+               printf(" (%.0f%%)", ((double)gsp->st_cache_hit /
+                   (gsp->st_cache_hit + gsp->st_cache_miss)) * 100);
+       printf(".\n");
+       printf("%lu\tRequested pages mapped into the process' address space.\n",
+           gsp->st_map);
+       printf("%lu\tRequested pages not found in the cache.\n",
+           gsp->st_cache_miss);
+       printf("%lu\tPages created in the cache.\n", gsp->st_page_create);
+       printf("%lu\tPages read into the cache.\n", gsp->st_page_in);
+       printf("%lu\tPages written from the cache to the backing file.\n",
+           gsp->st_page_out);
+       printf("%lu\tRead-only pages forced from the cache.\n",
+           gsp->st_ro_evict);
+       printf("%lu\tRead-write pages forced from the cache.\n",
+           gsp->st_rw_evict);
+       printf("%lu\tNumber of hash buckets used for page location.\n",
+           gsp->st_hash_buckets);
+       printf("%lu\tTotal number of times hash chains searched for a page.\n",
+           gsp->st_hash_searches);
+       printf("%lu\tThe longest hash chain searched for a page.\n",
+           gsp->st_hash_longest);
+       printf(
+           "%lu\tTotal number of hash buckets examined for page location.\n",
+           gsp->st_hash_examined);
+
+       for (; fsp != NULL && *fsp != NULL; ++fsp) {
+               printf("%s\n", DIVIDER);
+               printf("%s\n", (*fsp)->file_name);
+               printf("%lu\tPage size.\n", (u_long)(*fsp)->st_pagesize);
+               printf("%lu\tRequested pages found in the cache",
+                   (*fsp)->st_cache_hit);
+               if ((*fsp)->st_cache_hit + (*fsp)->st_cache_miss != 0)
+                       printf(" (%.0f%%)", ((double)(*fsp)->st_cache_hit /
+                           ((*fsp)->st_cache_hit + (*fsp)->st_cache_miss)) *
+                           100);
+               printf(".\n");
+       printf("%lu\tRequested pages mapped into the process' address space.\n",
+                   (*fsp)->st_map);
+               printf("%lu\tRequested pages not found in the cache.\n",
+                   (*fsp)->st_cache_miss);
+               printf("%lu\tPages created in the cache.\n",
+                   (*fsp)->st_page_create);
+               printf("%lu\tPages read into the cache.\n", (*fsp)->st_page_in);
+       printf("%lu\tPages written from the cache to the backing file.\n",
+                   (*fsp)->st_page_out);
+       }
+}
+
+/*
+ * tstat --
+ *     Display transaction statistics.
+ */
+void
+tstat(dbenv)
+       DB_ENV *dbenv;
+{
+       DB_TXN_STAT *tstat;
+       unsigned int i;
+       const char *p;
+
+       if (txn_stat(dbenv->tx_info, &tstat, NULL))
+               err(1, NULL);
+
+       p = tstat->st_last_ckp.file == 0 ?
+           "No checkpoint LSN." : "File/offset for last checkpoint LSN.";
+       printf("%lu/%lu\t%s\n", (u_long)tstat->st_last_ckp.file,
+           (u_long)tstat->st_last_ckp.offset, p);
+       p = tstat->st_pending_ckp.file == 0 ?
+           "No pending checkpoint LSN." :
+           "File/offset for last pending checkpoint LSN.";
+       printf("%lu/%lu\t%s.\n",
+           (u_long)tstat->st_pending_ckp.file,
+           (u_long)tstat->st_pending_ckp.offset, p);
+       if (tstat->st_time_ckp == 0)
+               printf("0\tNo checkpoint timestamp.\n");
+       else
+               printf("%.24s\tCheckpoint timestamp.\n",
+                   ctime(&tstat->st_time_ckp));
+       printf("%lx\tLast transaction ID allocated.\n",
+           (u_long)tstat->st_last_txnid);
+       printf("%lu\tMaximum number of active transactions.\n",
+           (u_long)tstat->st_maxtxns);
+       printf("%lu\tNumber of transactions begun.\n",
+           (u_long)tstat->st_nbegins);
+       printf("%lu\tNumber of transactions aborted.\n",
+           (u_long)tstat->st_naborts);
+       printf("%lu\tNumber of transactions committed.\n",
+           (u_long)tstat->st_ncommits);
+       printf("%lu\tActive transactions.\n", (u_long)tstat->st_nactive);
+       qsort(tstat->st_txnarray,
+           tstat->st_nactive, sizeof(tstat->st_txnarray[0]), txn_compare);
+       for (i = 0; i < tstat->st_nactive; ++i)
+               printf("\tid: %lx; initial LSN file/offest %lu/%lu\n",
+                   (u_long)tstat->st_txnarray[i].txnid,
+                   (u_long)tstat->st_txnarray[i].lsn.file,
+                   (u_long)tstat->st_txnarray[i].lsn.offset);
+}
+
+int
+txn_compare(a1, b1)
+       const void *a1, *b1;
+{
+       const DB_TXN_ACTIVE *a, *b;
+
+       a = a1;
+       b = b1;
+
+       if (a->txnid > b->txnid)
+               return (1);
+       if (a->txnid < b->txnid)
+               return (-1);
+       return (0);
+}
+
+/*
+ * prflags --
+ *     Print out flag values.
+ */
+void
+prflags(flags, fn)
+       u_int32_t flags;
+       FN const *fn;
+{
+       const FN *fnp;
+       int found;
+       const char *sep;
+
+       sep = " ";
+       printf("Flags:");
+       for (found = 0, fnp = fn; fnp->mask != 0; ++fnp)
+               if (fnp->mask & flags) {
+                       printf("%s%s", sep, fnp->name);
+                       sep = ", ";
+                       found = 1;
+               }
+       printf("\n");
+}
+
+/*
+ * db_init --
+ *     Initialize the environment.
+ */
+DB_ENV *
+db_init(home, ttype)
+       char *home;
+       test_t ttype;
+{
+       DB_ENV *dbenv;
+       int flags;
+
+       flags = DB_USE_ENVIRON;
+       switch (ttype) {
+       case T_MPOOL:
+               flags |= DB_INIT_MPOOL;
+               break;
+       case T_TXN:
+               flags |= DB_INIT_TXN;
+               break;
+       default:
+               break;
+       }
+
+       if ((dbenv = (DB_ENV *)calloc(sizeof(DB_ENV), 1)) == NULL) {
+               errno = ENOMEM;
+               err(1, NULL);
+       }
+       dbenv->db_errfile = stderr;
+       dbenv->db_errpfx = progname;
+
+       if ((errno = db_appinit(home, NULL, dbenv, flags)) != 0)
+               err(1, "db_appinit");
+       return (dbenv);
+}
+
+/*
+ * oninit --
+ *     Interrupt signal handler.
+ */
+void
+onint(signo)
+       int signo;
+{
+       signo = 1;                      /* XXX: Shut the compiler up. */
+       interrupted = 1;
+}
+
+void
+usage()
+{
+       fprintf(stderr, "usage: db_stat [-mt] [-d file] [-h home]\n");
+       exit (1);
+}
diff --git a/db2/txn/txn.c b/db2/txn/txn.c
new file mode 100644 (file)
index 0000000..b20697b
--- /dev/null
@@ -0,0 +1,809 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997
+ *     Sleepycat Software.  All rights reserved.
+ */
+/*
+ * Copyright (c) 1995, 1996
+ *     The President and Fellows of Harvard University.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Margo Seltzer.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *     This product includes software developed by the University of
+ *     California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "config.h"
+
+#ifndef lint
+static const char sccsid[] = "@(#)txn.c        10.20 (Sleepycat) 8/24/97";
+#endif /* not lint */
+
+
+/*
+ * This file contains the top level routines of the transaction library.
+ * It assumes that a lock manager and log manager that conform to the db_log(3)
+ * and db_lock(3) interfaces exist.
+ */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <unistd.h>
+#endif
+
+#include "shqueue.h"
+#include "db_int.h"
+#include "db_page.h"
+#include "db_shash.h"
+#include "txn.h"
+#include "db_dispatch.h"
+#include "lock.h"
+#include "log.h"
+#include "db_am.h"
+#include "common_ext.h"
+
+static int __txn_check_running __P((const DB_TXN *));
+
+static int     __txn_create __P((DB_ENV *, const char *, u_int));
+static int     __txn_grow_region __P((DB_TXNMGR *));
+static int     __txn_validate_region __P((DB_TXNMGR *));
+static int      __txn_end __P((DB_TXN *, int));
+static int      __txn_undo __P((DB_TXN *));
+
+/*
+ * Create and initialize a transaction region in shared memory.
+ * 0 means, success.
+ * +1 means that the db_create failed, so we did not create the region.
+ * -1 means that we got some sort of system error.
+ */
+static int
+__txn_create(dbenv, path, mode)
+       DB_ENV *dbenv;
+       const char *path;
+       u_int mode;
+{
+       DB_TXNREGION *txn_region;
+       TXN_DETAIL *txnp;
+       time_t now;
+       int fd, i, maxtxns, ret;
+
+       maxtxns = dbenv->tx_max != 0 ? dbenv->tx_max : 1000;
+       (void)time(&now);
+
+       ret = __db_rcreate(dbenv, DB_APP_NONE, path,
+           DEFAULT_TXN_FILE, mode, TXN_REGION_SIZE(maxtxns), &fd, &txn_region);
+
+       /* Region may have existed.  If it didn't, the open will fail. */
+       if (ret != 0)
+               return (ret);
+
+       txn_region->magic = DB_TXNMAGIC;
+       txn_region->version = DB_TXNVERSION;
+       txn_region->maxtxns = maxtxns;
+       txn_region->last_txnid = TXN_MINIMUM;
+       /* XXX If we ever do more types of locking and logging, this changes. */
+       txn_region->logtype = 0;
+       txn_region->locktype = 0;
+       txn_region->free_txn = 0;
+       txn_region->time_ckp = now;
+       ZERO_LSN(txn_region->last_ckp);
+       ZERO_LSN(txn_region->pending_ckp);
+
+       for (txnp = &txn_region->table[0], i = 0; i < maxtxns; i++, txnp++) {
+               ZERO_LSN(txnp->begin_lsn);
+               txnp->status = TXN_UNALLOC;
+               txnp->txnid = i + 1;
+       }
+       txn_region->table[maxtxns - 1].txnid = TXN_INVALID;
+
+       /* Unlock the region. */
+       (void)__db_mutex_unlock(&txn_region->hdr.lock, fd);
+
+       /* Now unmap and close the region. */
+       if ((ret = __db_rclose(dbenv, fd, txn_region)) != 0) {
+               (void)txn_unlink(path, 1 /* force */, dbenv);
+               return (ret);
+       }
+
+       return (0);
+}
+
+int
+txn_open(path, flags, mode, dbenv, mgrpp)
+       const char *path;
+       int flags, mode;
+       DB_ENV *dbenv;
+       DB_TXNMGR **mgrpp;
+{
+       DB_TXNMGR *tmgrp;
+       DB_TXNREGION *txn_regionp;
+       int fd, ret, retry_cnt;
+
+       tmgrp = NULL;
+       txn_regionp = NULL;
+       fd = -1;
+
+       /* Validate arguments. */
+       if (dbenv == NULL)
+               return (EINVAL);
+#ifdef HAVE_SPINLOCKS
+#define        OKFLAGS (DB_CREATE | DB_THREAD | DB_TXN_NOSYNC)
+#else
+#define        OKFLAGS (DB_CREATE | DB_TXN_NOSYNC)
+#endif
+       if ((ret = __db_fchk(dbenv, "txn_open", flags, OKFLAGS)) != 0)
+               return (ret);
+
+       retry_cnt = 0;
+retry: if (LF_ISSET(DB_CREATE) && (ret = __txn_create(dbenv, path, mode)) != 0)
+               if (ret == EAGAIN && ++retry_cnt < 0) {
+                       (void)__db_sleep(1, 0);
+                       goto retry;
+               } else  /* We did not really create the region */
+                       flags &= ~DB_CREATE;
+
+       retry_cnt = 0;
+retry1:        if ((ret = __db_ropen(dbenv, DB_APP_NONE, path, DEFAULT_TXN_FILE,
+           flags & ~(DB_CREATE | DB_THREAD | DB_TXN_NOSYNC),
+           &fd, &txn_regionp)) != 0) {
+               if (ret == EAGAIN && ++retry_cnt < 3) {
+                       (void)__db_sleep(1, 0);
+                       goto retry1;
+               }
+               goto out;
+       }
+
+
+       /* Check if valid region. */
+       if (txn_regionp->magic != DB_TXNMAGIC) {
+               __db_err(dbenv, "txn_open: Bad magic number");
+               ret = EINVAL;
+               goto out;
+       }
+
+       /* Now, create the transaction manager structure and set its fields. */
+       if ((tmgrp = (DB_TXNMGR *)malloc(sizeof(DB_TXNMGR))) == NULL) {
+               __db_err(dbenv, "txn_open: %s", strerror(errno));
+               ret = ENOMEM;
+               goto out;
+       }
+
+       tmgrp->dbenv = dbenv;
+       tmgrp->recover =
+           dbenv->tx_recover == NULL ? __db_dispatch : dbenv->tx_recover;
+       tmgrp->region = txn_regionp;
+       tmgrp->reg_size = txn_regionp->hdr.size;
+       tmgrp->fd = fd;
+       tmgrp->flags = LF_ISSET(DB_TXN_NOSYNC | DB_THREAD);
+       TAILQ_INIT(&tmgrp->txn_chain);
+       if (LF_ISSET(DB_THREAD))
+               __db_mutex_init(&tmgrp->mutex, -1);
+       *mgrpp = tmgrp;
+       return (0);
+
+out:   if (txn_regionp != NULL)
+               (void)__db_rclose(dbenv, fd, txn_regionp);
+       if (flags & DB_CREATE)
+               (void)txn_unlink(path, 1, dbenv);
+       if (tmgrp != NULL)
+               free(tmgrp);
+       return (ret);
+}
+
+/*
+ * Internally, we use TXN_DETAIL structures, but we allocate and return
+ * DB_TXN structures that provide access to the transaction ID and the
+ * offset in the transaction region of the TXN_DETAIL structure.
+ */
+int
+txn_begin(tmgrp, parent, txnpp)
+       DB_TXNMGR *tmgrp;
+       DB_TXN *parent;
+       DB_TXN **txnpp;
+{
+       TXN_DETAIL *txnp;
+       DB_TXN *retp;
+       int id, index, ret;
+
+       LOCK_TXNREGION(tmgrp);
+
+       if ((ret = __txn_validate_region(tmgrp)) != 0) {
+               UNLOCK_TXNREGION(tmgrp);
+               return (ret);
+       }
+
+       /* Remove element from free list. */
+       if (tmgrp->region->free_txn == TXN_INVALID &&
+           (ret = __txn_grow_region(tmgrp)) != 0) {
+               UNLOCK_TXNREGION(tmgrp);
+               return (ret);
+       }
+
+       index = tmgrp->region->free_txn;
+       txnp = &tmgrp->region->table[index];
+       tmgrp->region->free_txn = txnp->txnid;
+
+       if (txnp->status != TXN_UNALLOC) {
+               UNLOCK_TXNREGION(tmgrp);
+               return (EINVAL);
+       }
+
+       /* Make sure that last_txnid is not going to wrap around. */
+       if (tmgrp->region->last_txnid == TXN_INVALID)
+               return (EINVAL);
+
+       if ((retp = (DB_TXN *)malloc(sizeof(DB_TXN))) == NULL) {
+               __db_err(tmgrp->dbenv, "txn_begin : %s", strerror(ENOMEM));
+               UNLOCK_TXNREGION(tmgrp);
+               return (ENOMEM);
+       }
+
+       id = ++tmgrp->region->last_txnid;
+       tmgrp->region->nbegins++;
+
+       txnp->txnid = id;
+       txnp->last_lock = 0;
+       txnp->status = TXN_RUNNING;
+       ZERO_LSN(txnp->last_lsn);
+       ZERO_LSN(txnp->begin_lsn);
+
+       UNLOCK_TXNREGION(tmgrp);
+
+       ZERO_LSN(retp->last_lsn);
+       retp->txnid = id;
+       retp->parent = parent;
+       retp->off = (u_int8_t *)txnp - (u_int8_t *)tmgrp->region;
+       retp->mgrp = tmgrp;
+
+       if (tmgrp->dbenv->lg_info != NULL &&
+           (ret = __txn_regop_log(tmgrp->dbenv->lg_info,
+               retp, &txnp->begin_lsn, 0, TXN_BEGIN)) != 0) {
+
+               /* Deallocate transaction. */
+               LOCK_TXNREGION(tmgrp);
+               txnp->txnid = tmgrp->region->free_txn;
+               tmgrp->region->free_txn = txnp - &tmgrp->region->table[0];
+               UNLOCK_TXNREGION(tmgrp);
+               free (retp);
+               return (ret);
+       }
+
+       LOCK_TXNTHREAD(tmgrp);
+       TAILQ_INSERT_TAIL(&tmgrp->txn_chain, retp, links);
+       UNLOCK_TXNTHREAD(tmgrp);
+
+       *txnpp  = retp;
+       return (0);
+}
+
+/* The db_txn(3) man page describes txn_commit. */
+int
+txn_commit(txnp)
+       DB_TXN *txnp;
+{
+       DB_LOG *logp;
+       int ret;
+
+       if ((ret = __txn_check_running(txnp)) != 0)
+               return (ret);
+
+       /* Sync the log. */
+       if ((logp = txnp->mgrp->dbenv->lg_info) != NULL &&
+           (ret = __txn_regop_log(logp,
+           txnp, &txnp->last_lsn,
+           F_ISSET(txnp->mgrp, DB_TXN_NOSYNC) ? 0 : DB_FLUSH, TXN_COMMIT))
+           != 0)
+               return (ret);
+
+       return (__txn_end(txnp, 1));
+}
+
+/* The db_txn(3) man page describes txn_abort. */
+int
+txn_abort(txnp)
+       DB_TXN *txnp;
+{
+       int ret;
+
+       if ((ret = __txn_check_running(txnp)) != 0)
+               return (ret);
+
+       if ((ret = __txn_undo(txnp)) != 0) {
+               __db_err(txnp->mgrp->dbenv,
+                   "txn_abort: Log undo failed %s", strerror(ret));
+               return (ret);
+       }
+       return (__txn_end(txnp, 0));
+}
+
+/*
+ * Flush the log so a future commit is guaranteed to succeed.
+ */
+int
+txn_prepare(txnp)
+       DB_TXN *txnp;
+{
+       int ret;
+       TXN_DETAIL *tp;
+
+       ret = 0;
+       if ((ret = __txn_check_running(txnp)) != 0)
+               return (ret);
+
+       if (txnp->mgrp->dbenv->lg_info) {
+               ret = log_flush(txnp->mgrp->dbenv->lg_info, &txnp->last_lsn);
+               if (ret)
+                       __db_err(txnp->mgrp->dbenv,
+                           "txn_prepare: log_flush failed %s\n",
+                           strerror(errno));
+               return (ret);
+       }
+
+       LOCK_TXNTHREAD(txnp->mgrp);
+       tp = (TXN_DETAIL *)((u_int8_t *)txnp->mgrp->region + txnp->off);
+       tp->status = TXN_PREPARED;
+       UNLOCK_TXNTHREAD(txnp->mgrp);
+       return (ret);
+}
+
+/*
+ * Return the transaction ID associated with a particular transaction
+ */
+u_int32_t
+txn_id(txnp)
+       DB_TXN *txnp;
+{
+       return (txnp->txnid);
+}
+
+/*
+ * The db_txn(3) man page describes txn_close. Currently the caller should
+ * arrange a checkpoint before calling txn_close.
+ */
+int
+txn_close(tmgrp)
+       DB_TXNMGR *tmgrp;
+{
+       DB_TXN *txnp;
+       int ret, t_ret;
+
+       /*
+        * This function had better only be called once per process
+        * (i.e., not per thread), so there should be no synchronization
+        * required.
+        */
+       for (ret = 0, txnp = TAILQ_FIRST(&tmgrp->txn_chain);
+           txnp != TAILQ_END(&tmgrp->txn_chain);
+           txnp = TAILQ_FIRST(&tmgrp->txn_chain)) {
+               if ((t_ret = txn_abort(txnp)) != 0 && ret == 0)
+                       ret = t_ret;
+       }
+
+       if (tmgrp->dbenv->lg_info && (t_ret =
+           log_flush(tmgrp->dbenv->lg_info, NULL)) != 0 &&
+           ret == 0)
+               ret = t_ret;
+
+       if ((t_ret = __db_rclose(tmgrp->dbenv, tmgrp->fd, tmgrp->region)) != 0
+           && ret == 0)
+               ret = t_ret;
+
+       if (ret == 0)
+               free (tmgrp);
+       return (ret);
+}
+
+/*
+ * The db_txn(3) man page describes txn_unlink.  Right now it is up to
+ * txn_close to write the final checkpoint record.
+ */
+int
+txn_unlink(path, force, dbenv)
+       const char *path;
+       int force;
+       DB_ENV *dbenv;
+{
+       return (__db_runlink(dbenv,
+           DB_APP_NONE, path, DEFAULT_TXN_FILE, force));
+}
+
+/* Internal routines. */
+
+/*
+ * Return 0 if the txnp is reasonable, otherwise returns EINVAL.
+ */
+static int
+__txn_check_running(txnp)
+       const DB_TXN *txnp;
+{
+       TXN_DETAIL *tp;
+
+       tp = NULL;
+       if (txnp != NULL && txnp->mgrp != NULL && txnp->mgrp->region != NULL) {
+               tp = (TXN_DETAIL *)((u_int8_t *)txnp->mgrp->region + txnp->off);
+               if (tp->status != TXN_RUNNING)
+                       tp = NULL;
+       }
+
+       return (tp == NULL ? EINVAL : 0);
+}
+
+static int
+__txn_end(txnp, is_commit)
+       DB_TXN *txnp;
+       int is_commit;
+{
+       DB_TXNMGR *mgr;
+       TXN_DETAIL *tp;
+       DB_LOCKREQ request;
+       int ret;
+       u_int32_t locker;
+
+       mgr = txnp->mgrp;
+
+       LOCK_TXNTHREAD(mgr);
+       TAILQ_REMOVE(&mgr->txn_chain, txnp, links);
+       UNLOCK_TXNTHREAD(mgr);
+
+       /* Release the locks. */
+       locker = txnp->txnid;
+       request.op = DB_LOCK_PUT_ALL;
+
+       if (mgr->dbenv->lk_info) {
+               ret = lock_vec(mgr->dbenv->lk_info, locker, 0,
+                   &request, 1, NULL);
+               if (ret != 0 && (ret != DB_LOCK_DEADLOCK || is_commit)) {
+                       __db_err(mgr->dbenv, "%s: release locks failed %s",
+                           is_commit ? "txn_commit" : "txn_abort",
+                           strerror(ret));
+                       return (ret);
+               }
+       }
+
+       /* End the transaction. */
+       LOCK_TXNREGION(mgr);
+       tp = (TXN_DETAIL *)((u_int8_t *)mgr->region + txnp->off);
+       tp->status = TXN_UNALLOC;
+       tp->txnid = mgr->region->free_txn;
+       mgr->region->free_txn = tp - &mgr->region->table[0];
+       if (is_commit)
+               mgr->region->ncommits++;
+       else
+               mgr->region->naborts++;
+       UNLOCK_TXNREGION(mgr);
+
+       FREE(txnp, sizeof(*txnp));
+
+       return (0);
+}
+
+
+/*
+ * Undo the transaction with id txnid.  Returns 0 on success and sets
+ * errno and returns -1 on failure.
+ */
+static int
+__txn_undo(txnp)
+       DB_TXN *txnp;
+{
+       DB_TXNMGR *mgr;
+       DB_LOG *logp;
+       DBT rdbt;
+       DB_LSN key_lsn;
+       int ret;
+
+       mgr = txnp->mgrp;
+       logp = mgr->dbenv->lg_info;
+       if (logp == NULL)
+               return (0);
+
+       /*
+        * This is the simplest way to code this, but if the mallocs during
+        * recovery turn out to be a performance issue, we can do the
+        * allocation here and use DB_DBT_USERMEM.
+        */
+       memset(&rdbt, 0, sizeof(rdbt));
+       if (F_ISSET(logp, DB_AM_THREAD))
+               F_SET(&rdbt, DB_DBT_MALLOC);
+
+       key_lsn = txnp->last_lsn;               /* structure assignment */
+       for (ret = 0; ret == 0 && !IS_ZERO_LSN(key_lsn);) {
+               /*
+                * The dispatch routine returns the lsn of the record
+                * before the current one in the key_lsn argument.
+                */
+               if ((ret = log_get(logp, &key_lsn, &rdbt, DB_SET)) == 0) {
+                       ret =
+                           mgr->recover(logp, &rdbt, &key_lsn, TXN_UNDO, NULL);
+                       if (F_ISSET(logp, DB_AM_THREAD) && rdbt.data != NULL) {
+                               free(rdbt.data);
+                               rdbt.data = NULL;
+                       }
+               }
+               if (ret != 0)
+                       return (ret);
+       }
+
+       return (ret);
+}
+
+/*
+ * Transaction checkpoint.
+ * If either kbytes or minutes is non-zero, then we only take the checkpoint
+ * more than "minutes" minutes have passed since the last checkpoint or if
+ * more than "kbytes" of log data have been written since the last checkpoint.
+ * When taking a checkpoint, find the oldest active transaction and figure out
+ * its first LSN.  This is the lowest LSN we can checkpoint, since any record
+ * written after since that point may be involved in a transaction and may
+ * therefore need to be undone in the case of an abort.
+ */
+int
+txn_checkpoint(mgr, kbytes, minutes)
+       const DB_TXNMGR *mgr;
+       long kbytes, minutes;
+{
+       TXN_DETAIL *txnp;
+       DB_LSN ckp_lsn, last_ckp;
+       DB_LOG *dblp;
+       u_int32_t bytes_written, i;
+       time_t last_ckp_time, now;
+       int ret;
+
+       /* Check usage. */
+       if (kbytes < 0 || minutes < 0)
+               return (EINVAL);
+
+       /*
+        * Check if we need to run recovery.
+        */
+       ZERO_LSN(ckp_lsn);
+       if (minutes != 0) {
+               (void)time(&now);
+
+               LOCK_TXNREGION(mgr);
+               last_ckp_time = mgr->region->time_ckp;
+               UNLOCK_TXNREGION(mgr);
+
+               if (now - last_ckp_time >= (time_t)(minutes * 60))
+                       goto do_ckp;
+       }
+
+       if (kbytes != 0) {
+               dblp = mgr->dbenv->lg_info;
+               LOCK_LOGREGION(dblp);
+               bytes_written = dblp->lp->written;
+               ckp_lsn = dblp->lp->lsn;
+               UNLOCK_LOGREGION(dblp);
+               if (bytes_written >= (u_int32_t)(kbytes * 1024))
+                       goto do_ckp;
+       }
+
+       /*
+        * If we checked time and data and didn't go to checkpoint,
+        * we're done.
+        */
+       if (minutes != 0 || kbytes != 0)
+               return (0);
+
+       if (IS_ZERO_LSN(ckp_lsn)) {
+               dblp = mgr->dbenv->lg_info;
+               LOCK_LOGREGION(dblp);
+               ckp_lsn = dblp->lp->lsn;
+               UNLOCK_LOGREGION(dblp);
+       }
+
+       /*
+        * We have to find an LSN such that all transactions begun
+        * before that LSN are complete.
+        */
+do_ckp:
+       LOCK_TXNREGION(mgr);
+
+       if (!IS_ZERO_LSN(mgr->region->pending_ckp))
+               ckp_lsn = mgr->region->pending_ckp;
+       else
+               for (txnp = &mgr->region->table[0], i = 0;
+                   i < mgr->region->maxtxns; i++, txnp++) {
+
+                       /*
+                        * Look through the transaction table for the LSN of
+                        * the transaction that is in-use (e.g., not
+                        * TXN_UNALLOC) and whose begin lsn is the lowest.
+                        */
+                       if (txnp->status != TXN_UNALLOC &&
+                           !IS_ZERO_LSN(txnp->begin_lsn) &&
+                           log_compare(&txnp->begin_lsn, &ckp_lsn) < 0)
+                               ckp_lsn = txnp->begin_lsn;
+               }
+
+       mgr->region->pending_ckp = ckp_lsn;
+       UNLOCK_TXNREGION(mgr);
+
+       ret = memp_sync(mgr->dbenv->mp_info, &ckp_lsn);
+       if (ret > 0) {
+               __db_err(mgr->dbenv,
+                   "txn_checkpoint: system failure in memp_sync %s\n",
+                   strerror(ret));
+       } else if (ret == 0 && mgr->dbenv->lg_info != NULL) {
+               LOCK_TXNREGION(mgr);
+               last_ckp = mgr->region->last_ckp;
+               ZERO_LSN(mgr->region->pending_ckp);
+               UNLOCK_TXNREGION(mgr);
+
+               if ((ret = __txn_ckp_log(mgr->dbenv->lg_info,
+                  NULL, &ckp_lsn, DB_CHECKPOINT, &ckp_lsn, &last_ckp)) != 0) {
+                       __db_err(mgr->dbenv,
+                           "txn_checkpoint: log failed at LSN [%ld %ld] %s\n",
+                           (long)ckp_lsn.file, (long)ckp_lsn.offset,
+                           strerror(ret));
+                       return (ret);
+               }
+
+               LOCK_TXNREGION(mgr);
+               mgr->region->last_ckp = ckp_lsn;
+               (void)time(&mgr->region->time_ckp);
+               UNLOCK_TXNREGION(mgr);
+       }
+       /*
+        * ret < 0 means that there are still buffers to flush; the
+        * checkpoint is not complete. Back off and try again.
+        */
+       return (ret);
+}
+
+/*
+ * This is called at every interface to verify if the region
+ * has changed size, and if so, to remap the region in and
+ * reset the process pointers.
+ */
+static int
+__txn_validate_region(tp)
+       DB_TXNMGR *tp;
+{
+       int ret;
+
+       if (tp->reg_size == tp->region->hdr.size)
+               return (0);
+
+       /* Grow the region. */
+       if ((ret = __db_rremap(tp->dbenv, tp->region,
+           tp->reg_size, tp->region->hdr.size, tp->fd, &tp->region)) != 0)
+               return (ret);
+
+       tp->reg_size = tp->region->hdr.size;
+
+       return (0);
+}
+
+static int
+__txn_grow_region(tp)
+       DB_TXNMGR *tp;
+{
+       TXN_DETAIL *tx;
+       size_t incr;
+       u_int32_t i, oldmax;
+       int ret;
+
+       oldmax = tp->region->maxtxns;
+       incr = oldmax * sizeof(DB_TXN);
+
+       if ((ret = __db_rgrow(tp->dbenv, tp->fd, incr)) != 0)
+               return (ret);
+
+       if ((ret = __db_rremap(tp->dbenv, tp->region,
+           tp->reg_size, tp->reg_size + incr, tp->fd, &tp->region)) != 0)
+               return (ret);
+       tp->reg_size += incr;
+
+       /*
+        * Initialize all the new transactions and up the transaction count.
+        */
+       for (i = 0, tx = &tp->region->table[oldmax]; i < oldmax; i++, tx++) {
+               ZERO_LSN(tx->begin_lsn);
+               tx->status = TXN_UNALLOC;
+               tx->txnid = oldmax + i + 1;
+       }
+       tp->region->free_txn = oldmax;
+       tp->region->maxtxns = 2 * oldmax;
+       tp->region->table[tp->region->maxtxns - 1].txnid = TXN_INVALID;
+
+       return (0);
+}
+
+int
+txn_stat(mgr, statp, db_malloc)
+       DB_TXNMGR *mgr;
+       DB_TXN_STAT **statp;
+       void *(*db_malloc) __P((size_t));
+{
+       DB_TXN_STAT *stats;
+       size_t nbytes;
+       u_int32_t nactive;
+       unsigned int i, ndx;
+
+       LOCK_TXNREGION(mgr);
+       nactive = mgr->region->nbegins -
+           mgr->region->naborts - mgr->region->ncommits;
+       UNLOCK_TXNREGION(mgr);
+
+       /*
+        * Allocate a bunch of extra active structures to handle any
+        * that have been created since we unlocked the region.
+        */
+       nbytes = sizeof(DB_TXN_STAT) + sizeof(DB_TXN_ACTIVE) * (nactive + 200);
+       if (db_malloc == NULL)
+               stats = (DB_TXN_STAT *)malloc(nbytes);
+       else
+               stats = (DB_TXN_STAT *)db_malloc(nbytes);
+
+       if (stats == NULL)
+               return (ENOMEM);
+
+       LOCK_TXNREGION(mgr);
+       stats->st_last_txnid = mgr->region->last_txnid;
+       stats->st_last_ckp = mgr->region->last_ckp;
+       stats->st_maxtxns = mgr->region->maxtxns;
+       stats->st_naborts = mgr->region->naborts;
+       stats->st_nbegins = mgr->region->nbegins;
+       stats->st_ncommits = mgr->region->ncommits;
+       stats->st_pending_ckp = mgr->region->pending_ckp;
+       stats->st_time_ckp = mgr->region->time_ckp;
+       stats->st_nactive = stats->st_nbegins -
+           stats->st_naborts - stats->st_ncommits;
+       if (stats->st_nactive > nactive + 200)
+               stats->st_nactive = nactive + 200;
+       stats->st_txnarray = (DB_TXN_ACTIVE *)&stats[1];
+
+       for (ndx = 0, i = 0; i < mgr->region->maxtxns; i++)
+               if (mgr->region->table[i].status != TXN_UNALLOC) {
+                       stats->st_txnarray[ndx].txnid =
+                           mgr->region->table[i].txnid;
+                       stats->st_txnarray[ndx].lsn =
+                           mgr->region->table[i].begin_lsn;
+                       ndx++;
+
+                       if (ndx >= stats->st_nactive)
+                               break;
+               }
+
+       UNLOCK_TXNREGION(mgr);
+       *statp = stats;
+       return (0);
+}
diff --git a/db2/txn/txn.src b/db2/txn/txn.src
new file mode 100644 (file)
index 0000000..40bb63e
--- /dev/null
@@ -0,0 +1,31 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997
+ *     Sleepycat Software.  All rights reserved.
+ *
+ *     @(#)txn.src     10.1 (Sleepycat) 4/12/97
+ *
+ * This is the source file used to create the logging functions for the
+ * transaction system.
+ */
+PREFIX txn
+
+/*
+ * Everything except for checkpointing takes the same logging routine.
+ */
+BEGIN  regop
+ARG    opcode          u_int32_t       lu
+END
+
+/*
+ * This is the checkpoint record. It contains the lsn that the checkpoint
+ * guarantees and a pointer to the last checkpoint so that we can walk
+ * backwards by checkpoint.
+ * ckp_lsn:
+ * last_ckp:
+ */
+BEGIN  ckp
+POINTER        ckp_lsn         DB_LSN *        lu
+POINTER        last_ckp        DB_LSN *        lu
+END
diff --git a/db2/txn/txn_auto.c b/db2/txn/txn_auto.c
new file mode 100644 (file)
index 0000000..c7f277e
--- /dev/null
@@ -0,0 +1,308 @@
+/* Do not edit: automatically built by dist/db_gen.sh. */
+#include "config.h"
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <ctype.h>
+#include <errno.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+#endif
+
+#include "db_int.h"
+#include "shqueue.h"
+#include "db_page.h"
+#include "db_dispatch.h"
+#include "txn.h"
+#include "db_am.h"
+#include "common_ext.h"
+
+/*
+ * PUBLIC: int __txn_regop_log
+ * PUBLIC:     __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t,
+ * PUBLIC:     u_int32_t));
+ */
+int __txn_regop_log(logp, txnid, ret_lsnp, flags,
+       opcode)
+       DB_LOG *logp;
+       DB_TXN *txnid;
+       DB_LSN *ret_lsnp;
+       u_int32_t flags;
+       u_int32_t opcode;
+{
+       DBT logrec;
+       DB_LSN *lsnp, null_lsn;
+       u_int32_t rectype, txn_num;
+       int ret;
+       u_int8_t *bp;
+
+       rectype = DB_txn_regop;
+       txn_num = txnid == NULL ? 0 : txnid->txnid;
+       if (txnid == NULL) {
+               null_lsn.file = 0;
+               null_lsn.offset = 0;
+               lsnp = &null_lsn;
+       } else
+               lsnp = &txnid->last_lsn;
+       logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN)
+           + sizeof(opcode);
+       if ((logrec.data = (void *)malloc(logrec.size)) == NULL)
+               return (ENOMEM);
+
+       bp = logrec.data;
+       memcpy(bp, &rectype, sizeof(rectype));
+       bp += sizeof(rectype);
+       memcpy(bp, &txn_num, sizeof(txn_num));
+       bp += sizeof(txn_num);
+       memcpy(bp, lsnp, sizeof(DB_LSN));
+       bp += sizeof(DB_LSN);
+       memcpy(bp, &opcode, sizeof(opcode));
+       bp += sizeof(opcode);
+#ifdef DEBUG
+       if ((u_int32_t)(bp - (u_int8_t *)logrec.data) != logrec.size)
+               fprintf(stderr, "Error in log record length");
+#endif
+       ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags);
+       if (txnid != NULL)
+               txnid->last_lsn = *ret_lsnp;
+       free(logrec.data);
+       return (ret);
+}
+
+/*
+ * PUBLIC: int __txn_regop_print
+ * PUBLIC:    __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+ */
+
+int
+__txn_regop_print(notused1, dbtp, lsnp, notused3, notused4)
+       DB_LOG *notused1;
+       DBT *dbtp;
+       DB_LSN *lsnp;
+       int notused3;
+       void *notused4;
+{
+       __txn_regop_args *argp;
+       u_int32_t i;
+       int c, ret;
+
+       i = 0;
+       c = 0;
+       notused1 = NULL;
+       notused3 = 0;
+       notused4 = NULL;
+
+       if((ret = __txn_regop_read(dbtp->data, &argp)) != 0)
+               return (ret);
+       printf("[%lu][%lu]txn_regop: rec: %lu txnid %lx prevlsn [%lu][%lu]\n",
+           (u_long)lsnp->file,
+           (u_long)lsnp->offset,
+           (u_long)argp->type,
+           (u_long)argp->txnid->txnid,
+           (u_long)argp->prev_lsn.file,
+           (u_long)argp->prev_lsn.offset);
+       printf("\topcode: %lu\n", (u_long)argp->opcode);
+       printf("\n");
+       free(argp);
+       return (0);
+}
+
+/*
+ * PUBLIC: int __txn_regop_read __P((void *, __txn_regop_args **));
+ */
+int
+__txn_regop_read(recbuf, argpp)
+       void *recbuf;
+       __txn_regop_args **argpp;
+{
+       __txn_regop_args *argp;
+       u_int8_t *bp;
+
+       argp = (__txn_regop_args *)malloc(sizeof(__txn_regop_args) +
+           sizeof(DB_TXN));
+       if (argp == NULL)
+               return (ENOMEM);
+       argp->txnid = (DB_TXN *)&argp[1];
+       bp = recbuf;
+       memcpy(&argp->type, bp, sizeof(argp->type));
+       bp += sizeof(argp->type);
+       memcpy(&argp->txnid->txnid,  bp, sizeof(argp->txnid->txnid));
+       bp += sizeof(argp->txnid->txnid);
+       memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN));
+       bp += sizeof(DB_LSN);
+       memcpy(&argp->opcode, bp, sizeof(argp->opcode));
+       bp += sizeof(argp->opcode);
+       *argpp = argp;
+       return (0);
+}
+
+/*
+ * PUBLIC: int __txn_ckp_log
+ * PUBLIC:     __P((DB_LOG *, DB_TXN *, DB_LSN *, u_int32_t,
+ * PUBLIC:     DB_LSN *, DB_LSN *));
+ */
+int __txn_ckp_log(logp, txnid, ret_lsnp, flags,
+       ckp_lsn, last_ckp)
+       DB_LOG *logp;
+       DB_TXN *txnid;
+       DB_LSN *ret_lsnp;
+       u_int32_t flags;
+       DB_LSN * ckp_lsn;
+       DB_LSN * last_ckp;
+{
+       DBT logrec;
+       DB_LSN *lsnp, null_lsn;
+       u_int32_t rectype, txn_num;
+       int ret;
+       u_int8_t *bp;
+
+       rectype = DB_txn_ckp;
+       txn_num = txnid == NULL ? 0 : txnid->txnid;
+       if (txnid == NULL) {
+               null_lsn.file = 0;
+               null_lsn.offset = 0;
+               lsnp = &null_lsn;
+       } else
+               lsnp = &txnid->last_lsn;
+       logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN)
+           + sizeof(*ckp_lsn)
+           + sizeof(*last_ckp);
+       if ((logrec.data = (void *)malloc(logrec.size)) == NULL)
+               return (ENOMEM);
+
+       bp = logrec.data;
+       memcpy(bp, &rectype, sizeof(rectype));
+       bp += sizeof(rectype);
+       memcpy(bp, &txn_num, sizeof(txn_num));
+       bp += sizeof(txn_num);
+       memcpy(bp, lsnp, sizeof(DB_LSN));
+       bp += sizeof(DB_LSN);
+       if (ckp_lsn != NULL)
+               memcpy(bp, ckp_lsn, sizeof(*ckp_lsn));
+       else
+               memset(bp, 0, sizeof(*ckp_lsn));
+       bp += sizeof(*ckp_lsn);
+       if (last_ckp != NULL)
+               memcpy(bp, last_ckp, sizeof(*last_ckp));
+       else
+               memset(bp, 0, sizeof(*last_ckp));
+       bp += sizeof(*last_ckp);
+#ifdef DEBUG
+       if ((u_int32_t)(bp - (u_int8_t *)logrec.data) != logrec.size)
+               fprintf(stderr, "Error in log record length");
+#endif
+       ret = log_put(logp, ret_lsnp, (DBT *)&logrec, flags);
+       if (txnid != NULL)
+               txnid->last_lsn = *ret_lsnp;
+       free(logrec.data);
+       return (ret);
+}
+
+/*
+ * PUBLIC: int __txn_ckp_print
+ * PUBLIC:    __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+ */
+
+int
+__txn_ckp_print(notused1, dbtp, lsnp, notused3, notused4)
+       DB_LOG *notused1;
+       DBT *dbtp;
+       DB_LSN *lsnp;
+       int notused3;
+       void *notused4;
+{
+       __txn_ckp_args *argp;
+       u_int32_t i;
+       int c, ret;
+
+       i = 0;
+       c = 0;
+       notused1 = NULL;
+       notused3 = 0;
+       notused4 = NULL;
+
+       if((ret = __txn_ckp_read(dbtp->data, &argp)) != 0)
+               return (ret);
+       printf("[%lu][%lu]txn_ckp: rec: %lu txnid %lx prevlsn [%lu][%lu]\n",
+           (u_long)lsnp->file,
+           (u_long)lsnp->offset,
+           (u_long)argp->type,
+           (u_long)argp->txnid->txnid,
+           (u_long)argp->prev_lsn.file,
+           (u_long)argp->prev_lsn.offset);
+       printf("\tckp_lsn: [%lu][%lu]\n",
+           (u_long)argp->ckp_lsn.file, (u_long)argp->ckp_lsn.offset);
+       printf("\tlast_ckp: [%lu][%lu]\n",
+           (u_long)argp->last_ckp.file, (u_long)argp->last_ckp.offset);
+       printf("\n");
+       free(argp);
+       return (0);
+}
+
+/*
+ * PUBLIC: int __txn_ckp_read __P((void *, __txn_ckp_args **));
+ */
+int
+__txn_ckp_read(recbuf, argpp)
+       void *recbuf;
+       __txn_ckp_args **argpp;
+{
+       __txn_ckp_args *argp;
+       u_int8_t *bp;
+
+       argp = (__txn_ckp_args *)malloc(sizeof(__txn_ckp_args) +
+           sizeof(DB_TXN));
+       if (argp == NULL)
+               return (ENOMEM);
+       argp->txnid = (DB_TXN *)&argp[1];
+       bp = recbuf;
+       memcpy(&argp->type, bp, sizeof(argp->type));
+       bp += sizeof(argp->type);
+       memcpy(&argp->txnid->txnid,  bp, sizeof(argp->txnid->txnid));
+       bp += sizeof(argp->txnid->txnid);
+       memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN));
+       bp += sizeof(DB_LSN);
+       memcpy(&argp->ckp_lsn, bp,  sizeof(argp->ckp_lsn));
+       bp += sizeof(argp->ckp_lsn);
+       memcpy(&argp->last_ckp, bp,  sizeof(argp->last_ckp));
+       bp += sizeof(argp->last_ckp);
+       *argpp = argp;
+       return (0);
+}
+
+/*
+ * PUBLIC: int __txn_init_print __P((DB_ENV *));
+ */
+int
+__txn_init_print(dbenv)
+       DB_ENV *dbenv;
+{
+       int ret;
+
+       if ((ret = __db_add_recovery(dbenv,
+           __txn_regop_print, DB_txn_regop)) != 0)
+               return (ret);
+       if ((ret = __db_add_recovery(dbenv,
+           __txn_ckp_print, DB_txn_ckp)) != 0)
+               return (ret);
+       return (0);
+}
+
+/*
+ * PUBLIC: int __txn_init_recover __P((DB_ENV *));
+ */
+int
+__txn_init_recover(dbenv)
+       DB_ENV *dbenv;
+{
+       int ret;
+
+       if ((ret = __db_add_recovery(dbenv,
+           __txn_regop_recover, DB_txn_regop)) != 0)
+               return (ret);
+       if ((ret = __db_add_recovery(dbenv,
+           __txn_ckp_recover, DB_txn_ckp)) != 0)
+               return (ret);
+       return (0);
+}
+
diff --git a/db2/txn/txn_rec.c b/db2/txn/txn_rec.c
new file mode 100644 (file)
index 0000000..1fe720a
--- /dev/null
@@ -0,0 +1,131 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997
+ *     Sleepycat Software.  All rights reserved.
+ */
+/*
+ * Copyright (c) 1996
+ *     The President and Fellows of Harvard University.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *     This product includes software developed by the University of
+ *     California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "config.h"
+
+#ifndef lint
+static const char sccsid[] = "@(#)txn_rec.c    10.4 (Sleepycat) 7/2/97";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+#endif
+
+#include "db_int.h"
+#include "db_page.h"
+#include "shqueue.h"
+#include "txn.h"
+#include "db_dispatch.h"
+#include "db_am.h"
+#include "common_ext.h"
+
+/*
+ * PUBLIC: int __txn_regop_recover
+ * PUBLIC:     __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+ */
+int
+__txn_regop_recover(logp, dbtp, lsnp, redo, info)
+       DB_LOG *logp;
+       DBT *dbtp;
+       DB_LSN *lsnp;
+       int redo;
+        void *info;
+{
+       __txn_regop_args *argp;
+       int ret;
+
+#ifdef DEBUG_RECOVER
+       (void)__txn_regop_print(logp, dbtp, lsnp, redo, info);
+#endif
+       logp = logp;                    /* XXX: Shut the compiler up. */
+       redo = redo;
+
+       if ((ret = __txn_regop_read(dbtp->data, &argp)) != 0)
+               return (ret);
+
+       switch (argp->opcode) {
+       case TXN_COMMIT:
+               if (__db_txnlist_find(info,
+                   argp->txnid->txnid) == DB_NOTFOUND)
+                       __db_txnlist_add(info, argp->txnid->txnid);
+               break;
+       case TXN_PREPARE:       /* Nothing to do. */
+       case TXN_BEGIN:
+               /* Call find so that we update the maxid. */
+               (void)__db_txnlist_find(info, argp->txnid->txnid);
+               break;
+       }
+
+       *lsnp = argp->prev_lsn;
+       free (argp);
+       return (0);
+}
+
+/*
+ * PUBLIC: int __txn_ckp_recover __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
+ */
+int
+__txn_ckp_recover(logp, dbtp, lsnp, redo, info)
+       DB_LOG *logp;
+       DBT *dbtp;
+       DB_LSN *lsnp;
+       int redo;
+       void *info;
+{
+       __txn_ckp_args *argp;
+       int ret;
+
+#ifdef DEBUG_RECOVER
+       __txn_ckp_print(logp, dbtp, lsnp, redo, info);
+#endif
+       logp = logp;                    /* XXX: Shut the compiler up. */
+       redo = redo;
+       info = info;
+
+       if ((ret = __txn_ckp_read(dbtp->data, &argp)) != 0)
+               return (ret);
+
+       *lsnp = argp->last_ckp;
+       free(argp);
+       return (1);
+}
index a04f7746ef56a6a19b9f4a0a4c8c47f7da424aca..61e23a0b66dc5b3188db389c906d6d44d51909b2 100644 (file)
@@ -1 +1 @@
-#include <db/db.h>
+#include <db2/db.h>
diff --git a/include/db_185.h b/include/db_185.h
new file mode 100644 (file)
index 0000000..d303a1c
--- /dev/null
@@ -0,0 +1 @@
+#include <db2/db_185.h>
index 2062fa4f23d6b7928ff674a5e01d93a87e77d443..c420db8f84afd45022b605b04aa802f3b8424f59 100644 (file)
@@ -52,7 +52,7 @@ typedef struct _IO_FILE FILE;
 #include <libio.h>
 
 #ifdef __cplusplus
-# define __STDIO_INLINE __inline
+# define __STDIO_INLINE inline
 #else
 # define __STDIO_INLINE extern __inline
 #endif
index 6a936fdbe43cba79a9847e8749649b04db4bd3aa..84bd6b7bc8d5fb7c91bbee61fed18568308777f0 100644 (file)
@@ -968,6 +968,11 @@ Porting the GNU C Library
 @include memory.texi
 @include ctype.texi
 @include string.texi
+@include mbyte.texi
+@include locale.texi
+@include message.texi
+@include search.texi
+@include pattern.texi
 @include io.texi
 @include stdio.texi
 @include llio.texi
@@ -977,12 +982,7 @@ Porting the GNU C Library
 @include terminal.texi
 @include math.texi
 @include arith.texi
-@include search.texi
-@include pattern.texi
 @include time.texi
-@include mbyte.texi
-@include locale.texi
-@include message.texi
 @include setjmp.texi
 @include signal.texi
 @include startup.texi
index 54e7a754c334c5122d08250affe8e372802e39a6..2f781bf3d9318dd7c91ca4a6d73ff5c75bbc0a8d 100644 (file)
@@ -17,7 +17,7 @@
    write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
    Boston, MA 02111-1307, USA.  */
 
-#include <db.h>
+#include <db_185.h>
 #include <fcntl.h>
 #include <bits/libc-lock.h>
 #include "nsswitch.h"
index 810fc3aba6a46c414dc58cc21305f4747ce668ca..32ba4b5f4f8fc5b445f00e19657513e6ae445c28 100644 (file)
@@ -21,7 +21,7 @@
 #include <aliases.h>
 #include <alloca.h>
 #include <ctype.h>
-#include <db.h>
+#include <db_185.h>
 #include <errno.h>
 #include <fcntl.h>
 #include <bits/libc-lock.h>
index c301789f74e44c24ec59dde74a2100d8a98abb54..4691c0fb9b82ce7aef3809fb15f569cd6cfd5078 100644 (file)
@@ -18,7 +18,7 @@
    write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
    Boston, MA 02111-1307, USA.  */
 
-#include <db.h>
+#include <db_185.h>
 #include <errno.h>
 #include <fcntl.h>
 #include <string.h>
index 86bc35335a6b801dbe5ea7b7c16475adc9aa646d..2a7724af69d065944e0a4b1a2c1ca88803bff443 100644 (file)
--- a/po/de.po
+++ b/po/de.po
@@ -4,9 +4,9 @@
 #
 msgid ""
 msgstr ""
-"Project-Id-Version: GNU libc 2.0.3\n"
-"POT-Creation-Date: 1997-03-30 19:08+0200\n"
-"PO-Revision-Date: 1997-04-27 21:16 MET DST\n"
+"Project-Id-Version: GNU libc 2.0.5\n"
+"POT-Creation-Date: 1997-08-21 04:13+0200\n"
+"PO-Revision-Date: 1997-10-23 11:14 MET DST\n"
 "Last-Translator: Jochen Hein <jochen.hein@delphi.central.de>\n"
 "Language-Team: German <de@li.org>\n"
 "MIME-Version: 1.0\n"
@@ -15,11 +15,11 @@ msgstr ""
 
 #: sunrpc/rpcinfo.c:612
 msgid "       rpcinfo -b prognum versnum\n"
-msgstr "       rpcinfo -b Programm-Nummer Versions-Nummer\n"
+msgstr "       rpcinfo -b Programmnummer Versionsnummer\n"
 
 #: sunrpc/rpcinfo.c:613
 msgid "       rpcinfo -d prognum versnum\n"
-msgstr "       rpcinfo -d Programm-Nummer Versions-Nummer\n"
+msgstr "       rpcinfo -d Programmnummer Versionsnummer\n"
 
 #: sunrpc/rpcinfo.c:611
 msgid "       rpcinfo -p [ host ]\n"
@@ -28,14 +28,14 @@ msgstr "       rpcinfo -p [ Rechner ]\n"
 #: sunrpc/rpcinfo.c:610
 msgid "       rpcinfo [ -n portnum ] -t host prognum [ versnum ]\n"
 msgstr ""
-"       rpcinfo [ -n Port-Nummer ] -t Rechner Programm-Nummer "
-"[Versions-Nummer]\n"
+"       rpcinfo [ -n Portnummer ] -t Rechner Programmnummer [ Versionsnummer "
+"]\n"
 
 #: sunrpc/rpcinfo.c:517
 msgid "   program vers proto   port\n"
 msgstr "   Program Vers Proto   Port\n"
 
-#: time/zic.c:419
+#: time/zic.c:421
 #, c-format
 msgid " (rule from \"%s\", line %d)"
 msgstr " (Regel aus Datei Â»%s«, Zeile %d)"
@@ -44,22 +44,22 @@ msgstr " (Regel aus Datei 
 msgid " done\n"
 msgstr " fertig\n"
 
-#: time/zic.c:416
+#: time/zic.c:418
 #, c-format
 msgid "\"%s\", line %d: %s"
 msgstr "»%s«, Zeile %d: %s"
 
-#: time/zic.c:943
+#: time/zic.c:947
 #, c-format
 msgid "\"Zone %s\" line and -l option are mutually exclusive"
 msgstr "»Zone %s«-Zeile und die Option Â»-l« schliessen sich aus"
 
-#: time/zic.c:951
+#: time/zic.c:955
 #, c-format
 msgid "\"Zone %s\" line and -p option are mutually exclusive"
 msgstr "»Zone %s«-Zeile und die Option Â»-p« schliessen sich aus"
 
-#: time/zic.c:754
+#: time/zic.c:758
 #, c-format
 msgid "%s in ruleless zone"
 msgstr "%s in einer regellosen Zone"
@@ -80,52 +80,53 @@ msgid "%s%sUnknown signal %d\n"
 msgstr "%s%sUnbekanntes Signal %d\n"
 
 # Ob diese Ãœbersetzung so korrekt ist? - jh
-#: time/zic.c:2172
+# definitiv nicht, aber hoffentlich ist es so besser - Klaus Espenlaub
+#: time/zic.c:2185
 #, c-format
 msgid "%s: %d did not sign extend correctly\n"
-msgstr "%s: %d Das Vorzeichen der extended-Zahl ist nicht korrekt\n"
+msgstr "%s: Vorzeichenerweiterung von %d fehlgeschlagen\n"
 
 #: locale/programs/charmap.c:176
 #, c-format
 msgid "%s: <mb_cur_max> must be greater than <mb_cur_min>\n"
 msgstr "%s: Â»<mb_cur_max>« muß größer als Â»<mb_cur_min>« sein\n"
 
-#: time/zic.c:1443
+#: time/zic.c:1456
 #, c-format
 msgid "%s: Can't create %s: %s\n"
 msgstr "%s: Kann Â»%s« nicht erzeugen: %s\n"
 
-#: time/zic.c:2150
+#: time/zic.c:2163
 #, c-format
 msgid "%s: Can't create directory %s: %s\n"
 msgstr "%s: Kann das Verzeichnis Â»%s« nicht erzeugen: %s\n"
 
-#: time/zic.c:608
+#: time/zic.c:610
 #, c-format
 msgid "%s: Can't link from %s to %s: %s\n"
 msgstr "%s: Kann nicht von Â»%s« nach Â»%s« linken: %s\n"
 
-#: time/zic.c:780
+#: time/zic.c:784
 #, c-format
 msgid "%s: Can't open %s: %s\n"
 msgstr "%s: Kann die Datei Â»%s« nicht Ã¶ffnen: %s\n"
 
-#: time/zic.c:1433
+#: time/zic.c:1446
 #, c-format
 msgid "%s: Can't remove %s: %s\n"
 msgstr "%s: Kann Â»%s« nicht löschen: %s\n"
 
-#: time/zic.c:849
+#: time/zic.c:853
 #, c-format
 msgid "%s: Error closing %s: %s\n"
 msgstr "%s: Fehler beim Schliessen der Datei Â»%s«: %s\n"
 
-#: time/zic.c:842
+#: time/zic.c:846
 #, c-format
 msgid "%s: Error reading %s\n"
 msgstr "%s: Fehler beim Lesen der Datei Â»%s«\n"
 
-#: time/zic.c:1507
+#: time/zic.c:1520
 #, c-format
 msgid "%s: Error writing %s\n"
 msgstr "%s: Fehler beim Schreiben der Datei Â»%s«\n"
@@ -135,42 +136,42 @@ msgstr "%s: Fehler beim Schreiben der Datei 
 msgid "%s: Error writing standard output "
 msgstr "%s: Fehler beim Schreiben auf die Standardausgabe "
 
-#: time/zic.c:827
+#: time/zic.c:831
 #, c-format
 msgid "%s: Leap line in non leap seconds file %s\n"
-msgstr "%s: Schalt-Zeile in einer nicht-Schalt-Sekunden-Datei Â»%s«\n"
+msgstr "%s: Schalt-Zeile in einer nicht-Schaltsekunden-Datei Â»%s«\n"
 
-#: time/zic.c:357
+#: time/zic.c:359
 #, c-format
 msgid "%s: Memory exhausted: %s\n"
 msgstr "%s: Kein Hauptspeicher mehr verfügbar: %s\n"
 
-#: time/zic.c:522
+#: time/zic.c:524
 #, c-format
 msgid "%s: More than one -L option specified\n"
 msgstr "%s: Mehr als eine Â»-L« Option angegeben\n"
 
-#: time/zic.c:482
+#: time/zic.c:484
 #, c-format
 msgid "%s: More than one -d option specified\n"
 msgstr "%s: Mehr als eine Â»-d« Option angegeben\n"
 
-#: time/zic.c:492
+#: time/zic.c:494
 #, c-format
 msgid "%s: More than one -l option specified\n"
 msgstr "%s: Mehr als eine Â»-l« Option angegeben\n"
 
-#: time/zic.c:502
+#: time/zic.c:504
 #, c-format
 msgid "%s: More than one -p option specified\n"
 msgstr "%s: Mehr als eine Â»-p« Option angegeben\n"
 
-#: time/zic.c:512
+#: time/zic.c:514
 #, c-format
 msgid "%s: More than one -y option specified\n"
 msgstr "%s: Mehr als eine Â»-y« Option angegeben\n"
 
-#: time/zic.c:1872
+#: time/zic.c:1885
 #, c-format
 msgid "%s: command was '%s', result was %d\n"
 msgstr "%s: Das Kommando war Â»%s«, das Ergebnis war %d\n"
@@ -188,7 +189,7 @@ msgstr "%s: Die Option ist nicht erlaubt -- 
 #: posix/getopt.c:786
 #, c-format
 msgid "%s: invalid option -- %c\n"
-msgstr "%s: Ungültige option -- Â»%c«\n"
+msgstr "%s: Ungültige Option -- Â»%c«\n"
 
 #: posix/getopt.c:707
 #, c-format
@@ -225,7 +226,7 @@ msgstr "%s: Die Option 
 msgid "%s: option requires an argument -- %c\n"
 msgstr "%s: Diese Option benötigt ein Argument -- Â»%c«\n"
 
-#: time/zic.c:834 time/zic.c:1246 time/zic.c:1266
+#: time/zic.c:838 time/zic.c:1251 time/zic.c:1275
 #, c-format
 msgid "%s: panic: Invalid l_value %d\n"
 msgstr "%s: Panik: ungültiger Â»l_value« %d\n"
@@ -245,21 +246,21 @@ msgstr "%s: Unbekannte Option 
 msgid "%s: unrecognized option `--%s'\n"
 msgstr "%s: Unbekannte Option Â»--%s«\n"
 
-#: time/zic.c:441
+#: time/zic.c:443
 #, c-format
 msgid ""
 "%s: usage is %s [ -s ] [ -v ] [ -l localtime ] [ -p posixrules ] [ -d "
 "directory ]\n"
 "\t[ -L leapseconds ] [ -y yearistype ] [ filename ... ]\n"
 msgstr ""
-"%s: Syntax ist %s [ -s ] [ -v ] [ -l Lokale-Zeit ] [ -p Posix-Regeln ] [ -d "
+"%s: Syntax ist %s [ -s ] [ -v ] [ -l Ortszeit ] [ -p Posix-Regeln ] [ -d "
 "Verzeichnis ]\n"
 "\t[ -L Schaltsekunden ] [ -y Jahrestyp ] [ Dateiname ... ]\n"
 
 #: time/zdump.c:174
 #, c-format
 msgid "%s: usage is %s [ -v ] [ -c cutoff ] zonename ...\n"
-msgstr "%s: Syntax: %s [ -v ] [ -c cutoff ] Zonen-Name ...\n"
+msgstr "%s: Syntax: %s [ -v ] [ -c cutoff ] Zonenname ...\n"
 
 #: sunrpc/clnt_perr.c:125
 #, c-format
@@ -272,7 +273,7 @@ msgstr "Unbekanntes Signal"
 
 #: catgets/gencat.c:254
 msgid "*standard input*"
-msgstr "*Standard-Eingabe*"
+msgstr "*Standardeingabe*"
 
 #: stdio-common/../sysdeps/gnu/errlist.c:766
 msgid ".lib section in a.out corrupted"
@@ -327,7 +328,7 @@ msgstr "Abgebrochen"
 
 #: stdio-common/../sysdeps/gnu/errlist.c:762
 msgid "Accessing a corrupted shared library"
-msgstr "Zugriff auf eine fehlerhafte oder defekte Shared-Library"
+msgstr "Zugriff auf eine fehlerhafte oder defekte Shared Library"
 
 #. TRANS The requested socket address is already in use.  @xref{Socket Addresses}.
 #: stdio-common/../sysdeps/gnu/errlist.c:354
@@ -338,12 +339,14 @@ msgstr "Die Adresse wird bereits verwendet"
 #. TRANS inconsistent with the protocol being used on the socket.  @xref{Sockets}.
 #: stdio-common/../sysdeps/gnu/errlist.c:349
 msgid "Address family not supported by protocol"
-msgstr "Die Adress-Familie wird von der Protokoll-Familie nicht unterstützt"
+msgstr "Die Adreßfamilie wird von der Protokollfamilie nicht unterstützt"
 
 # Diese Ãœbersetzung macht eigentlich keinen Sinn - jh
+# man -s 2 Intro auf Solaris2 laesst diese Ãœbersetzung 
+# sinnvoller erscheinen - Klaus Espenlaub
 #: stdio-common/../sysdeps/gnu/errlist.c:730
 msgid "Advertise error"
-msgstr "Fehler bei der Werbung"
+msgstr "Konflikt mit Bekanntmachung"
 
 #: stdio-common/../sysdeps/unix/siglist.c:43
 #: sysdeps/unix/sysv/linux/siglist.h:33
@@ -360,7 +363,7 @@ msgstr "Die Argumentliste ist zu lang"
 
 #: stdio-common/../sysdeps/gnu/errlist.c:770
 msgid "Attempting to link in too many shared libraries"
-msgstr "Versuch zu viele Shared-Libraries einzubinden"
+msgstr "Versuch zu viele Shared Libraries einzubinden"
 
 #: sunrpc/clnt_perr.c:276
 msgid "Authentication OK"
@@ -382,11 +385,11 @@ msgstr "Ung
 #. TRANS versa).
 #: stdio-common/../sysdeps/gnu/errlist.c:70
 msgid "Bad file descriptor"
-msgstr "Ungültiger Datei-Deskriptor"
+msgstr "Ungültiger Dateideskriptor"
 
 #: stdio-common/../sysdeps/gnu/errlist.c:718
 msgid "Bad font file format"
-msgstr "Ungültiges Font-Datei-Format"
+msgstr "Ungültiges Font-Dateiformat"
 
 #: stdio-common/../sysdeps/gnu/errlist.c:610
 msgid "Bad message"
@@ -420,6 +423,8 @@ msgid "Broken pipe"
 msgstr "Datenübergabe unterbrochen (broken pipe)"
 
 # Ungültige Adressierung? - jh
+# die Ãœbersetzung scheint mir gut - SIGBUS kann jedoch 
+# vieles bedeuten - Klaus Espenlaub
 #: stdio-common/../sysdeps/unix/siglist.c:39
 #: sysdeps/unix/sysv/linux/siglist.h:30
 msgid "Bus error"
@@ -431,7 +436,7 @@ msgstr "Rechenzeitbegrenzung 
 
 #: stdio-common/../sysdeps/gnu/errlist.c:758
 msgid "Can not access a needed shared library"
-msgstr "Auf eine benötigte Shared-Library kann nicht zugegriffen werden"
+msgstr "Auf eine benötigte Shared Library kann nicht zugegriffen werden"
 
 #: nis/ypclnt.c:695
 msgid "Can't bind to server which serves this domain"
@@ -468,7 +473,7 @@ msgstr "Kann den Socket f
 
 #: stdio-common/../sysdeps/gnu/errlist.c:774
 msgid "Cannot exec a shared library directly"
-msgstr "Eine Shared-Library kann nicht direkt ausgeführt werden"
+msgstr "Eine Shared Library kann nicht direkt ausgeführt werden"
 
 #: sunrpc/pmap_rmt.c:350
 msgid "Cannot receive reply to broadcast"
@@ -549,7 +554,7 @@ msgstr "Die Wartezeit f
 msgid "Continued"
 msgstr "Fortgesetzt"
 
-#: catgets/gencat.c:169 db/makedb.c:120 locale/programs/locale.c:187
+#: catgets/gencat.c:169 db/makedb.c:120 locale/programs/locale.c:191
 #: locale/programs/localedef.c:180
 #, c-format
 msgid ""
@@ -596,7 +601,7 @@ msgstr "Das Ger
 #. TRANS mounted filesystem, you get this error.
 #: stdio-common/../sysdeps/gnu/errlist.c:116
 msgid "Device or resource busy"
-msgstr "Das Gerät oder die Resource ist belegt"
+msgstr "Das Gerät oder die Ressource ist belegt"
 
 #. TRANS Directory not empty, where an empty directory was expected.  Typically,
 #. TRANS this error occurs when you are trying to delete a directory.
@@ -633,7 +638,7 @@ msgstr "Fehler: Die Datei 
 
 #: stdio-common/../sysdeps/gnu/errlist.c:698
 msgid "Exchange full"
-msgstr "Umsteigebahnhof ist Ã¼berfüllt"
+msgstr "Vermittlung ist Ã¼berfüllt"
 
 #. TRANS Invalid executable file format.  This condition is detected by the
 #. TRANS @code{exec} functions; see @ref{Executing a File}.
@@ -773,7 +778,7 @@ msgstr "Informationsanfrage"
 #. TRANS Input/output error; usually used for physical read or write errors.
 #: stdio-common/../sysdeps/gnu/errlist.c:40
 msgid "Input/output error"
-msgstr "Eingabe-/Ausgabe-Fehler"
+msgstr "Eingabe-/Ausgabefehler"
 
 #: nis/ypclnt.c:701
 msgid "Internal NIS error"
@@ -809,11 +814,11 @@ msgstr "Der unterbrochene Betriebssystemaufruf sollte neu gestartet werden"
 msgid "Invalid argument"
 msgstr "Das Argument ist ungültig"
 
-#: posix/regex.c:960
+#: posix/regex.c:978
 msgid "Invalid back reference"
 msgstr "Ungültiger Verweis zurück"
 
-#: posix/regex.c:958
+#: posix/regex.c:976
 msgid "Invalid character class name"
 msgstr "Ungültiger Name für eine Zeichenklasse"
 
@@ -825,11 +830,11 @@ msgstr "Die Best
 msgid "Invalid client verifier"
 msgstr "Ungültige Ãœberprüfung des Clients"
 
-#: posix/regex.c:957
+#: posix/regex.c:975
 msgid "Invalid collation character"
 msgstr "Ungültiges Sortierzeichen"
 
-#: posix/regex.c:964
+#: posix/regex.c:982
 msgid "Invalid content of \\{\\}"
 msgstr "Ungültiger Inhalt von Â»\\{\\}«"
 
@@ -850,15 +855,15 @@ msgstr "Ung
 msgid "Invalid or incomplete multibyte or wide character"
 msgstr "Ungültiges oder unvollständiges Multi-Byte oder Wide-Zeichen"
 
-#: posix/regex.c:967
+#: posix/regex.c:985
 msgid "Invalid preceding regular expression"
 msgstr "Der vorherige reguläre Ausdruck ist nicht korrekt."
 
-#: posix/regex.c:965
+#: posix/regex.c:983
 msgid "Invalid range end"
 msgstr "Das Ende des angegebenen Intervalls ist nicht gültig"
 
-#: posix/regex.c:956
+#: posix/regex.c:974
 msgid "Invalid regular expression"
 msgstr "Ungültiger regulärer Ausdruck"
 
@@ -923,13 +928,13 @@ msgstr "Der lokale Domain-Name ist nicht eingetragen"
 
 #: nis/ypclnt.c:703
 msgid "Local resource allocation failure"
-msgstr "Lokaler Fehler bei der Resourcen-Beschaffung"
+msgstr "Lokaler Fehler bei der Ressourcenreservierung"
 
 #: stdio-common/../sysdeps/gnu/errlist.c:722
 msgid "Machine is not on the network"
 msgstr "Die Maschine ist nicht an das Netzwerk angeschlossen"
 
-#: posix/regex.c:966
+#: posix/regex.c:984
 msgid "Memory exhausted"
 msgstr "Kein Hauptspeicher mehr verfügbar"
 
@@ -1019,7 +1024,7 @@ msgstr "Keine Daten verf
 msgid "No locks available"
 msgstr "Keine Sperren verfügbar"
 
-#: posix/regex.c:955
+#: posix/regex.c:973
 msgid "No match"
 msgstr "Keine Ãœbereinstimmung gefunden"
 
@@ -1031,7 +1036,7 @@ msgstr "Keine Nachricht des gew
 msgid "No more records in map database"
 msgstr "Keine weiteren Sätze in der Map-Datenbank"
 
-#: posix/regex.c:5324
+#: posix/regex.c:5434
 msgid "No previous regular expression"
 msgstr "Es wurde bisher noch kein regulärer Ausdruck definiert"
 
@@ -1042,7 +1047,7 @@ msgstr "Es sind keine entfernten Programme registriert.\n"
 #. TRANS The remote host for a requested network connection is not reachable.
 #: stdio-common/../sysdeps/gnu/errlist.c:462
 msgid "No route to host"
-msgstr "Keine Route zum entfernten Rechner"
+msgstr "Keine Route zum Zielrechner"
 
 #. TRANS No space left on device; write operation on a file failed because the
 #. TRANS disk is full.
@@ -1099,7 +1104,7 @@ msgstr "Das numerische Ergebnis ist au
 msgid "Object is remote"
 msgstr "Das Objekt ist remote"
 
-#: time/zic.c:1966
+#: time/zic.c:1979
 msgid "Odd number of quotation marks"
 msgstr "Ungerade Anzahl von Anführungszeichen"
 
@@ -1154,7 +1159,7 @@ msgstr "Die Operation w
 
 #: stdio-common/../sysdeps/gnu/errlist.c:634
 msgid "Out of streams resources"
-msgstr "Keine Stream-Resourcen mehr verfügbar"
+msgstr "Keine Stream-Ressourcen mehr verfügbar"
 
 #: stdio-common/../sysdeps/gnu/errlist.c:726
 msgid "Package not installed"
@@ -1169,7 +1174,7 @@ msgstr "Keine Berechtigung"
 msgid "Power failure"
 msgstr "Fehler in der Stromversorgung"
 
-#: posix/regex.c:968
+#: posix/regex.c:986
 msgid "Premature end of regular expression"
 msgstr "Unerwartetes Ende des regulären Ausdruckes"
 
@@ -1184,7 +1189,7 @@ msgstr "Das Protokoll ist nicht verf
 
 #: stdio-common/../sysdeps/gnu/errlist.c:646
 msgid "Protocol error"
-msgstr "Protokoll-Fehler"
+msgstr "Protokollfehler"
 
 #. TRANS The socket communications protocol family you requested is not supported.
 #: stdio-common/../sysdeps/gnu/errlist.c:343
@@ -1235,7 +1240,7 @@ msgstr "RPC: Programm nicht verf
 #. TRANS ???
 #: stdio-common/../sysdeps/gnu/errlist.c:522
 msgid "RPC program version wrong"
-msgstr "RPC: Die Programm-Version ist falsch"
+msgstr "RPC: Die Programmversion ist falsch"
 
 #. TRANS ???
 #: stdio-common/../sysdeps/gnu/errlist.c:507
@@ -1332,13 +1337,13 @@ msgstr "
 msgid "Read-only file system"
 msgstr "Das Dateisystem ist nur lesbar"
 
-#: posix/regex.c:969
+#: posix/regex.c:987
 msgid "Regular expression too big"
 msgstr "Der reguläre Ausdruck ist zu groß"
 
 #: stdio-common/../sysdeps/gnu/errlist.c:798
 msgid "Remote I/O error"
-msgstr "Ein-/Ausgabe-Fehler des entfernten Systems"
+msgstr "Ein-/Ausgabefehler des entfernten Systems"
 
 #: stdio-common/../sysdeps/gnu/errlist.c:754
 msgid "Remote address changed"
@@ -1348,10 +1353,12 @@ msgstr "Die entfernte Adresse hat sich ge
 msgid "Remove password or make file unreadable by others."
 msgstr "Das Paßwort löschen oder die Datei für andere nicht lesbar anlegen."
 
-#: catgets/gencat.c:224 db/makedb.c:227 locale/programs/locale.c:257
-#: locale/programs/localedef.c:412
+#: catgets/gencat.c:224 db/makedb.c:227 locale/programs/locale.c:262
+#: locale/programs/localedef.c:415
 msgid "Report bugs using the `glibcbug' script to <bugs@gnu.ai.mit.edu>.\n"
-msgstr "Fehler bitte mit dem Â»glibcbug«-Skript an <bug-glibc@prep.ai.mit.edu> melden.\n"
+msgstr ""
+"Fehler bitte mit dem Â»glibcbug«-Skript an <bug-glibc@prep.ai.mit.edu> "
+"melden.\n"
 
 #: nis/ypclnt.c:691
 msgid "Request arguments bad"
@@ -1371,11 +1378,11 @@ msgstr "Interner Fehler des Resolvers"
 #. TRANS noticed; it might just hang.  @xref{File Locks}, for an example.
 #: stdio-common/../sysdeps/gnu/errlist.c:85
 msgid "Resource deadlock avoided"
-msgstr "Verklemmung beim Zugriff auf eine Resource vermieden"
+msgstr "Verklemmung beim Zugriff auf eine Ressource vermieden"
 
 #: stdio-common/../sysdeps/unix/siglist.c:58
 msgid "Resource lost"
-msgstr "Die Resource ist verlorengegangen"
+msgstr "Die Ressource ist verlorengegangen"
 
 #. TRANS Resource temporarily unavailable; the call might work if you try again
 #. TRANS later.  The macro @code{EWOULDBLOCK} is another name for @code{EAGAIN};
@@ -1408,7 +1415,7 @@ msgstr "Die Resource ist verlorengegangen"
 #. TRANS @end itemize
 #: stdio-common/../sysdeps/gnu/errlist.c:267
 msgid "Resource temporarily unavailable"
-msgstr "Die Resource ist zur Zeit nicht verfügbar"
+msgstr "Die Ressource ist zur Zeit nicht verfügbar"
 
 #: stdio-common/../sysdeps/unix/siglist.c:40
 #: sysdeps/unix/sysv/linux/siglist.h:31
@@ -1448,7 +1455,7 @@ msgstr "
 
 #: sysdeps/unix/sysv/linux/siglist.h:59
 msgid "Stack fault"
-msgstr "Stack-Fehler"
+msgstr "Stackfehler"
 
 #. TRANS Stale NFS file handle.  This indicates an internal confusion in the NFS
 #. TRANS system which is due to file system rearrangements on the server host.
@@ -1486,14 +1493,14 @@ msgstr "Fehler in Stream-Pipe"
 msgid "Structure needs cleaning"
 msgstr "Die Struktur muß bereinigt werden"
 
-#: nis/ypclnt.c:689 nis/ypclnt.c:763 posix/regex.c:954
+#: nis/ypclnt.c:689 nis/ypclnt.c:763 posix/regex.c:972
 #: stdio-common/../sysdeps/gnu/errlist.c:7
 msgid "Success"
 msgstr "Erfolg"
 
 #: nis/ypclnt.c:769
 msgid "System resource allocation failure"
-msgstr "Fehler bei der Beschaffung einer System-Resource"
+msgstr "Fehler bei der Beschaffung einer Systemressource"
 
 #: stdio-common/../sysdeps/unix/siglist.c:44
 #: sysdeps/unix/sysv/linux/siglist.h:34
@@ -1570,7 +1577,7 @@ msgstr "Trace/BPT ausgel
 msgid "Trace/breakpoint trap"
 msgstr "Trace/Breakpoint ausgelöst"
 
-#: posix/regex.c:959
+#: posix/regex.c:977
 msgid "Trailing backslash"
 msgstr "Angehängter Backslash (»\\«)"
 
@@ -1595,8 +1602,8 @@ msgstr "Der Socket ist bereits verbunden"
 msgid "Transport endpoint is not connected"
 msgstr "Der Socket ist nicht verbunden"
 
-#: catgets/gencat.c:208 db/makedb.c:209 locale/programs/locale.c:241
-#: locale/programs/localedef.c:393
+#: catgets/gencat.c:208 db/makedb.c:209 locale/programs/locale.c:246
+#: locale/programs/localedef.c:396
 #, c-format
 msgid "Try `%s --help' for more information.\n"
 msgstr "»%s --help« gibt weitere Informationen.\n"
@@ -1645,19 +1652,19 @@ msgstr "Unbekannter Systemfehler"
 msgid "Unknown ypbind error"
 msgstr "Unbekannter Fehler im Â»ypbind«"
 
-#: posix/regex.c:962
+#: posix/regex.c:980
 msgid "Unmatched ( or \\("
 msgstr "»(« oder Â»\\(« ohne schließende Klammer"
 
-#: posix/regex.c:970
+#: posix/regex.c:988
 msgid "Unmatched ) or \\)"
 msgstr "»)« oder Â»\\)« ohne Ã¶ffnende Klammer"
 
-#: posix/regex.c:961
+#: posix/regex.c:979
 msgid "Unmatched [ or [^"
 msgstr "»[« oder Â»[^« ohne schließende Klammer"
 
-#: posix/regex.c:963
+#: posix/regex.c:981
 msgid "Unmatched \\{"
 msgstr "»\\{« ohne schließende Klammer"
 
@@ -1669,7 +1676,7 @@ msgstr "Unbekannte Variable 
 #: stdio-common/../sysdeps/unix/siglist.c:45
 #: sysdeps/unix/sysv/linux/siglist.h:35
 msgid "Urgent I/O condition"
-msgstr "Dringende Ein-/Ausgabe-Bedingung"
+msgstr "Dringende Ein-/Ausgabebedingung"
 
 #: catgets/gencat.c:212
 #, c-format
@@ -1677,7 +1684,8 @@ msgid ""
 "Usage: %s [OPTION]... -o OUTPUT-FILE [INPUT-FILE]...\n"
 "       %s [OPTION]... [OUTPUT-FILE [INPUT-FILE]...]\n"
 "Mandatory arguments to long options are mandatory for short options too.\n"
-"  -H, --header        create C header file containing symbol definitions\n"
+"  -H, --header=NAME   create C header file NAME containing symbol "
+"definitions\n"
 "  -h, --help          display this help and exit\n"
 "      --new           do not use existing catalog, force new output file\n"
 "  -o, --output=NAME   write output to file NAME\n"
@@ -1685,12 +1693,12 @@ msgid ""
 "If INPUT-FILE is -, input is read from standard input.  If OUTPUT-FILE\n"
 "is -, output is written to standard output.\n"
 msgstr ""
-"Syntax: %s [OPTION]... -o Ausgabe-Datei [Eingabe-Datei]...\n"
-"        %s [OPTION]... [Ausgabe-Datei [Eingabe-Datei]...]\n"
+"Syntax: %s [OPTION]... -o Ausgabedatei [Eingabedatei]...\n"
+"        %s [OPTION]... [Ausgabedatei [Eingabedatei]...]\n"
 "\n"
 "Notwendige Argumente für lange Optionen sind auch für kurze erforderlich. \n"
 "\n"
-"  -H, --header        Erzeuge C-Header-Dateien mit den Symbol-Definitionen\n"
+"  -H, --header=NAME   Erzeuge C-Header-Datei NAME mit den Symbol-Definitionen\n"
 "  -h, --help          Zeigt diese Hilfe an\n"
 "      --new           Verwendet keinen existierenden Katalog sondern \n"
 "                      erzwingt die Erzeugung eines neuen\n"
@@ -1733,7 +1741,7 @@ msgstr ""
 "Wenn als Name der Eingabe-Datei ein '-' angegeben ist, dann wird\n"
 "von der Standard-Eingabe gelesen.\n"
 
-#: locale/programs/localedef.c:397
+#: locale/programs/localedef.c:400
 #, c-format
 msgid ""
 "Usage: %s [OPTION]... name\n"
@@ -1771,7 +1779,7 @@ msgstr ""
 "System-Verzeichnis für Zeichensatzbeschreibungen: %s\n"
 "                       Lokale-Definitionen      : %s\n"
 
-#: locale/programs/locale.c:245
+#: locale/programs/locale.c:250
 #, c-format
 msgid ""
 "Usage: %s [OPTION]... name\n"
@@ -1802,13 +1810,13 @@ msgstr ""
 #: posix/getconf.c:200
 #, c-format
 msgid "Usage: %s variable_name [pathname]\n"
-msgstr "Syntax: %s Variablen-Name [Pfadname]\n"
+msgstr "Syntax: %s Variablenname [Pfadname]\n"
 
 #: sunrpc/rpcinfo.c:609
 msgid "Usage: rpcinfo [ -n portnum ] -u host prognum [ versnum ]\n"
 msgstr ""
-"Syntax: rpcinfo [ -n Portnummer ] -u Rechner Programm-Nummer "
-"[Versions-Nummer]\n"
+"Syntax: rpcinfo [ -n Portnummer ] -u Rechner Programmnummer [ Versionsnummer "
+"]\n"
 
 #: stdio-common/../sysdeps/unix/siglist.c:59
 #: sysdeps/unix/sysv/linux/siglist.h:48
@@ -1829,16 +1837,16 @@ msgstr "Der Wert ist zu gro
 msgid "Virtual timer expired"
 msgstr "Der virtuelle Zeitnehmer ist abgelaufen"
 
-#: time/zic.c:1871
+#: time/zic.c:1884
 msgid "Wild result from command execution"
-msgstr "Wildes Ergebnis aus der Kommando-Ausführung"
+msgstr "Wildes Ergebnis aus der Kommandoausführung"
 
 #: stdio-common/../sysdeps/unix/siglist.c:57
 #: sysdeps/unix/sysv/linux/siglist.h:47
 msgid "Window changed"
 msgstr "Die Fenstergröße wurde verändert"
 
-#: catgets/gencat.c:174 db/makedb.c:125 locale/programs/locale.c:192
+#: catgets/gencat.c:174 db/makedb.c:125 locale/programs/locale.c:196
 #: locale/programs/localedef.c:185
 #, c-format
 msgid "Written by %s.\n"
@@ -1855,7 +1863,7 @@ msgstr "YPBINDPROC_DOMAIN: Kein Server f
 
 #: nis/ypclnt.c:150
 msgid "YPBINDPROC_DOMAIN: Resource allocation failure\n"
-msgstr "YPBINDPROC_DOMAIN: Fehler bei der Resourcen-Beschaffung\n"
+msgstr "YPBINDPROC_DOMAIN: Fehler bei der Ressourcenbeschaffung\n"
 
 #: nis/ypclnt.c:154
 msgid "YPBINDPROC_DOMAIN: Unknown error\n"
@@ -1866,7 +1874,7 @@ msgstr "YPBINDPROC_DOMAIN: Unbekannter Fehler\n"
 msgid "You really blew it this time"
 msgstr "Diesmal hast Du es wirklich kaputtgemacht"
 
-#: time/zic.c:1048
+#: time/zic.c:1052
 msgid "Zone continuation line end time is not after end time of previous line"
 msgstr ""
 "Die Ende-Zeit der Fortsetzungszeile ist nicht später als die Ende-Zeit der "
@@ -1894,8 +1902,8 @@ msgstr "
 msgid ""
 "`from' string in collation element declaration contains unknown character"
 msgstr ""
-"Der Â»from«-String in der Deklaration des Â»collation element« enthält "
-"unbekannte Zeichen"
+"Der Â»from«-String in der Deklaration des Sortierelements enthält unbekannte "
+"Zeichen"
 
 #: locale/programs/charmap.c:267
 #, c-format
@@ -1919,11 +1927,11 @@ msgstr "Ung
 msgid "bad argument"
 msgstr "Das Argument ist ungültig"
 
-#: time/zic.c:1170
+#: time/zic.c:1174
 msgid "blank FROM field on Link line"
 msgstr "Leeres FROM-Feld in der Link-Zeile"
 
-#: time/zic.c:1174
+#: time/zic.c:1178
 msgid "blank TO field on Link line"
 msgstr "Leeres TO-Feld in der Link-Zeile"
 
@@ -1947,16 +1955,18 @@ msgstr "Broadcast: ioctl (Holen der Parameter der Schnittstelle)"
 msgid "cache_set: victim not found"
 msgstr "cache_set: Das Opfer wurde nicht gefunden"
 
-#: time/zic.c:1698
+#: time/zic.c:1711
 msgid "can't determine time zone abbreviation to use just after until time"
-msgstr "Kann die Abkürzung der Zeitzone zur Verwendung direkt nach der Ende-Zeit nicht bestimmen"
+msgstr ""
+"Kann die Abkürzung der Zeitzone zur Verwendung direkt nach der Ende-Zeit "
+"nicht bestimmen"
 
 #: sunrpc/svc_simple.c:64
 #, c-format
 msgid "can't reassign procedure number %d\n"
 msgstr "Kann die Prozedurnummer %d nicht erneut zuweisen\n"
 
-#: locale/programs/localedef.c:291
+#: locale/programs/localedef.c:294
 #, c-format
 msgid "cannot `stat' locale file `%s'"
 msgstr "Kann den Status der Lokale-Datei Â»%s« nicht bestimmen"
@@ -1968,12 +1978,12 @@ msgstr "Kann das Sortierelement 
 
 #: locale/programs/ld-collate.c:1496 locale/programs/ld-collate.c:1501
 msgid "cannot insert into result table"
-msgstr "Kann nicht in die Ergebnis-Tabelle einfügen"
+msgstr "Kann nicht in die Ergebnistabelle einfügen"
 
 #: locale/programs/ld-collate.c:1169 locale/programs/ld-collate.c:1211
 #, c-format
 msgid "cannot insert new collating symbol definition: %s"
-msgstr "Kann die neue Sortiersymbol-Definition nicht einfügen: %s"
+msgstr "Kann die neue Sortiersymboldefinition nicht einfügen: %s"
 
 #: db/makedb.c:161
 #, c-format
@@ -1995,7 +2005,7 @@ msgstr "Kann die Lokale-Definitionsdatei 
 msgid "cannot open output file `%s'"
 msgstr "Kann die Ausgabedatei Â»%s« nicht Ã¶ffnen"
 
-#: locale/programs/locfile.c:1008
+#: locale/programs/locfile.c:1020
 #, c-format
 msgid "cannot open output file `%s' for category `%s'"
 msgstr "Kann die Ausgabedatei Â»%s« für die Kategorie Â»%s« nicht Ã¶ffnen"
@@ -2004,28 +2014,28 @@ msgstr "Kann die Ausgabedatei 
 msgid "cannot process order specification"
 msgstr "Kann die Spezifikation der Sortierreihenfolge nicht verarbeiten"
 
-#: locale/programs/locale.c:304
+#: locale/programs/locale.c:444
 #, c-format
 msgid "cannot read character map directory `%s'"
 msgstr ""
 "Das Verzeichnis Â»%s« der Zeichensatz-Definitionen kann nicht gelesen werden"
 
-#: locale/programs/locale.c:279
+#: locale/programs/locale.c:301
 #, c-format
 msgid "cannot read locale directory `%s'"
 msgstr "Kann das Lokale-Verzeichnis Â»%s« nicht lesen"
 
-#: locale/programs/localedef.c:313
+#: locale/programs/localedef.c:316
 #, c-format
 msgid "cannot read locale file `%s'"
 msgstr "Kann die Lokale-Datei Â»%s« nicht lesen"
 
-#: locale/programs/localedef.c:338
+#: locale/programs/localedef.c:341
 #, c-format
 msgid "cannot write output files to `%s'"
 msgstr "Kann die Ausgabedatei Â»%s« nicht schreiben"
 
-#: locale/programs/localedef.c:381
+#: locale/programs/localedef.c:384
 msgid "category data requested more than once: should not happen"
 msgstr ""
 "Die Daten einer Kategorie sind mehrfach angefordert worden, das sollte nicht "
@@ -2073,7 +2083,7 @@ msgstr "Die Zeichensatzbeschreibung 
 #: locale/programs/charmap.c:76
 #, c-format
 msgid "character map file `%s' not found"
-msgstr "Die Zeichensatzbeschreibungs-Datei Â»%s« wurde nicht gefunden"
+msgstr "Die Zeichensatzbeschreibungsdatei Â»%s« wurde nicht gefunden"
 
 #: sunrpc/clnt_raw.c:106
 msgid "clnt_raw.c - Fatal header serialization error."
@@ -2158,18 +2168,18 @@ msgstr "Doppelter Schl
 msgid "duplicate set definition"
 msgstr "Doppelte Â»set«-Definition"
 
-#: time/zic.c:963
+#: time/zic.c:967
 #, c-format
 msgid "duplicate zone name %s (file \"%s\", line %d)"
-msgstr "Doppelter Zonen-Name Â»%s« (Datei Â»%s«, Zeile %d)"
+msgstr "Doppelter Zonenname Â»%s« (Datei Â»%s«, Zeile %d)"
 
 #: catgets/gencat.c:542
 msgid "duplicated message identifier"
-msgstr "Der Nachrichten-Bezeichner ist mehrfach vorhanden"
+msgstr "Der Nachrichtenbezeichner ist mehrfach vorhanden"
 
 #: catgets/gencat.c:515
 msgid "duplicated message number"
-msgstr "Die Nachrichten-Nummer ist mehrfach vorhanden"
+msgstr "Die Nachrichtennummer ist mehrfach vorhanden"
 
 #: locale/programs/ld-collate.c:1699
 msgid "empty weight name: line ignored"
@@ -2193,11 +2203,11 @@ msgstr "enablecache: Kann keinen FIFO-Cache anlegen"
 
 #: locale/programs/ld-collate.c:1422
 msgid "end point of ellipsis range is bigger then start"
-msgstr "Das Ende eines Auslassungs-Intervalls ist gößer als der Start"
+msgstr "Das Ende eines Auslassungsintervalls ist größer als der Start"
 
 #: locale/programs/ld-collate.c:1152
 msgid "error while inserting collation element into hash table"
-msgstr "Fehler beim Einfügen des Collation-Elementes in die Hash-Tabelle"
+msgstr "Fehler beim Einfügen des Sortierelementes in die Hash-Tabelle"
 
 #: locale/programs/ld-collate.c:1164
 msgid "error while inserting to hash table"
@@ -2207,11 +2217,11 @@ msgstr "Fehler beim Einf
 msgid "expect string argument for `copy'"
 msgstr "Für Â»copy« wird ein String-Argument erwartet"
 
-#: time/zic.c:854
+#: time/zic.c:858
 msgid "expected continuation line not found"
 msgstr "Die erwartete Fortsetzungszeile ist nicht vorhanden"
 
-#: locale/programs/locfile.c:1032
+#: locale/programs/locfile.c:1044
 #, c-format
 msgid "failure while writing data for category `%s'"
 msgstr "Fehler beim Schreiben der Daten für die Kategorie Â»%s«"
@@ -2236,7 +2246,7 @@ msgstr "Der 
 
 #: locale/programs/linereader.c:328
 msgid "garbage at end of character code specification"
-msgstr "Murks am Ende einer Zeichensatz-Spezifikation"
+msgstr "Murks am Ende einer Zeichensatzspezifikation"
 
 #: locale/programs/linereader.c:214
 msgid "garbage at end of number"
@@ -2270,21 +2280,21 @@ msgstr ""
 msgid "get_myaddress: ioctl (get interface configuration)"
 msgstr "get_myaddress: ioctl (Holen der Schnittstellen-Konfiguration)"
 
-#: time/zic.c:1147
+#: time/zic.c:1151
 msgid "illegal CORRECTION field on Leap line"
 msgstr "ungültiges Â»CORRECTION«-Feld in der Â»Leap«-Zeile"
 
-#: time/zic.c:1151
+#: time/zic.c:1155
 msgid "illegal Rolling/Stationary field on Leap line"
 msgstr "ungültiges Â»Rolling/Stationary«-Feld in der Â»Leap«-Zeile"
 
 #: locale/programs/ld-collate.c:1770
 msgid "illegal character constant in string"
-msgstr "Ungültige Zeichen-Konstante in der Zeichenkette"
+msgstr "Ungültige Zeichenkonstante in der Zeichenkette"
 
 #: locale/programs/ld-collate.c:1119
 msgid "illegal collation element"
-msgstr "Ungültiges Â»collation«-Element"
+msgstr "Ungültiges Sortierelement"
 
 #: locale/programs/charmap.c:196
 msgid "illegal definition"
@@ -2300,7 +2310,7 @@ msgstr "ung
 
 #: locale/programs/charset.c:101
 msgid "illegal names for character range"
-msgstr "ungültige Namen für den Zeichen-Bereich"
+msgstr "ungültige Namen für den Zeichenbereich"
 
 #: locale/programs/ld-time.c:176
 #, c-format
@@ -2343,21 +2353,21 @@ msgstr ""
 msgid "incorrectly formatted file"
 msgstr "ungültig formatierte Datei"
 
-#: time/zic.c:811
+#: time/zic.c:815
 msgid "input line of unknown type"
 msgstr "Die Eingabezeile ist von einem unbekannten Typ"
 
-#: time/zic.c:1760
+#: time/zic.c:1773
 msgid "internal error - addtype called with bad isdst"
 msgstr ""
 "Interner Fehler - Â»addtype« wurde mit einer ungültigen Â»isdst« aufgerufen"
 
-#: time/zic.c:1768
+#: time/zic.c:1781
 msgid "internal error - addtype called with bad ttisgmt"
 msgstr ""
 "Interner Fehler - Â»addtype« wurde mit einem ungültigen Â»ttisgmt« aufgerufen"
 
-#: time/zic.c:1764
+#: time/zic.c:1777
 msgid "internal error - addtype called with bad ttisstd"
 msgstr ""
 "Interner Fehler - Â»addtype« wurde mit einem ungültigen Â»ttisstd« aufgerufen"
@@ -2368,43 +2378,43 @@ msgid "internal error in %s, line %u"
 msgstr "Interner Fehler in der Datei Â»%s«, Zeile %u"
 
 # Sollte das nicht UTC sein? -- jh
-#: time/zic.c:1019
+#: time/zic.c:1023
 msgid "invalid GMT offset"
 msgstr "ungültiger GMT-Offset"
 
-#: time/zic.c:1022
+#: time/zic.c:1026
 msgid "invalid abbreviation format"
 msgstr "ungültiges Abkürzungsformat"
 
-#: time/zic.c:1112 time/zic.c:1313 time/zic.c:1327
+#: time/zic.c:1116 time/zic.c:1326 time/zic.c:1340
 msgid "invalid day of month"
 msgstr "Ungültiger Tag des Monats"
 
-#: time/zic.c:1270
+#: time/zic.c:1279
 msgid "invalid ending year"
 msgstr "Ungültiges Ende-Jahr"
 
-#: time/zic.c:1084
+#: time/zic.c:1088
 msgid "invalid leaping year"
 msgstr "Ungültiges Schaltjahr"
 
-#: time/zic.c:1099 time/zic.c:1202
+#: time/zic.c:1103 time/zic.c:1206
 msgid "invalid month name"
 msgstr "ungültiger Monatsname"
 
-#: time/zic.c:918
+#: time/zic.c:922
 msgid "invalid saved time"
 msgstr "Ungültige gespeicherte Zeit"
 
-#: time/zic.c:1250
+#: time/zic.c:1255
 msgid "invalid starting year"
 msgstr "Ungültiges Anfangsjahr"
 
-#: time/zic.c:1128 time/zic.c:1230
+#: time/zic.c:1132 time/zic.c:1235
 msgid "invalid time of day"
 msgstr "Ungültige Tageszeit"
 
-#: time/zic.c:1318
+#: time/zic.c:1331
 msgid "invalid weekday name"
 msgstr "ungültiger Name für einen Wochentag"
 
@@ -2419,11 +2429,11 @@ msgid "line before ellipsis does not contain definition for character constant"
 msgstr ""
 "Die Zeile vor einem Auslassungsintervall muß eine Zeichen-Konstante enthalten"
 
-#: time/zic.c:791
+#: time/zic.c:795
 msgid "line too long"
 msgstr "Die Zeile ist zu lang"
 
-#: locale/programs/localedef.c:285
+#: locale/programs/localedef.c:288
 #, c-format
 msgid "locale file `%s', used in `copy' statement, not found"
 msgstr ""
@@ -2444,12 +2454,12 @@ msgstr "Der Speicher nach dem Ende des allozierten Blockes wurde 
 
 #: locale/programs/ld-collate.c:167 locale/programs/ld-collate.c:173
 #: locale/programs/ld-collate.c:177 locale/programs/ld-collate.c:1442
-#: locale/programs/ld-collate.c:1471 locale/programs/locfile.c:962
+#: locale/programs/ld-collate.c:1471 locale/programs/locfile.c:974
 #: locale/programs/xmalloc.c:68 posix/getconf.c:250
 msgid "memory exhausted"
 msgstr "Kein Hauptspeicher mehr verfügbar"
 
-#: malloc/obstack.c:462
+#: malloc/obstack.c:466
 msgid "memory exhausted\n"
 msgstr "Kein Hauptspeicher mehr verfügbar\n"
 
@@ -2471,7 +2481,7 @@ msgstr ""
 "fehlender Â»era«-Name in der Zeichenkette %d im Â»era« Feld in der Kategorie "
 "»%s«"
 
-#: time/zic.c:913
+#: time/zic.c:917
 msgid "nameless rule"
 msgstr "Regel ohne Name"
 
@@ -2487,7 +2497,7 @@ msgstr ""
 "Kein gültiger regulärer Ausdruck für den Eintrag Â»%s« in der Kategorie Â»%s«: "
 "%s"
 
-#: time/zic.c:2086
+#: time/zic.c:2099
 msgid "no day in month matches rule"
 msgstr "Kein Tag des Monats paßt zur angegebenen Regel"
 
@@ -2501,9 +2511,9 @@ msgstr ""
 "Es dürfen keine anderen Schlüsselworte angegeben werden, wenn Â»copy« "
 "verwendet wird"
 
-#: locale/programs/localedef.c:344
+#: locale/programs/localedef.c:347
 msgid "no output file produced because warning were issued"
-msgstr "Es wurde keine Ausgabe-Datei erzeugt, weil Warnungen ausgegeben wurden"
+msgstr "Es wurde keine Ausgabedatei erzeugt, weil Warnungen ausgegeben wurden"
 
 #: locale/programs/charmap.c:315 locale/programs/charmap.c:466
 #: locale/programs/charmap.c:545
@@ -2548,7 +2558,7 @@ msgstr "Program %lu Version %lu ist bereit und wartet\n"
 #: inet/rcmd.c:172
 #, c-format
 msgid "rcmd: select (setting up stderr): %m\n"
-msgstr "rcmd: select (Vorbereiten der Standard-Fehlerausgabe): %m\n"
+msgstr "rcmd: select (Vorbereiten der Standardfehlerausgabe): %m\n"
 
 #: inet/rcmd.c:104
 msgid "rcmd: socket: All ports in use\n"
@@ -2557,13 +2567,13 @@ msgstr "rcmd: Socket: Alle Ports sind zur Zeit belegt\n"
 #: inet/rcmd.c:160
 #, c-format
 msgid "rcmd: write (setting up stderr): %m\n"
-msgstr "rcmd: write (Vorbereiten der Standard-Fehlerausgabe): %m\n"
+msgstr "rcmd: write (Vorbereiten der Standardfehlerausgabe): %m\n"
 
 #: sunrpc/svc_simple.c:83
 msgid "registerrpc: out of memory\n"
 msgstr "registerrpc: Hauptspeicher erschöpft\n"
 
-#: time/zic.c:1821
+#: time/zic.c:1834
 msgid "repeated leap second moment"
 msgstr "Wiederholung der Â»Leap«-Sekunde"
 
@@ -2597,17 +2607,17 @@ msgstr "rpcinfo: Kann den Portmapper nicht erreichen"
 msgid "rpcinfo: can't contact portmapper: "
 msgstr "rpcinfo: Kann den Portmapper nicht erreichen: "
 
-#: time/zic.c:704 time/zic.c:706
+#: time/zic.c:708 time/zic.c:710
 msgid "same rule name in multiple files"
 msgstr "Dieselbe Regel ist in mehreren Dateien enthalten"
 
 #: inet/rcmd.c:175
 msgid "select: protocol failure in circuit setup\n"
-msgstr "Select: Protokoll-Fehler im Kreislauf-Setup\n"
+msgstr "Select: Protokollfehler im Verbindungsaufbau\n"
 
 #: inet/rcmd.c:193
 msgid "socket: protocol failure in circuit setup\n"
-msgstr "socket: Protokoll-Fehler im Kreislauf-Setup\n"
+msgstr "socket: Protokollfehler im Verbindungsaufbau\n"
 
 #: locale/programs/locfile.c:622
 msgid "sorting order `forward' and `backward' are mutually exclusive"
@@ -2617,16 +2627,15 @@ msgstr "Die Anweisungen 
 msgid ""
 "specification of sorting weight for collation symbol does not make sense"
 msgstr ""
-"Die Angabe eines Sortiergewichtes für ein Â»Collation«-Symbol ist nicht "
-"sinnvoll"
+"Die Angabe eines Sortiergewichtes für ein Sortiersymbol ist nicht sinnvoll"
 
-#: time/zic.c:775
+#: time/zic.c:779
 msgid "standard input"
-msgstr "Standard-Eingabe"
+msgstr "Standardeingabe"
 
 #: time/zdump.c:268
 msgid "standard output"
-msgstr "Standard-Ausgabe"
+msgstr "Standardausgabe"
 
 #: locale/programs/ld-time.c:257
 #, c-format
@@ -2635,10 +2644,18 @@ msgstr ""
 "Das Start-Datum in der Zeichenkette %d im Â»era«-Feld der Kategorie Â»%s« ist "
 "ungültig"
 
-#: time/zic.c:1274
+#: time/zic.c:1287
 msgid "starting year greater than ending year"
 msgstr "Das Start-Jahr ist größer als das Ende-Jahr"
 
+#: time/zic.c:1261 time/zic.c:1285
+msgid "starting year too high to be represented"
+msgstr "Das Startjahr ist zu groß für die Darstellung"
+
+#: time/zic.c:1259 time/zic.c:1283
+msgid "starting year too low to be represented"
+msgstr "Das Startjahr ist zu klein für die Darstellung"
+
 #: locale/programs/ld-time.c:330
 #, c-format
 msgid "stopping date is illegal in string %d in `era' field in category `%s'"
@@ -2646,6 +2663,10 @@ msgstr ""
 "Das Ende-Datum in der Zeichenkette %d im Â»era«-Feld in der Kategorie Â»%s« "
 "ist ungültig"
 
+#: sunrpc/svc_run.c:81
+msgid "svc_run: - select failed"
+msgstr "svc_run - Â»select« ist fehlgeschlagen"
+
 #: sunrpc/svc_tcp.c:201 sunrpc/svc_tcp.c:206
 msgid "svc_tcp: makefd_xprt: out of memory\n"
 msgstr "svc_tcp: makefd_xprt: Hauptspeicher erschöpft\n"
@@ -2677,14 +2698,18 @@ msgstr "svcudp_create: Problem bei der Erstellung des Sockets"
 msgid ""
 "symbol for multicharacter collating element `%.*s' duplicates element "
 "definition"
-msgstr "Das Symbol für das Mehr-Zeichen Collating-Element Â»%.*s« wiederholt die Element-Definition"
+msgstr ""
+"Das Symbol für das Mehr-Zeichen Sortierelement Â»%.*s« wiederholt die "
+"Elementdefinition"
 
 #: locale/programs/ld-collate.c:1067
 #, c-format
 msgid ""
 "symbol for multicharacter collating element `%.*s' duplicates other element "
 "definition"
-msgstr "Das Symbol für das 'multicharacter collating'-Element Â»%.*s« wiederholt eine andere Element-Definition"
+msgstr ""
+"Das Symbol für das 'multicharacter' Sortierelement Â»%.*s« wiederholt eine "
+"andere Elementdefinition"
 
 #: locale/programs/ld-collate.c:1203
 #, c-format
@@ -2692,8 +2717,8 @@ msgid ""
 "symbol for multicharacter collating element `%.*s' duplicates other symbol "
 "definition"
 msgstr ""
-"Das Symbol für das 'multicharacter collating'-Element Â»%.*s« wiederholt eine "
-"andere Symbol-Definition"
+"Das Symbol für das 'multicharacter' Sortierelement Â»%.*s« wiederholt eine "
+"andere Symboldefinition"
 
 #: locale/programs/ld-collate.c:1076
 #, c-format
@@ -2701,8 +2726,8 @@ msgid ""
 "symbol for multicharacter collating element `%.*s' duplicates symbol "
 "definition"
 msgstr ""
-"Das Symbol für das 'multicharacter collating'-Element Â»%.*s« wiederholt eine "
-"Symbol-Definition"
+"Das Symbol für das 'multicharacter' Sortierelement Â»%.*s« wiederholt eine "
+"Symboldefinition"
 
 #: locale/programs/ld-collate.c:1058 locale/programs/ld-collate.c:1185
 #, c-format
@@ -2710,7 +2735,7 @@ msgid ""
 "symbol for multicharacter collating element `%.*s' duplicates symbolic name "
 "in charset"
 msgstr ""
-"Das Symbol für das 'multicharacter collating'-Element Â»%.*s« kollidiert mit "
+"Das Symbol für das 'multicharacter' Sortierelement Â»%.*s« kollidiert mit "
 "einem symbolischen Namen in der Zeichensatzbeschreibung"
 
 #: locale/programs/charmap.c:314 locale/programs/charmap.c:348
@@ -2727,11 +2752,11 @@ msgstr "Syntaxfehler in der 
 
 #: locale/programs/locfile.c:384
 msgid "syntax error in character class definition"
-msgstr "Syntaxfehler in der Zeichenklassen-Definitionsdatei"
+msgstr "Syntaxfehler in der Zeichenklassendefinition"
 
 #: locale/programs/locfile.c:442
 msgid "syntax error in character conversion definition"
-msgstr "Syntaxfehler in der Zeichensatz-Umwandlungs-Definition"
+msgstr "Syntaxfehler in der Zeichensatzumwandlungsdefinition"
 
 #: locale/programs/locfile.c:684
 msgid "syntax error in collating order definition"
@@ -2772,7 +2797,7 @@ msgstr "Syntaxfehler in der Lokale-Definitionsdatei, Abschnitt 
 #: locale/programs/charmap.c:195 locale/programs/charmap.c:211
 #, c-format
 msgid "syntax error in prolog: %s"
-msgstr "Syntax-Fehler im Prolog: %s"
+msgstr "Syntaxfehler im Prolog: %s"
 
 #: locale/programs/locfile.c:871
 msgid "syntax error in time locale definition"
@@ -2786,35 +2811,35 @@ msgstr "Syntaxfehler: nicht in einem Abschnitt der Lokale-Definition"
 msgid "this is the first definition"
 msgstr "Dies ist die erste Definition"
 
-#: time/zic.c:1117
+#: time/zic.c:1121
 msgid "time before zero"
 msgstr "Zeit vor Null"
 
-#: time/zic.c:1125 time/zic.c:1986 time/zic.c:2005
+#: time/zic.c:1129 time/zic.c:1999 time/zic.c:2018
 msgid "time overflow"
 msgstr "Ãœberlauf der Zeit"
 
 #: locale/programs/charset.c:44
 msgid "too few bytes in character encoding"
-msgstr "Zu wenige Bytes in der Zeichen-Kodierung"
+msgstr "Zu wenige Bytes in der Zeichenkodierung"
 
 #: locale/programs/charset.c:46
 msgid "too many bytes in character encoding"
-msgstr "Zu viele Bytes in der Zeichen-Kodierung"
+msgstr "Zu viele Bytes in der Zeichenkodierung"
 
 #: locale/programs/locales.h:72
 msgid "too many character classes defined"
 msgstr "Zu viele Zeichenklassen definiert"
 
-#: time/zic.c:1815
+#: time/zic.c:1828
 msgid "too many leap seconds"
 msgstr "Zu viele Schaltsekunden"
 
-#: time/zic.c:1787
+#: time/zic.c:1800
 msgid "too many local time types"
-msgstr "Zu viele lokale Zeit-Typen"
+msgstr "Zu viele Ortszeittypen"
 
-#: time/zic.c:1741
+#: time/zic.c:1754
 msgid "too many transitions?!"
 msgstr "Zu viele Ãœbergänge?!"
 
@@ -2822,7 +2847,7 @@ msgstr "Zu viele 
 msgid "too many weights"
 msgstr "Zu viele Sortiergewichte"
 
-#: time/zic.c:2109
+#: time/zic.c:2122
 msgid "too many, or too long, time zone abbreviations"
 msgstr "Zu viele oder zu lange Abkürzungen für Zeitzonen"
 
@@ -2839,7 +2864,7 @@ msgstr "Schwierigkeiten bei der Antwort an das Programm %d\n"
 msgid "two lines in a row containing `...' are not allowed"
 msgstr "Zwei aufeinanderfolgende Zeilen mit Â»...« sind nicht erlaubt"
 
-#: time/zic.c:1281
+#: time/zic.c:1294
 msgid "typed single year"
 msgstr "Ein einzelnes Jahr angegeben"
 
@@ -2879,7 +2904,7 @@ msgstr "Unbekanntes Set 
 msgid "unknown symbol `%.*s': line ignored"
 msgstr "Unbekanntes Symbol Â»%.*s«: Die Zeile wurde ignoriert"
 
-#: time/zic.c:747
+#: time/zic.c:751
 msgid "unruly zone"
 msgstr "Zeitzone ohne Regeln"
 
@@ -2897,13 +2922,13 @@ msgstr "Der symbolische Name wird nicht beendet"
 
 #: locale/programs/ld-collate.c:1688
 msgid "unterminated weight name"
-msgstr "Der Name des Sortier-Gewichtes wird nicht beendet"
+msgstr "Der Name des Sortiergewichtes wird nicht beendet"
 
 #: locale/programs/charset.c:119
 msgid "upper limit in range is not smaller then lower limit"
 msgstr "Das obere Ende des Intervalls ist nicht kleiner als das untere Ende"
 
-#: time/zic.c:2052
+#: time/zic.c:2065
 msgid "use of 2/29 in non leap-year"
 msgstr "Der 29. Februar wurde in einem nicht-Schaltjahr verwendet"
 
@@ -2954,13 +2979,17 @@ msgstr ""
 msgid "values for field `%s' in category `%s' must not be zero"
 msgstr "Der Eintrag im Feld Â»%s« in der Kategorie Â»%s« darf nicht Null sein"
 
-#: login/utmp_file.c:84
+#: login/utmp_file.c:76
 msgid "while opening UTMP file"
 msgstr "beim Ã–ffnen der UTMP-Datei"
 
 #: catgets/gencat.c:989
 msgid "while opening old catalog file"
-msgstr "beim Ã–ffnen der alten Katalog-Datei"
+msgstr "beim Ã–ffnen der alten Katalogdatei"
+
+#: locale/programs/locale.c:341
+msgid "while preparing output"
+msgstr "beim Aufbereiten der Ausgabe"
 
 #: db/makedb.c:354
 msgid "while reading database"
@@ -2968,29 +2997,29 @@ msgstr "beim Lesen der Datenbank"
 
 #: db/makedb.c:316
 msgid "while writing data base file"
-msgstr "beim Schreiben der Datenbank-Datei"
+msgstr "beim Schreiben der Datenbankdatei"
 
 #: db/makedb.c:142
 msgid "wrong number of arguments"
 msgstr "Falsche Anzahl an Argumenten"
 
-#: time/zic.c:1075
+#: time/zic.c:1079
 msgid "wrong number of fields on Leap line"
 msgstr "Falsche Anzahl an Feldern in der Leap-Zeile"
 
-#: time/zic.c:1166
+#: time/zic.c:1170
 msgid "wrong number of fields on Link line"
 msgstr "Falsche Anzahl der Felder in der Link-Zeile"
 
-#: time/zic.c:909
+#: time/zic.c:913
 msgid "wrong number of fields on Rule line"
 msgstr "Falsche Anzahl der Felder in der Rule-Zeile"
 
-#: time/zic.c:979
+#: time/zic.c:983
 msgid "wrong number of fields on Zone continuation line"
 msgstr "Falsche Anzahl der Felder in der Zeitzonen-Fortsetzungszeile"
 
-#: time/zic.c:937
+#: time/zic.c:941
 msgid "wrong number of fields on Zone line"
 msgstr "Falsche Anzahl an Feldern in der Zone-Zeile"
 
@@ -3001,4 +3030,3 @@ msgstr "yp_update: Kann den Rechnername nicht in einen Netzname umwandeln\n"
 #: nis/ypclnt.c:823
 msgid "yp_update: cannot get server address\n"
 msgstr "yp_update: Kann die Adresse des Servers nicht finden\n"
-
index 8b296021f593b12389a20c574094efa22f9c59a8..ef9726c71ce599fd3f728d919f2f9ccf03f2c682 100644 (file)
--- a/po/sv.po
+++ b/po/sv.po
@@ -5,8 +5,8 @@
 #
 msgid ""
 msgstr ""
-"Project-Id-Version: libc 2.0.3\n"
-"POT-Creation-Date: 1997-03-30 19:08+0200\n"
+"Project-Id-Version: libc 2.0.5\n"
+"POT-Creation-Date: 1997-08-21 04:13+0200\n"
 "PO-Revision-Date: $Date$\n"
 "Last-Translator: Jan Djärv <Jan.Djarv@mbox200.swipnet.se>\n"
 "Language-Team: Swedish <sv@li.org>\n"
@@ -34,7 +34,7 @@ msgstr "       rpcinfo [ -n portnr ] -t v
 msgid "   program vers proto   port\n"
 msgstr "   program vers proto   port\n"
 
-#: time/zic.c:419
+#: time/zic.c:421
 #, c-format
 msgid " (rule from \"%s\", line %d)"
 msgstr " (regel frÃ¥n \"%s\", rad %d)"
@@ -43,22 +43,22 @@ msgstr " (regel fr
 msgid " done\n"
 msgstr " klar\n"
 
-#: time/zic.c:416
+#: time/zic.c:418
 #, c-format
 msgid "\"%s\", line %d: %s"
 msgstr "\"%s\", rad %d: %s"
 
-#: time/zic.c:943
+#: time/zic.c:947
 #, c-format
 msgid "\"Zone %s\" line and -l option are mutually exclusive"
 msgstr "\"Zone %s\"-rad och flaggan -l Ã¤r Ã¶msesidigt uteslutande"
 
-#: time/zic.c:951
+#: time/zic.c:955
 #, c-format
 msgid "\"Zone %s\" line and -p option are mutually exclusive"
 msgstr "\"Zone %s\"-rad och flaggan -p Ã¤r Ã¶msesidigt uteslutande"
 
-#: time/zic.c:754
+#: time/zic.c:758
 #, c-format
 msgid "%s in ruleless zone"
 msgstr "%s i zon utan regel"
@@ -78,7 +78,7 @@ msgstr "%s%s%s:%u: %s%sOv
 msgid "%s%sUnknown signal %d\n"
 msgstr "%s%sOkänd signal %d\n"
 
-#: time/zic.c:2172
+#: time/zic.c:2185
 #, c-format
 msgid "%s: %d did not sign extend correctly\n"
 msgstr "%s: %d teckenexpanderades inte korrekt\n"
@@ -88,42 +88,42 @@ msgstr "%s: %d teckenexpanderades inte korrekt\n"
 msgid "%s: <mb_cur_max> must be greater than <mb_cur_min>\n"
 msgstr "%s: <mb_cur_max> mÃ¥ste vara större Ã¤n <mb_cur_min>\n"
 
-#: time/zic.c:1443
+#: time/zic.c:1456
 #, c-format
 msgid "%s: Can't create %s: %s\n"
 msgstr "%s: Kan inte skapa %s: %s\n"
 
-#: time/zic.c:2150
+#: time/zic.c:2163
 #, c-format
 msgid "%s: Can't create directory %s: %s\n"
 msgstr "%s: Kan inte skapa katalog %s: %s\n"
 
-#: time/zic.c:608
+#: time/zic.c:610
 #, c-format
 msgid "%s: Can't link from %s to %s: %s\n"
 msgstr "%s: Kan inte länka frÃ¥n %s till %s: %s\n"
 
-#: time/zic.c:780
+#: time/zic.c:784
 #, c-format
 msgid "%s: Can't open %s: %s\n"
 msgstr "%s: Kan inte Ã¶ppna %s: %s\n"
 
-#: time/zic.c:1433
+#: time/zic.c:1446
 #, c-format
 msgid "%s: Can't remove %s: %s\n"
 msgstr "%s: Kan inte ta bort %s: %s\n"
 
-#: time/zic.c:849
+#: time/zic.c:853
 #, c-format
 msgid "%s: Error closing %s: %s\n"
 msgstr "%s: Fel vid stängning av %s: %s\n"
 
-#: time/zic.c:842
+#: time/zic.c:846
 #, c-format
 msgid "%s: Error reading %s\n"
 msgstr "%s: Fel vid läsning frÃ¥n %s\n"
 
-#: time/zic.c:1507
+#: time/zic.c:1520
 #, c-format
 msgid "%s: Error writing %s\n"
 msgstr "%s: Fel vid skrivning till %s\n"
@@ -133,42 +133,42 @@ msgstr "%s: Fel vid skrivning till %s\n"
 msgid "%s: Error writing standard output "
 msgstr "%s: Fel vid skrivning till standard ut "
 
-#: time/zic.c:827
+#: time/zic.c:831
 #, c-format
 msgid "%s: Leap line in non leap seconds file %s\n"
 msgstr "%s: \"Leap\"-rad i fil %s som inte Ã¤r skottsekundsfil\n"
 
-#: time/zic.c:357
+#: time/zic.c:359
 #, c-format
 msgid "%s: Memory exhausted: %s\n"
 msgstr "%s: Minnet slut: %s\n"
 
-#: time/zic.c:522
+#: time/zic.c:524
 #, c-format
 msgid "%s: More than one -L option specified\n"
 msgstr "%s: Flaggan -L given mer Ã¤n en gÃ¥ng\n"
 
-#: time/zic.c:482
+#: time/zic.c:484
 #, c-format
 msgid "%s: More than one -d option specified\n"
 msgstr "%s: Flaggan -d given mer Ã¤n en gÃ¥ng\n"
 
-#: time/zic.c:492
+#: time/zic.c:494
 #, c-format
 msgid "%s: More than one -l option specified\n"
 msgstr "%s: Flaggan -l given mer Ã¤n en gÃ¥ng\n"
 
-#: time/zic.c:502
+#: time/zic.c:504
 #, c-format
 msgid "%s: More than one -p option specified\n"
 msgstr "%s: Flaggan -p given mer Ã¤n en gÃ¥ng\n"
 
-#: time/zic.c:512
+#: time/zic.c:514
 #, c-format
 msgid "%s: More than one -y option specified\n"
 msgstr "%s: Flaggan -y given mer Ã¤n en gÃ¥ng\n"
 
-#: time/zic.c:1872
+#: time/zic.c:1885
 #, c-format
 msgid "%s: command was '%s', result was %d\n"
 msgstr "%s: kommandot var \"%s\", resultatet blev %d\n"
@@ -223,7 +223,7 @@ msgstr "%s: flaggan \"-W %s\" 
 msgid "%s: option requires an argument -- %c\n"
 msgstr "%s: flaggan behöver ett argument -- %c\n"
 
-#: time/zic.c:834 time/zic.c:1246 time/zic.c:1266
+#: time/zic.c:838 time/zic.c:1251 time/zic.c:1275
 #, c-format
 msgid "%s: panic: Invalid l_value %d\n"
 msgstr "%s: panik: ogiltigt l_value %d\n"
@@ -243,7 +243,7 @@ msgstr "%s: ok
 msgid "%s: unrecognized option `--%s'\n"
 msgstr "%s: okänd flagga \"--%s\"\n"
 
-#: time/zic.c:441
+#: time/zic.c:443
 #, c-format
 msgid ""
 "%s: usage is %s [ -s ] [ -v ] [ -l localtime ] [ -p posixrules ] [ -d "
@@ -541,7 +541,7 @@ msgstr "F
 msgid "Continued"
 msgstr "Ã…terupptagen"
 
-#: catgets/gencat.c:169 db/makedb.c:120 locale/programs/locale.c:187
+#: catgets/gencat.c:169 db/makedb.c:120 locale/programs/locale.c:191
 #: locale/programs/localedef.c:180
 #, c-format
 msgid ""
@@ -799,11 +799,11 @@ msgstr "Avbrutet systemanrop borde omstartas"
 msgid "Invalid argument"
 msgstr "Ogiltigt argument"
 
-#: posix/regex.c:960
+#: posix/regex.c:978
 msgid "Invalid back reference"
 msgstr "Ogiltig bakÃ¥treferens"
 
-#: posix/regex.c:958
+#: posix/regex.c:976
 msgid "Invalid character class name"
 msgstr "Ogiltigt teckenklassnamn"
 
@@ -815,11 +815,11 @@ msgstr "Ogiltiga klientreferenser"
 msgid "Invalid client verifier"
 msgstr "Ogiltig klientverifierare"
 
-#: posix/regex.c:957
+#: posix/regex.c:975
 msgid "Invalid collation character"
 msgstr "Ogiltigt kollationeringstecken"
 
-#: posix/regex.c:964
+#: posix/regex.c:982
 msgid "Invalid content of \\{\\}"
 msgstr "Ogiltigt innehÃ¥ll i \\{\\}"
 
@@ -840,15 +840,15 @@ msgstr "Ogiltig v
 msgid "Invalid or incomplete multibyte or wide character"
 msgstr "Ogiltig eller inte komplett flerbyte- eller brett tecken"
 
-#: posix/regex.c:967
+#: posix/regex.c:985
 msgid "Invalid preceding regular expression"
 msgstr "Ogiltigt föregÃ¥ende reguljärt uttryck"
 
-#: posix/regex.c:965
+#: posix/regex.c:983
 msgid "Invalid range end"
 msgstr "Ogiltigt intervallslut"
 
-#: posix/regex.c:956
+#: posix/regex.c:974
 msgid "Invalid regular expression"
 msgstr "Ogiltigt reguljärt uttryck"
 
@@ -919,7 +919,7 @@ msgstr "Allokeringsfel f
 msgid "Machine is not on the network"
 msgstr "Maskinen finns inte pÃ¥ nätverket"
 
-#: posix/regex.c:966
+#: posix/regex.c:984
 msgid "Memory exhausted"
 msgstr "Minnet slut"
 
@@ -1008,7 +1008,7 @@ msgstr "Inga data tillg
 msgid "No locks available"
 msgstr "Inga lÃ¥s tillgängliga"
 
-#: posix/regex.c:955
+#: posix/regex.c:973
 msgid "No match"
 msgstr "Ingen träff"
 
@@ -1020,7 +1020,7 @@ msgstr "Inget meddelande av 
 msgid "No more records in map database"
 msgstr "Inga fler poster i tabelldatabasen"
 
-#: posix/regex.c:5324
+#: posix/regex.c:5434
 msgid "No previous regular expression"
 msgstr "Inget föregÃ¥ende reguljärt uttryck"
 
@@ -1088,7 +1088,7 @@ msgstr "Numeriskt resultat 
 msgid "Object is remote"
 msgstr "Är ett fjärrobjekt"
 
-#: time/zic.c:1966
+#: time/zic.c:1979
 msgid "Odd number of quotation marks"
 msgstr "Ojämnt antal citationstecken"
 
@@ -1158,7 +1158,7 @@ msgstr "
 msgid "Power failure"
 msgstr "Strömavbrott"
 
-#: posix/regex.c:968
+#: posix/regex.c:986
 msgid "Premature end of regular expression"
 msgstr "För tidigt slut pÃ¥ reguljärt uttryck"
 
@@ -1321,7 +1321,7 @@ msgstr "RTLD_NEXT anv
 msgid "Read-only file system"
 msgstr "Filsystemet endast läsbart"
 
-#: posix/regex.c:969
+#: posix/regex.c:987
 msgid "Regular expression too big"
 msgstr "Reguljärt uttryck för stort"
 
@@ -1337,11 +1337,12 @@ msgstr "Fj
 msgid "Remove password or make file unreadable by others."
 msgstr "Ta bort lösenord eller gör filen oläsbar för andra"
 
-#: catgets/gencat.c:224 db/makedb.c:227 locale/programs/locale.c:257
-#: locale/programs/localedef.c:412
+#: catgets/gencat.c:224 db/makedb.c:227 locale/programs/locale.c:262
+#: locale/programs/localedef.c:415
 msgid "Report bugs using the `glibcbug' script to <bugs@gnu.ai.mit.edu>.\n"
 msgstr ""
-"Rapportera fel med programmet \"glibcbug\" till <bug-glibc@prep.ai.mit.edu>.\n"
+"Rapportera fel med programmet \"glibcbug\" till "
+"<bug-glibc@prep.ai.mit.edu>.\n"
 "Rapportera fel pÃ¥ Ã¶versättningen till <sv@li.org>.\n"
 
 #: nis/ypclnt.c:691
@@ -1477,7 +1478,7 @@ msgstr "Streams-r
 msgid "Structure needs cleaning"
 msgstr "Strukturen behöver städas"
 
-#: nis/ypclnt.c:689 nis/ypclnt.c:763 posix/regex.c:954
+#: nis/ypclnt.c:689 nis/ypclnt.c:763 posix/regex.c:972
 #: stdio-common/../sysdeps/gnu/errlist.c:7
 msgid "Success"
 msgstr "Lyckat"
@@ -1561,7 +1562,7 @@ msgstr "Sp
 msgid "Trace/breakpoint trap"
 msgstr "SpÃ¥rningsfälla"
 
-#: posix/regex.c:959
+#: posix/regex.c:977
 msgid "Trailing backslash"
 msgstr "Avslutande omvänt snedstreck"
 
@@ -1586,8 +1587,8 @@ msgstr "Transportslutpunkten 
 msgid "Transport endpoint is not connected"
 msgstr "Transportslutpunkten Ã¤r inte förbunden"
 
-#: catgets/gencat.c:208 db/makedb.c:209 locale/programs/locale.c:241
-#: locale/programs/localedef.c:393
+#: catgets/gencat.c:208 db/makedb.c:209 locale/programs/locale.c:246
+#: locale/programs/localedef.c:396
 #, c-format
 msgid "Try `%s --help' for more information.\n"
 msgstr "Försök med \"%s --help\" för mer information\n"
@@ -1636,19 +1637,19 @@ msgstr "Ok
 msgid "Unknown ypbind error"
 msgstr "Okänt ypbind-fel"
 
-#: posix/regex.c:962
+#: posix/regex.c:980
 msgid "Unmatched ( or \\("
 msgstr "Obalanserade ( eller \\("
 
-#: posix/regex.c:970
+#: posix/regex.c:988
 msgid "Unmatched ) or \\)"
 msgstr "Obalanserade ) eller \\)"
 
-#: posix/regex.c:961
+#: posix/regex.c:979
 msgid "Unmatched [ or [^"
 msgstr "Obalanserade [ eller [^"
 
-#: posix/regex.c:963
+#: posix/regex.c:981
 msgid "Unmatched \\{"
 msgstr "Obalanserad \\{"
 
@@ -1668,7 +1669,8 @@ msgid ""
 "Usage: %s [OPTION]... -o OUTPUT-FILE [INPUT-FILE]...\n"
 "       %s [OPTION]... [OUTPUT-FILE [INPUT-FILE]...]\n"
 "Mandatory arguments to long options are mandatory for short options too.\n"
-"  -H, --header        create C header file containing symbol definitions\n"
+"  -H, --header=NAME   create C header file NAME containing symbol "
+"definitions\n"
 "  -h, --help          display this help and exit\n"
 "      --new           do not use existing catalog, force new output file\n"
 "  -o, --output=NAME   write output to file NAME\n"
@@ -1678,9 +1680,8 @@ msgid ""
 msgstr ""
 "Användning: %s [FLAGGA]... -o UTFIL [INFIL]...\n"
 "            %s [FLAGGA]... [UTFIL [INFIL]...]\n"
-"Obligatoriska argument till lÃ¥nga flaggor Ã¤r obligatoriska Ã¤ven för de "
-"korta.\n"
-"  -H, --header        skapa en C-deklarationsfil med symboldefinitioner\n"
+"Obligatoriska argument till lÃ¥nga flaggor Ã¤r obligatoriska Ã¤ven för de korta.\n"
+"  -H, --header=NAMN   skapa en C-deklarationsfil NAMN med symboldefinitioner\n"
 "  -h, --help          visa denna hjälptext och avsluta\n"
 "      --new           använd inte existerande katalog, gör en ny utfil\n"
 "  -o, --output=NAMN   skriv resultatet till filen NAMN\n"
@@ -1716,7 +1717,7 @@ msgstr ""
 "  -V, --version       visa versionsinformation och avsluta\n"
 "Om INFIL Ã¤r -, läs indata frÃ¥n standard in\n"
 
-#: locale/programs/localedef.c:397
+#: locale/programs/localedef.c:400
 #, c-format
 msgid ""
 "Usage: %s [OPTION]... name\n"
@@ -1735,7 +1736,8 @@ msgid ""
 "                       locale files  : %s\n"
 msgstr ""
 "Användning: %s [FLAGGA]... namn\n"
-"Obligatoriska argument till lÃ¥nga flaggor Ã¤r obligatoriska Ã¤ven för de korta.\n"
+"Obligatoriska argument till lÃ¥nga flaggor Ã¤r obligatoriska Ã¤ven för de "
+"korta.\n"
 "  -c, --force               skapa resultatfil Ã¤ven om varningar gavs\n"
 "  -h, --help                visa denna hjälptext och avsluta\n"
 "  -f, --charmap=FIL         symboliska teckennamn definieras i FIL\n"
@@ -1749,7 +1751,7 @@ msgstr ""
 "Systemets katalog för teckenuppsättningar: %s\n"
 "                               lokalfiler: %s\n"
 
-#: locale/programs/locale.c:245
+#: locale/programs/locale.c:250
 #, c-format
 msgid ""
 "Usage: %s [OPTION]... name\n"
@@ -1803,7 +1805,7 @@ msgstr "V
 msgid "Virtual timer expired"
 msgstr "Alarmklocka - virtuell tid"
 
-#: time/zic.c:1871
+#: time/zic.c:1884
 msgid "Wild result from command execution"
 msgstr "Vilt resultat frÃ¥n kommandokörning"
 
@@ -1812,7 +1814,7 @@ msgstr "Vilt resultat fr
 msgid "Window changed"
 msgstr "Ändrat fönster"
 
-#: catgets/gencat.c:174 db/makedb.c:125 locale/programs/locale.c:192
+#: catgets/gencat.c:174 db/makedb.c:125 locale/programs/locale.c:196
 #: locale/programs/localedef.c:185
 #, c-format
 msgid "Written by %s.\n"
@@ -1840,7 +1842,7 @@ msgstr "YPBINDPROC_DOMAIN: Ok
 msgid "You really blew it this time"
 msgstr "Du strulade till det den här gÃ¥ngen"
 
-#: time/zic.c:1048
+#: time/zic.c:1052
 msgid "Zone continuation line end time is not after end time of previous line"
 msgstr ""
 "Zon-fortsättningsradens sluttid Ã¤r inte efter sluttiden pÃ¥ föregÃ¥ende rad"
@@ -1893,11 +1895,11 @@ msgstr "ol
 msgid "bad argument"
 msgstr "dÃ¥ligt argument"
 
-#: time/zic.c:1170
+#: time/zic.c:1174
 msgid "blank FROM field on Link line"
 msgstr "tomt \"FROM\"-fält pÃ¥ \"Link\"-rad"
 
-#: time/zic.c:1174
+#: time/zic.c:1178
 msgid "blank TO field on Link line"
 msgstr "tomt \"TO\"-fält pÃ¥ \"Link\"-rad"
 
@@ -1921,7 +1923,7 @@ msgstr "uts
 msgid "cache_set: victim not found"
 msgstr "cache_set: offer hittades ej"
 
-#: time/zic.c:1698
+#: time/zic.c:1711
 msgid "can't determine time zone abbreviation to use just after until time"
 msgstr ""
 "kan inte avgöra tidszonsförkortning att använda just efter \"until\"-tid"
@@ -1931,7 +1933,7 @@ msgstr ""
 msgid "can't reassign procedure number %d\n"
 msgstr "kan inte Ã¤ndra procedurnummer %d\n"
 
-#: locale/programs/localedef.c:291
+#: locale/programs/localedef.c:294
 #, c-format
 msgid "cannot `stat' locale file `%s'"
 msgstr "kan inte ta status pÃ¥ lokalfil \"%s\""
@@ -1970,7 +1972,7 @@ msgstr "kan inte 
 msgid "cannot open output file `%s'"
 msgstr "kan inte Ã¶ppna utfil \"%s\""
 
-#: locale/programs/locfile.c:1008
+#: locale/programs/locfile.c:1020
 #, c-format
 msgid "cannot open output file `%s' for category `%s'"
 msgstr "kan inte Ã¶ppna utfil \"%s\" för kategori \"%s\""
@@ -1979,27 +1981,27 @@ msgstr "kan inte 
 msgid "cannot process order specification"
 msgstr "kan inte bearbeta sorteringsspecifikation"
 
-#: locale/programs/locale.c:304
+#: locale/programs/locale.c:444
 #, c-format
 msgid "cannot read character map directory `%s'"
 msgstr "kan inte läsa teckenuppsättningskatalog \"%s\""
 
-#: locale/programs/locale.c:279
+#: locale/programs/locale.c:301
 #, c-format
 msgid "cannot read locale directory `%s'"
 msgstr "kan inte läsa lokalkatalog \"%s\""
 
-#: locale/programs/localedef.c:313
+#: locale/programs/localedef.c:316
 #, c-format
 msgid "cannot read locale file `%s'"
 msgstr "kan inte läsa lokalfil \"%s\""
 
-#: locale/programs/localedef.c:338
+#: locale/programs/localedef.c:341
 #, c-format
 msgid "cannot write output files to `%s'"
 msgstr "kan inte skriva utfiler till \"%s\""
 
-#: locale/programs/localedef.c:381
+#: locale/programs/localedef.c:384
 msgid "category data requested more than once: should not happen"
 msgstr "kategoridata begärd mer Ã¤n en gÃ¥ng: borde inte inträffa"
 
@@ -2085,7 +2087,9 @@ msgstr "standardteckenupps
 msgid ""
 "direction flag in string %d in `era' field in category `%s' is not '+' nor "
 "'-'"
-msgstr "riktningsflagga i sträng %d i \"era\"-fält i kategori \"%s\" Ã¤r varken \"+\" eller \"-\""
+msgstr ""
+"riktningsflagga i sträng %d i \"era\"-fält i kategori \"%s\" Ã¤r varken \"+\" "
+"eller \"-\""
 
 #: locale/programs/ld-time.c:164
 #, c-format
@@ -2118,7 +2122,7 @@ msgstr "duplicerad nyckel"
 msgid "duplicate set definition"
 msgstr "duplicerad definition av mängd"
 
-#: time/zic.c:963
+#: time/zic.c:967
 #, c-format
 msgid "duplicate zone name %s (file \"%s\", line %d)"
 msgstr "duplicerat zonnamn %s (fil \"%s\", rad %d)"
@@ -2167,11 +2171,11 @@ msgstr "fel vid ins
 msgid "expect string argument for `copy'"
 msgstr "förväntar strängargument för \"copy\""
 
-#: time/zic.c:854
+#: time/zic.c:858
 msgid "expected continuation line not found"
 msgstr "förväntad fortsättningsrad ej funnen"
 
-#: locale/programs/locfile.c:1032
+#: locale/programs/locfile.c:1044
 #, c-format
 msgid "failure while writing data for category `%s'"
 msgstr "misslyckades skriva data för kategori \"%s\""
@@ -2227,11 +2231,11 @@ msgstr ""
 msgid "get_myaddress: ioctl (get interface configuration)"
 msgstr "get_myaddress: ioctl (hämta gränssnittskonfiguration)"
 
-#: time/zic.c:1147
+#: time/zic.c:1151
 msgid "illegal CORRECTION field on Leap line"
 msgstr "otillÃ¥tet \"CORRECTION\"-fält pÃ¥ \"Leap\"-rad"
 
-#: time/zic.c:1151
+#: time/zic.c:1155
 msgid "illegal Rolling/Stationary field on Leap line"
 msgstr "otillÃ¥tet \"Rolling/Stationary\"-fält pÃ¥ \"Leap\"-rad"
 
@@ -2294,19 +2298,19 @@ msgstr ""
 msgid "incorrectly formatted file"
 msgstr "felaktigt formaterad rad"
 
-#: time/zic.c:811
+#: time/zic.c:815
 msgid "input line of unknown type"
 msgstr "inrad av okänd typ"
 
-#: time/zic.c:1760
+#: time/zic.c:1773
 msgid "internal error - addtype called with bad isdst"
 msgstr "internt fel - addtype anropad med felaktig isdst"
 
-#: time/zic.c:1768
+#: time/zic.c:1781
 msgid "internal error - addtype called with bad ttisgmt"
 msgstr "internt fel - addtype anropad med felaktig ttisgmt"
 
-#: time/zic.c:1764
+#: time/zic.c:1777
 msgid "internal error - addtype called with bad ttisstd"
 msgstr "internt fel - addtype anropad med felaktig ttisstd"
 
@@ -2315,43 +2319,43 @@ msgstr "internt fel - addtype anropad med felaktig ttisstd"
 msgid "internal error in %s, line %u"
 msgstr "internt fel i %s, rad %u"
 
-#: time/zic.c:1019
+#: time/zic.c:1023
 msgid "invalid GMT offset"
 msgstr "ogiltigt GMT-tillägg"
 
-#: time/zic.c:1022
+#: time/zic.c:1026
 msgid "invalid abbreviation format"
 msgstr "ogiltigt förkortningsformat"
 
-#: time/zic.c:1112 time/zic.c:1313 time/zic.c:1327
+#: time/zic.c:1116 time/zic.c:1326 time/zic.c:1340
 msgid "invalid day of month"
 msgstr "ogiltig dag i mÃ¥naden"
 
-#: time/zic.c:1270
+#: time/zic.c:1279
 msgid "invalid ending year"
 msgstr "ogiltigt slutÃ¥r"
 
-#: time/zic.c:1084
+#: time/zic.c:1088
 msgid "invalid leaping year"
 msgstr "ogiltigt skottÃ¥r"
 
-#: time/zic.c:1099 time/zic.c:1202
+#: time/zic.c:1103 time/zic.c:1206
 msgid "invalid month name"
 msgstr "ogiltigt mÃ¥nadsnamn"
 
-#: time/zic.c:918
+#: time/zic.c:922
 msgid "invalid saved time"
 msgstr "ogiltigt sparad tid"
 
-#: time/zic.c:1250
+#: time/zic.c:1255
 msgid "invalid starting year"
 msgstr "ogiltigt startÃ¥r"
 
-#: time/zic.c:1128 time/zic.c:1230
+#: time/zic.c:1132 time/zic.c:1235
 msgid "invalid time of day"
 msgstr "ogiltig tid pÃ¥ dagen"
 
-#: time/zic.c:1318
+#: time/zic.c:1331
 msgid "invalid weekday name"
 msgstr "ogiltigt veckodagsnamn"
 
@@ -2363,11 +2367,11 @@ msgstr "rad efter ... m
 msgid "line before ellipsis does not contain definition for character constant"
 msgstr "rad före ... innehÃ¥ller inte definition för teckenkonstant"
 
-#: time/zic.c:791
+#: time/zic.c:795
 msgid "line too long"
 msgstr "för lÃ¥ng rad"
 
-#: locale/programs/localedef.c:285
+#: locale/programs/localedef.c:288
 #, c-format
 msgid "locale file `%s', used in `copy' statement, not found"
 msgstr "lokalfil \"%s\", använd i \"copy\", inte funnen"
@@ -2386,12 +2390,12 @@ msgstr "minnet f
 
 #: locale/programs/ld-collate.c:167 locale/programs/ld-collate.c:173
 #: locale/programs/ld-collate.c:177 locale/programs/ld-collate.c:1442
-#: locale/programs/ld-collate.c:1471 locale/programs/locfile.c:962
+#: locale/programs/ld-collate.c:1471 locale/programs/locfile.c:974
 #: locale/programs/xmalloc.c:68 posix/getconf.c:250
 msgid "memory exhausted"
 msgstr "minne slut"
 
-#: malloc/obstack.c:462
+#: malloc/obstack.c:466
 msgid "memory exhausted\n"
 msgstr "minnet slut\n"
 
@@ -2409,7 +2413,7 @@ msgstr "eraformat i str
 msgid "missing era name in string %d in `era' field in category `%s'"
 msgstr "eranamn i sträng %d i \"era\"-fält i kategori \"%s\" saknas"
 
-#: time/zic.c:913
+#: time/zic.c:917
 msgid "nameless rule"
 msgstr "namnlös regel"
 
@@ -2423,7 +2427,7 @@ msgstr "aldrig registrerat prog %d\n"
 msgid "no correct regular expression for field `%s' in category `%s': %s"
 msgstr "felaktigt reguljärt uttryck för fält \"%s\" i kategori \"%s\": %s"
 
-#: time/zic.c:2086
+#: time/zic.c:2099
 msgid "no day in month matches rule"
 msgstr "ingen dag i mÃ¥naden matchar regeln"
 
@@ -2435,7 +2439,7 @@ msgstr "ingen definition av \"UNDEFINED\""
 msgid "no other keyword shall be specified when `copy' is used"
 msgstr "inget annat nyckelord ska anges när \"copy\" används"
 
-#: locale/programs/localedef.c:344
+#: locale/programs/localedef.c:347
 msgid "no output file produced because warning were issued"
 msgstr "ingen utfil skapad pÃ¥ grund av varningar"
 
@@ -2497,7 +2501,7 @@ msgstr "rcmd: write: (s
 msgid "registerrpc: out of memory\n"
 msgstr "registerrpc: minnet slut\n"
 
-#: time/zic.c:1821
+#: time/zic.c:1834
 msgid "repeated leap second moment"
 msgstr "upprepat skottsekundstillfälle"
 
@@ -2529,7 +2533,7 @@ msgstr "rpcinfo: kan inte kontakta portmapper"
 msgid "rpcinfo: can't contact portmapper: "
 msgstr "rpcinfo: kan inte kontakta portmapper: "
 
-#: time/zic.c:704 time/zic.c:706
+#: time/zic.c:708 time/zic.c:710
 msgid "same rule name in multiple files"
 msgstr "samma regel i flera filer"
 
@@ -2552,7 +2556,7 @@ msgid ""
 msgstr ""
 "specifikation av sorteringsvikter för kollationssymbol verkar inte vettig"
 
-#: time/zic.c:775
+#: time/zic.c:779
 msgid "standard input"
 msgstr "standard in"
 
@@ -2565,15 +2569,27 @@ msgstr "standard ut"
 msgid "starting date is illegal in string %d in `era' field in category `%s'"
 msgstr "startdatum Ã¤r otillÃ¥tet i sträng %d i \"era\"-fält i kategori \"%s\""
 
-#: time/zic.c:1274
+#: time/zic.c:1287
 msgid "starting year greater than ending year"
 msgstr "startÃ¥r Ã¤r större Ã¤n slutÃ¥r"
 
+#: time/zic.c:1261 time/zic.c:1285
+msgid "starting year too high to be represented"
+msgstr "startÃ¥r för stort för att kunna representeras"
+
+#: time/zic.c:1259 time/zic.c:1283
+msgid "starting year too low to be represented"
+msgstr "startÃ¥r för litet för att kunna representeras"
+
 #: locale/programs/ld-time.c:330
 #, c-format
 msgid "stopping date is illegal in string %d in `era' field in category `%s'"
 msgstr "slutdatum Ã¤r otillÃ¥tet i sträng %d i \"era\"-fält i kategori \"%s\""
 
+#: sunrpc/svc_run.c:81
+msgid "svc_run: - select failed"
+msgstr "svc_run: - select misslyckades"
+
 #: sunrpc/svc_tcp.c:201 sunrpc/svc_tcp.c:206
 msgid "svc_tcp: makefd_xprt: out of memory\n"
 msgstr "svc_tcp: makefd_xprt: minnet slut\n"
@@ -2714,11 +2730,11 @@ msgstr "syntaxfel: inte inne i en lokaldefinition"
 msgid "this is the first definition"
 msgstr "detta Ã¤r den första definitionen"
 
-#: time/zic.c:1117
+#: time/zic.c:1121
 msgid "time before zero"
 msgstr "tid före noll"
 
-#: time/zic.c:1125 time/zic.c:1986 time/zic.c:2005
+#: time/zic.c:1129 time/zic.c:1999 time/zic.c:2018
 msgid "time overflow"
 msgstr "för stort tidsvärde"
 
@@ -2734,15 +2750,15 @@ msgstr "f
 msgid "too many character classes defined"
 msgstr "för mÃ¥nga teckenklasser definierade"
 
-#: time/zic.c:1815
+#: time/zic.c:1828
 msgid "too many leap seconds"
 msgstr "för mÃ¥nga skottsekunder"
 
-#: time/zic.c:1787
+#: time/zic.c:1800
 msgid "too many local time types"
 msgstr "för mÃ¥nga lokala tidstyper"
 
-#: time/zic.c:1741
+#: time/zic.c:1754
 msgid "too many transitions?!"
 msgstr "för mÃ¥nga Ã¶vergÃ¥ngar?!"
 
@@ -2750,7 +2766,7 @@ msgstr "f
 msgid "too many weights"
 msgstr "för mÃ¥nga vikter"
 
-#: time/zic.c:2109
+#: time/zic.c:2122
 msgid "too many, or too long, time zone abbreviations"
 msgstr "för mÃ¥nga eller för lÃ¥nga tidszonförkortningar"
 
@@ -2767,7 +2783,7 @@ msgstr "problem att svara till prog %d\n"
 msgid "two lines in a row containing `...' are not allowed"
 msgstr "tvÃ¥ rader efter varann som har \"...\" Ã¤r inte tillÃ¥tet"
 
-#: time/zic.c:1281
+#: time/zic.c:1294
 msgid "typed single year"
 msgstr "satte typ pÃ¥ endast ett Ã¥r"
 
@@ -2807,7 +2823,7 @@ msgstr "ok
 msgid "unknown symbol `%.*s': line ignored"
 msgstr "okänd symbol \"%.*s\": rad ignorerad"
 
-#: time/zic.c:747
+#: time/zic.c:751
 msgid "unruly zone"
 msgstr "besvärlig zon"
 
@@ -2831,7 +2847,7 @@ msgstr "oavslutat viktnamn"
 msgid "upper limit in range is not smaller then lower limit"
 msgstr "övre gräns i intervall Ã¤r inte mindre Ã¤n undre gräns"
 
-#: time/zic.c:2052
+#: time/zic.c:2065
 msgid "use of 2/29 in non leap-year"
 msgstr "använder 29/2 i icke-skottÃ¥r"
 
@@ -2878,7 +2894,7 @@ msgstr "v
 msgid "values for field `%s' in category `%s' must not be zero"
 msgstr "värden pÃ¥ fält \"%s\" i kategorin \"%s\" fÃ¥r inte vara noll"
 
-#: login/utmp_file.c:84
+#: login/utmp_file.c:76
 msgid "while opening UTMP file"
 msgstr "när UTMP-filen Ã¶ppnades"
 
@@ -2886,6 +2902,10 @@ msgstr "n
 msgid "while opening old catalog file"
 msgstr "när gammal katalogfil Ã¶ppnades"
 
+#: locale/programs/locale.c:341
+msgid "while preparing output"
+msgstr "när utadata förbereddes"
+
 #: db/makedb.c:354
 msgid "while reading database"
 msgstr "när databasen lästes"
@@ -2898,23 +2918,23 @@ msgstr "n
 msgid "wrong number of arguments"
 msgstr "fel antal argument"
 
-#: time/zic.c:1075
+#: time/zic.c:1079
 msgid "wrong number of fields on Leap line"
 msgstr "fel antal fält pÃ¥ \"Leap\"-rad"
 
-#: time/zic.c:1166
+#: time/zic.c:1170
 msgid "wrong number of fields on Link line"
 msgstr "fel antal fält pÃ¥ \"Link\"-rad"
 
-#: time/zic.c:909
+#: time/zic.c:913
 msgid "wrong number of fields on Rule line"
 msgstr "fel antal fält pÃ¥ \"Rule\"-rad"
 
-#: time/zic.c:979
+#: time/zic.c:983
 msgid "wrong number of fields on Zone continuation line"
 msgstr "fel antal fält pÃ¥ \"Zone\"-fortsättningsrad"
 
-#: time/zic.c:937
+#: time/zic.c:941
 msgid "wrong number of fields on Zone line"
 msgstr "fel antal fält pÃ¥ \"Zone\"-rad"
 
index f7c913a4a65ef2a90820882a2ffcdf7b4ad338b5..fe08fde2a77091a85bf55eac841d16a92564c37b 100644 (file)
@@ -73,9 +73,8 @@ alpha-.*-linux.*      libnss_nisplus=1.1
 alpha-.*-linux.*       libnsl=1.1
 .*-.*-.*               libnsl=1
 
-# We use libdb.so.2 for the interface in version 1.85 of the Berkeley DB code.
-alpha-.*-linux.*       libdb=2.1
-.*-.*-.*               libdb=2
+# We use libdb.so.3 for the interface in version 2.x of the Berkeley DB code.
+.*-.*-.*               libdb=3
 
 # This defines the shared library version numbers we will install.
 alpha-.*-linux.*       libcrypt=1.1
index 485994226e69038956f51968aae2b505b9d37016..0416d3b9b1941dfd777581d46b6ea90809f012c6 100644 (file)
@@ -1,4 +1,4 @@
-/* Copyright (C) 1992, 1996 Free Software Foundation, Inc.
+/* Copyright (C) 1992, 1996, 1997 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -35,11 +35,10 @@ extern __ptr_t __alloca __P ((size_t __size));
 extern __ptr_t alloca __P ((size_t __size));
 
 #ifdef __GNUC__
-#define        __alloca(size)  __builtin_alloca(size)
+# define __alloca(size)        __builtin_alloca (size)
+# define alloca(size)  __alloca (size)
 #endif /* GCC.  */
 
-#define        alloca(size)    __alloca(size)
-
 __END_DECLS
 
 #endif /* alloca.h */
index 71e94c9167a9a430b24beba66639bf8028dd411a..d5847cb425df8a8ceaa30c0896f684b1ee686c18 100644 (file)
@@ -154,4 +154,6 @@ argz_next (char *__argz, size_t __argz_len, __const char *__entry)
 }
 #endif /* optimizing GCC2 */
 
+__END_DECLS
+
 #endif /* argz.h */
index a434c3ab8134c7186dd06ade34d9fe2c398476c5..3cac382529d72e314b5eb62b13554aae8644e8a7 100644 (file)
@@ -181,15 +181,6 @@ extern size_t strlen __P ((__const char *__s));
 /* Find the length of STRING, but scan at most MAXLEN characters.
    If no '\0' terminator is found in that many characters, return MAXLEN.  */
 extern size_t strnlen __P ((__const char *__string, size_t __maxlen));
-
-# ifdef        __OPTIMIZE__
-extern __inline size_t
-strnlen (__const char *__string, size_t __maxlen)
-{
-  __const char *__end = (__const char *) memchr (__string, '\0', __maxlen);
-  return __end ? __end - __string : __maxlen;
-}
-# endif
 #endif
 
 
@@ -272,23 +263,34 @@ extern char *strfry __P ((char *__string));
 
 /* Frobnicate N bytes of S.  */
 extern __ptr_t memfrob __P ((__ptr_t __s, size_t __n));
-#endif
 
-#if defined __USE_MISC || !defined basename
+# ifndef basename
 /* Return the file name within directory of FILENAME.  We don't
    declare the function if the `basename' macro is available (defined
    in <libgen.h>) which makes the XPG version of this function
    available.  */
 extern char *basename __P ((__const char *__filename));
+# endif
 #endif
 
 
 /* Some functions might be implemented as optimized inline assembler
-   functions.  */
-#if !defined __NO_STRING_INLINES && defined __OPTIMIZE__
+   functions.  Only include this file if we really want them.  */
+#if defined __USE_STRING_INLINES && defined __OPTIMIZE__
 # include <bits/string.h>
 #endif
 
+
+/* Now provide some generic optimizations.  */
+#if defined __GNUC__ && __GNUC__ >= 2 && defined __OPTIMIZE__
+extern __inline size_t
+strnlen (__const char *__string, size_t __maxlen)
+{
+  __const char *__end = (__const char *) memchr (__string, '\0', __maxlen);
+  return __end ? __end - __string : __maxlen;
+}
+#endif
+
 __END_DECLS
 
 #endif /* string.h  */
index 2c6522d849e6691bddcbb885938b1a2b9ca2092b..c4bf67197d1fa363872234a076cc87674f7d463e 100644 (file)
@@ -132,7 +132,7 @@ struct XDR
        /* lets you reposition the stream */
        long *(*x_inline) __P ((XDR * __xdrs, int len));
        /* buf quick ptr to buffered data */
-       void (*x_destroy) __P ((__const XDR * __xdrs));
+       void (*x_destroy) __P ((XDR * __xdrs));
        /* free privates of this xdr_stream */
       }
      *x_ops;
@@ -283,7 +283,7 @@ extern bool_t xdr_vector __P ((XDR * __xdrs, char *__basep, u_int __nelem,
                               u_int __elemsize, xdrproc_t __xdr_elem));
 extern bool_t xdr_float __P ((XDR * __xdrs, float *__fp));
 extern bool_t xdr_double __P ((XDR * __xdrs, double *__dp));
-extern bool_t xdr_reference __P ((XDR * __xdrs, caddr_t * __pp, u_int __size,
+extern bool_t xdr_reference __P ((XDR * __xdrs, caddr_t * __xpp, u_int __size,
                                  xdrproc_t __proc));
 extern bool_t xdr_pointer __P ((XDR * __xdrs, char **__objpp,
                                u_int __obj_size, xdrproc_t __xdr_obj));
@@ -310,11 +310,11 @@ extern bool_t xdr_netobj __P ((XDR * __xdrs, struct netobj * __np));
 
 /* XDR using memory buffers */
 extern void xdrmem_create __P ((XDR * __xdrs, __const caddr_t __addr,
-                               u_int __size, enum xdr_op __op));
+                               u_int __size, enum xdr_op __xop));
 
 /* XDR using stdio library */
 extern void xdrstdio_create __P ((XDR * __xdrs, FILE * __file,
-                                 enum xdr_op __op));
+                                 enum xdr_op __xop));
 
 /* XDR pseudo records for tcp */
 extern void xdrrec_create __P ((XDR * __xdrs, u_int __sendsize,
index a67713e669a51a02b55294a059dda713163d07f4..028329c34eb5df00e29083a2630e8d4548eed1c7 100644 (file)
@@ -53,7 +53,7 @@ static bool_t xdrmem_putbytes (XDR *, const char *, u_int);
 static u_int xdrmem_getpos (const XDR *);
 static bool_t xdrmem_setpos (XDR *, u_int);
 static long *xdrmem_inline (XDR *, int);
-static void xdrmem_destroy (const XDR *);
+static void xdrmem_destroy (XDR *);
 
 static const struct xdr_ops xdrmem_ops =
 {
@@ -91,7 +91,7 @@ xdrmem_create (xdrs, addr, size, op)
  */
 
 static void
-xdrmem_destroy (const XDR *xdrs)
+xdrmem_destroy (XDR *xdrs)
 {
 }
 
index e39131c2c25d4e3efaa3cee5e9f6cce01ef35f3e..b8a4f511a7ef27da7f37d09868ef275228d76562 100644 (file)
@@ -60,7 +60,7 @@ static bool_t xdrrec_putbytes (XDR *, const char *, u_int);
 static u_int xdrrec_getpos (const XDR *);
 static bool_t xdrrec_setpos (XDR *, u_int);
 static long *xdrrec_inline (XDR *, int);
-static void xdrrec_destroy (const XDR *);
+static void xdrrec_destroy (XDR *);
 
 static const struct xdr_ops xdrrec_ops =
 {
@@ -404,7 +404,7 @@ xdrrec_inline (XDR *xdrs, int len)
 
 static void
 xdrrec_destroy (xdrs)
-     const XDR *xdrs;
+     XDR *xdrs;
 {
   RECSTREAM *rstrm = (RECSTREAM *) xdrs->x_private;
 
index 67d9db905375b9f939799672b8dc3821933c045e..75c563daa98321540664ef3620c34635a2627972 100644 (file)
@@ -52,7 +52,7 @@ static bool_t xdrstdio_putbytes (XDR *, const char *, u_int);
 static u_int xdrstdio_getpos (const XDR *);
 static bool_t xdrstdio_setpos (XDR *, u_int);
 static long *xdrstdio_inline (XDR *, int);
-static void xdrstdio_destroy (const XDR *);
+static void xdrstdio_destroy (XDR *);
 
 /*
  * Ops vector for stdio type XDR
@@ -94,7 +94,7 @@ xdrstdio_create (xdrs, file, op)
  */
 static void
 xdrstdio_destroy (xdrs)
-     const XDR *xdrs;
+     XDR *xdrs;
 {
   (void) fflush ((FILE *) xdrs->x_private);
   /* xx should we close the file ?? */
index 746cdd236b5d87dc2b2efc24073bb90bc36e187c..e0ba67b48bef125365c0bc00690d80d5697397c0 100644 (file)
@@ -243,7 +243,8 @@ _dl_start_user:
        /* Jump to the user's entry point.  */
        mov     $9, $27
        jmp     ($9)
-       .end _dl_start_user");
+       .end _dl_start_user
+.previous");
 
 /* Nonzero iff TYPE describes relocation of a PLT entry, so
    PLT entries should not be allowed to define the value.  */
index 05fa043fa506c2d94d0a5a7ffcd987a23f7ae6e4..7e6c72c7aec5e996d30e23487b507bb7e5adf2db 100644 (file)
@@ -23,6 +23,8 @@
 #include <memcopy.h>
 #include <pagecopy.h>
 
+#undef memcpy
+
 void *
 memcpy (dstpp, srcpp, len)
      void *dstpp;
index 7aeac0abec05d5ed58f27566c744454e50b50218..13696986525b2708d54ed2e58dc65dcb96aee8a2 100644 (file)
@@ -19,6 +19,8 @@
 #include <string.h>
 #include <memcopy.h>
 
+#undef memset
+
 void *
 memset (dstpp, c, len)
      void *dstpp;
index 35e0422bdb39347ad404eaeedd4db6d4f9875bfd..bb2d8f7b41254bb5da3a6c52de963d172bea6c81 100644 (file)
@@ -5,6 +5,10 @@ asm-CPPFLAGS := $(asm-CPPFLAGS) -DGAS_SYNTAX
 # The i386 `long double' is a distinct type we support.
 long-double-fcts = yes
 
+ifeq ($(subdir),db2)
+CPPFLAGS += -DHAVE_SPINLOCKS=1 -DHAVE_ASSEM_X86_GCC=1
+endif
+
 ifeq ($(subdir),gmon)
 sysdep_routines += i386-mcount
 endif
index e72b6f0a840c4ebffec99fb2369641a024d328c8..8c9a5434b9a81824c193942e478c7aa0dc2b6abe 100644 (file)
 
 #if defined __GNUC__ && __GNUC__ >= 2
 
-#define __FD_ZERO(fdsetp) \
+# define __FD_ZERO(fdsetp) \
   __asm__ __volatile__ ("cld ; rep ; stosl"                                  \
                        : "=m" (*(__fd_set *) (fdsetp))                       \
                        : "a" (0), "c" (sizeof (__fd_set)                     \
                                        / sizeof (__fd_mask)),                \
                          "D" ((__fd_set *) (fdsetp))                         \
                        :"cx","di")
-#define __FD_SET(fd, fdsetp) \
+# define __FD_SET(fd, fdsetp) \
   __asm__ __volatile__ ("btsl %1,%0"                                         \
-                       : "=m" (*(__fd_set *) (fdsetp))                       \
-                       : "r" ((int) (fd)))
-#define __FD_CLR(fd, fdsetp) \
+                       : "=m" (((__fd_set *) (fdsetp))[__FDELT (fd)])        \
+                       : "r" (((int) (fd)) % __NFDBITS)                      \
+                       : "cc")
+# define __FD_CLR(fd, fdsetp) \
   __asm__ __volatile__ ("btrl %1,%0"                                         \
-                       : "=m" (*(__fd_set *) (fdsetp))                       \
-                       : "r" ((int) (fd)))
-#define __FD_ISSET(fd, fdsetp) \
+                       : "=m" (((__fd_set *) (fdsetp))[__FDELT (fd)])        \
+                       : "r" (((int) (fd)) % __NFDBITS)                      \
+                       : "cc")
+# define __FD_ISSET(fd, fdsetp) \
   (__extension__                                                             \
-   ({unsigned char __result;                                                 \
-     __asm__ __volatile__ ("btl %1,%2 ; setb %0"                             \
+   ({unsigned int __result;                                                  \
+     __asm__ __volatile__ ("btl %1,%2 ; setcb %b0; andl $1,%0"               \
                           : "=q" (__result)                                  \
-                          : "r" ((int) (fd)), "m" (*(__fd_set *) (fdsetp))); \
+                          : "r" (((int) (fd)) % __NFDBITS),                  \
+                            "m" (((__fd_set *) (fdsetp))[__FDELT (fd)])      \
+                          : "cc");                                           \
      __result; }))
 
 #else  /* ! GNU CC */
 
 /* We don't use `memset' because this would require a prototype and
    the array isn't too big.  */
-#define __FD_ZERO(set)  \
+# define __FD_ZERO(set)  \
   do {                                                                       \
     unsigned int __i;                                                        \
     for (__i = 0; __i < sizeof (__fd_set) / sizeof (__fd_mask); ++__i)       \
-      ((__fd_mask *) set)[__i] = '\0';                                       \
+      ((__fd_mask *) set)[__i] = 0;                                          \
   } while (0)
-#define __FD_SET(d, set)       ((set)->fds_bits[__FDELT(d)] |= __FDMASK(d))
-#define __FD_CLR(d, set)       ((set)->fds_bits[__FDELT(d)] &= ~__FDMASK(d))
-#define __FD_ISSET(d, set)     ((set)->fds_bits[__FDELT(d)] & __FDMASK(d))
+# define __FD_SET(d, set)      ((set)->fds_bits[__FDELT (d)] |= __FDMASK (d))
+# define __FD_CLR(d, set)      ((set)->fds_bits[__FDELT (d)] &= ~__FDMASK (d))
+# define __FD_ISSET(d, set)    ((set)->fds_bits[__FDELT (d)] & __FDMASK (d))
 
 #endif /* GNU CC */
diff --git a/sysdeps/i386/bits/string.h b/sysdeps/i386/bits/string.h
new file mode 100644 (file)
index 0000000..254db3e
--- /dev/null
@@ -0,0 +1,745 @@
+/* Optimized, inlined string functions.  i386 version.
+   Copyright (C) 1997 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Library General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Library General Public License for more details.
+
+   You should have received a copy of the GNU Library General Public
+   License along with the GNU C Library; see the file COPYING.LIB.  If not,
+   write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+   Boston, MA 02111-1307, USA.  */
+
+#ifndef _STRING_H
+#error "Never use <bits/string.h> directly; include <string.h> instead."
+#endif
+
+/* We only provide optimizations for the GNU CC.  */
+#if defined __GNUC__ && __GNUC__ >= 2
+
+#ifdef __cplusplus
+# define __STRING_INLINE inline
+#else
+# define __STRING_INLINE extern __inline
+#endif
+
+
+/* Copy N bytes of SRC to DEST.  */
+#define memcpy(dest, src, n) \
+  (__extension__ (__builtin_constant_p (n)                                   \
+                 ? __memcpy_c (dest, src, n)                                 \
+                 : memcpy (dest, src, n)))
+/* This looks horribly ugly, but the compiler can optimize it totally,
+   as the count is constant.  */
+__STRING_INLINE void *
+__memcpy_c (void *__dest, __const void *__src, size_t __n)
+{
+  switch (n)
+    {
+    case 0:
+      return __dest;
+    case 1:
+      *(unsigned char *) __dest = *(const unsigned char *) __src;
+      return __dest;
+    case 2:
+      *(unsigned short int *) __dest = *(const unsigned short int *) __src;
+      return __dest;
+    case 3:
+      *(unsigned short int *) __dest = *(const unsigned short int *) __src;
+      *(2 + (unsigned char *) __dest) = *(2 + (const unsigned char *) __src);
+      return __dest;
+    case 4:
+      *(unsigned long int *) __dest = *(const unsigned long int *) __src;
+      return __dest;
+    case 6:    /* for ethernet addresses */
+      *(unsigned long int *) __dest = *(const unsigned long int *) __src;
+      *(2 + (unsigned short int *) __dest) =
+       *(2 + (const unsigned short int *) __src);
+      return __dest;
+    case 8:
+      *(unsigned long int *) __dest = *(const unsigned long int *) __to;
+      *(1 + (unsigned long int *) __dest) =
+       *(1 + (const unsigned long int *) __src);
+      return __dest;
+    case 12:
+      *(unsigned long int *) __dest = *(const unsigned long int *) __src;
+      *(1 + (unsigned long int *) __dest) =
+       *(1 + (const unsigned long int *) __src);
+      *(2 + (unsigned long int *) __dest) =
+       *(2 + (const unsigned long int *) __src);
+      return __dest;
+    case 16:
+      *(unsigned long int *) __dest = *(const unsigned long int *) __src;
+      *(1 + (unsigned long int *) __dest) =
+       *(1 + (const unsigned long int *) __src);
+      *(2 + (unsigned long int *) __dest) =
+       *(2 + (const unsigned long int *) __src);
+      *(3 + (unsigned long int *) __dest) =
+       *(3 + (const unsigned long int *) __src);
+      return __dest;
+    case 20:
+      *(unsigned long int *) __dest = *(const unsigned long int *) __src;
+      *(1 + (unsigned long int *) __dest) =
+       *(1 + (const unsigned long int *) __src);
+      *(2 + (unsigned long int *) __dest) =
+       *(2 + (const unsigned long int *) __src);
+      *(3 + (unsigned long int *) __dest) =
+       *(3 + (const unsigned long int *) __src);
+      *(4 + (unsigned long int *) __dest) =
+       *(4 + (const unsigned long int *) __src);
+      return __dest;
+    }
+#define __COMMON_CODE(x) \
+  __asm__ __volatile__                                                       \
+    ("cld\n\t"                                                               \
+     "rep; movsl"                                                            \
+     x                                                                       \
+     : /* no outputs */                                                              \
+     : "c" (n / 4), "D" (__dest), "S" (__src)                                \
+     : "cx", "di", "si", "memory");
+
+  switch (n % 4)
+    {
+    case 0:
+      __COMMON_CODE ("");
+      return __dest;
+    case 1:
+      __COMMON_CODE ("\n\tmovsb");
+      return __dest;
+    case 2:
+      __COMMON_CODE ("\n\tmovsw");
+      return __dest;
+    case 3:
+      __COMMON_CODE ("\n\tmovsw\n\tmovsb");
+      return __dest;
+    }
+#undef __COMMON_CODE
+}
+
+
+/* Copy N bytes of SRC to DEST, guaranteeing
+   correct behavior for overlapping strings.  */
+__STRING_INLINE void *
+memmove (void *__dest, __const void *__src, size_t __n)
+{
+  if (__dest < __src)
+    __asm__ __volatile__
+      ("cld\n\t"
+       "rep\n\t"
+       "movsb"
+       : /* no output */
+       : "c" (__n), "S" (__src),"D" (__dest)
+       : "cx", "si", "di");
+  else
+    __asm__ __volatile__
+      ("std\n\t"
+       "rep\n\t"
+       "movsb\n\t"
+       "cld"
+       : /* no output */
+       : "c" (__n), "S" (__n - 1 + (const char *) __src),
+        "D" (__n - 1 + (char *) __dest)
+       : "cx", "si", "di", "memory");
+  return __dest;
+}
+
+
+/* Set N bytes of S to C.  */
+#define memset(s, c, n) \
+  (__extension__ (__builtin_constant_p (c)                                   \
+                 ? (__builtin_constant_p (n)                                 \
+                    ? __memset_cc (s, 0x01010101UL * (unsigned char) (c), n) \
+                    : __memset_cg (s, 0x01010101UL * (unsigned char) (c), n))\
+                 : __memset_gg (s, c, n)))
+
+__STRING_INLINE void *
+__memset_cc (void *__s, unsigned long int __pattern, size_t __n)
+{
+  switch (__n)
+    {
+    case 0:
+      return s;
+    case 1:
+      *(unsigned char *) __s = __pattern;
+      return __s;
+    case 2:
+      *(unsigned short int *) __s = __pattern;
+      return s;
+    case 3:
+      *(unsigned short int *) __s = __pattern;
+      *(2 + (unsigned char *) __s) = __pattern;
+      return __s;
+    case 4:
+      *(unsigned long *) __s = __pattern;
+      return __s;
+       }
+#define __COMMON_CODE(x) \
+  __asm__ __volatile__                                                       \
+    ("cld\n\t"                                                               \
+     "rep; stosl"                                                            \
+     x                                                                       \
+     : /* no outputs */                                                              \
+     : "a" (__pattern),"c" (__n / 4), "D" (__s)                                      \
+     : "cx", "di", "memory")
+
+  switch (__n % 4)
+    {
+    case 0:
+      __COMMON_CODE ("");
+      return __s;
+    case 1:
+      __COMMON_CODE ("\n\tstosb");
+      return __s;
+    case 2:
+      __COMMON__CODE ("\n\tstosw");
+      return s;
+    case 3:
+      __COMMON_CODE ("\n\tstosw\n\tstosb");
+      return __s;
+    }
+#undef __COMMON_CODE
+}
+
+__STRING_INLINE void *
+__memset_cg (void *__s, unsigned long __c, size_t __n)
+{
+  __asm__ __volatile__
+    ("cld\n\t"
+     "rep; stosl\n\t"
+     "testb    $2,%b1\n\t"
+     "je       1f\n\t"
+     "stosw\n"
+     "1:\n\t"
+     "testb    $1,%b1\n\t"
+     "je       2f\n\t"
+     "stosb\n"
+     "2:"
+     : /* no output */
+     : "a" (__c), "q" (__n), "c" (__n / 4), "D" (__s)
+     : "cx", "di", "memory");
+  return __s;
+}
+
+__STRING_INLINE void *
+__memset_gg (void *__s, char __c, size_t __n)
+{
+  __asm__ __volatile__
+    ("cld\n\t"
+     "rep\n\t"
+     "stosb"
+     : /* no output */
+     : "a" (__c),"D" (__s), "c" (__n)
+     : "cx", "di", "memory");
+  return __s;
+}
+
+
+
+
+/* Search N bytes of S for C.  */
+__STRING_INLINE void *
+memchr (__const void *__s, int __c, size_t __n)
+{
+  register void *__res;
+  if (count == 0)
+    return NULL;
+  __asm__ __volatile__
+    ("cld\n\t"
+     "repne; scasb\n\t"
+     "je       1f\n\t"
+     "movl     $1,%0\n"
+     "1:\n\t"
+     "decl     %0"
+     : "=D" (__res)
+     : "a" (__c), "D" (__s), "c" (__n)
+     : "cx", "cc");
+  return __res;
+}
+
+
+/* Search N bytes of S for C.  */
+__STRING_INLINE void *
+memchr (__const void *__s, int __c, size_t __n)
+{
+  register void *__res;
+  if (count == 0)
+    return NULL;
+  __asm__ __volatile__
+    ("cld\n\t"
+     "repne\n\t"
+     "scasb\n\t"
+     "je       1f\n\t"
+     "movl     $1,%0\n"
+     "1:"
+     : "=D" (__res)
+     : "a" (__c), "0" (__s), "c" (__n)
+     : "cx");
+  return __res - 1;
+}
+
+
+/* Return the length of S.  */
+__STRING_INLINE size_t
+strlen (__const char *__str)
+{
+  register size_t __res;
+  __asm__ __volatile__
+    ("cld\n\t"
+     "repne; scasb\n\t"
+     "notl %0"
+     : "=c" (__res)
+     : "D" (__str), "a" (0), "0" (0xffffffff)
+     : "di", "cc");
+  return __res - 1;
+}
+
+
+/* Copy SRC to DEST.  */
+__STRING_INLINE char *
+strcpy (char *__dest, __const char *__src)
+{
+  __asm__ __volatile__
+    ("cld\n"
+     "1:\n\t"
+     "lodsb\n\t"
+     "stosb\n\t"
+     "testb    %%al,%%al\n\t"
+     "jne      1b"
+     : /* no output */
+     : "S" (__src), "D" (__dest)
+     : "si", "di", "ax", "memory", "cc");
+  return __dest;
+}
+
+
+/* Copy no more than N characters of SRC to DEST.  */
+__STRING_INLINE char *
+strncpy (char *__dest, __const char *__src, size_t __n)
+{
+  __asm__ __volatile__
+    ("cld\n"
+     "1:\n\t"
+     "decl     %2\n\t"
+     "js       2f\n\t"
+     "lodsb\n\t"
+     "stosb\n\t"
+     "testb    %%al,%%al\n\t"
+     "jne      1b\n\t"
+     "rep; stosb\n"
+     "2:"
+     : /* no output */
+     : "S" (__src), "D" (__dest), "c" (__n)
+     : "si", "di", "ax", "cx", "memory", "cc");
+  return __dest;
+}
+
+
+/* Append SRC onto DEST.  */
+__STRING_INLINE char *
+strcat (char *__dest, __const char *__src)
+{
+  __asm__ __volatile__
+    ("cld\n\t"
+     "repne; scasb\n\t"
+     "decl     %1\n"
+     "1:\n\t"
+     "lodsb\n\t"
+     "stosb\n\t"
+     "testb    %%al,%%al\n\t"
+     "jne      1b"
+     : /* no output */
+     : "S" (__src), "D" (__dest), "a" (0), "c" (0xffffffff)
+     : "si", "di", "ax", "cx", "memory", "cc");
+  return __dest;
+}
+
+
+/* Append no more than N characters from SRC onto DEST.  */
+__STRING_INLINE char *
+strncat (char *__dest, __const char *__src, size_t __n)
+{
+  __asm__ __volatile__
+    ("cld\n\t"
+     "repne; scasb\n\t"
+     "decl     %1\n\t"
+     "movl     %4,%3\n"
+     "1:\n\t"
+     "decl     %3\n\t"
+     "js       2f\n\t"
+     "lodsb\n\t"
+     "stosb\n\t"
+     "testb    %%al,%%al\n\t"
+     "jne      1b\n"
+     "2:\n\t"
+     "xorl     %2,%2\n\t"
+     "stosb"
+     : /* no output */
+     : "S" (__src), "D" (__dest), "a" (0), "c" (0xffffffff), "g" (__n)
+     : "si", "di", "ax", "cx", "memory", "cc");
+  return __dest;
+}
+
+
+/* Compare S1 and S2.  */
+__STRING_INLINE int
+strcmp (__const char *__s1, __const char *__s2)
+{
+  register int __res;
+  __asm__ __volatile__
+    ("cld\n"
+     "1:\n\t"
+     "lodsb\n\t"
+     "scasb\n\t"
+     "jne      2f\n\t"
+     "testb    %%al,%%al\n\t"
+     "jne      1b\n\t"
+     "xorl     %%eax,%%eax\n\t"
+     "jmp      3f\n"
+     "2:\n\t"
+     "sbbl     %%eax,%%eax\n\t"
+     "orb      $1,%%eax\n"
+     "3:"
+     : "=a" (__res)
+     : "S" (__s1), "D" (__s2)
+     : "si", "di", "cc");
+  return __res;
+}
+
+
+/* Compare N characters of S1 and S2.  */
+__STRING_INLINE int
+strncmp (__const char *__s1, __const char *__s2, size_t __n)
+{
+  register int __res;
+  __asm__ __volatile__
+    ("cld\n"
+     "1:\n\t"
+     "decl     %3\n\t"
+     "js       2f\n\t"
+     "lodsb\n\t"
+     "scasb\n\t"
+     "jne      3f\n\t"
+     "testb    %%al,%%al\n\t"
+     "jne      1b\n"
+     "2:\n\t"
+     "xorl     %%eax,%%eax\n\t"
+     "jmp      4f\n"
+     "3:\n\t"
+     "sbbl     %%eax,%%eax\n\t"
+     "orb      $1,%%al\n"
+     "4:"
+     : "=a" (__res)
+     : "S" (__s1), "D" (__s2), "c" (__n)
+     : "si", "di", "cx", "cc");
+  return __res;
+}
+
+
+/* Find the first occurrence of C in S.  */
+#define strchr(s, c) \
+  (__extension__ (__builtin_constant_p (c)                                   \
+                 ? __strchr_c (s, ((c) & 0xff) << 8)                         \
+                 : __strchr_g (s, c)))
+
+__STRING_INLINE char *
+__strchr_g (__const char *__s, int __c)
+{
+  register char *__res;
+  __asm__ __volatile__
+    ("cld\n\t"
+     "movb     %%al,%%ah\n"
+     "1:\n\t"
+     "lodsb\n\t"
+     "cmpb     %%ah,%%al\n\t"
+     "je       2f\n\t"
+     "testb    %%al,%%al\n\t"
+     "jne      1b\n\t"
+     "movl     $1,%1\n"
+     "2:\n\t"
+     "movl     %1,%0"
+     : "=a" (__res)
+     : "S" (__s), "0" (__c)
+     : "si", "cc");
+  return __res - 1;
+}
+
+__STRING_INLINE char *
+__strchr_c (__const char *__s, int __c)
+{
+  register char *__res;
+  __asm__ __volatile__
+    ("cld\n\t"
+     "1:\n\t"
+     "lodsb\n\t"
+     "cmpb     %%ah,%%al\n\t"
+     "je       2f\n\t"
+     "testb    %%al,%%al\n\t"
+     "jne      1b\n\t"
+     "movl     $1,%1\n"
+     "2:\n\t"
+     "movl     %1,%0"
+     : "=a" (__res)
+     : "S" (__s), "0" (__c)
+     : "si", "cc");
+  return __res - 1;
+}
+
+
+/* Return the length of the initial segment of S which
+   consists entirely of characters not in REJECT.  */
+#ifdef __PIC__
+__STRING_INLINE size_t
+strcspn (__const char *__s, __const char *__reject)
+{
+  register char *__res;
+  __asm__ __volatile__
+    ("pushl    %%ebx\n\t"
+     "cld\n\t"
+     "movl     %4,%%edi\n\t"
+     "repne; scasb\n\t"
+     "notl     %%ecx\n\t"
+     "decl     %%ecx\n\t"
+     "movl     %%ecx,%%ebx\n"
+     "1:\n\t"
+     "lodsb\n\t"
+     "testb    %%al,%%al\n\t"
+     "je       2f\n\t"
+     "movl     %4,%%edi\n\t"
+     "movl     %%ebx,%%ecx\n\t"
+     "repne; scasb\n\t"
+     "jne      1b\n"
+     "2:\n\t"
+     "popl     %%ebx"
+     : "=S" (__res)
+     : "a" (0), "c" (0xffffffff), "0" (__s), "g" (__reject)
+     : "ax", "cx", "di", "cc");
+  return (__res - 1) - __s;
+}
+#else
+__STRING_INLINE size_t
+strcspn (__const char *__s, __const char *__reject)
+{
+  register char *__res;
+  __asm__ __volatile__
+    ("cld\n\t"
+     "movl     %4,%%edi\n\t"
+     "repne; scasb\n\t"
+     "notl     %%ecx\n\t"
+     "decl     %%ecx\n\t"
+     "movl     %%ecx,%%edx\n"
+     "1:\n\t"
+     "lodsb\n\t"
+     "testb    %%al,%%al\n\t"
+     "je       2f\n\t"
+     "movl     %4,%%edi\n\t"
+     "movl     %%edx,%%ecx\n\t"
+     "repne; scasb\n\t"
+     "jne      1b\n"
+     "2:"
+     : "=S" (__res)
+     : "a" (0), "c" (0xffffffff),"0" (__s), "g" (__reject)
+     : "ax", "cx", "dx", "di", "cc");
+  return (__res - 1) - __s;
+}
+#endif
+
+
+/* Return the length of the initial segment of S which
+   consists entirely of characters in ACCEPT.  */
+#ifdef __PIC__
+__STRING_INLINE size_t
+strspn (__const char *__s, __const char *__accept)
+{
+  register char *__res;
+  __asm__ __volatile__
+    ("pushl    %%ebx\n\t"
+     "cld\n\t"
+     "movl     %4,%%edi\n\t"
+     "repne; scasb\n\t"
+     "notl     %%ecx\n\t"
+     "decl     %%ecx\n\t"
+     "movl     %%ecx,%%ebx\n"
+     "1:\n\t"
+     "lodsb\n\t"
+     "testb    %%al,%%al\n\t"
+     "je       2f\n\t"
+     "movl     %4,%%edi\n\t"
+     "movl     %%ebx,%%ecx\n\t"
+     "repne; scasb\n\t"
+     "je       1b\n"
+     "2:\n\t"
+     "popl     %%ebx"
+     : "=S" (__res)
+     : "a" (0), "c" (0xffffffff), "0" (__s), "g" (__accept)
+     : "ax", "cx", "di", "cc");
+  return (__res - 1) - __s;
+}
+#else
+__STRING_INLINE size_t
+strspn (__const char *__s, __const char *__accept)
+{
+  register char *__res;
+  __asm__ __volatile__
+    ("cld\n\t"
+     "movl     %4,%%edi\n\t"
+     "repne; scasb\n\t"
+     "notl     %%ecx\n\t"
+     "decl     %%ecx\n\t"
+     "movl     %%ecx,%%edx\n"
+     "1:\n\t"
+     "lodsb\n\t"
+     "testb    %%al,%%al\n\t"
+     "je       2f\n\t"
+     "movl     %4,%%edi\n\t"
+     "movl     %%edx,%%ecx\n\t"
+     "repne; scasb\n\t"
+     "je       1b\n"
+     "2:"
+     : "=S" (__res)
+     : "a" (0), "c" (0xffffffff), "0" (__s), "g" (__accept)
+     : "ax", "cx", "dx", "di", "cc");
+  return (__res - 1) - __s;
+}
+#endif
+
+
+/* Find the first occurrence in S of any character in ACCEPT.  */
+#ifdef __PIC__
+__STRING_INLINE char *
+strpbrk (__const char *__s, __const char *__accept)
+{
+  register char *__res;
+  __asm__ __volatile__
+    ("pushl    %%ebx\n\t"
+     "cld\n\t"
+     "movl     %4,%%edi\n\t"
+     "repne; scasb\n\t"
+     "notl     %%ecx\n\t"
+     "decl     %%ecx\n\t"
+     "movl     %%ecx,%%ebx\n"
+     "1:\n\t"
+     "lodsb\n\t"
+     "testb    %%al,%%al\n\t"
+     "je       2f\n\t"
+     "movl     %4,%%edi\n\t"
+     "movl     %%ebx,%%ecx\n\t"
+     "repne; scasb\n\t"
+     "jne      1b\n\t"
+     "decl     %0\n\t"
+     "jmp      3f\n"
+     "2:\n\t"
+     "xorl     %0,%0\n"
+     "3:\n\t"
+     "popl     %%ebx"
+     : "=S" (__res)
+     : "a" (0), "c" (0xffffffff), "0" (__s), "g" (__accept)
+     : "ax", "cx", "di", "cc");
+  return __res;
+}
+#else
+__STRING_INLINE char *
+strpbrk (__const char *__s, __const char *__accept)
+{
+  register char *__res;
+  __asm__ __volatile__
+    ("cld\n\t"
+     "movl     %4,%%edi\n\t"
+     "repne; scasb\n\t"
+     "notl     %%ecx\n\t"
+     "decl     %%ecx\n\t"
+     "movl     %%ecx,%%edx\n"
+     "1:\n\t"
+     "lodsb\n\t"
+     "testb    %%al,%%al\n\t"
+     "je       2f\n\t"
+     "movl     %4,%%edi\n\t"
+     "movl     %%edx,%%ecx\n\t"
+     "repne; scasb\n\t"
+     "jne      1b\n\t"
+     "decl     %0\n\t"
+     "jmp      3f\n"
+     "2:\n\t"
+     "xorl     %0,%0\n"
+     "3:"
+     : "=S" (__res)
+     : "a" (0), "c" (0xffffffff), "0" (__s), "g" (__accept)
+     : "ax", "cx", "dx", "di", "cc");
+  return __res;
+}
+#endif
+
+
+/* Find the first occurrence of NEEDLE in HAYSTACK.  */
+#ifdef __PIC__
+__STRING_INLINE char *
+strstr (__const char *__haystack, __const char *__needle)
+{
+  register char *__res;
+  __asm__ __volatile__
+    ("pushl    %%ebx\n\t"
+     "cld\n\t" \
+     "movl     %4,%%edi\n\t"
+     "repne; scasb\n\t"
+     "notl     %%ecx\n\t"
+     "decl     %%ecx\n\t"      /* NOTE! This also sets Z if searchstring='' */
+     "movl     %%ecx,%%ebx\n"
+     "1:\n\t"
+     "movl     %4,%%edi\n\t"
+     "movl     %%esi,%%eax\n\t"
+     "movl     %%ebx,%%ecx\n\t"
+     "repe; cmpsb\n\t"
+     "je       2f\n\t"         /* also works for empty string, see above */
+     "xchgl    %%eax,%%esi\n\t"
+     "incl     %%esi\n\t"
+     "cmpb     $0,-1(%%eax)\n\t"
+     "jne      1b\n\t"
+     "xorl     %%eax,%%eax\n\t"
+     "2:\n\t"
+     "popl     %%ebx"
+     : "=a" (__res)
+     : "0" (0), "c" (0xffffffff), "S" (__haystack), "g" (__needle)
+     : "cx", "di", "si", "cc");
+  return __res;
+}
+#else
+__STRING_INLINE char *
+strstr (__const char *__haystack, __const char *__needle)
+{
+  register char *__res;
+  __asm__ __volatile__
+    ("cld\n\t" \
+     "movl     %4,%%edi\n\t"
+     "repne; scasb\n\t"
+     "notl     %%ecx\n\t"
+     "decl     %%ecx\n\t"      /* NOTE! This also sets Z if searchstring='' */
+     "movl     %%ecx,%%edx\n"
+     "1:\n\t"
+     "movl     %4,%%edi\n\t"
+     "movl     %%esi,%%eax\n\t"
+     "movl     %%edx,%%ecx\n\t"
+     "repe; cmpsb\n\t"
+     "je       2f\n\t"         /* also works for empty string, see above */
+     "xchgl    %%eax,%%esi\n\t"
+     "incl     %%esi\n\t"
+     "cmpb     $0,-1(%%eax)\n\t"
+     "jne      1b\n\t"
+     "xorl     %%eax,%%eax\n\t"
+     "2:"
+     : "=a" (__res)
+     : "0" (0), "c" (0xffffffff), "S" (__haystack), "g" (__needle)
+     : "cx", "dx", "di", "si", "cc");
+  return __res;
+}
+#endif
+
+
+#undef __STRING_INLINE
+
+#endif /* GNU CC */
index 213060ba8434c6239df0866cc38abf9f685f6d8d..f9913cf00c8523ed745ec6f9157d9addce634f88 100644 (file)
@@ -57,10 +57,9 @@ static inline Elf32_Addr __attribute__ ((unused))
 elf_machine_load_address (void)
 {
   Elf32_Addr addr;
-  asm ("       call 1f\n"
-       "1:     popl %0\n"
-       "       subl 1b@GOT(%%ebx), %0"
-       : "=r" (addr));
+  asm ("leal _dl_start@GOTOFF(%%ebx), %0\n"
+       "subl _dl_start@GOT(%%ebx), %0"
+       : "=r" (addr) : : "cc");
   return addr;
 }
 
@@ -250,6 +249,7 @@ _dl_start_user:\n\
        movl _dl_fini@GOT(%ebx), %edx\n\
        # Jump to the user's entry point.\n\
        jmp *%edi\n\
+.previous\n\
 ");
 
 /* Nonzero iff TYPE should not be allowed to resolve to one of
diff --git a/sysdeps/i386/i486/bits/string.h b/sysdeps/i386/i486/bits/string.h
new file mode 100644 (file)
index 0000000..f141bd5
--- /dev/null
@@ -0,0 +1,853 @@
+/* Optimized, inlined string functions.  i486 version.
+   Copyright (C) 1997 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Library General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Library General Public License for more details.
+
+   You should have received a copy of the GNU Library General Public
+   License along with the GNU C Library; see the file COPYING.LIB.  If not,
+   write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+   Boston, MA 02111-1307, USA.  */
+
+#ifndef _STRING_H
+#error "Never use <bits/string.h> directly; include <string.h> instead."
+#endif
+
+/* We only provide optimizations for the GNU CC.  */
+#if defined __GNUC__ && __GNUC__ >= 2
+
+#ifdef __cplusplus
+# define __STRING_INLINE inline
+#else
+# define __STRING_INLINE extern __inline
+#endif
+
+
+/* Copy N bytes of SRC to DEST.  */
+#define memcpy(dest, src, n) \
+  (__extension__ (__builtin_constant_p (n)                                   \
+                 ? __memcpy_c (dest, src, n)                                 \
+                 : __memcpy_g (dest, src, n)))
+#define __memcpy_c(dest, src, n) \
+  (((n) % 4 == 0)                                                            \
+   ? __memcpy_by4 (dest, src, n)                                             \
+   : (((n) % 2 == 0)                                                         \
+      ? __memcpy_by2 (dest, src, n)                                          \
+      : __memcpy_g (dest, src, n)))
+
+__STRING_INLINE void *
+__memcpy_by4 (void *__dest, __const void *__src, size_t __n)
+{
+  register void *__tmp = __dest;
+  register int __dummy1, __dummy2;
+  __asm__ __volatile__
+    ("1:\n\t"
+     "movl     (%2),%0\n\t"
+     "leal     4(%2),%2\n\t"
+     "movl     %0,(%1)\n\t"
+     "leal     4(%1),%1\n\t"
+     "decl     %3\n\t"
+     "jnz      1b"
+     : "=r" (__dummy1), "=r" (__tmp), "=r" (__src), "=r" (__dummy2)
+     : "1" (__tmp), "2" (__src), "3" (__n / 4)
+     : "memory", "cc");
+  return __dest;
+}
+
+__STRING_INLINE void *
+__memcpy_by2 (void *__dest, __const void *__src, size_t __n)
+{
+  register void *__tmp = __dest;
+  register int __dummy1, __dummy2;
+  __asm__ __volatile__
+    ("shrl     $1,%3\n\t"
+     "jz       2f\n"                 /* only a word */
+     "1:\n\t"
+     "movl     (%2),%0\n\t"
+     "leal     4(%2),%2\n\t"
+     "movl     %0,(%1)\n\t"
+     "leal     4(%1),%1\n\t"
+     "decl     %3\n\t"
+     "jnz      1b\n"
+     "2:\n\t"
+     "movw     (%2),%w0\n\t"
+     "movw     %w0,(%1)"
+     : "=q" (__dummy1), "=r" (__tmp), "=r" (__src), "=r" (__dummy2)
+     : "1" (__tmp), "2" (__src), "3" (__n / 2)
+     : "memory", "cc");
+  return __dest;
+}
+
+__STRING_INLINE void *
+__memcpy_g (void *__dest, __const void *__src, size_t __n)
+{
+  register void *__tmp = __dest;
+  __asm__ __volatile__
+    ("cld\n\t"
+     "shrl     $1,%%ecx\n\t"
+     "jnc      1f\n\t"
+     "movsb\n"
+     "1:\n\t"
+     "shrl     $1,%%ecx\n\t"
+     "jnc      2f\n\t"
+     "movsw\n"
+     "2:\n\t"
+     "rep; movsl"
+     : /* no output */
+     : "c" (__n), "D" (__tmp),"S" (__src)
+     : "cx", "di", "si", "memory", "cc");
+  return __dest;
+}
+
+
+/* Copy N bytes of SRC to DEST, guaranteeing
+   correct behavior for overlapping strings.  */
+__STRING_INLINE void *
+memmove (void *__dest, __const void *__src, size_t __n)
+{
+  register void *__tmp = __dest;
+  if (__dest < __src)
+    __asm__ __volatile__
+      ("cld\n\t"
+       "rep; movsb"
+       : /* no output */
+       : "c" (__n), "S" (__src), "D" (__tmp)
+       : "cx", "si", "di");
+  else
+    __asm__ __volatile__
+      ("std\n\t"
+       "rep; movsb\n\t"
+       "cld"
+       : /* no output */
+       : "c" (__n), "S" (__n - 1 + (__const char *) __src),
+        "D" (__n - 1 + (char *) __tmp)
+       : "cx", "si", "di", "memory");
+  return __dest;
+}
+
+
+/* Compare N bytes of S1 and S2.  */
+#ifndef __PIC__
+/* gcc has problems to spill registers when using PIC.  */
+__STRING_INLINE int
+memcmp (__const void *__s1, __const void *__s2, size_t __n)
+{
+  register int __res;
+  __asm__ __volatile__
+    ("cld\n\t"
+     "repe; cmpsb\n\t"
+     "je       1f\n\t"
+     "sbbl     %0,%0\n\t"
+     "orb      $1,%b0\n"
+     "1:"
+     : "=a" (__res)
+     : "0" (0), "S" (__s1), "D" (__s2), "c" (__n)
+     : "si", "di", "cx", "cc");
+  return __res;
+}
+#endif
+
+
+/* Set N bytes of S to C.  */
+#define memset(s, c, n) \
+  (__extension__ (__builtin_constant_p (c)                                   \
+                 ? (__builtin_constant_p (n)                                 \
+                    ? __memset_cc (s, c, n)                                  \
+                    : __memset_cg (s, c, n))                                 \
+                 : (__builtin_constant_p (n)                                 \
+                    ? __memset_gc (s, c, n)                                  \
+                    : __memset_gg (s, c, n))))
+#define __memset_cc(s, c, n) \
+  (((n) % 4 == 0)                                                            \
+   ? __memset_cc_by4 (s, c, n)                                               \
+   : (((n) % 2== 0)                                                          \
+      ? __memset_cc_by2 (s, c, n)                                            \
+      : __memset_cg (s, c, n)))
+#define __memset_gc(s, c, n) \
+  (((n) % 4== 0)                                                             \
+   ? __memset_gc_by4 (s, c, n)                                               \
+   : (((n) % 2 == 0)                                                         \
+      ? __memset_gc_by2 (s, c, n)                                            \
+      : __memset_gg (s, c, n)))
+
+__STRING_INLINE void *
+__memset_cc_by4 (void *__s, int __c, size_t __n)
+{
+  register char *__tmp = __s;
+  register int __dummy;
+  __asm__ __volatile__
+    ("1:\n\t"
+     "movl     %2,(%0)\n\t"
+     "leal     4(%0),%0\n\t"
+     "decl     %1\n\t"
+     "jnz      1b"
+     : "=r" (__tmp), "=r" (__dummy)
+     : "q" (0x01010101UL * (unsigned char) __c), "0" (__tmp), "1" (__n / 4)
+     : "memory", "cc");
+  return __s;
+}
+
+__STRING_INLINE void *
+__memset_cc_by2 (void *__s, char __c, size_t __n)
+{
+  register void *__tmp = __s;
+  register int __dummy;
+  __asm__ __volatile__
+    ("shrl     $1,%1\n\t"      /* may be divisible also by 4 */
+     "jz       2f\n"
+     "1:\n\t"
+     "movl     %2,(%0)\n\t"
+     "leal     4(%0),%0\n\t"
+     "decl     %1\n\t"
+     "jnz      1b\n"
+     "2:\n\t"
+     "movw     %w2,(%0)"
+     : "=r" (__tmp), "=r" (__dummy)
+     : "q" (0x01010101UL * (unsigned char) __c), "0" (__tmp), "1" (__n / 2)
+     : "memory", "cc");
+  return __s;
+}
+
+__STRING_INLINE void *
+__memset_gc_by4 (void *__s, char __c, size_t __n)
+{
+  register void *__tmp = __s;
+  register int __dummy;
+  __asm__ __volatile__
+    ("movb     %b0,%h0\n"
+     "pushw    %w0\n\t"
+     "shll     $16,%0\n\t"
+     "popw     %w0\n"
+     "1:\n\t"
+     "movl     %0,(%1)\n\t"
+     "addl     $4,%1\n\t"
+     "decl     %2\n\t"
+     "jnz      1b\n"
+     : "=q" (__c), "=r" (__tmp), "=r" (__dummy)
+     : "0" ((unsigned int) __c), "1" (__tmp), "2" (__n / 4)
+     : "memory", "cc");
+  return __s;
+}
+
+__STRING_INLINE void *
+__memset_gc_by2 (void *__s, char __c, size_t __n)
+{
+  register void *__tmp = __s;
+  register int __dummy1, __dummy2;
+  __asm__ __volatile__
+    ("movb     %b0,%h0\n\t"
+     "shrl     $1,%2\n\t"      /* may be divisible also by 4 */
+     "jz       2f\n\t"
+     "pushw    %w0\n\t"
+     "shll     $16,%0\n\t"
+     "popw     %w0\n"
+     "1:\n\t"
+     "movl     %0,(%1)\n\t"
+     "leal     4(%1),%1\n\t"
+     "decl     %2\n\t"
+     "jnz      1b\n"
+     "2:\n\t"
+     "movw     %w0,(%1)"
+     : "=q" (__dummy1), "=r" (__tmp), "=r" (__dummy2)
+     : "0" ((unsigned int) __c), "1" (__tmp), "2" (__n / 2)
+     : "memory", "cc");
+  return __s;
+}
+
+__STRING_INLINE void *
+__memset_cg (void *__s, char __c, size_t __n)
+{
+  register void *__tmp = __s;
+  __asm__ __volatile__
+    ("shrl     $1,%%ecx\n\t"
+     "rep; stosw\n\t"
+     "jnc      1f\n\t"
+     "movb     %%al,(%%edi)\n"
+     "1:"
+     : /* no output */
+     : "c" (__n),"D" (__tmp), "a" (0x0101U * (unsigned char) __c)
+     : "cx", "di", "memory", "cc");
+  return __s;
+}
+
+__STRING_INLINE void *
+__memset_gg (void *__s, char __c, size_t __n)
+{
+  register void *__tmp = __s;
+  __asm__ __volatile__
+    ("movb     %%al,%%ah\n\t"
+     "shrl     $1,%%ecx\n\t"
+     "rep; stosw\n\t"
+     "jnc      1f\n\t"
+     "movb     %%al,(%%edi)\n"
+     "1:"
+     : /* no output */
+     : "c" (__n), "D" (__tmp), "a" (__c)
+     : "cx", "di", "memory", "cc");
+  return __s;
+}
+
+
+/* Search N bytes of S for C.  */
+__STRING_INLINE void *
+memchr (__const void *__s, int __c, size_t __n)
+{
+  register void *__res;
+  if (__n == 0)
+    return NULL;
+  __asm__ __volatile__
+    ("cld\n\t"
+     "repne; scasb\n\t"
+     "je       1f\n\t"
+     "movl     $1,%0\n"
+     "1:\n\t"
+     "decl     %0"
+     : "=D" (__res)
+     : "a" (__c), "D" (__s), "c" (__n)
+     : "cx", "cc");
+  return __res;
+}
+
+
+/* Return the length of S.  */
+__STRING_INLINE size_t
+strlen (__const char *__str)
+{
+  register char __dummy;
+  register __const char *__tmp = __str;
+  __asm__ __volatile__
+    ("1:\n\t"
+     "movb     (%0),%1\n\t"
+     "leal     1(%0),%0\n\t"
+     "testb    %1,%1\n\t"
+     "jne      1b"
+     : "=r" (__tmp), "=q" (__dummy)
+     : "0" (__str)
+     : "memory", "cc" );
+  return __tmp - __str - 1;
+}
+
+
+/* Copy SRC to DEST.  */
+__STRING_INLINE char *
+strcpy (char *__dest, __const char *__src)
+{
+  register char *__tmp = __dest;
+  register char __dummy;
+  __asm__ __volatile__
+    (
+     "1:\n\t"
+     "movb     (%0),%2\n\t"
+     "incl     %0\n\t"
+     "movb     %2,(%1)\n\t"
+     "incl     %1\n\t"
+     "testb    %2,%2\n\t"
+     "jne      1b"
+     : "=r" (__src), "=r" (__tmp), "=q" (__dummy)
+     : "0" (__src), "1" (__tmp)
+     : "memory", "cc");
+  return __dest;
+}
+
+
+/* Copy no more than N characters of SRC to DEST.  */
+__STRING_INLINE char *
+strncpy (char *__dest, __const char *__src, size_t __n)
+{
+  register char *__tmp = __dest;
+  register char __dummy;
+  if (__n > 0)
+    __asm__ __volatile__
+      ("1:\n\t"
+       "movb   (%0),%2\n\t"
+       "incl   %0\n\t"
+       "movb   %2,(%1)\n\t"
+       "incl   %1\n\t"
+       "decl   %3\n\t"
+       "je     3f\n\t"
+       "testb  %2,%2\n\t"
+       "jne    1b\n\t"
+       "2:\n\t"
+       "movb   %2,(%1)\n\t"
+       "incl   %1\n\t"
+       "decl   %3\n\t"
+       "jne    2b\n\t"
+       "3:"
+       : "=r" (__src), "=r" (__tmp), "=q" (__dummy), "=r" (__n)
+       : "0" (__src), "1" (__tmp), "3" (__n)
+       : "memory", "cc");
+
+  return __dest;
+}
+
+
+/* Append SRC onto DEST.  */
+__STRING_INLINE char *
+strcat (char *__dest, __const char *__src)
+{
+  register char *__tmp = __dest - 1;
+  register char __dummy;
+  __asm__ __volatile__
+    (
+     "1:\n\t"
+     "incl     %1\n\t"
+     "cmpb     $0,(%1)\n\t"
+     "jne      1b\n"
+     "2:\n\t"
+     "movb     (%2),%b0\n\t"
+     "incl     %2\n\t"
+     "movb     %b0,(%1)\n\t"
+     "incl     %1\n\t"
+     "testb    %b0,%b0\n\t"
+     "jne      2b\n"
+     : "=q" (__dummy), "=r" (__tmp), "=r" (__src)
+     : "1"  (__tmp), "2"  (__src)
+     : "memory", "cc");
+  return __dest;
+}
+
+
+/* Append no more than N characters from SRC onto DEST.  */
+__STRING_INLINE char *
+strncat (char *__dest, __const char *__src, size_t __n)
+{
+  register char *__tmp = __dest - 1;
+  register char __dummy;
+  __asm__ __volatile__
+    (
+     "1:\n\t"
+     "incl     %1\n\t"
+     "cmpb     $0,(%1)\n\t"
+     "jne      1b\n"
+     "2:\n\t"
+     "decl     %3\n\t"
+     "js       3f\n\t"
+     "movb     (%2),%b0\n\t"
+     "leal     1(%2),%2\n\t"
+     "movb     %b0,(%1)\n\t"
+     "leal     1(%1),%1\n\t"
+     "testb    %b0,%b0\n\t"
+     "jne      2b\n"
+     "3:\n\t"
+     "movb     $0,(%1)\n\t"
+     : "=q" (__dummy), "=r" (__tmp), "=r" (__src), "=r" (__n)
+     : "1" (__tmp), "2" (__src), "3" (__n)
+     : "memory", "cc");
+  return __dest;
+}
+
+
+/* Compare S1 and S2.  */
+__STRING_INLINE int
+strcmp (__const char *__s1, __const char *__s2)
+{
+  register int __res;
+  __asm__ __volatile__
+    ("1:\n\t"
+     "movb     (%1),%b0\n\t"
+     "leal     1(%1),%1\n\t"
+     "cmpb     %b0,(%2)\n\t"
+     "jne      2f\n\t"
+     "leal     1(%2),%2\n\t"
+     "testb    %b0,%b0\n\t"
+     "jne      1b\n\t"
+     "xorl     %0,%0\n\t"
+     "jmp      3f\n"
+     "2:\n\t"
+     "movl     $1,%0\n\t"
+     "jb       3f\n\t"
+     "negl     %0\n"
+     "3:"
+     : "=q" (__res), "=r" (__s1), "=r" (__s2)
+     : "1" (__s1), "2" (__s2)
+     : "cc");
+  return __res;
+}
+
+
+/* Compare N characters of S1 and S2.  */
+__STRING_INLINE int
+strncmp (__const char *__s1, __const char *__s2, size_t __n)
+{
+  register int __res;
+  __asm__ __volatile__
+    ("1:\n\t"
+     "decl     %3\n\t"
+     "js       2f\n\t"
+     "movb     (%1),%b0\n\t"
+     "incl     %1\n\t"
+     "cmpb     %b0,(%2)\n\t"
+     "jne      3f\n\t"
+     "incl     %2\n\t"
+     "testb    %b0,%b0\n\t"
+     "jne      1b\n"
+     "2:\n\t"
+     "xorl     %0,%0\n\t"
+     "jmp      4f\n"
+     "3:\n\t"
+     "movl     $1,%0\n\t"
+     "jb       4f\n\t"
+     "negl     %0\n"
+     "4:"
+     : "=q" (__res), "=r" (__s1), "=r" (__s2), "=r" (__n)
+     : "1"  (__s1), "2"  (__s2),  "3" (__n)
+     : "cc");
+  return __res;
+}
+
+
+/* Find the first occurrence of C in S.  */
+#define strchr(s, c) \
+  (__extension__ (__builtin_constant_p (c)                                   \
+                 ? __strchr_c (s, ((c) & 0xff) << 8)                         \
+                 : __strchr_g (s, c)))
+
+__STRING_INLINE char *
+__strchr_g (__const char *__s, int __c)
+{
+  register char *__res;
+  __asm__ __volatile__
+    ("movb     %%al,%%ah\n"
+     "1:\n\t"
+     "movb     (%0),%%al\n\t"
+     "cmpb     %%ah,%%al\n\t"
+     "je       2f\n\t"
+     "leal     1(%0),%0\n\t"
+     "testb    %%al,%%al\n\t"
+     "jne      1b\n\t"
+     "xorl     %0,%0\n"
+     "2:"
+     : "=r" (__res)
+     : "a" (__c), "0" (__s)
+     : "ax", "cc");
+  return __res;
+}
+
+__STRING_INLINE char *
+__strchr_c (__const char *__s, int __c)
+{
+  register char *__res;
+  __asm__ __volatile__
+    ("1:\n\t"
+     "movb     (%0),%%al\n\t"
+     "cmpb     %%ah,%%al\n\t"
+     "je       2f\n\t"
+     "leal     1(%0),%0\n\t"
+     "testb    %%al,%%al\n\t"
+     "jne      1b\n\t"
+     "xorl     %0,%0\n"
+     "2:"
+     : "=r" (__res)
+     : "a" (__c), "0" (__s)
+     : "ax", "cc");
+  return __res;
+}
+
+
+/* Find the last occurrence of C in S.  */
+#define strrchr(s, c) \
+  (__extension__ (__builtin_constant_p (c)                                   \
+                 ? __strrchr_c (s, ((c) & 0xff) << 8)                        \
+                 : __strrchr_g (s, c)))
+
+__STRING_INLINE char *
+__strrchr_g (__const char *__s, int __c)
+{
+  register char *__res;
+  __asm__ __volatile__
+    ("cld\n\t"
+     "movb     %%al,%%ah\n"
+     "1:\n\t"
+     "lodsb\n\t"
+     "cmpb     %%ah,%%al\n\t"
+     "jne      2f\n\t"
+     "leal     -1(%%esi),%0\n"
+     "2:\n\t"
+     "testb    %%al,%%al\n\t"
+     "jne 1b"
+     : "=d" (__res)
+     : "0" (0), "S" (__s),"a" (__c)
+     : "ax", "si", "cc");
+  return __res;
+}
+
+__STRING_INLINE char *
+__strrchr_c (__const char *__s, int __c)
+{
+  register char *__res;
+  __asm__ __volatile__
+    ("cld\n\t"
+     "1:\n\t"
+     "lodsb\n\t"
+     "cmpb     %%ah,%%al\n\t"
+     "jne      2f\n\t"
+     "leal     -1(%%esi),%0\n"
+     "2:\n\t"
+     "testb    %%al,%%al\n\t"
+     "jne 1b"
+     : "=d" (__res)
+     : "0" (0), "S" (__s),"a" (__c)
+     : "ax", "si", "cc");
+  return __res;
+}
+
+
+/* Return the length of the initial segment of S which
+   consists entirely of characters not in REJECT.  */
+#ifdef __PIC__
+__STRING_INLINE size_t
+strcspn (__const char *__s, __const char *__reject)
+{
+  register char *__res;
+  __asm__ __volatile__
+    ("push     %%ebx\n\t"
+     "cld\n\t"
+     "movl     %4,%%edi\n\t"
+     "repne; scasb\n\t"
+     "notl     %%ecx\n\t"
+     "decl     %%ecx\n\t"
+     "movl     %%ecx,%%ebx\n"
+     "1:\n\t"
+     "lodsb\n\t"
+     "testb    %%al,%%al\n\t"
+     "je       2f\n\t"
+     "movl     %4,%%edi\n\t"
+     "movl     %%ebx,%%ecx\n\t"
+     "repne; scasb\n\t"
+     "jne      1b\n"
+     "2:\n\t"
+     "popl     %%ebx"
+     : "=S" (__res)
+     : "a" (0), "c" (0xffffffff), "0" (__s), "g" (__reject)
+     : "ax", "cx", "di", "cc");
+  return (__res - 1) - __s;
+}
+#else
+__STRING_INLINE size_t
+strcspn (__const char *__s, __const char *__reject)
+{
+  register char *__res;
+  __asm__ __volatile__
+    ("cld\n\t"
+     "movl     %4,%%edi\n\t"
+     "repne; scasb\n\t"
+     "notl     %%ecx\n\t"
+     "decl     %%ecx\n\t"
+     "movl     %%ecx,%%edx\n"
+     "1:\n\t"
+     "lodsb\n\t"
+     "testb    %%al,%%al\n\t"
+     "je       2f\n\t"
+     "movl     %4,%%edi\n\t"
+     "movl     %%edx,%%ecx\n\t"
+     "repne; scasb\n\t"
+     "jne      1b\n"
+     "2:"
+     : "=S" (__res)
+     : "a" (0), "c" (0xffffffff), "0" (__s), "g" (__reject)
+     : "ax", "cx", "dx", "di", "cc");
+  return (__res - 1) - __s;
+}
+#endif
+
+
+/* Return the length of the initial segment of S which
+   consists entirely of characters in ACCEPT.  */
+#ifdef __PIC__
+__STRING_INLINE size_t
+strspn (__const char *__s, __const char *__accept)
+{
+  register char *__res;
+  __asm__ __volatile__
+    ("pushl    %%ebx\n\t"
+     "cld\n\t"
+     "movl     %4,%%edi\n\t"
+     "repne; scasb\n\t"
+     "notl     %%ecx\n\t"
+     "decl     %%ecx\n\t"
+     "movl     %%ecx,%%ebx\n"
+     "1:\n\t"
+     "lodsb\n\t"
+     "testb    %%al,%%al\n\t"
+     "je       2f\n\t"
+     "movl     %4,%%edi\n\t"
+     "movl     %%ebx,%%ecx\n\t"
+     "repne; scasb\n\t"
+     "je       1b\n"
+     "2:\n\t"
+     "popl     %%ebx"
+     : "=S" (__res)
+     : "a" (0), "c" (0xffffffff), "0" (__s), "g" (__accept)
+     : "ax", "cx", "di", "cc");
+  return (__res - 1) - __s;
+}
+#else
+__STRING_INLINE size_t
+strspn (__const char *__s, __const char *__accept)
+{
+  register char *__res;
+  __asm__ __volatile__
+    ("cld\n\t"
+     "movl     %4,%%edi\n\t"
+     "repne; scasb\n\t"
+     "notl     %%ecx\n\t"
+     "decl     %%ecx\n\t"
+     "movl     %%ecx,%%edx\n"
+     "1:\n\t"
+     "lodsb\n\t"
+     "testb    %%al,%%al\n\t"
+     "je       2f\n\t"
+     "movl     %4,%%edi\n\t"
+     "movl     %%edx,%%ecx\n\t"
+     "repne; scasb\n\t"
+     "je       1b\n"
+     "2:"
+     : "=S" (__res)
+     : "a" (0), "c" (0xffffffff), "0" (__s), "g" (__accept)
+     : "ax", "cx", "dx", "di", "cc");
+  return (__res - 1) - __s;
+}
+#endif
+
+
+/* Find the first occurrence in S of any character in ACCEPT.  */
+#ifdef __PIC__
+__STRING_INLINE char *
+strpbrk (__const char *__s, __const char *__accept)
+{
+  register char *__res;
+  __asm__ __volatile__
+    ("pushl    %%ebx\n\t"
+     "cld\n\t"
+     "movl     %4,%%edi\n\t"
+     "repne; scasb\n\t"
+     "notl     %%ecx\n\t"
+     "decl     %%ecx\n\t"
+     "movl     %%ecx,%%ebx\n"
+     "1:\n\t"
+     "lodsb\n\t"
+     "testb    %%al,%%al\n\t"
+     "je       2f\n\t"
+     "movl     %4,%%edi\n\t"
+     "movl     %%ebx,%%ecx\n\t"
+     "repne; scasb\n\t"
+     "jne      1b\n\t"
+     "decl     %0\n\t"
+     "jmp      3f\n"
+     "2:\n\t"
+     "xorl     %0,%0\n"
+     "3:\n\t"
+     "popl     %%ebx"
+     : "=S" (__res)
+     : "a" (0), "c" (0xffffffff), "0" (__s), "g" (__accept)
+     : "ax", "cx", "di", "cc");
+  return __res;
+}
+#else
+__STRING_INLINE char *
+strpbrk (__const char *__s, __const char *__accept)
+{
+  register char *__res;
+  __asm__ __volatile__
+    ("cld\n\t"
+     "movl     %4,%%edi\n\t"
+     "repne; scasb\n\t"
+     "notl     %%ecx\n\t"
+     "decl     %%ecx\n\t"
+     "movl     %%ecx,%%edx\n"
+     "1:\n\t"
+     "lodsb\n\t"
+     "testb    %%al,%%al\n\t"
+     "je       2f\n\t"
+     "movl     %4,%%edi\n\t"
+     "movl     %%edx,%%ecx\n\t"
+     "repne; scasb\n\t"
+     "jne      1b\n\t"
+     "decl     %0\n\t"
+     "jmp      3f\n"
+     "2:\n\t"
+     "xorl     %0,%0\n"
+     "3:"
+     : "=S" (__res)
+     : "a" (0), "c" (0xffffffff), "0" (__s), "g" (__accept)
+     : "ax", "cx", "dx", "di", "cc");
+  return __res;
+}
+#endif
+
+
+/* Find the first occurrence of NEEDLE in HAYSTACK.  */
+#ifdef __PIC__
+__STRING_INLINE char *
+strstr (__const char *__haystack, __const char *__needle)
+{
+  register char *__res;
+  __asm__ __volatile__
+    ("pushl    %%ebx\n\t"
+     "cld\n\t" \
+     "movl     %4,%%edi\n\t"
+     "repne; scasb\n\t"
+     "notl     %%ecx\n\t"
+     "decl     %%ecx\n\t"      /* NOTE! This also sets Z if searchstring='' */
+     "movl     %%ecx,%%ebx\n"
+     "1:\n\t"
+     "movl     %4,%%edi\n\t"
+     "movl     %%esi,%%eax\n\t"
+     "movl     %%ebx,%%ecx\n\t"
+     "repe; cmpsb\n\t"
+     "je       2f\n\t"         /* also works for empty string, see above */
+     "xchgl    %%eax,%%esi\n\t"
+     "incl     %%esi\n\t"
+     "cmpb     $0,-1(%%eax)\n\t"
+     "jne      1b\n\t"
+     "xorl     %%eax,%%eax\n\t"
+     "2:\n\t"
+     "popl     %%ebx"
+     : "=a" (__res)
+     : "0" (0), "c" (0xffffffff), "S" (__haystack), "g" (__needle)
+     : "cx", "di", "si", "cc");
+  return __res;
+}
+#else
+__STRING_INLINE char *
+strstr (__const char *__haystack, __const char *__needle)
+{
+  register char *__res;
+  __asm__ __volatile__
+    ("cld\n\t" \
+     "movl     %4,%%edi\n\t"
+     "repne; scasb\n\t"
+     "notl     %%ecx\n\t"
+     "decl     %%ecx\n\t"      /* NOTE! This also sets Z if searchstring='' */
+     "movl     %%ecx,%%edx\n"
+     "1:\n\t"
+     "movl     %4,%%edi\n\t"
+     "movl     %%esi,%%eax\n\t"
+     "movl     %%edx,%%ecx\n\t"
+     "repe; cmpsb\n\t"
+     "je       2f\n\t"         /* also works for empty string, see above */
+     "xchgl    %%eax,%%esi\n\t"
+     "incl     %%esi\n\t"
+     "cmpb     $0,-1(%%eax)\n\t"
+     "jne      1b\n\t"
+     "xorl     %%eax,%%eax\n\t"
+     "2:"
+     : "=a" (__res)
+     : "0" (0), "c" (0xffffffff), "S" (__haystack), "g" (__needle)
+     : "cx", "dx", "di", "si", "cc");
+  return __res;
+}
+#endif
+
+
+#undef __STRING_INLINE
+
+#endif /* GNU CC */
index 01fc3399285601321a9ebcbdc700398c043deb90..75434cd7504a5fe28d7014693d6f86c0815d38fa 100644 (file)
@@ -56,21 +56,12 @@ static inline Elf32_Addr
 elf_machine_load_address (void)
 {
   Elf32_Addr addr;
-  asm (".Lhere:        lea .Lhere(%%pc), %0\n"
-       "       sub.l %#.Lhere, %0"
+  asm ("1: lea 1b(%%pc), %0\n"
+       "   sub.l 1b@GOTPC(%%pc), %0"
        : "=a" (addr));
   return addr;
 }
 
-/* The `subl' insn above will contain an R_68K_RELATIVE relocation
-   entry intended to insert the run-time address of the label `.Lhere'.
-   This will be the first relocation in the text of the dynamic
-   linker; we skip it to avoid trying to modify read-only text in this
-   early stage.  */
-#define ELF_MACHINE_BEFORE_RTLD_RELOC(dynamic_info) \
-  ((dynamic_info)[DT_RELA]->d_un.d_ptr += sizeof (Elf32_Rela), \
-   (dynamic_info)[DT_RELASZ]->d_un.d_val -= sizeof (Elf32_Rela))
-
 
 /* Set up the loaded object described by L so its unrelocated PLT
    entries will jump to the on-demand fixup code in dl-runtime.c.  */
@@ -157,13 +148,16 @@ asm (TRAMPOLINE_TEMPLATE (_dl_runtime_resolve, fixup) \
    its return value is the user program's entry point.  */
 
 #define RTLD_START asm ("\
-.text
-.globl _start
-.globl _dl_start_user
+       .text
+       .globl _start
+       .type _start,@function
 _start:
        move.l %sp, -(%sp)
        jbsr _dl_start
        addq.l #4, %sp
+
+       .globl _dl_start_user
+       .type _dl_start_user,@function
 _dl_start_user:
        | Save the user entry point address in %a4.
        move.l %d0, %a4
@@ -209,7 +203,9 @@ _dl_start_user:
        | Initialize %fp with the stack pointer.
        move.l %sp, %fp
        | Jump to the user's entry point.
-       jmp (%a4)");
+       jmp (%a4)
+       .size _dl_start_user, . - _dl_start_user
+       .previous");
 
 /* Nonzero iff TYPE describes a relocation that should
    skip the executable when looking up the symbol value.  */
diff --git a/sysdeps/m68k/m68020/Makefile b/sysdeps/m68k/m68020/Makefile
new file mode 100644 (file)
index 0000000..b176354
--- /dev/null
@@ -0,0 +1,3 @@
+ifeq ($(subdir),db2)
+CPPFLAGS += -DHAVE_SPINLOCKS=1 -DHAVE_ASSEM_MC68020_GCC=1
+endif
diff --git a/sysdeps/mach/hurd/abi-tag.h b/sysdeps/mach/hurd/abi-tag.h
deleted file mode 100644 (file)
index 0498d6e..0000000
+++ /dev/null
@@ -1,12 +0,0 @@
-#define ABI_HURD_TAG   1
-
-#define ABI_HURD_MAJOR 0
-#define ABI_HURD_MINOR 0
-#define ABI_HURD_PATCH 0
-
-/* Don't use `|' in this expression, it is a comment character in the
-   assembler.  */
-#define ABI_TAG ((ABI_HURD_TAG << 24) +                                              \
-                (ABI_HURD_MAJOR << 16) +                                     \
-                (ABI_HURD_MINOR << 8) +                                      \
-                (ABI_HURD_PATCH << 0))
index 6973f7612af21c4d54407ae545bb2e9c1e187e5b..7ee2e203436a7fea45f9c9db477044e69479effb 100644 (file)
@@ -423,6 +423,7 @@ _dl_runtime_resolve:\n                                                            \
        move    $25, $2\n                                                     \
        jr      $25\n                                                         \
        .end    _dl_runtime_resolve\n                                         \
+       .previous\n                                                           \
 ");
 
 /* Mask identifying addresses reserved for the user program,
@@ -519,6 +520,7 @@ _dl_start_user:\n\
        lw $7, 12($29)\n\
        jr $25\n"\
 _RTLD_EPILOGUE(ENTRY_POINT)\
+       "\n.previous"\
 );
 
 /* The MIPS never uses Elfxx_Rela relocations.  */
index e501a2520eb3a49793c20efa37bb9f7ff28039cd..76981a6a9bd6174d7a0049c5628580616d727bf6 100644 (file)
@@ -423,6 +423,7 @@ _dl_runtime_resolve:\n                                                            \
        move    $25, $2\n                                                     \
        jr      $25\n                                                         \
        .end    _dl_runtime_resolve\n                                         \
+       .previous\n                                                           \
 ");
 
 /* Mask identifying addresses reserved for the user program,
@@ -514,6 +515,7 @@ _dl_start_user:\n\
        ld $7, 3*8($29)\n\
        jr $25\n"\
 _RTLD_EPILOGUE(ENTRY_POINT) \
+       "\n.previous"\
 );
 
 
diff --git a/sysdeps/sparc/Makefile b/sysdeps/sparc/Makefile
new file mode 100644 (file)
index 0000000..e55e461
--- /dev/null
@@ -0,0 +1,3 @@
+ifeq ($(subdir),db2)
+CPPFLAGS += -DHAVE_SPINLOCKS=1 -DHAVE_ASSEM_SPARC_GCC=1
+endif
index 7c547f24ba760044a1c215b616ac043b7bf00b3a..39bcab7df51bf63588c165f3dd395af2f6d2032a 100644 (file)
@@ -243,7 +243,8 @@ _dl_start_user:
   /* Jump to the user's entry point and deallocate the extra stack we got.  */
        jmp     %l0
         add    %sp, 6*4, %sp
-       .size   _dl_start_user,.-_dl_start_user");
+       .size   _dl_start_user,.-_dl_start_user
+.previous");
 
 #ifdef RESOLVE
 /* Perform the relocation specified by RELOC and SYM (which is fully resolved).
diff --git a/sysdeps/stub/abi-tag.h b/sysdeps/stub/abi-tag.h
deleted file mode 100644 (file)
index 2810552..0000000
+++ /dev/null
@@ -1,3 +0,0 @@
-#error must define an ABI tag for this port
-
-#define ABI_TAG 0xff123abc
index 93a73802f7b606b18aa2f5b45e7c1736eac80e03..f00ce5bf37ad5e9dc7f68b1ca08ec21ab006b219 100644 (file)
@@ -57,7 +57,7 @@ EOF
 
   # Emit a compilation rule for this syscall.
   echo "\
-\$(foreach o,\$(all-object-suffixes),\$(objpfx)$file\$o): \\
+\$(foreach o,\$(object-suffixes),\$(objpfx)$file\$o): \\
 \$(common-objpfx)s-proto.d
        (echo '#include <sysdep.h>'; \\
         echo 'PSEUDO ($strong, $syscall, $nargs)'; \\
index ea1ab49fd4423007bf1b0281824ef4c43ea181fa..0545f4426ae836f93acbb897e8244d0ee8e9606e 100644 (file)
@@ -4,6 +4,10 @@ CFLAGS-init-first.c = -fkeep-inline-functions
 sysdep_routines += errno-loc
 endif
 
+ifeq ($(subdir),db2)
+CPPFLAGS += -DHAVE_LLSEEK=1
+endif
+
 ifeq ($(subdir),misc)
 sysdep_routines += sysctl clone llseek
 
diff --git a/sysdeps/unix/sysv/linux/abi-tag.h b/sysdeps/unix/sysv/linux/abi-tag.h
deleted file mode 100644 (file)
index 6c71eec..0000000
+++ /dev/null
@@ -1,18 +0,0 @@
-/* Define the GNU ABI tag for the Linux kernel we need.
-   The is a 4-byte quantity in native byte order:
-   the high byte is 0 to indicate Linux;
-   the low three bytes are the LINUX_VERSION_CODE for the earliest
-   compatible Linux kernel.  */
-
-#define ABI_LINUX_TAG  0
-
-#define ABI_LINUX_MAJOR        2
-#define ABI_LINUX_MINOR        0
-#define ABI_LINUX_PATCH        0
-
-/* Don't use `|' in this expression, it is a comment character in the
-   assembler.  */
-#define ABI_TAG ((ABI_LINUX_TAG << 24) +                                     \
-                (ABI_LINUX_MAJOR << 16) +                                    \
-                (ABI_LINUX_MINOR << 8) +                                     \
-                (ABI_LINUX_PATCH << 0))
index 7d7fc027173cd5ea23f7669b3a8e3c16f6d0f3b3..c0ffbcedf6accd402712241025ca3744d2659f7e 100644 (file)
@@ -16,6 +16,7 @@
    write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
    Boston, MA 02111-1307, USA.  */
 
+#include <errno.h>
 #include <string.h>
 #include <stdio.h>
 #include <stdlib.h>
index 60e67c5149407a3d634618b86cba2852fd32ff98..49fa180a006d5ef1270f8e751dcf5f77bc9dfe15 100644 (file)
 #ifndef _NETINET_TCP_H
 #define _NETINET_TCP_H 1
 
-#include <sys/cdefs.h>
+#include <features.h>
 #include <sys/types.h>
 
 __BEGIN_DECLS
 
 #ifdef __FAVOR_BSD
-typedef        u_int32_t       tcp_seq;
+typedef        u_int32_t tcp_seq;
 /*
  * TCP header.
  * Per RFC 793, September, 1981.
  */
-struct tcphdr {
-       u_int16_t       th_sport;               /* source port */
-       u_int16_t       th_dport;               /* destination port */
-       tcp_seq th_seq;                 /* sequence number */
-       tcp_seq th_ack;                 /* acknowledgement number */
+struct tcphdr
+  {
+    u_int16_t th_sport;                /* source port */
+    u_int16_t th_dport;                /* destination port */
+    tcp_seq th_seq;            /* sequence number */
+    tcp_seq th_ack;            /* acknowledgement number */
 #if __BYTE_ORDER == __LITTLE_ENDIAN
-       u_int8_t        th_x2:4,                /* (unused) */
-               th_off:4;               /* data offset */
+    u_int8_t th_x2:4;          /* (unused) */
+    u_int8_t th_off:4;         /* data offset */
 #endif
 #if __BYTE_ORDER == __BIG_ENDIAN
-       u_int8_t        th_off:4,               /* data offset */
-               th_x2:4;                /* (unused) */
+    u_int8_t th_off:4;         /* data offset */
+    u_int8_t th_x2:4;          /* (unused) */
 #endif
-       u_int8_t        th_flags;
+    u_int8_t th_flags;
 #define        TH_FIN  0x01
 #define        TH_SYN  0x02
 #define        TH_RST  0x04
 #define        TH_PUSH 0x08
 #define        TH_ACK  0x10
 #define        TH_URG  0x20
-       u_int16_t       th_win;                 /* window */
-       u_int16_t       th_sum;                 /* checksum */
-       u_int16_t       th_urp;                 /* urgent pointer */
+    u_int16_t th_win;          /* window */
+    u_int16_t th_sum;          /* checksum */
+    u_int16_t th_urp;          /* urgent pointer */
 };
 
 #else /* !__FAVOR_BSD */
-struct tcphdr {
-       u_int16_t       source;
-       u_int16_t       dest;
-       u_int32_t       seq;
-       u_int32_t       ack_seq;
+struct tcphdr
+  {
+    u_int16_t source;
+    u_int16_t dest;
+    u_int32_t seq;
+    u_int32_t ack_seq;
 #if __BYTE_ORDER == __LITTLE_ENDIAN
-       u_int16_t       res1:4,
-               doff:4,
-               fin:1,
-               syn:1,
-               rst:1,
-               psh:1,
-               ack:1,
-               urg:1,
-               res2:2;
+    u_int16_t res1:4;
+    u_int16_t doff:4;
+    u_int16_t fin:1;
+    u_int16_t syn:1;
+    u_int16_t rst:1;
+    u_int16_t psh:1;
+    u_int16_t ack:1;
+    u_int16_t urg:1;
+    u_int16_t res2:2;
 #elif __BYTE_ORDER == __BIG_ENDIAN
-       u_int16_t       doff:4,
-               res1:4,
-               res2:2,
-               urg:1,
-               ack:1,
-               psh:1,
-               rst:1,
-               syn:1,
-               fin:1;
+    u_int16_t doff:4;
+    u_int16_t res1:4;
+    u_int16_t res2:2;
+    u_int16_t urg:1;
+    u_int16_t ack:1;
+    u_int16_t psh:1;
+    u_int16_t rst:1;
+    u_int16_t syn:1;
+    u_int16_t fin:1;
 #else
 #error "Adjust your <bits/endian.h> defines"
-#endif 
-       u_int16_t       window;
-       u_int16_t       check;
-       u_int16_t       urg_ptr;
+#endif
+    u_int16_t window;
+    u_int16_t check;
+    u_int16_t urg_ptr;
 };
 #endif /* __FAVOR_BSD */
 
-enum {
+enum
+{
   TCP_ESTABLISHED = 1,
   TCP_SYN_SENT,
   TCP_SYN_RECV,
@@ -124,15 +127,15 @@ enum {
 #define        TCPOPT_EOL              0
 #define        TCPOPT_NOP              1
 #define        TCPOPT_MAXSEG           2
-#define    TCPOLEN_MAXSEG              4
+#define TCPOLEN_MAXSEG         4
 #define TCPOPT_WINDOW          3
-#define    TCPOLEN_WINDOW              3
+#define TCPOLEN_WINDOW         3
 #define TCPOPT_SACK_PERMITTED  4               /* Experimental */
-#define    TCPOLEN_SACK_PERMITTED      2
+#define TCPOLEN_SACK_PERMITTED 2
 #define TCPOPT_SACK            5               /* Experimental */
 #define TCPOPT_TIMESTAMP       8
-#define    TCPOLEN_TIMESTAMP           10
-#define    TCPOLEN_TSTAMP_APPA         (TCPOLEN_TIMESTAMP+2) /* appendix A */
+#define TCPOLEN_TIMESTAMP      10
+#define TCPOLEN_TSTAMP_APPA    (TCPOLEN_TIMESTAMP+2) /* appendix A */
 
 #define TCPOPT_TSTAMP_HDR      \
     (TCPOPT_NOP<<24|TCPOPT_NOP<<16|TCPOPT_TIMESTAMP<<8|TCPOLEN_TIMESTAMP)
index 5c1e1f74419927411994efa1ba9bcf6ccfd0a69a..55486feb3ee80a189be880017a8b31f25b1e30a0 100644 (file)
 
 #define _SYS_SYSMACROS_H       1
 
-/* For compatibility we provide alternative names.  */
-#define major(dev) ((int)(((dev) >> 8) & 0xff))
-#define minor(dev) ((int)((dev) & 0xff))
-#define makedev(major, minor) (((major) << 8) | (minor))
+/* For compatibility we provide alternative names.
+
+   The problem here is that compilers other than GCC probably don't
+   have the `long long' type and so `dev_t' is actually an array.  */
+#if defined __GNUC__ && __GNUC__ >= 2
+# define major(dev) ((int)(((dev) >> 8) & 0xff))
+# define minor(dev) ((int)((dev) & 0xff))
+# define makedev(major, minor) (((major) << 8) | (minor))
+#else
+# define major(dev) (((dev).__val[0] >> 8) & 0xff)
+# define minor(dev) ((dev).__val[0] & 0xff)
+# define makedev(major, minor) { (((major) << 8) | (minor)), 0 }
+#endif
 
 #endif /* _SYS_SYSMACROS_H */