]> git.ipfire.org Git - thirdparty/openldap.git/commitdiff
Copied from back-bdb to start back-mdb
authorHoward Chu <hyc@openldap.org>
Fri, 12 Aug 2011 12:16:53 +0000 (05:16 -0700)
committerHoward Chu <hyc@openldap.org>
Fri, 12 Aug 2011 12:16:53 +0000 (05:16 -0700)
33 files changed:
servers/slapd/back-mdb/Makefile.in [new file with mode: 0644]
servers/slapd/back-mdb/add.c [new file with mode: 0644]
servers/slapd/back-mdb/attr.c [new file with mode: 0644]
servers/slapd/back-mdb/back-mdb.h [new file with mode: 0644]
servers/slapd/back-mdb/bind.c [new file with mode: 0644]
servers/slapd/back-mdb/compare.c [new file with mode: 0644]
servers/slapd/back-mdb/config.c [new file with mode: 0644]
servers/slapd/back-mdb/dbcache.c [new file with mode: 0644]
servers/slapd/back-mdb/delete.c [new file with mode: 0644]
servers/slapd/back-mdb/dn2entry.c [new file with mode: 0644]
servers/slapd/back-mdb/dn2id.c [new file with mode: 0644]
servers/slapd/back-mdb/error.c [new file with mode: 0644]
servers/slapd/back-mdb/extended.c [new file with mode: 0644]
servers/slapd/back-mdb/filterindex.c [new file with mode: 0644]
servers/slapd/back-mdb/id2entry.c [new file with mode: 0644]
servers/slapd/back-mdb/idl.c [new file with mode: 0644]
servers/slapd/back-mdb/idl.h [new file with mode: 0644]
servers/slapd/back-mdb/index.c [new file with mode: 0644]
servers/slapd/back-mdb/init.c [new file with mode: 0644]
servers/slapd/back-mdb/key.c [new file with mode: 0644]
servers/slapd/back-mdb/mdb.c [new file with mode: 0644]
servers/slapd/back-mdb/mdb.h [new file with mode: 0644]
servers/slapd/back-mdb/midl.c [new file with mode: 0644]
servers/slapd/back-mdb/midl.h [new file with mode: 0644]
servers/slapd/back-mdb/modify.c [new file with mode: 0644]
servers/slapd/back-mdb/modrdn.c [new file with mode: 0644]
servers/slapd/back-mdb/monitor.c [new file with mode: 0644]
servers/slapd/back-mdb/nextid.c [new file with mode: 0644]
servers/slapd/back-mdb/operational.c [new file with mode: 0644]
servers/slapd/back-mdb/proto-mdb.h [new file with mode: 0644]
servers/slapd/back-mdb/referral.c [new file with mode: 0644]
servers/slapd/back-mdb/search.c [new file with mode: 0644]
servers/slapd/back-mdb/tools.c [new file with mode: 0644]

diff --git a/servers/slapd/back-mdb/Makefile.in b/servers/slapd/back-mdb/Makefile.in
new file mode 100644 (file)
index 0000000..a0c77a0
--- /dev/null
@@ -0,0 +1,53 @@
+# Makefile.in for back-mdb
+# $OpenLDAP$
+## This work is part of OpenLDAP Software <http://www.openldap.org/>.
+##
+## Copyright 2011 The OpenLDAP Foundation.
+## All rights reserved.
+##
+## Redistribution and use in source and binary forms, with or without
+## modification, are permitted only as authorized by the OpenLDAP
+## Public License.
+##
+## A copy of this license is available in the file LICENSE in the
+## top-level directory of the distribution or, alternatively, at
+## <http://www.OpenLDAP.org/license.html>.
+
+SRCS = init.c tools.c config.c \
+       add.c bind.c compare.c delete.c modify.c modrdn.c search.c \
+       extended.c referral.c operational.c \
+       attr.c index.c key.c dbcache.c filterindex.c \
+       dn2entry.c dn2id.c error.c id2entry.c idl.c \
+       nextid.c monitor.c mdb.c midl.c
+
+OBJS = init.lo tools.lo config.lo \
+       add.lo bind.lo compare.lo delete.lo modify.lo modrdn.lo search.lo \
+       extended.lo referral.lo operational.lo \
+       attr.lo index.lo key.lo dbcache.lo filterindex.lo \
+       dn2entry.lo dn2id.lo error.lo id2entry.lo idl.lo \
+       nextid.lo monitor.lo mdb.lo midl.lo
+
+LDAP_INCDIR= ../../../include       
+LDAP_LIBDIR= ../../../libraries
+
+BUILD_OPT = "--enable-mdb"
+BUILD_MOD = @BUILD_MDB@
+
+mod_DEFS = -DSLAPD_IMPORT
+MOD_DEFS = $(@BUILD_MDB@_DEFS)
+MOD_LIBS = $(MDB_LIBS)
+
+shared_LDAP_LIBS = $(LDAP_LIBLDAP_R_LA) $(LDAP_LIBLBER_LA)
+NT_LINK_LIBS = -L.. -lslapd $(@BUILD_LIBS_DYNAMIC@_LDAP_LIBS)
+UNIX_LINK_LIBS = $(@BUILD_LIBS_DYNAMIC@_LDAP_LIBS)
+
+LIBBASE = back_mdb
+
+XINCPATH = -I.. -I$(srcdir)/..
+XDEFS = $(MODULES_CPPFLAGS)
+
+all-local-lib: ../.backend
+
+../.backend: lib$(LIBBASE).a
+       @touch $@
+
diff --git a/servers/slapd/back-mdb/add.c b/servers/slapd/back-mdb/add.c
new file mode 100644 (file)
index 0000000..58f3b18
--- /dev/null
@@ -0,0 +1,479 @@
+/* add.c - ldap mdb back-end add routine */
+/* $OpenLDAP$ */
+/* This work is part of OpenLDAP Software <http://www.openldap.org/>.
+ *
+ * Copyright 2000-2011 The OpenLDAP Foundation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted only as authorized by the OpenLDAP
+ * Public License.
+ *
+ * A copy of this license is available in the file LICENSE in the
+ * top-level directory of the distribution or, alternatively, at
+ * <http://www.OpenLDAP.org/license.html>.
+ */
+
+#include "portable.h"
+
+#include <stdio.h>
+#include <ac/string.h>
+
+#include "back-mdb.h"
+
+int
+mdb_add(Operation *op, SlapReply *rs )
+{
+       struct mdb_info *mdb = (struct mdb_info *) op->o_bd->be_private;
+       struct berval   pdn;
+       Entry           *p = NULL, *oe = op->ora_e;
+       EntryInfo       *ei;
+       char textbuf[SLAP_TEXT_BUFLEN];
+       size_t textlen = sizeof textbuf;
+       AttributeDescription *children = slap_schema.si_ad_children;
+       AttributeDescription *entry = slap_schema.si_ad_entry;
+       DB_TXN          *ltid = NULL, *lt2;
+       ID eid = NOID;
+       struct mdb_op_info opinfo = {{{ 0 }}};
+       int subentry;
+       DB_LOCK         lock;
+
+       int             num_retries = 0;
+       int             success;
+
+       LDAPControl **postread_ctrl = NULL;
+       LDAPControl *ctrls[SLAP_MAX_RESPONSE_CONTROLS];
+       int num_ctrls = 0;
+
+#ifdef LDAP_X_TXN
+       int settle = 0;
+#endif
+
+       Debug(LDAP_DEBUG_ARGS, "==> " LDAP_XSTRING(mdb_add) ": %s\n",
+               op->ora_e->e_name.bv_val, 0, 0);
+
+#ifdef LDAP_X_TXN
+       if( op->o_txnSpec ) {
+               /* acquire connection lock */
+               ldap_pvt_thread_mutex_lock( &op->o_conn->c_mutex );
+               if( op->o_conn->c_txn == CONN_TXN_INACTIVE ) {
+                       rs->sr_text = "invalid transaction identifier";
+                       rs->sr_err = LDAP_X_TXN_ID_INVALID;
+                       goto txnReturn;
+               } else if( op->o_conn->c_txn == CONN_TXN_SETTLE ) {
+                       settle=1;
+                       goto txnReturn;
+               }
+
+               if( op->o_conn->c_txn_backend == NULL ) {
+                       op->o_conn->c_txn_backend = op->o_bd;
+
+               } else if( op->o_conn->c_txn_backend != op->o_bd ) {
+                       rs->sr_text = "transaction cannot span multiple database contexts";
+                       rs->sr_err = LDAP_AFFECTS_MULTIPLE_DSAS;
+                       goto txnReturn;
+               }
+
+               /* insert operation into transaction */
+
+               rs->sr_text = "transaction specified";
+               rs->sr_err = LDAP_X_TXN_SPECIFY_OKAY;
+
+txnReturn:
+               /* release connection lock */
+               ldap_pvt_thread_mutex_unlock( &op->o_conn->c_mutex );
+
+               if( !settle ) {
+                       send_ldap_result( op, rs );
+                       return rs->sr_err;
+               }
+       }
+#endif
+
+       ctrls[num_ctrls] = 0;
+
+       /* check entry's schema */
+       rs->sr_err = entry_schema_check( op, op->ora_e, NULL,
+               get_relax(op), 1, NULL, &rs->sr_text, textbuf, textlen );
+       if ( rs->sr_err != LDAP_SUCCESS ) {
+               Debug( LDAP_DEBUG_TRACE,
+                       LDAP_XSTRING(mdb_add) ": entry failed schema check: "
+                       "%s (%d)\n", rs->sr_text, rs->sr_err, 0 );
+               goto return_results;
+       }
+
+       /* add opattrs to shadow as well, only missing attrs will actually
+        * be added; helps compatibility with older OL versions */
+       rs->sr_err = slap_add_opattrs( op, &rs->sr_text, textbuf, textlen, 1 );
+       if ( rs->sr_err != LDAP_SUCCESS ) {
+               Debug( LDAP_DEBUG_TRACE,
+                       LDAP_XSTRING(mdb_add) ": entry failed op attrs add: "
+                       "%s (%d)\n", rs->sr_text, rs->sr_err, 0 );
+               goto return_results;
+       }
+
+       if ( get_assert( op ) &&
+               ( test_filter( op, op->ora_e, get_assertion( op )) != LDAP_COMPARE_TRUE ))
+       {
+               rs->sr_err = LDAP_ASSERTION_FAILED;
+               goto return_results;
+       }
+
+       subentry = is_entry_subentry( op->ora_e );
+
+       /* begin transaction */
+       rs->sr_err = TXN_BEGIN( mdb->bi_dbenv, NULL, &ltid, 
+               mdb->bi_db_opflags );
+       rs->sr_text = NULL;
+       if( rs->sr_err != 0 ) {
+               Debug( LDAP_DEBUG_TRACE,
+                       LDAP_XSTRING(mdb_add) ": txn_begin failed: %s (%d)\n",
+                       db_strerror(rs->sr_err), rs->sr_err, 0 );
+               rs->sr_err = LDAP_OTHER;
+               rs->sr_text = "internal error";
+               goto return_results;
+       }
+
+       opinfo.boi_oe.oe_key = mdb;
+       opinfo.boi_txn = ltid;
+       opinfo.boi_err = 0;
+       opinfo.boi_acl_cache = op->o_do_not_cache;
+       LDAP_SLIST_INSERT_HEAD( &op->o_extra, &opinfo.boi_oe, oe_next );
+
+       /*
+        * Get the parent dn and see if the corresponding entry exists.
+        */
+       if ( be_issuffix( op->o_bd, &op->ora_e->e_nname ) ) {
+               pdn = slap_empty_bv;
+       } else {
+               dnParent( &op->ora_e->e_nname, &pdn );
+       }
+
+       /* get entry or parent */
+       rs->sr_err = mdb_dn2entry( op, ltid, &op->ora_e->e_nname, &ei,
+               1, &lock );
+       switch( rs->sr_err ) {
+       case 0:
+               rs->sr_err = LDAP_ALREADY_EXISTS;
+               goto return_results;
+       case DB_NOTFOUND:
+               break;
+       case LDAP_BUSY:
+               rs->sr_text = "ldap server busy";
+               goto return_results;
+       default:
+               rs->sr_err = LDAP_OTHER;
+               rs->sr_text = "internal error";
+               goto return_results;
+       }
+
+       p = ei->bei_e;
+       if ( !p )
+               p = (Entry *)&slap_entry_root;
+
+       if ( !bvmatch( &pdn, &p->e_nname ) ) {
+               rs->sr_matched = ber_strdup_x( p->e_name.bv_val,
+                       op->o_tmpmemctx );
+               rs->sr_ref = is_entry_referral( p )
+                       ? get_entry_referrals( op, p )
+                       : NULL;
+               if ( p != (Entry *)&slap_entry_root )
+                       mdb_unlocked_cache_return_entry_r( mdb, p );
+               p = NULL;
+               Debug( LDAP_DEBUG_TRACE,
+                       LDAP_XSTRING(mdb_add) ": parent "
+                       "does not exist\n", 0, 0, 0 );
+
+               rs->sr_err = LDAP_REFERRAL;
+               rs->sr_flags = REP_MATCHED_MUSTBEFREED | REP_REF_MUSTBEFREED;
+               goto return_results;
+       }
+
+       rs->sr_err = access_allowed( op, p,
+               children, NULL, ACL_WADD, NULL );
+
+       if ( ! rs->sr_err ) {
+               if ( p != (Entry *)&slap_entry_root )
+                       mdb_unlocked_cache_return_entry_r( mdb, p );
+               p = NULL;
+
+               Debug( LDAP_DEBUG_TRACE,
+                       LDAP_XSTRING(mdb_add) ": no write access to parent\n",
+                       0, 0, 0 );
+               rs->sr_err = LDAP_INSUFFICIENT_ACCESS;
+               rs->sr_text = "no write access to parent";
+               goto return_results;;
+       }
+
+       if ( p != (Entry *)&slap_entry_root ) {
+               if ( is_entry_subentry( p ) ) {
+                       mdb_unlocked_cache_return_entry_r( mdb, p );
+                       p = NULL;
+                       /* parent is a subentry, don't allow add */
+                       Debug( LDAP_DEBUG_TRACE,
+                               LDAP_XSTRING(mdb_add) ": parent is subentry\n",
+                               0, 0, 0 );
+                       rs->sr_err = LDAP_OBJECT_CLASS_VIOLATION;
+                       rs->sr_text = "parent is a subentry";
+                       goto return_results;;
+               }
+
+               if ( is_entry_alias( p ) ) {
+                       mdb_unlocked_cache_return_entry_r( mdb, p );
+                       p = NULL;
+                       /* parent is an alias, don't allow add */
+                       Debug( LDAP_DEBUG_TRACE,
+                               LDAP_XSTRING(mdb_add) ": parent is alias\n",
+                               0, 0, 0 );
+                       rs->sr_err = LDAP_ALIAS_PROBLEM;
+                       rs->sr_text = "parent is an alias";
+                       goto return_results;;
+               }
+
+               if ( is_entry_referral( p ) ) {
+                       /* parent is a referral, don't allow add */
+                       rs->sr_matched = ber_strdup_x( p->e_name.bv_val,
+                               op->o_tmpmemctx );
+                       rs->sr_ref = get_entry_referrals( op, p );
+                       mdb_unlocked_cache_return_entry_r( mdb, p );
+                       p = NULL;
+                       Debug( LDAP_DEBUG_TRACE,
+                               LDAP_XSTRING(mdb_add) ": parent is referral\n",
+                               0, 0, 0 );
+
+                       rs->sr_err = LDAP_REFERRAL;
+                       rs->sr_flags = REP_MATCHED_MUSTBEFREED | REP_REF_MUSTBEFREED;
+                       goto return_results;
+               }
+
+       }
+
+       if ( subentry ) {
+               /* FIXME: */
+               /* parent must be an administrative point of the required kind */
+       }
+
+       /* free parent and reader lock */
+       if ( p != (Entry *)&slap_entry_root ) {
+               if ( p->e_nname.bv_len ) {
+                       struct berval ppdn;
+
+                       /* ITS#5326: use parent's DN if differs from provided one */
+                       dnParent( &op->ora_e->e_name, &ppdn );
+                       if ( !dn_match( &p->e_name, &ppdn ) ) {
+                               struct berval rdn;
+                               struct berval newdn;
+
+                               dnRdn( &op->ora_e->e_name, &rdn );
+
+                               build_new_dn( &newdn, &p->e_name, &rdn, NULL ); 
+                               if ( op->ora_e->e_name.bv_val != op->o_req_dn.bv_val )
+                                       ber_memfree( op->ora_e->e_name.bv_val );
+                               op->ora_e->e_name = newdn;
+
+                               /* FIXME: should check whether
+                                * dnNormalize(newdn) == e->e_nname ... */
+                       }
+               }
+
+               mdb_unlocked_cache_return_entry_r( mdb, p );
+       }
+       p = NULL;
+
+       rs->sr_err = access_allowed( op, op->ora_e,
+               entry, NULL, ACL_WADD, NULL );
+
+       if ( ! rs->sr_err ) {
+               Debug( LDAP_DEBUG_TRACE,
+                       LDAP_XSTRING(mdb_add) ": no write access to entry\n",
+                       0, 0, 0 );
+               rs->sr_err = LDAP_INSUFFICIENT_ACCESS;
+               rs->sr_text = "no write access to entry";
+               goto return_results;;
+       }
+
+       /* 
+        * Check ACL for attribute write access
+        */
+       if (!acl_check_modlist(op, oe, op->ora_modlist)) {
+               Debug( LDAP_DEBUG_TRACE,
+                       LDAP_XSTRING(mdb_add) ": no write access to attribute\n",
+                       0, 0, 0 );
+               rs->sr_err = LDAP_INSUFFICIENT_ACCESS;
+               rs->sr_text = "no write access to attribute";
+               goto return_results;;
+       }
+
+       if ( eid == NOID ) {
+               rs->sr_err = mdb_next_id( op->o_bd, &eid );
+               if( rs->sr_err != 0 ) {
+                       Debug( LDAP_DEBUG_TRACE,
+                               LDAP_XSTRING(mdb_add) ": next_id failed (%d)\n",
+                               rs->sr_err, 0, 0 );
+                       rs->sr_err = LDAP_OTHER;
+                       rs->sr_text = "internal error";
+                       goto return_results;
+               }
+               op->ora_e->e_id = eid;
+       }
+
+       /* nested transaction */
+       rs->sr_err = TXN_BEGIN( mdb->bi_dbenv, ltid, &lt2, 
+               mdb->bi_db_opflags );
+       rs->sr_text = NULL;
+       if( rs->sr_err != 0 ) {
+               Debug( LDAP_DEBUG_TRACE,
+                       LDAP_XSTRING(mdb_add) ": txn_begin(2) failed: "
+                       "%s (%d)\n", db_strerror(rs->sr_err), rs->sr_err, 0 );
+               rs->sr_err = LDAP_OTHER;
+               rs->sr_text = "internal error";
+               goto return_results;
+       }
+
+       /* dn2id index */
+       rs->sr_err = mdb_dn2id_add( op, lt2, ei, op->ora_e );
+       if ( rs->sr_err != 0 ) {
+               Debug( LDAP_DEBUG_TRACE,
+                       LDAP_XSTRING(mdb_add) ": dn2id_add failed: %s (%d)\n",
+                       db_strerror(rs->sr_err), rs->sr_err, 0 );
+
+               switch( rs->sr_err ) {
+               case DB_KEYEXIST:
+                       rs->sr_err = LDAP_ALREADY_EXISTS;
+                       break;
+               default:
+                       rs->sr_err = LDAP_OTHER;
+               }
+               goto return_results;
+       }
+
+       /* attribute indexes */
+       rs->sr_err = mdb_index_entry_add( op, lt2, op->ora_e );
+       if ( rs->sr_err != LDAP_SUCCESS ) {
+               Debug( LDAP_DEBUG_TRACE,
+                       LDAP_XSTRING(mdb_add) ": index_entry_add failed\n",
+                       0, 0, 0 );
+               rs->sr_err = LDAP_OTHER;
+               rs->sr_text = "index generation failed";
+               goto return_results;
+       }
+
+       /* id2entry index */
+       rs->sr_err = mdb_id2entry_add( op->o_bd, lt2, op->ora_e );
+       if ( rs->sr_err != 0 ) {
+               Debug( LDAP_DEBUG_TRACE,
+                       LDAP_XSTRING(mdb_add) ": id2entry_add failed\n",
+                       0, 0, 0 );
+               rs->sr_err = LDAP_OTHER;
+               rs->sr_text = "entry store failed";
+               goto return_results;
+       }
+
+       if ( TXN_COMMIT( lt2, 0 ) != 0 ) {
+               rs->sr_err = LDAP_OTHER;
+               rs->sr_text = "txn_commit(2) failed";
+               goto return_results;
+       }
+
+       /* post-read */
+       if( op->o_postread ) {
+               if( postread_ctrl == NULL ) {
+                       postread_ctrl = &ctrls[num_ctrls++];
+                       ctrls[num_ctrls] = NULL;
+               }
+               if ( slap_read_controls( op, rs, op->ora_e,
+                       &slap_post_read_bv, postread_ctrl ) )
+               {
+                       Debug( LDAP_DEBUG_TRACE,
+                               "<=- " LDAP_XSTRING(mdb_add) ": post-read "
+                               "failed!\n", 0, 0, 0 );
+                       if ( op->o_postread & SLAP_CONTROL_CRITICAL ) {
+                               /* FIXME: is it correct to abort
+                                * operation if control fails? */
+                               goto return_results;
+                       }
+               }
+       }
+
+       if ( op->o_noop ) {
+               if (( rs->sr_err=TXN_ABORT( ltid )) != 0 ) {
+                       rs->sr_text = "txn_abort (no-op) failed";
+               } else {
+                       rs->sr_err = LDAP_X_NO_OPERATION;
+                       ltid = NULL;
+                       goto return_results;
+               }
+
+       } else {
+               struct berval nrdn;
+
+               /* pick the RDN if not suffix; otherwise pick the entire DN */
+               if (pdn.bv_len) {
+                       nrdn.bv_val = op->ora_e->e_nname.bv_val;
+                       nrdn.bv_len = pdn.bv_val - op->ora_e->e_nname.bv_val - 1;
+               } else {
+                       nrdn = op->ora_e->e_nname;
+               }
+
+               if(( rs->sr_err=TXN_COMMIT( ltid, 0 )) != 0 ) {
+                       rs->sr_text = "txn_commit failed";
+               } else {
+                       rs->sr_err = LDAP_SUCCESS;
+               }
+       }
+
+       ltid = NULL;
+       LDAP_SLIST_REMOVE( &op->o_extra, &opinfo.boi_oe, OpExtra, oe_next );
+       opinfo.boi_oe.oe_key = NULL;
+
+       if ( rs->sr_err != LDAP_SUCCESS ) {
+               Debug( LDAP_DEBUG_TRACE,
+                       LDAP_XSTRING(mdb_add) ": %s : %s (%d)\n",
+                       rs->sr_text, db_strerror(rs->sr_err), rs->sr_err );
+               rs->sr_err = LDAP_OTHER;
+               goto return_results;
+       }
+
+       Debug(LDAP_DEBUG_TRACE,
+               LDAP_XSTRING(mdb_add) ": added%s id=%08lx dn=\"%s\"\n",
+               op->o_noop ? " (no-op)" : "",
+               op->ora_e->e_id, op->ora_e->e_dn );
+
+       rs->sr_text = NULL;
+       if( num_ctrls ) rs->sr_ctrls = ctrls;
+
+return_results:
+       success = rs->sr_err;
+       send_ldap_result( op, rs );
+
+       if( ltid != NULL ) {
+               TXN_ABORT( ltid );
+       }
+       if ( opinfo.boi_oe.oe_key ) {
+               LDAP_SLIST_REMOVE( &op->o_extra, &opinfo.boi_oe, OpExtra, oe_next );
+       }
+
+       if( success == LDAP_SUCCESS ) {
+               /* We own the entry now, and it can be purged at will
+                * Check to make sure it's the same entry we entered with.
+                * Possibly a callback may have mucked with it, although
+                * in general callbacks should treat the entry as read-only.
+                */
+               mdb_cache_deref( oe->e_private );
+               if ( op->ora_e == oe )
+                       op->ora_e = NULL;
+
+               if ( mdb->bi_txn_cp_kbyte ) {
+                       TXN_CHECKPOINT( mdb->bi_dbenv,
+                               mdb->bi_txn_cp_kbyte, mdb->bi_txn_cp_min, 0 );
+               }
+       }
+
+       slap_graduate_commit_csn( op );
+
+       if( postread_ctrl != NULL && (*postread_ctrl) != NULL ) {
+               slap_sl_free( (*postread_ctrl)->ldctl_value.bv_val, op->o_tmpmemctx );
+               slap_sl_free( *postread_ctrl, op->o_tmpmemctx );
+       }
+       return rs->sr_err;
+}
diff --git a/servers/slapd/back-mdb/attr.c b/servers/slapd/back-mdb/attr.c
new file mode 100644 (file)
index 0000000..86cdd59
--- /dev/null
@@ -0,0 +1,441 @@
+/* attr.c - backend routines for dealing with attributes */
+/* $OpenLDAP$ */
+/* This work is part of OpenLDAP Software <http://www.openldap.org/>.
+ *
+ * Copyright 2000-2011 The OpenLDAP Foundation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted only as authorized by the OpenLDAP
+ * Public License.
+ *
+ * A copy of this license is available in the file LICENSE in the
+ * top-level directory of the distribution or, alternatively, at
+ * <http://www.OpenLDAP.org/license.html>.
+ */
+
+#include "portable.h"
+
+#include <stdio.h>
+
+#include <ac/socket.h>
+#include <ac/string.h>
+
+#include "slap.h"
+#include "back-mdb.h"
+#include "config.h"
+#include "lutil.h"
+
+/* Find the ad, return -1 if not found,
+ * set point for insertion if ins is non-NULL
+ */
+int
+mdb_attr_slot( struct mdb_info *mdb, AttributeDescription *ad, int *ins )
+{
+       unsigned base = 0, cursor = 0;
+       unsigned n = mdb->bi_nattrs;
+       int val = 0;
+       
+       while ( 0 < n ) {
+               unsigned pivot = n >> 1;
+               cursor = base + pivot;
+
+               val = SLAP_PTRCMP( ad, mdb->mi_attrs[cursor]->ai_desc );
+               if ( val < 0 ) {
+                       n = pivot;
+               } else if ( val > 0 ) {
+                       base = cursor + 1;
+                       n -= pivot + 1;
+               } else {
+                       return cursor;
+               }
+       }
+       if ( ins ) {
+               if ( val > 0 )
+                       ++cursor;
+               *ins = cursor;
+       }
+       return -1;
+}
+
+static int
+ainfo_insert( struct mdb_info *mdb, AttrInfo *a )
+{
+       int x;
+       int i = mdb_attr_slot( mdb, a->ai_desc, &x );
+
+       /* Is it a dup? */
+       if ( i >= 0 )
+               return -1;
+
+       mdb->mi_attrs = ch_realloc( mdb->mi_attrs, ( mdb->mi_nattrs+1 ) * 
+               sizeof( AttrInfo * ));
+       if ( x < mdb->mi_nattrs )
+               AC_MEMCPY( &mdb->mi_attrs[x+1], &mdb->mi_attrs[x],
+                       ( mdb->mi_nattrs - x ) * sizeof( AttrInfo *));
+       mdb->mi_attrs[x] = a;
+       mdb->mi_nattrs++;
+       return 0;
+}
+
+AttrInfo *
+mdb_attr_mask(
+       struct mdb_info *mdb,
+       AttributeDescription *desc )
+{
+       int i = mdb_attr_slot( mdb, desc, NULL );
+       return i < 0 ? NULL : mdb->bi_attrs[i];
+}
+
+int
+mdb_attr_index_config(
+       struct mdb_info *mdb,
+       const char              *fname,
+       int                     lineno,
+       int                     argc,
+       char            **argv,
+       struct          config_reply_s *c_reply)
+{
+       int rc = 0;
+       int     i;
+       slap_mask_t mask;
+       char **attrs;
+       char **indexes = NULL;
+
+       attrs = ldap_str2charray( argv[0], "," );
+
+       if( attrs == NULL ) {
+               fprintf( stderr, "%s: line %d: "
+                       "no attributes specified: %s\n",
+                       fname, lineno, argv[0] );
+               return LDAP_PARAM_ERROR;
+       }
+
+       if ( argc > 1 ) {
+               indexes = ldap_str2charray( argv[1], "," );
+
+               if( indexes == NULL ) {
+                       fprintf( stderr, "%s: line %d: "
+                               "no indexes specified: %s\n",
+                               fname, lineno, argv[1] );
+                       rc = LDAP_PARAM_ERROR;
+                       goto done;
+               }
+       }
+
+       if( indexes == NULL ) {
+               mask = mdb->bi_defaultmask;
+
+       } else {
+               mask = 0;
+
+               for ( i = 0; indexes[i] != NULL; i++ ) {
+                       slap_mask_t index;
+                       rc = slap_str2index( indexes[i], &index );
+
+                       if( rc != LDAP_SUCCESS ) {
+                               if ( c_reply )
+                               {
+                                       snprintf(c_reply->msg, sizeof(c_reply->msg),
+                                               "index type \"%s\" undefined", indexes[i] );
+
+                                       fprintf( stderr, "%s: line %d: %s\n",
+                                               fname, lineno, c_reply->msg );
+                               }
+                               rc = LDAP_PARAM_ERROR;
+                               goto done;
+                       }
+
+                       mask |= index;
+               }
+       }
+
+       if( !mask ) {
+               if ( c_reply )
+               {
+                       snprintf(c_reply->msg, sizeof(c_reply->msg),
+                               "no indexes selected" );
+                       fprintf( stderr, "%s: line %d: %s\n",
+                               fname, lineno, c_reply->msg );
+               }
+               rc = LDAP_PARAM_ERROR;
+               goto done;
+       }
+
+       for ( i = 0; attrs[i] != NULL; i++ ) {
+               AttrInfo        *a;
+               AttributeDescription *ad;
+               const char *text;
+#ifdef LDAP_COMP_MATCH
+               ComponentReference* cr = NULL;
+               AttrInfo *a_cr = NULL;
+#endif
+
+               if( strcasecmp( attrs[i], "default" ) == 0 ) {
+                       mdb->bi_defaultmask |= mask;
+                       continue;
+               }
+
+#ifdef LDAP_COMP_MATCH
+               if ( is_component_reference( attrs[i] ) ) {
+                       rc = extract_component_reference( attrs[i], &cr );
+                       if ( rc != LDAP_SUCCESS ) {
+                               if ( c_reply )
+                               {
+                                       snprintf(c_reply->msg, sizeof(c_reply->msg),
+                                               "index component reference\"%s\" undefined",
+                                               attrs[i] );
+                                       fprintf( stderr, "%s: line %d: %s\n",
+                                               fname, lineno, c_reply->msg );
+                               }
+                               goto done;
+                       }
+                       cr->cr_indexmask = mask;
+                       /*
+                        * After extracting a component reference
+                        * only the name of a attribute will be remaining
+                        */
+               } else {
+                       cr = NULL;
+               }
+#endif
+               ad = NULL;
+               rc = slap_str2ad( attrs[i], &ad, &text );
+
+               if( rc != LDAP_SUCCESS ) {
+                       if ( c_reply )
+                       {
+                               snprintf(c_reply->msg, sizeof(c_reply->msg),
+                                       "index attribute \"%s\" undefined",
+                                       attrs[i] );
+
+                               fprintf( stderr, "%s: line %d: %s\n",
+                                       fname, lineno, c_reply->msg );
+                       }
+                       goto done;
+               }
+
+               if( ad == slap_schema.si_ad_entryDN || slap_ad_is_binary( ad ) ) {
+                       if (c_reply) {
+                               snprintf(c_reply->msg, sizeof(c_reply->msg),
+                                       "index of attribute \"%s\" disallowed", attrs[i] );
+                               fprintf( stderr, "%s: line %d: %s\n",
+                                       fname, lineno, c_reply->msg );
+                       }
+                       rc = LDAP_UNWILLING_TO_PERFORM;
+                       goto done;
+               }
+
+               if( IS_SLAP_INDEX( mask, SLAP_INDEX_APPROX ) && !(
+                       ad->ad_type->sat_approx
+                               && ad->ad_type->sat_approx->smr_indexer
+                               && ad->ad_type->sat_approx->smr_filter ) )
+               {
+                       if (c_reply) {
+                               snprintf(c_reply->msg, sizeof(c_reply->msg),
+                                       "approx index of attribute \"%s\" disallowed", attrs[i] );
+                               fprintf( stderr, "%s: line %d: %s\n",
+                                       fname, lineno, c_reply->msg );
+                       }
+                       rc = LDAP_INAPPROPRIATE_MATCHING;
+                       goto done;
+               }
+
+               if( IS_SLAP_INDEX( mask, SLAP_INDEX_EQUALITY ) && !(
+                       ad->ad_type->sat_equality
+                               && ad->ad_type->sat_equality->smr_indexer
+                               && ad->ad_type->sat_equality->smr_filter ) )
+               {
+                       if (c_reply) {
+                               snprintf(c_reply->msg, sizeof(c_reply->msg),
+                                       "equality index of attribute \"%s\" disallowed", attrs[i] );
+                               fprintf( stderr, "%s: line %d: %s\n",
+                                       fname, lineno, c_reply->msg );
+                       }
+                       rc = LDAP_INAPPROPRIATE_MATCHING;
+                       goto done;
+               }
+
+               if( IS_SLAP_INDEX( mask, SLAP_INDEX_SUBSTR ) && !(
+                       ad->ad_type->sat_substr
+                               && ad->ad_type->sat_substr->smr_indexer
+                               && ad->ad_type->sat_substr->smr_filter ) )
+               {
+                       if (c_reply) {
+                               snprintf(c_reply->msg, sizeof(c_reply->msg),
+                                       "substr index of attribute \"%s\" disallowed", attrs[i] );
+                               fprintf( stderr, "%s: line %d: %s\n",
+                                       fname, lineno, c_reply->msg );
+                       }
+                       rc = LDAP_INAPPROPRIATE_MATCHING;
+                       goto done;
+               }
+
+               Debug( LDAP_DEBUG_CONFIG, "index %s 0x%04lx\n",
+                       ad->ad_cname.bv_val, mask, 0 ); 
+
+               a = (AttrInfo *) ch_malloc( sizeof(AttrInfo) );
+
+#ifdef LDAP_COMP_MATCH
+               a->ai_cr = NULL;
+#endif
+               a->ai_desc = ad;
+
+               if ( mdb->bi_flags & MDB_IS_OPEN ) {
+                       a->ai_indexmask = 0;
+                       a->ai_newmask = mask;
+               } else {
+                       a->ai_indexmask = mask;
+                       a->ai_newmask = 0;
+               }
+
+#ifdef LDAP_COMP_MATCH
+               if ( cr ) {
+                       a_cr = mdb_attr_mask( mdb, ad );
+                       if ( a_cr ) {
+                               /*
+                                * AttrInfo is already in AVL
+                                * just add the extracted component reference
+                                * in the AttrInfo
+                                */
+                               rc = insert_component_reference( cr, &a_cr->ai_cr );
+                               if ( rc != LDAP_SUCCESS) {
+                                       fprintf( stderr, " error during inserting component reference in %s ", attrs[i]);
+                                       rc = LDAP_PARAM_ERROR;
+                                       goto done;
+                               }
+                               continue;
+                       } else {
+                               rc = insert_component_reference( cr, &a->ai_cr );
+                               if ( rc != LDAP_SUCCESS) {
+                                       fprintf( stderr, " error during inserting component reference in %s ", attrs[i]);
+                                       rc = LDAP_PARAM_ERROR;
+                                       goto done;
+                               }
+                       }
+               }
+#endif
+               rc = ainfo_insert( mdb, a );
+               if( rc ) {
+                       if ( mdb->bi_flags & MDB_IS_OPEN ) {
+                               AttrInfo *b = mdb_attr_mask( mdb, ad );
+                               /* If there is already an index defined for this attribute
+                                * it must be replaced. Otherwise we end up with multiple 
+                                * olcIndex values for the same attribute */
+                               if ( b->ai_indexmask & MDB_INDEX_DELETING ) {
+                                       /* If we were editing this attr, reset it */
+                                       b->ai_indexmask &= ~MDB_INDEX_DELETING;
+                                       /* If this is leftover from a previous add, commit it */
+                                       if ( b->ai_newmask )
+                                               b->ai_indexmask = b->ai_newmask;
+                                       b->ai_newmask = a->ai_newmask;
+                                       ch_free( a );
+                                       rc = 0;
+                                       continue;
+                               }
+                       }
+                       if (c_reply) {
+                               snprintf(c_reply->msg, sizeof(c_reply->msg),
+                                       "duplicate index definition for attr \"%s\"",
+                                       attrs[i] );
+                               fprintf( stderr, "%s: line %d: %s\n",
+                                       fname, lineno, c_reply->msg );
+                       }
+
+                       rc = LDAP_PARAM_ERROR;
+                       goto done;
+               }
+       }
+
+done:
+       ldap_charray_free( attrs );
+       if ( indexes != NULL ) ldap_charray_free( indexes );
+
+       return rc;
+}
+
+static int
+mdb_attr_index_unparser( void *v1, void *v2 )
+{
+       AttrInfo *ai = v1;
+       BerVarray *bva = v2;
+       struct berval bv;
+       char *ptr;
+
+       slap_index2bvlen( ai->ai_indexmask, &bv );
+       if ( bv.bv_len ) {
+               bv.bv_len += ai->ai_desc->ad_cname.bv_len + 1;
+               ptr = ch_malloc( bv.bv_len+1 );
+               bv.bv_val = lutil_strcopy( ptr, ai->ai_desc->ad_cname.bv_val );
+               *bv.bv_val++ = ' ';
+               slap_index2bv( ai->ai_indexmask, &bv );
+               bv.bv_val = ptr;
+               ber_bvarray_add( bva, &bv );
+       }
+       return 0;
+}
+
+static AttributeDescription addef = { NULL, NULL, BER_BVC("default") };
+static AttrInfo aidef = { &addef };
+
+void
+mdb_attr_index_unparse( struct mdb_info *mdb, BerVarray *bva )
+{
+       int i;
+
+       if ( mdb->bi_defaultmask ) {
+               aidef.ai_indexmask = mdb->bi_defaultmask;
+               mdb_attr_index_unparser( &aidef, bva );
+       }
+       for ( i=0; i<mdb->bi_nattrs; i++ )
+               mdb_attr_index_unparser( mdb->bi_attrs[i], bva );
+}
+
+void
+mdb_attr_info_free( AttrInfo *ai )
+{
+#ifdef LDAP_COMP_MATCH
+       free( ai->ai_cr );
+#endif
+       free( ai );
+}
+
+void
+mdb_attr_index_destroy( struct mdb_info *mdb )
+{
+       int i;
+
+       for ( i=0; i<mdb->bi_nattrs; i++ ) 
+               mdb_attr_info_free( mdb->bi_attrs[i] );
+
+       free( mdb->bi_attrs );
+}
+
+void mdb_attr_index_free( struct mdb_info *mdb, AttributeDescription *ad )
+{
+       int i;
+
+       i = mdb_attr_slot( mdb, ad, NULL );
+       if ( i >= 0 ) {
+               mdb_attr_info_free( mdb->bi_attrs[i] );
+               mdb->bi_nattrs--;
+               for (; i<mdb->bi_nattrs; i++)
+                       mdb->bi_attrs[i] = mdb->bi_attrs[i+1];
+       }
+}
+
+void mdb_attr_flush( struct mdb_info *mdb )
+{
+       int i;
+
+       for ( i=0; i<mdb->bi_nattrs; i++ ) {
+               if ( mdb->bi_attrs[i]->ai_indexmask & MDB_INDEX_DELETING ) {
+                       int j;
+                       mdb_attr_info_free( mdb->bi_attrs[i] );
+                       mdb->bi_nattrs--;
+                       for (j=i; j<mdb->bi_nattrs; j++)
+                               mdb->bi_attrs[j] = mdb->bi_attrs[j+1];
+                       i--;
+               }
+       }
+}
diff --git a/servers/slapd/back-mdb/back-mdb.h b/servers/slapd/back-mdb/back-mdb.h
new file mode 100644 (file)
index 0000000..9e728da
--- /dev/null
@@ -0,0 +1,163 @@
+/* back-mdb.h - mdb back-end header file */
+/* $OpenLDAP$ */
+/* This work is part of OpenLDAP Software <http://www.openldap.org/>.
+ *
+ * Copyright 2000-2011 The OpenLDAP Foundation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted only as authorized by the OpenLDAP
+ * Public License.
+ *
+ * A copy of this license is available in the file LICENSE in the
+ * top-level directory of the distribution or, alternatively, at
+ * <http://www.OpenLDAP.org/license.html>.
+ */
+
+#ifndef _BACK_MDB_H_
+#define _BACK_MDB_H_
+
+#include <portable.h>
+#include "slap.h"
+#include "mdb.h"
+
+LDAP_BEGIN_DECL
+
+#define DN_BASE_PREFIX         SLAP_INDEX_EQUALITY_PREFIX
+#define DN_ONE_PREFIX          '%'
+#define DN_SUBTREE_PREFIX      '@'
+
+#define MDB_AD2ID              0
+#define MDB_DN2ID              1
+#define MDB_ID2ENTRY   2
+#define MDB_NDB                        3
+
+/* The default search IDL stack cache depth */
+#define DEFAULT_SEARCH_STACK_DEPTH     16
+
+/* The minimum we can function with */
+#define MINIMUM_SEARCH_STACK_DEPTH     8
+
+#define MDB_INDICES            128
+
+struct mdb_db_info {
+       struct berval   mdi_name;
+       MDB_dbi mdi_dbi;
+};
+
+#ifdef LDAP_DEVEL
+#define MDB_MONITOR_IDX
+#endif /* LDAP_DEVEL */
+
+typedef struct mdb_monitor_t {
+       void            *mdm_cb;
+       struct berval   mdm_ndn;
+} mdb_monitor_t;
+
+/* From ldap_rq.h */
+struct re_s;
+
+struct mdb_info {
+       MDB_env         *mi_dbenv;
+
+       /* DB_ENV parameters */
+       /* The DB_ENV can be tuned via DB_CONFIG */
+       char            *mi_dbenv_home;
+       u_int32_t       mi_dbenv_flags;
+       int                     mi_dbenv_mode;
+
+       size_t          mi_mapsize;
+
+       int                     mi_ndatabases;
+       int                     mi_db_opflags;  /* db-specific flags */
+       struct mdb_db_info **mi_databases;
+       ldap_pvt_thread_mutex_t mi_database_mutex;
+
+       slap_mask_t     mi_defaultmask;
+       struct mdb_attrinfo             **mi_attrs;
+       int                     mi_nattrs;
+       void            *mi_search_stack;
+       int                     mi_search_stack_depth;
+
+       int                     mi_txn_cp;
+       u_int32_t       mi_txn_cp_min;
+       u_int32_t       mi_txn_cp_kbyte;
+       struct re_s             *mi_txn_cp_task;
+       struct re_s             *mi_index_task;
+
+       mdb_monitor_t   mi_monitor;
+
+#ifdef MDB_MONITOR_IDX
+       ldap_pvt_thread_mutex_t mi_idx_mutex;
+       Avlnode         *mi_idx;
+#endif /* MDB_MONITOR_IDX */
+
+       int             mi_flags;
+#define        MDB_IS_OPEN             0x01
+#define        MDB_DEL_INDEX   0x08
+};
+
+#define mi_id2entry    mi_databases[MDB_ID2ENTRY]
+#define mi_dn2id       mi_databases[MDB_DN2ID]
+#define mi_ad2id       mi_databases[MDB_AD2ID]
+
+struct mdb_op_info {
+       OpExtra         moi_oe;
+       MDB_txn*        moi_txn;
+       u_int32_t       moi_err;
+       char            moi_acl_cache;
+       char            moi_flag;
+};
+#define MOI_DONTFREE   1
+
+/* Copy an ID "src" to pointer "dst" in big-endian byte order */
+#define MDB_ID2DISK( src, dst )        \
+       do { int i0; ID tmp; unsigned char *_p; \
+               tmp = (src); _p = (unsigned char *)(dst);       \
+               for ( i0=sizeof(ID)-1; i0>=0; i0-- ) {  \
+                       _p[i0] = tmp & 0xff; tmp >>= 8; \
+               } \
+       } while(0)
+
+/* Copy a pointer "src" to a pointer "dst" from big-endian to native order */
+#define MDB_DISK2ID( src, dst ) \
+       do { unsigned i0; ID tmp = 0; unsigned char *_p;        \
+               _p = (unsigned char *)(src);    \
+               for ( i0=0; i0<sizeof(ID); i0++ ) {     \
+                       tmp <<= 8; tmp |= *_p++;        \
+               } *(dst) = tmp; \
+       } while (0)
+
+LDAP_END_DECL
+
+/* for the cache of attribute information (which are indexed, etc.) */
+typedef struct mdb_attrinfo {
+       AttributeDescription *ai_desc; /* attribute description cn;lang-en */
+       slap_mask_t ai_indexmask;       /* how the attr is indexed      */
+       slap_mask_t ai_newmask; /* new settings to replace old mask */
+#ifdef LDAP_COMP_MATCH
+       ComponentReference* ai_cr; /*component indexing*/
+#endif
+       int ai_idx;     /* position in AI array */
+} AttrInfo;
+
+/* These flags must not clash with SLAP_INDEX flags or ops in slap.h! */
+#define        MDB_INDEX_DELETING      0x8000U /* index is being modified */
+#define        MDB_INDEX_UPDATE_OP     0x03    /* performing an index update */
+
+/* For slapindex to record which attrs in an entry belong to which
+ * index database 
+ */
+typedef struct AttrList {
+       struct AttrList *next;
+       Attribute *attr;
+} AttrList;
+
+typedef struct IndexRec {
+       AttrInfo *ai;
+       AttrList *attrs;
+} IndexRec;
+
+#include "proto-mdb.h"
+
+#endif /* _BACK_MDB_H_ */
diff --git a/servers/slapd/back-mdb/bind.c b/servers/slapd/back-mdb/bind.c
new file mode 100644 (file)
index 0000000..78a6b09
--- /dev/null
@@ -0,0 +1,166 @@
+/* bind.c - mdb backend bind routine */
+/* $OpenLDAP$ */
+/* This work is part of OpenLDAP Software <http://www.openldap.org/>.
+ *
+ * Copyright 2000-2011 The OpenLDAP Foundation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted only as authorized by the OpenLDAP
+ * Public License.
+ *
+ * A copy of this license is available in the file LICENSE in the
+ * top-level directory of the distribution or, alternatively, at
+ * <http://www.OpenLDAP.org/license.html>.
+ */
+
+#include "portable.h"
+
+#include <stdio.h>
+#include <ac/string.h>
+#include <ac/unistd.h>
+
+#include "back-mdb.h"
+
+int
+mdb_bind( Operation *op, SlapReply *rs )
+{
+       struct mdb_info *mdb = (struct mdb_info *) op->o_bd->be_private;
+       Entry           *e;
+       Attribute       *a;
+       EntryInfo       *ei;
+
+       AttributeDescription *password = slap_schema.si_ad_userPassword;
+
+       DB_TXN          *rtxn;
+       DB_LOCK         lock;
+
+       Debug( LDAP_DEBUG_ARGS,
+               "==> " LDAP_XSTRING(mdb_bind) ": dn: %s\n",
+               op->o_req_dn.bv_val, 0, 0);
+
+       /* allow noauth binds */
+       switch ( be_rootdn_bind( op, NULL ) ) {
+       case LDAP_SUCCESS:
+               /* frontend will send result */
+               return rs->sr_err = LDAP_SUCCESS;
+
+       default:
+               /* give the database a chance */
+               /* NOTE: this behavior departs from that of other backends,
+                * since the others, in case of password checking failure
+                * do not give the database a chance.  If an entry with
+                * rootdn's name does not exist in the database the result
+                * will be the same.  See ITS#4962 for discussion. */
+               break;
+       }
+
+       rs->sr_err = mdb_reader_get(op, mdb->bi_dbenv, &rtxn);
+       switch(rs->sr_err) {
+       case 0:
+               break;
+       default:
+               rs->sr_text = "internal error";
+               send_ldap_result( op, rs );
+               return rs->sr_err;
+       }
+
+dn2entry_retry:
+       /* get entry with reader lock */
+       rs->sr_err = mdb_dn2entry( op, rtxn, &op->o_req_ndn, &ei, 1,
+               &lock );
+
+       switch(rs->sr_err) {
+       case DB_NOTFOUND:
+       case 0:
+               break;
+       case LDAP_BUSY:
+               send_ldap_error( op, rs, LDAP_BUSY, "ldap_server_busy" );
+               return LDAP_BUSY;
+       case DB_LOCK_DEADLOCK:
+       case DB_LOCK_NOTGRANTED:
+               goto dn2entry_retry;
+       default:
+               send_ldap_error( op, rs, LDAP_OTHER, "internal error" );
+               return rs->sr_err;
+       }
+
+       e = ei->bei_e;
+       if ( rs->sr_err == DB_NOTFOUND ) {
+               if( e != NULL ) {
+                       mdb_cache_return_entry_r( mdb, e, &lock );
+                       e = NULL;
+               }
+
+               rs->sr_err = LDAP_INVALID_CREDENTIALS;
+               send_ldap_result( op, rs );
+
+               return rs->sr_err;
+       }
+
+       ber_dupbv( &op->oq_bind.rb_edn, &e->e_name );
+
+       /* check for deleted */
+       if ( is_entry_subentry( e ) ) {
+               /* entry is an subentry, don't allow bind */
+               Debug( LDAP_DEBUG_TRACE, "entry is subentry\n", 0,
+                       0, 0 );
+               rs->sr_err = LDAP_INVALID_CREDENTIALS;
+               goto done;
+       }
+
+       if ( is_entry_alias( e ) ) {
+               /* entry is an alias, don't allow bind */
+               Debug( LDAP_DEBUG_TRACE, "entry is alias\n", 0, 0, 0 );
+               rs->sr_err = LDAP_INVALID_CREDENTIALS;
+               goto done;
+       }
+
+       if ( is_entry_referral( e ) ) {
+               Debug( LDAP_DEBUG_TRACE, "entry is referral\n", 0,
+                       0, 0 );
+               rs->sr_err = LDAP_INVALID_CREDENTIALS;
+               goto done;
+       }
+
+       switch ( op->oq_bind.rb_method ) {
+       case LDAP_AUTH_SIMPLE:
+               a = attr_find( e->e_attrs, password );
+               if ( a == NULL ) {
+                       rs->sr_err = LDAP_INVALID_CREDENTIALS;
+                       goto done;
+               }
+
+               if ( slap_passwd_check( op, e, a, &op->oq_bind.rb_cred,
+                                       &rs->sr_text ) != 0 )
+               {
+                       /* failure; stop front end from sending result */
+                       rs->sr_err = LDAP_INVALID_CREDENTIALS;
+                       goto done;
+               }
+                       
+               rs->sr_err = 0;
+               break;
+
+       default:
+               assert( 0 ); /* should not be reachable */
+               rs->sr_err = LDAP_STRONG_AUTH_NOT_SUPPORTED;
+               rs->sr_text = "authentication method not supported";
+       }
+
+done:
+       /* free entry and reader lock */
+       if( e != NULL ) {
+               mdb_cache_return_entry_r( mdb, e, &lock );
+       }
+
+       if ( rs->sr_err ) {
+               send_ldap_result( op, rs );
+               if ( rs->sr_ref ) {
+                       ber_bvarray_free( rs->sr_ref );
+                       rs->sr_ref = NULL;
+               }
+       }
+       /* front end will send result on success (rs->sr_err==0) */
+       return rs->sr_err;
+}
diff --git a/servers/slapd/back-mdb/compare.c b/servers/slapd/back-mdb/compare.c
new file mode 100644 (file)
index 0000000..55c1041
--- /dev/null
@@ -0,0 +1,143 @@
+/* compare.c - mdb backend compare routine */
+/* $OpenLDAP$ */
+/* This work is part of OpenLDAP Software <http://www.openldap.org/>.
+ *
+ * Copyright 2000-2011 The OpenLDAP Foundation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted only as authorized by the OpenLDAP
+ * Public License.
+ *
+ * A copy of this license is available in the file LICENSE in the
+ * top-level directory of the distribution or, alternatively, at
+ * <http://www.OpenLDAP.org/license.html>.
+ */
+
+#include "portable.h"
+
+#include <stdio.h>
+#include <ac/string.h>
+
+#include "back-mdb.h"
+
+int
+mdb_compare( Operation *op, SlapReply *rs )
+{
+       struct mdb_info *mdb = (struct mdb_info *) op->o_bd->be_private;
+       Entry           *e = NULL;
+       EntryInfo       *ei;
+       int             manageDSAit = get_manageDSAit( op );
+
+       DB_TXN          *rtxn;
+       DB_LOCK         lock;
+
+       rs->sr_err = mdb_reader_get(op, mdb->bi_dbenv, &rtxn);
+       switch(rs->sr_err) {
+       case 0:
+               break;
+       default:
+               send_ldap_error( op, rs, LDAP_OTHER, "internal error" );
+               return rs->sr_err;
+       }
+
+dn2entry_retry:
+       /* get entry */
+       rs->sr_err = mdb_dn2entry( op, rtxn, &op->o_req_ndn, &ei, 1,
+               &lock );
+
+       switch( rs->sr_err ) {
+       case DB_NOTFOUND:
+       case 0:
+               break;
+       case LDAP_BUSY:
+               rs->sr_text = "ldap server busy";
+               goto return_results;
+       case DB_LOCK_DEADLOCK:
+       case DB_LOCK_NOTGRANTED:
+               goto dn2entry_retry;
+       default:
+               rs->sr_err = LDAP_OTHER;
+               rs->sr_text = "internal error";
+               goto return_results;
+       }
+
+       e = ei->bei_e;
+       if ( rs->sr_err == DB_NOTFOUND ) {
+               if ( e != NULL ) {
+                       /* return referral only if "disclose" is granted on the object */
+                       if ( ! access_allowed( op, e, slap_schema.si_ad_entry,
+                               NULL, ACL_DISCLOSE, NULL ) )
+                       {
+                               rs->sr_err = LDAP_NO_SUCH_OBJECT;
+
+                       } else {
+                               rs->sr_matched = ch_strdup( e->e_dn );
+                               rs->sr_ref = is_entry_referral( e )
+                                       ? get_entry_referrals( op, e )
+                                       : NULL;
+                               rs->sr_err = LDAP_REFERRAL;
+                       }
+
+                       mdb_cache_return_entry_r( mdb, e, &lock );
+                       e = NULL;
+
+               } else {
+                       rs->sr_ref = referral_rewrite( default_referral,
+                               NULL, &op->o_req_dn, LDAP_SCOPE_DEFAULT );
+                       rs->sr_err = rs->sr_ref ? LDAP_REFERRAL : LDAP_NO_SUCH_OBJECT;
+               }
+
+               send_ldap_result( op, rs );
+
+               ber_bvarray_free( rs->sr_ref );
+               free( (char *)rs->sr_matched );
+               rs->sr_ref = NULL;
+               rs->sr_matched = NULL;
+
+               goto done;
+       }
+
+       if (!manageDSAit && is_entry_referral( e ) ) {
+               /* return referral only if "disclose" is granted on the object */
+               if ( !access_allowed( op, e, slap_schema.si_ad_entry,
+                       NULL, ACL_DISCLOSE, NULL ) )
+               {
+                       rs->sr_err = LDAP_NO_SUCH_OBJECT;
+               } else {
+                       /* entry is a referral, don't allow compare */
+                       rs->sr_ref = get_entry_referrals( op, e );
+                       rs->sr_err = LDAP_REFERRAL;
+                       rs->sr_matched = e->e_name.bv_val;
+               }
+
+               Debug( LDAP_DEBUG_TRACE, "entry is referral\n", 0, 0, 0 );
+
+               send_ldap_result( op, rs );
+
+               ber_bvarray_free( rs->sr_ref );
+               rs->sr_ref = NULL;
+               rs->sr_matched = NULL;
+               goto done;
+       }
+
+       rs->sr_err = slap_compare_entry( op, e, op->orc_ava );
+
+return_results:
+       send_ldap_result( op, rs );
+
+       switch ( rs->sr_err ) {
+       case LDAP_COMPARE_FALSE:
+       case LDAP_COMPARE_TRUE:
+               rs->sr_err = LDAP_SUCCESS;
+               break;
+       }
+
+done:
+       /* free entry */
+       if ( e != NULL ) {
+               mdb_cache_return_entry_r( mdb, e, &lock );
+       }
+
+       return rs->sr_err;
+}
diff --git a/servers/slapd/back-mdb/config.c b/servers/slapd/back-mdb/config.c
new file mode 100644 (file)
index 0000000..5b26bec
--- /dev/null
@@ -0,0 +1,942 @@
+/* config.c - mdb backend configuration file routine */
+/* $OpenLDAP$ */
+/* This work is part of OpenLDAP Software <http://www.openldap.org/>.
+ *
+ * Copyright 2000-2011 The OpenLDAP Foundation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted only as authorized by the OpenLDAP
+ * Public License.
+ *
+ * A copy of this license is available in the file LICENSE in the
+ * top-level directory of the distribution or, alternatively, at
+ * <http://www.OpenLDAP.org/license.html>.
+ */
+
+#include "portable.h"
+
+#include <stdio.h>
+#include <ac/ctype.h>
+#include <ac/string.h>
+#include <ac/errno.h>
+
+#include "back-mdb.h"
+
+#include "config.h"
+
+#include "lutil.h"
+#include "ldap_rq.h"
+
+#ifdef DB_DIRTY_READ
+#      define  SLAP_MDB_ALLOW_DIRTY_READ
+#endif
+
+#define mdb_cf_gen             MDB_SYMBOL(cf_gen)
+#define        mdb_cf_cleanup          MDB_SYMBOL(cf_cleanup)
+#define mdb_checkpoint         MDB_SYMBOL(checkpoint)
+#define mdb_online_index       MDB_SYMBOL(online_index)
+
+static ConfigDriver mdb_cf_gen;
+
+enum {
+       MDB_CHKPT = 1,
+       MDB_CONFIG,
+       MDB_CRYPTFILE,
+       MDB_CRYPTKEY,
+       MDB_DIRECTORY,
+       MDB_NOSYNC,
+       MDB_DIRTYR,
+       MDB_INDEX,
+       MDB_LOCKD,
+       MDB_SSTACK,
+       MDB_MODE,
+       MDB_PGSIZE,
+       MDB_CHECKSUM
+};
+
+static ConfigTable mdbcfg[] = {
+       { "directory", "dir", 2, 2, 0, ARG_STRING|ARG_MAGIC|MDB_DIRECTORY,
+               mdb_cf_gen, "( OLcfgDbAt:0.1 NAME 'olcDbDirectory' "
+                       "DESC 'Directory for database content' "
+                       "EQUALITY caseIgnoreMatch "
+                       "SYNTAX OMsDirectoryString SINGLE-VALUE )", NULL, NULL },
+       { "cachefree", "size", 2, 2, 0, ARG_ULONG|ARG_OFFSET,
+               (void *)offsetof(struct mdb_info, bi_cache.c_minfree),
+               "( OLcfgDbAt:1.11 NAME 'olcDbCacheFree' "
+                       "DESC 'Number of extra entries to free when max is reached' "
+                       "SYNTAX OMsInteger SINGLE-VALUE )", NULL, NULL },
+       { "cachesize", "size", 2, 2, 0, ARG_ULONG|ARG_OFFSET,
+               (void *)offsetof(struct mdb_info, bi_cache.c_maxsize),
+               "( OLcfgDbAt:1.1 NAME 'olcDbCacheSize' "
+                       "DESC 'Entry cache size in entries' "
+                       "SYNTAX OMsInteger SINGLE-VALUE )", NULL, NULL },
+       { "checkpoint", "kbyte> <min", 3, 3, 0, ARG_MAGIC|MDB_CHKPT,
+               mdb_cf_gen, "( OLcfgDbAt:1.2 NAME 'olcDbCheckpoint' "
+                       "DESC 'Database checkpoint interval in kbytes and minutes' "
+                       "SYNTAX OMsDirectoryString SINGLE-VALUE )",NULL, NULL },
+       { "checksum", NULL, 1, 2, 0, ARG_ON_OFF|ARG_MAGIC|MDB_CHECKSUM,
+               mdb_cf_gen, "( OLcfgDbAt:1.16 NAME 'olcDbChecksum' "
+                       "DESC 'Enable database checksum validation' "
+                       "SYNTAX OMsBoolean SINGLE-VALUE )", NULL, NULL },
+       { "cryptfile", "file", 2, 2, 0, ARG_STRING|ARG_MAGIC|MDB_CRYPTFILE,
+               mdb_cf_gen, "( OLcfgDbAt:1.13 NAME 'olcDbCryptFile' "
+                       "DESC 'Pathname of file containing the DB encryption key' "
+                       "SYNTAX OMsDirectoryString SINGLE-VALUE )",NULL, NULL },
+       { "cryptkey", "key", 2, 2, 0, ARG_BERVAL|ARG_MAGIC|MDB_CRYPTKEY,
+               mdb_cf_gen, "( OLcfgDbAt:1.14 NAME 'olcDbCryptKey' "
+                       "DESC 'DB encryption key' "
+                       "SYNTAX OMsOctetString SINGLE-VALUE )",NULL, NULL },
+       { "dbconfig", "DB_CONFIG setting", 1, 0, 0, ARG_MAGIC|MDB_CONFIG,
+               mdb_cf_gen, "( OLcfgDbAt:1.3 NAME 'olcDbConfig' "
+                       "DESC 'BerkeleyDB DB_CONFIG configuration directives' "
+                       "SYNTAX OMsIA5String X-ORDERED 'VALUES' )", NULL, NULL },
+       { "dbnosync", NULL, 1, 2, 0, ARG_ON_OFF|ARG_MAGIC|MDB_NOSYNC,
+               mdb_cf_gen, "( OLcfgDbAt:1.4 NAME 'olcDbNoSync' "
+                       "DESC 'Disable synchronous database writes' "
+                       "SYNTAX OMsBoolean SINGLE-VALUE )", NULL, NULL },
+       { "dbpagesize", "db> <size", 3, 3, 0, ARG_MAGIC|MDB_PGSIZE,
+               mdb_cf_gen, "( OLcfgDbAt:1.15 NAME 'olcDbPageSize' "
+                       "DESC 'Page size of specified DB, in Kbytes' "
+                       "EQUALITY caseExactMatch "
+                       "SYNTAX OMsDirectoryString )", NULL, NULL },
+       { "dirtyread", NULL, 1, 2, 0,
+#ifdef SLAP_MDB_ALLOW_DIRTY_READ
+               ARG_ON_OFF|ARG_MAGIC|MDB_DIRTYR, mdb_cf_gen,
+#else
+               ARG_IGNORED, NULL,
+#endif
+               "( OLcfgDbAt:1.5 NAME 'olcDbDirtyRead' "
+               "DESC 'Allow reads of uncommitted data' "
+               "SYNTAX OMsBoolean SINGLE-VALUE )", NULL, NULL },
+       { "dncachesize", "size", 2, 2, 0, ARG_ULONG|ARG_OFFSET,
+               (void *)offsetof(struct mdb_info, bi_cache.c_eimax),
+               "( OLcfgDbAt:1.12 NAME 'olcDbDNcacheSize' "
+                       "DESC 'DN cache size' "
+                       "SYNTAX OMsInteger SINGLE-VALUE )", NULL, NULL },
+       { "idlcachesize", "size", 2, 2, 0, ARG_ULONG|ARG_OFFSET,
+               (void *)offsetof(struct mdb_info, bi_idl_cache_max_size),
+               "( OLcfgDbAt:1.6 NAME 'olcDbIDLcacheSize' "
+               "DESC 'IDL cache size in IDLs' "
+               "SYNTAX OMsInteger SINGLE-VALUE )", NULL, NULL },
+       { "index", "attr> <[pres,eq,approx,sub]", 2, 3, 0, ARG_MAGIC|MDB_INDEX,
+               mdb_cf_gen, "( OLcfgDbAt:0.2 NAME 'olcDbIndex' "
+               "DESC 'Attribute index parameters' "
+               "EQUALITY caseIgnoreMatch "
+               "SYNTAX OMsDirectoryString )", NULL, NULL },
+       { "linearindex", NULL, 1, 2, 0, ARG_ON_OFF|ARG_OFFSET,
+               (void *)offsetof(struct mdb_info, bi_linear_index), 
+               "( OLcfgDbAt:1.7 NAME 'olcDbLinearIndex' "
+               "DESC 'Index attributes one at a time' "
+               "SYNTAX OMsBoolean SINGLE-VALUE )", NULL, NULL },
+       { "lockdetect", "policy", 2, 2, 0, ARG_MAGIC|MDB_LOCKD,
+               mdb_cf_gen, "( OLcfgDbAt:1.8 NAME 'olcDbLockDetect' "
+               "DESC 'Deadlock detection algorithm' "
+               "SYNTAX OMsDirectoryString SINGLE-VALUE )", NULL, NULL },
+       { "mode", "mode", 2, 2, 0, ARG_MAGIC|MDB_MODE,
+               mdb_cf_gen, "( OLcfgDbAt:0.3 NAME 'olcDbMode' "
+               "DESC 'Unix permissions of database files' "
+               "SYNTAX OMsDirectoryString SINGLE-VALUE )", NULL, NULL },
+       { "searchstack", "depth", 2, 2, 0, ARG_INT|ARG_MAGIC|MDB_SSTACK,
+               mdb_cf_gen, "( OLcfgDbAt:1.9 NAME 'olcDbSearchStack' "
+               "DESC 'Depth of search stack in IDLs' "
+               "SYNTAX OMsInteger SINGLE-VALUE )", NULL, NULL },
+       { "shm_key", "key", 2, 2, 0, ARG_LONG|ARG_OFFSET,
+               (void *)offsetof(struct mdb_info, bi_shm_key), 
+               "( OLcfgDbAt:1.10 NAME 'olcDbShmKey' "
+               "DESC 'Key for shared memory region' "
+               "SYNTAX OMsInteger SINGLE-VALUE )", NULL, NULL },
+       { NULL, NULL, 0, 0, 0, ARG_IGNORED,
+               NULL, NULL, NULL, NULL }
+};
+
+static ConfigOCs mdbocs[] = {
+       {
+#ifdef MDB_HIER
+               "( OLcfgDbOc:1.2 "
+               "NAME 'olcHdbConfig' "
+               "DESC 'HDB backend configuration' "
+#else
+               "( OLcfgDbOc:1.1 "
+               "NAME 'olcBdbConfig' "
+               "DESC 'MDB backend configuration' "
+#endif
+               "SUP olcDatabaseConfig "
+               "MUST olcDbDirectory "
+               "MAY ( olcDbCacheSize $ olcDbCheckpoint $ olcDbConfig $ "
+               "olcDbCryptFile $ olcDbCryptKey $ "
+               "olcDbNoSync $ olcDbDirtyRead $ olcDbIDLcacheSize $ "
+               "olcDbIndex $ olcDbLinearIndex $ olcDbLockDetect $ "
+               "olcDbMode $ olcDbSearchStack $ olcDbShmKey $ "
+               "olcDbCacheFree $ olcDbDNcacheSize $ olcDbPageSize ) )",
+                       Cft_Database, mdbcfg },
+       { NULL, 0, NULL }
+};
+
+static slap_verbmasks mdb_lockd[] = {
+       { BER_BVC("default"), DB_LOCK_DEFAULT },
+       { BER_BVC("oldest"), DB_LOCK_OLDEST },
+       { BER_BVC("random"), DB_LOCK_RANDOM },
+       { BER_BVC("youngest"), DB_LOCK_YOUNGEST },
+       { BER_BVC("fewest"), DB_LOCK_MINLOCKS },
+       { BER_BVNULL, 0 }
+};
+
+/* perform periodic checkpoints */
+static void *
+mdb_checkpoint( void *ctx, void *arg )
+{
+       struct re_s *rtask = arg;
+       struct mdb_info *mdb = rtask->arg;
+       
+       TXN_CHECKPOINT( mdb->bi_dbenv, mdb->bi_txn_cp_kbyte,
+               mdb->bi_txn_cp_min, 0 );
+       ldap_pvt_thread_mutex_lock( &slapd_rq.rq_mutex );
+       ldap_pvt_runqueue_stoptask( &slapd_rq, rtask );
+       ldap_pvt_thread_mutex_unlock( &slapd_rq.rq_mutex );
+       return NULL;
+}
+
+/* reindex entries on the fly */
+static void *
+mdb_online_index( void *ctx, void *arg )
+{
+       struct re_s *rtask = arg;
+       BackendDB *be = rtask->arg;
+       struct mdb_info *mdb = be->be_private;
+
+       Connection conn = {0};
+       OperationBuffer opbuf;
+       Operation *op;
+
+       DBC *curs;
+       DBT key, data;
+       DB_TXN *txn;
+       DB_LOCK lock;
+       ID id, nid;
+       EntryInfo *ei;
+       int rc, getnext = 1;
+       int i;
+
+       connection_fake_init( &conn, &opbuf, ctx );
+       op = &opbuf.ob_op;
+
+       op->o_bd = be;
+
+       DBTzero( &key );
+       DBTzero( &data );
+       
+       id = 1;
+       key.data = &nid;
+       key.size = key.ulen = sizeof(ID);
+       key.flags = DB_DBT_USERMEM;
+
+       data.flags = DB_DBT_USERMEM | DB_DBT_PARTIAL;
+       data.dlen = data.ulen = 0;
+
+       while ( 1 ) {
+               if ( slapd_shutdown )
+                       break;
+
+               rc = TXN_BEGIN( mdb->bi_dbenv, NULL, &txn, mdb->bi_db_opflags );
+               if ( rc ) 
+                       break;
+               if ( getnext ) {
+                       getnext = 0;
+                       MDB_ID2DISK( id, &nid );
+                       rc = mdb->bi_id2entry->bdi_db->cursor(
+                               mdb->bi_id2entry->bdi_db, txn, &curs, mdb->bi_db_opflags );
+                       if ( rc ) {
+                               TXN_ABORT( txn );
+                               break;
+                       }
+                       rc = curs->c_get( curs, &key, &data, DB_SET_RANGE );
+                       curs->c_close( curs );
+                       if ( rc ) {
+                               TXN_ABORT( txn );
+                               if ( rc == DB_NOTFOUND )
+                                       rc = 0;
+                               if ( rc == DB_LOCK_DEADLOCK ) {
+                                       ldap_pvt_thread_yield();
+                                       continue;
+                               }
+                               break;
+                       }
+                       MDB_DISK2ID( &nid, &id );
+               }
+
+               ei = NULL;
+               rc = mdb_cache_find_id( op, txn, id, &ei, 0, &lock );
+               if ( rc ) {
+                       TXN_ABORT( txn );
+                       if ( rc == DB_LOCK_DEADLOCK ) {
+                               ldap_pvt_thread_yield();
+                               continue;
+                       }
+                       if ( rc == DB_NOTFOUND ) {
+                               id++;
+                               getnext = 1;
+                               continue;
+                       }
+                       break;
+               }
+               if ( ei->bei_e ) {
+                       rc = mdb_index_entry( op, txn, MDB_INDEX_UPDATE_OP, ei->bei_e );
+                       if ( rc == DB_LOCK_DEADLOCK ) {
+                               TXN_ABORT( txn );
+                               ldap_pvt_thread_yield();
+                               continue;
+                       }
+                       if ( rc == 0 ) {
+                               rc = TXN_COMMIT( txn, 0 );
+                               txn = NULL;
+                       }
+                       if ( rc )
+                               break;
+               }
+               id++;
+               getnext = 1;
+       }
+
+       for ( i = 0; i < mdb->bi_nattrs; i++ ) {
+               if ( mdb->bi_attrs[ i ]->ai_indexmask & MDB_INDEX_DELETING
+                       || mdb->bi_attrs[ i ]->ai_newmask == 0 )
+               {
+                       continue;
+               }
+               mdb->bi_attrs[ i ]->ai_indexmask = mdb->bi_attrs[ i ]->ai_newmask;
+               mdb->bi_attrs[ i ]->ai_newmask = 0;
+       }
+
+       ldap_pvt_thread_mutex_lock( &slapd_rq.rq_mutex );
+       ldap_pvt_runqueue_stoptask( &slapd_rq, rtask );
+       mdb->bi_index_task = NULL;
+       ldap_pvt_runqueue_remove( &slapd_rq, rtask );
+       ldap_pvt_thread_mutex_unlock( &slapd_rq.rq_mutex );
+
+       return NULL;
+}
+
+/* Cleanup loose ends after Modify completes */
+static int
+mdb_cf_cleanup( ConfigArgs *c )
+{
+       struct mdb_info *mdb = c->be->be_private;
+       int rc = 0;
+
+       if ( mdb->bi_flags & MDB_UPD_CONFIG ) {
+               if ( mdb->bi_db_config ) {
+                       int i;
+                       FILE *f = fopen( mdb->bi_db_config_path, "w" );
+                       if ( f ) {
+                               for (i=0; mdb->bi_db_config[i].bv_val; i++)
+                                       fprintf( f, "%s\n", mdb->bi_db_config[i].bv_val );
+                               fclose( f );
+                       }
+               } else {
+                       unlink( mdb->bi_db_config_path );
+               }
+               mdb->bi_flags ^= MDB_UPD_CONFIG;
+       }
+
+       if ( mdb->bi_flags & MDB_DEL_INDEX ) {
+               mdb_attr_flush( mdb );
+               mdb->bi_flags ^= MDB_DEL_INDEX;
+       }
+       
+       if ( mdb->bi_flags & MDB_RE_OPEN ) {
+               mdb->bi_flags ^= MDB_RE_OPEN;
+               rc = c->be->bd_info->bi_db_close( c->be, &c->reply );
+               if ( rc == 0 )
+                       rc = c->be->bd_info->bi_db_open( c->be, &c->reply );
+               /* If this fails, we need to restart */
+               if ( rc ) {
+                       slapd_shutdown = 2;
+                       snprintf( c->cr_msg, sizeof( c->cr_msg ),
+                               "failed to reopen database, rc=%d", rc );
+                       Debug( LDAP_DEBUG_ANY, LDAP_XSTRING(mdb_cf_cleanup)
+                               ": %s\n", c->cr_msg, 0, 0 );
+                       rc = LDAP_OTHER;
+               }
+       }
+       return rc;
+}
+
+static int
+mdb_cf_gen( ConfigArgs *c )
+{
+       struct mdb_info *mdb = c->be->be_private;
+       int rc;
+
+       if ( c->op == SLAP_CONFIG_EMIT ) {
+               rc = 0;
+               switch( c->type ) {
+               case MDB_MODE: {
+                       char buf[64];
+                       struct berval bv;
+                       bv.bv_len = snprintf( buf, sizeof(buf), "0%o", mdb->bi_dbenv_mode );
+                       if ( bv.bv_len > 0 && bv.bv_len < sizeof(buf) ) {
+                               bv.bv_val = buf;
+                               value_add_one( &c->rvalue_vals, &bv );
+                       } else {
+                               rc = 1;
+                       }
+                       } break;
+
+               case MDB_CHKPT:
+                       if ( mdb->bi_txn_cp ) {
+                               char buf[64];
+                               struct berval bv;
+                               bv.bv_len = snprintf( buf, sizeof(buf), "%ld %ld",
+                                       (long) mdb->bi_txn_cp_kbyte, (long) mdb->bi_txn_cp_min );
+                               if ( bv.bv_len > 0 && bv.bv_len < sizeof(buf) ) {
+                                       bv.bv_val = buf;
+                                       value_add_one( &c->rvalue_vals, &bv );
+                               } else {
+                                       rc = 1;
+                               }
+                       } else {
+                               rc = 1;
+                       }
+                       break;
+
+               case MDB_CRYPTFILE:
+                       if ( mdb->bi_db_crypt_file ) {
+                               c->value_string = ch_strdup( mdb->bi_db_crypt_file );
+                       } else {
+                               rc = 1;
+                       }
+                       break;
+
+               /* If a crypt file has been set, its contents are copied here.
+                * But we don't want the key to be incorporated here.
+                */
+               case MDB_CRYPTKEY:
+                       if ( !mdb->bi_db_crypt_file && !BER_BVISNULL( &mdb->bi_db_crypt_key )) {
+                               value_add_one( &c->rvalue_vals, &mdb->bi_db_crypt_key );
+                       } else {
+                               rc = 1;
+                       }
+                       break;
+
+               case MDB_DIRECTORY:
+                       if ( mdb->bi_dbenv_home ) {
+                               c->value_string = ch_strdup( mdb->bi_dbenv_home );
+                       } else {
+                               rc = 1;
+                       }
+                       break;
+
+               case MDB_CONFIG:
+                       if ( !( mdb->bi_flags & MDB_IS_OPEN )
+                               && !mdb->bi_db_config )
+                       {
+                               char    buf[SLAP_TEXT_BUFLEN];
+                               FILE *f = fopen( mdb->bi_db_config_path, "r" );
+                               struct berval bv;
+
+                               if ( f ) {
+                                       mdb->bi_flags |= MDB_HAS_CONFIG;
+                                       while ( fgets( buf, sizeof(buf), f )) {
+                                               ber_str2bv( buf, 0, 1, &bv );
+                                               if ( bv.bv_len > 0 && bv.bv_val[bv.bv_len-1] == '\n' ) {
+                                                       bv.bv_len--;
+                                                       bv.bv_val[bv.bv_len] = '\0';
+                                               }
+                                               /* shouldn't need this, but ... */
+                                               if ( bv.bv_len > 0 && bv.bv_val[bv.bv_len-1] == '\r' ) {
+                                                       bv.bv_len--;
+                                                       bv.bv_val[bv.bv_len] = '\0';
+                                               }
+                                               ber_bvarray_add( &mdb->bi_db_config, &bv );
+                                       }
+                                       fclose( f );
+                               }
+                       }
+                       if ( mdb->bi_db_config ) {
+                               int i;
+                               struct berval bv;
+
+                               bv.bv_val = c->log;
+                               for (i=0; !BER_BVISNULL(&mdb->bi_db_config[i]); i++) {
+                                       bv.bv_len = sprintf( bv.bv_val, "{%d}%s", i,
+                                               mdb->bi_db_config[i].bv_val );
+                                       value_add_one( &c->rvalue_vals, &bv );
+                               }
+                       }
+                       if ( !c->rvalue_vals ) rc = 1;
+                       break;
+
+               case MDB_NOSYNC:
+                       if ( mdb->bi_dbenv_xflags & DB_TXN_NOSYNC )
+                               c->value_int = 1;
+                       break;
+                       
+               case MDB_CHECKSUM:
+                       if ( mdb->bi_flags & MDB_CHKSUM )
+                               c->value_int = 1;
+                       break;
+
+               case MDB_INDEX:
+                       mdb_attr_index_unparse( mdb, &c->rvalue_vals );
+                       if ( !c->rvalue_vals ) rc = 1;
+                       break;
+
+               case MDB_LOCKD:
+                       rc = 1;
+                       if ( mdb->bi_lock_detect != DB_LOCK_DEFAULT ) {
+                               int i;
+                               for (i=0; !BER_BVISNULL(&mdb_lockd[i].word); i++) {
+                                       if ( mdb->bi_lock_detect == (u_int32_t)mdb_lockd[i].mask ) {
+                                               value_add_one( &c->rvalue_vals, &mdb_lockd[i].word );
+                                               rc = 0;
+                                               break;
+                                       }
+                               }
+                       }
+                       break;
+
+               case MDB_SSTACK:
+                       c->value_int = mdb->bi_search_stack_depth;
+                       break;
+
+               case MDB_PGSIZE: {
+                               struct mdb_db_pgsize *ps;
+                               char buf[SLAP_TEXT_BUFLEN];
+                               struct berval bv;
+                               int rc = 1;
+
+                               bv.bv_val = buf;
+                               for ( ps = mdb->bi_pagesizes; ps; ps = ps->bdp_next ) {
+                                       bv.bv_len = sprintf( buf, "%s %d", ps->bdp_name.bv_val,
+                                               ps->bdp_size / 1024 );
+                                       value_add_one( &c->rvalue_vals, &bv );
+                                       rc = 0;
+
+                               }
+                               break;
+                       }
+               }
+               return rc;
+       } else if ( c->op == LDAP_MOD_DELETE ) {
+               rc = 0;
+               switch( c->type ) {
+               case MDB_MODE:
+#if 0
+                       /* FIXME: does it make any sense to change the mode,
+                        * if we don't exec a chmod()? */
+                       mdb->bi_dbenv_mode = SLAPD_DEFAULT_DB_MODE;
+                       break;
+#endif
+
+               /* single-valued no-ops */
+               case MDB_LOCKD:
+               case MDB_SSTACK:
+                       break;
+
+               case MDB_CHKPT:
+                       if ( mdb->bi_txn_cp_task ) {
+                               struct re_s *re = mdb->bi_txn_cp_task;
+                               mdb->bi_txn_cp_task = NULL;
+                               ldap_pvt_thread_mutex_lock( &slapd_rq.rq_mutex );
+                               if ( ldap_pvt_runqueue_isrunning( &slapd_rq, re ) )
+                                       ldap_pvt_runqueue_stoptask( &slapd_rq, re );
+                               ldap_pvt_runqueue_remove( &slapd_rq, re );
+                               ldap_pvt_thread_mutex_unlock( &slapd_rq.rq_mutex );
+                       }
+                       mdb->bi_txn_cp = 0;
+                       break;
+               case MDB_CONFIG:
+                       if ( c->valx < 0 ) {
+                               ber_bvarray_free( mdb->bi_db_config );
+                               mdb->bi_db_config = NULL;
+                       } else {
+                               int i = c->valx;
+                               ch_free( mdb->bi_db_config[i].bv_val );
+                               for (; mdb->bi_db_config[i].bv_val; i++)
+                                       mdb->bi_db_config[i] = mdb->bi_db_config[i+1];
+                       }
+                       mdb->bi_flags |= MDB_UPD_CONFIG;
+                       c->cleanup = mdb_cf_cleanup;
+                       break;
+               /* Doesn't really make sense to change these on the fly;
+                * the entire DB must be dumped and reloaded
+                */
+               case MDB_CRYPTFILE:
+                       if ( mdb->bi_db_crypt_file ) {
+                               ch_free( mdb->bi_db_crypt_file );
+                               mdb->bi_db_crypt_file = NULL;
+                       }
+                       /* FALLTHRU */
+               case MDB_CRYPTKEY:
+                       if ( !BER_BVISNULL( &mdb->bi_db_crypt_key )) {
+                               ch_free( mdb->bi_db_crypt_key.bv_val );
+                               BER_BVZERO( &mdb->bi_db_crypt_key );
+                       }
+                       break;
+               case MDB_DIRECTORY:
+                       mdb->bi_flags |= MDB_RE_OPEN;
+                       mdb->bi_flags ^= MDB_HAS_CONFIG;
+                       ch_free( mdb->bi_dbenv_home );
+                       mdb->bi_dbenv_home = NULL;
+                       ch_free( mdb->bi_db_config_path );
+                       mdb->bi_db_config_path = NULL;
+                       c->cleanup = mdb_cf_cleanup;
+                       ldap_pvt_thread_pool_purgekey( mdb->bi_dbenv );
+                       break;
+               case MDB_NOSYNC:
+                       mdb->bi_dbenv->set_flags( mdb->bi_dbenv, DB_TXN_NOSYNC, 0 );
+                       break;
+               case MDB_CHECKSUM:
+                       mdb->bi_flags &= ~MDB_CHKSUM;
+                       break;
+               case MDB_INDEX:
+                       if ( c->valx == -1 ) {
+                               int i;
+
+                               /* delete all (FIXME) */
+                               for ( i = 0; i < mdb->bi_nattrs; i++ ) {
+                                       mdb->bi_attrs[i]->ai_indexmask |= MDB_INDEX_DELETING;
+                               }
+                               mdb->bi_flags |= MDB_DEL_INDEX;
+                               c->cleanup = mdb_cf_cleanup;
+
+                       } else {
+                               struct berval bv, def = BER_BVC("default");
+                               char *ptr;
+
+                               for (ptr = c->line; !isspace( (unsigned char) *ptr ); ptr++);
+
+                               bv.bv_val = c->line;
+                               bv.bv_len = ptr - bv.bv_val;
+                               if ( bvmatch( &bv, &def )) {
+                                       mdb->bi_defaultmask = 0;
+
+                               } else {
+                                       int i;
+                                       char **attrs;
+                                       char sep;
+
+                                       sep = bv.bv_val[ bv.bv_len ];
+                                       bv.bv_val[ bv.bv_len ] = '\0';
+                                       attrs = ldap_str2charray( bv.bv_val, "," );
+
+                                       for ( i = 0; attrs[ i ]; i++ ) {
+                                               AttributeDescription *ad = NULL;
+                                               const char *text;
+                                               AttrInfo *ai;
+
+                                               slap_str2ad( attrs[ i ], &ad, &text );
+                                               /* if we got here... */
+                                               assert( ad != NULL );
+
+                                               ai = mdb_attr_mask( mdb, ad );
+                                               /* if we got here... */
+                                               assert( ai != NULL );
+
+                                               ai->ai_indexmask |= MDB_INDEX_DELETING;
+                                               mdb->bi_flags |= MDB_DEL_INDEX;
+                                               c->cleanup = mdb_cf_cleanup;
+                                       }
+
+                                       bv.bv_val[ bv.bv_len ] = sep;
+                                       ldap_charray_free( attrs );
+                               }
+                       }
+                       break;
+               /* doesn't make sense on the fly; the DB file must be
+                * recreated
+                */
+               case MDB_PGSIZE: {
+                               struct mdb_db_pgsize *ps, **prev;
+                               int i;
+
+                               for ( i = 0, prev = &mdb->bi_pagesizes, ps = *prev; ps;
+                                       prev = &ps->bdp_next, ps = ps->bdp_next, i++ ) {
+                                       if ( c->valx == -1 || i == c->valx ) {
+                                               *prev = ps->bdp_next;
+                                               ch_free( ps );
+                                               ps = *prev;
+                                               if ( i == c->valx ) break;
+                                       }
+                               }
+                       }
+                       break;
+               }
+               return rc;
+       }
+
+       switch( c->type ) {
+       case MDB_MODE:
+               if ( ASCII_DIGIT( c->argv[1][0] ) ) {
+                       long mode;
+                       char *next;
+                       errno = 0;
+                       mode = strtol( c->argv[1], &next, 0 );
+                       if ( errno != 0 || next == c->argv[1] || next[0] != '\0' ) {
+                               fprintf( stderr, "%s: "
+                                       "unable to parse mode=\"%s\".\n",
+                                       c->log, c->argv[1] );
+                               return 1;
+                       }
+                       mdb->bi_dbenv_mode = mode;
+
+               } else {
+                       char *m = c->argv[1];
+                       int who, what, mode = 0;
+
+                       if ( strlen( m ) != STRLENOF("-rwxrwxrwx") ) {
+                               return 1;
+                       }
+
+                       if ( m[0] != '-' ) {
+                               return 1;
+                       }
+
+                       m++;
+                       for ( who = 0; who < 3; who++ ) {
+                               for ( what = 0; what < 3; what++, m++ ) {
+                                       if ( m[0] == '-' ) {
+                                               continue;
+                                       } else if ( m[0] != "rwx"[what] ) {
+                                               return 1;
+                                       }
+                                       mode += ((1 << (2 - what)) << 3*(2 - who));
+                               }
+                       }
+                       mdb->bi_dbenv_mode = mode;
+               }
+               break;
+       case MDB_CHKPT: {
+               long    l;
+               mdb->bi_txn_cp = 1;
+               if ( lutil_atolx( &l, c->argv[1], 0 ) != 0 ) {
+                       fprintf( stderr, "%s: "
+                               "invalid kbyte \"%s\" in \"checkpoint\".\n",
+                               c->log, c->argv[1] );
+                       return 1;
+               }
+               mdb->bi_txn_cp_kbyte = l;
+               if ( lutil_atolx( &l, c->argv[2], 0 ) != 0 ) {
+                       fprintf( stderr, "%s: "
+                               "invalid minutes \"%s\" in \"checkpoint\".\n",
+                               c->log, c->argv[2] );
+                       return 1;
+               }
+               mdb->bi_txn_cp_min = l;
+               /* If we're in server mode and time-based checkpointing is enabled,
+                * submit a task to perform periodic checkpoints.
+                */
+               if ((slapMode & SLAP_SERVER_MODE) && mdb->bi_txn_cp_min ) {
+                       struct re_s *re = mdb->bi_txn_cp_task;
+                       if ( re ) {
+                               re->interval.tv_sec = mdb->bi_txn_cp_min * 60;
+                       } else {
+                               if ( c->be->be_suffix == NULL || BER_BVISNULL( &c->be->be_suffix[0] ) ) {
+                                       fprintf( stderr, "%s: "
+                                               "\"checkpoint\" must occur after \"suffix\".\n",
+                                               c->log );
+                                       return 1;
+                               }
+                               ldap_pvt_thread_mutex_lock( &slapd_rq.rq_mutex );
+                               mdb->bi_txn_cp_task = ldap_pvt_runqueue_insert( &slapd_rq,
+                                       mdb->bi_txn_cp_min * 60, mdb_checkpoint, mdb,
+                                       LDAP_XSTRING(mdb_checkpoint), c->be->be_suffix[0].bv_val );
+                               ldap_pvt_thread_mutex_unlock( &slapd_rq.rq_mutex );
+                       }
+               }
+               } break;
+
+       case MDB_CONFIG: {
+               char *ptr = c->line;
+               struct berval bv;
+
+               if ( c->op == SLAP_CONFIG_ADD ) {
+                       ptr += STRLENOF("dbconfig");
+                       while (!isspace((unsigned char)*ptr)) ptr++;
+                       while (isspace((unsigned char)*ptr)) ptr++;
+               }
+
+               if ( mdb->bi_flags & MDB_IS_OPEN ) {
+                       mdb->bi_flags |= MDB_UPD_CONFIG;
+                       c->cleanup = mdb_cf_cleanup;
+               } else {
+               /* If we're just starting up...
+                */
+                       FILE *f;
+                       /* If a DB_CONFIG file exists, or we don't know the path
+                        * to the DB_CONFIG file, ignore these directives
+                        */
+                       if (( mdb->bi_flags & MDB_HAS_CONFIG ) || !mdb->bi_db_config_path )
+                               break;
+                       f = fopen( mdb->bi_db_config_path, "a" );
+                       if ( f ) {
+                               /* FIXME: EBCDIC probably needs special handling */
+                               fprintf( f, "%s\n", ptr );
+                               fclose( f );
+                       }
+               }
+               ber_str2bv( ptr, 0, 1, &bv );
+               ber_bvarray_add( &mdb->bi_db_config, &bv );
+               }
+               break;
+
+       case MDB_CRYPTFILE:
+               rc = lutil_get_filed_password( c->value_string, &mdb->bi_db_crypt_key );
+               if ( rc == 0 ) {
+                       mdb->bi_db_crypt_file = c->value_string;
+               }
+               break;
+
+       /* Cannot set key if file was already set */
+       case MDB_CRYPTKEY:
+               if ( mdb->bi_db_crypt_file ) {
+                       rc = 1;
+               } else {
+                       mdb->bi_db_crypt_key = c->value_bv;
+               }
+               break;
+
+       case MDB_DIRECTORY: {
+               FILE *f;
+               char *ptr, *testpath;
+               int len;
+
+               len = strlen( c->value_string );
+               testpath = ch_malloc( len + STRLENOF(LDAP_DIRSEP) + STRLENOF("DUMMY") + 1 );
+               ptr = lutil_strcopy( testpath, c->value_string );
+               *ptr++ = LDAP_DIRSEP[0];
+               strcpy( ptr, "DUMMY" );
+               f = fopen( testpath, "w" );
+               if ( f ) {
+                       fclose( f );
+                       unlink( testpath );
+               }
+               ch_free( testpath );
+               if ( !f ) {
+                       snprintf( c->cr_msg, sizeof( c->cr_msg ), "%s: invalid path: %s",
+                               c->log, strerror( errno ));
+                       Debug( LDAP_DEBUG_ANY, "%s\n", c->cr_msg, 0, 0 );
+                       return -1;
+               }
+
+               if ( mdb->bi_dbenv_home )
+                       ch_free( mdb->bi_dbenv_home );
+               mdb->bi_dbenv_home = c->value_string;
+
+               /* See if a DB_CONFIG file already exists here */
+               if ( mdb->bi_db_config_path )
+                       ch_free( mdb->bi_db_config_path );
+               mdb->bi_db_config_path = ch_malloc( len +
+                       STRLENOF(LDAP_DIRSEP) + STRLENOF("DB_CONFIG") + 1 );
+               ptr = lutil_strcopy( mdb->bi_db_config_path, mdb->bi_dbenv_home );
+               *ptr++ = LDAP_DIRSEP[0];
+               strcpy( ptr, "DB_CONFIG" );
+
+               f = fopen( mdb->bi_db_config_path, "r" );
+               if ( f ) {
+                       mdb->bi_flags |= MDB_HAS_CONFIG;
+                       fclose(f);
+               }
+               }
+               break;
+
+       case MDB_NOSYNC:
+               if ( c->value_int )
+                       mdb->bi_dbenv_xflags |= DB_TXN_NOSYNC;
+               else
+                       mdb->bi_dbenv_xflags &= ~DB_TXN_NOSYNC;
+               if ( mdb->bi_flags & MDB_IS_OPEN ) {
+                       mdb->bi_dbenv->set_flags( mdb->bi_dbenv, DB_TXN_NOSYNC,
+                               c->value_int );
+               }
+               break;
+
+       case MDB_CHECKSUM:
+               if ( c->value_int )
+                       mdb->bi_flags |= MDB_CHKSUM;
+               else
+                       mdb->bi_flags &= ~MDB_CHKSUM;
+               break;
+
+       case MDB_INDEX:
+               rc = mdb_attr_index_config( mdb, c->fname, c->lineno,
+                       c->argc - 1, &c->argv[1], &c->reply);
+
+               if( rc != LDAP_SUCCESS ) return 1;
+               if (( mdb->bi_flags & MDB_IS_OPEN ) && !mdb->bi_index_task ) {
+                       /* Start the task as soon as we finish here. Set a long
+                        * interval (10 hours) so that it only gets scheduled once.
+                        */
+                       if ( c->be->be_suffix == NULL || BER_BVISNULL( &c->be->be_suffix[0] ) ) {
+                               fprintf( stderr, "%s: "
+                                       "\"index\" must occur after \"suffix\".\n",
+                                       c->log );
+                               return 1;
+                       }
+                       ldap_pvt_thread_mutex_lock( &slapd_rq.rq_mutex );
+                       mdb->bi_index_task = ldap_pvt_runqueue_insert( &slapd_rq, 36000,
+                               mdb_online_index, c->be,
+                               LDAP_XSTRING(mdb_online_index), c->be->be_suffix[0].bv_val );
+                       ldap_pvt_thread_mutex_unlock( &slapd_rq.rq_mutex );
+               }
+               break;
+
+       case MDB_LOCKD:
+               rc = verb_to_mask( c->argv[1], mdb_lockd );
+               if ( BER_BVISNULL(&mdb_lockd[rc].word) ) {
+                       fprintf( stderr, "%s: "
+                               "bad policy (%s) in \"lockDetect <policy>\" line\n",
+                               c->log, c->argv[1] );
+                       return 1;
+               }
+               mdb->bi_lock_detect = (u_int32_t)rc;
+               break;
+
+       case MDB_SSTACK:
+               if ( c->value_int < MINIMUM_SEARCH_STACK_DEPTH ) {
+                       fprintf( stderr,
+               "%s: depth %d too small, using %d\n",
+                       c->log, c->value_int, MINIMUM_SEARCH_STACK_DEPTH );
+                       c->value_int = MINIMUM_SEARCH_STACK_DEPTH;
+               }
+               mdb->bi_search_stack_depth = c->value_int;
+               break;
+
+       case MDB_PGSIZE: {
+               struct mdb_db_pgsize *ps, **prev;
+               int i, s;
+               
+               s = atoi(c->argv[2]);
+               if ( s < 1 || s > 64 ) {
+                       snprintf( c->cr_msg, sizeof( c->cr_msg ),
+                               "%s: size must be > 0 and <= 64: %d",
+                               c->log, s );
+                       Debug( LDAP_DEBUG_ANY, "%s\n", c->cr_msg, 0, 0 );
+                       return -1;
+               }
+               i = strlen(c->argv[1]);
+               ps = ch_malloc( sizeof(struct mdb_db_pgsize) + i + 1 );
+               ps->bdp_next = NULL;
+               ps->bdp_name.bv_len = i;
+               ps->bdp_name.bv_val = (char *)(ps+1);
+               strcpy( ps->bdp_name.bv_val, c->argv[1] );
+               ps->bdp_size = s * 1024;
+               for ( prev = &mdb->bi_pagesizes; *prev; prev = &(*prev)->bdp_next )
+                       ;
+               *prev = ps;
+               }
+               break;
+       }
+       return 0;
+}
+
+int mdb_back_init_cf( BackendInfo *bi )
+{
+       int rc;
+       bi->bi_cf_ocs = mdbocs;
+
+       rc = config_register_schema( mdbcfg, mdbocs );
+       if ( rc ) return rc;
+       return 0;
+}
diff --git a/servers/slapd/back-mdb/dbcache.c b/servers/slapd/back-mdb/dbcache.c
new file mode 100644 (file)
index 0000000..fb7a0e1
--- /dev/null
@@ -0,0 +1,119 @@
+/* dbcache.c - manage cache of open databases */
+/* $OpenLDAP$ */
+/* This work is part of OpenLDAP Software <http://www.openldap.org/>.
+ *
+ * Copyright 2000-2011 The OpenLDAP Foundation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted only as authorized by the OpenLDAP
+ * Public License.
+ *
+ * A copy of this license is available in the file LICENSE in the
+ * top-level directory of the distribution or, alternatively, at
+ * <http://www.OpenLDAP.org/license.html>.
+ */
+
+#include "portable.h"
+
+#include <stdio.h>
+
+#include <ac/errno.h>
+#include <ac/socket.h>
+#include <ac/string.h>
+#include <ac/time.h>
+#include <sys/stat.h>
+
+#include "slap.h"
+#include "back-mdb.h"
+
+int
+mdb_db_cache(
+       Backend *be,
+       struct berval *name,
+       DB **dbout )
+{
+       int i, flags;
+       int rc;
+       struct mdb_info *mdb = (struct mdb_info *) be->be_private;
+       struct mdb_db_info *db;
+       char *file;
+
+       *dbout = NULL;
+
+       for( i=MDB_NDB; i < mdb->bi_ndatabases; i++ ) {
+               if( !ber_bvcmp( &mdb->bi_databases[i]->bdi_name, name) ) {
+                       *dbout = mdb->bi_databases[i]->bdi_db;
+                       return 0;
+               }
+       }
+
+       ldap_pvt_thread_mutex_lock( &mdb->bi_database_mutex );
+
+       /* check again! may have been added by another thread */
+       for( i=MDB_NDB; i < mdb->bi_ndatabases; i++ ) {
+               if( !ber_bvcmp( &mdb->bi_databases[i]->bdi_name, name) ) {
+                       *dbout = mdb->bi_databases[i]->bdi_db;
+                       ldap_pvt_thread_mutex_unlock( &mdb->bi_database_mutex );
+                       return 0;
+               }
+       }
+
+       if( i >= MDB_INDICES ) {
+               ldap_pvt_thread_mutex_unlock( &mdb->bi_database_mutex );
+               return -1;
+       }
+
+       db = (struct mdb_db_info *) ch_calloc(1, sizeof(struct mdb_db_info));
+
+       ber_dupbv( &db->bdi_name, name );
+
+       rc = db_create( &db->bdi_db, mdb->bi_dbenv, 0 );
+       if( rc != 0 ) {
+               Debug( LDAP_DEBUG_ANY,
+                       "mdb_db_cache: db_create(%s) failed: %s (%d)\n",
+                       mdb->bi_dbenv_home, db_strerror(rc), rc );
+               ldap_pvt_thread_mutex_unlock( &mdb->bi_database_mutex );
+               ch_free( db );
+               return rc;
+       }
+
+       file = ch_malloc( db->bdi_name.bv_len + sizeof(MDB_SUFFIX) );
+       strcpy( file, db->bdi_name.bv_val );
+       strcpy( file+db->bdi_name.bv_len, MDB_SUFFIX );
+
+#ifdef HAVE_EBCDIC
+       __atoe( file );
+#endif
+       flags = DB_CREATE | DB_THREAD;
+#ifdef DB_AUTO_COMMIT
+       if ( !( slapMode & SLAP_TOOL_QUICK ))
+               flags |= DB_AUTO_COMMIT;
+#endif
+       /* Cannot Truncate when Transactions are in use */
+       if ( (slapMode & (SLAP_TOOL_QUICK|SLAP_TRUNCATE_MODE)) ==
+               (SLAP_TOOL_QUICK|SLAP_TRUNCATE_MODE))
+                       flags |= DB_TRUNCATE;
+
+       rc = DB_OPEN( db->bdi_db,
+               file, NULL /* name */,
+               MDB_INDEXTYPE, mdb->bi_db_opflags | flags, mdb->bi_dbenv_mode );
+
+       ch_free( file );
+
+       if( rc != 0 ) {
+               Debug( LDAP_DEBUG_ANY,
+                       "mdb_db_cache: db_open(%s) failed: %s (%d)\n",
+                       name->bv_val, db_strerror(rc), rc );
+               ldap_pvt_thread_mutex_unlock( &mdb->bi_database_mutex );
+               return rc;
+       }
+
+       mdb->bi_databases[i] = db;
+       mdb->bi_ndatabases = i+1;
+
+       *dbout = db->bdi_db;
+
+       ldap_pvt_thread_mutex_unlock( &mdb->bi_database_mutex );
+       return 0;
+}
diff --git a/servers/slapd/back-mdb/delete.c b/servers/slapd/back-mdb/delete.c
new file mode 100644 (file)
index 0000000..99a9fd4
--- /dev/null
@@ -0,0 +1,601 @@
+/* delete.c - mdb backend delete routine */
+/* $OpenLDAP$ */
+/* This work is part of OpenLDAP Software <http://www.openldap.org/>.
+ *
+ * Copyright 2000-2011 The OpenLDAP Foundation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted only as authorized by the OpenLDAP
+ * Public License.
+ *
+ * A copy of this license is available in the file LICENSE in the
+ * top-level directory of the distribution or, alternatively, at
+ * <http://www.OpenLDAP.org/license.html>.
+ */
+
+#include "portable.h"
+
+#include <stdio.h>
+#include <ac/string.h>
+
+#include "lutil.h"
+#include "back-mdb.h"
+
+int
+mdb_delete( Operation *op, SlapReply *rs )
+{
+       struct mdb_info *mdb = (struct mdb_info *) op->o_bd->be_private;
+       Entry   *matched = NULL;
+       struct berval   pdn = {0, NULL};
+       Entry   *e = NULL;
+       Entry   *p = NULL;
+       EntryInfo       *ei = NULL, *eip = NULL;
+       int             manageDSAit = get_manageDSAit( op );
+       AttributeDescription *children = slap_schema.si_ad_children;
+       AttributeDescription *entry = slap_schema.si_ad_entry;
+       DB_TXN          *ltid = NULL, *lt2;
+       struct mdb_op_info opinfo = {{{ 0 }}};
+       ID      eid;
+
+       DB_LOCK         lock, plock;
+
+       int             num_retries = 0;
+
+       int     rc;
+
+       LDAPControl **preread_ctrl = NULL;
+       LDAPControl *ctrls[SLAP_MAX_RESPONSE_CONTROLS];
+       int num_ctrls = 0;
+
+       int     parent_is_glue = 0;
+       int parent_is_leaf = 0;
+
+#ifdef LDAP_X_TXN
+       int settle = 0;
+#endif
+
+       Debug( LDAP_DEBUG_ARGS, "==> " LDAP_XSTRING(mdb_delete) ": %s\n",
+               op->o_req_dn.bv_val, 0, 0 );
+
+#ifdef LDAP_X_TXN
+       if( op->o_txnSpec ) {
+               /* acquire connection lock */
+               ldap_pvt_thread_mutex_lock( &op->o_conn->c_mutex );
+               if( op->o_conn->c_txn == CONN_TXN_INACTIVE ) {
+                       rs->sr_text = "invalid transaction identifier";
+                       rs->sr_err = LDAP_X_TXN_ID_INVALID;
+                       goto txnReturn;
+               } else if( op->o_conn->c_txn == CONN_TXN_SETTLE ) {
+                       settle=1;
+                       goto txnReturn;
+               }
+
+               if( op->o_conn->c_txn_backend == NULL ) {
+                       op->o_conn->c_txn_backend = op->o_bd;
+
+               } else if( op->o_conn->c_txn_backend != op->o_bd ) {
+                       rs->sr_text = "transaction cannot span multiple database contexts";
+                       rs->sr_err = LDAP_AFFECTS_MULTIPLE_DSAS;
+                       goto txnReturn;
+               }
+
+               /* insert operation into transaction */
+
+               rs->sr_text = "transaction specified";
+               rs->sr_err = LDAP_X_TXN_SPECIFY_OKAY;
+
+txnReturn:
+               /* release connection lock */
+               ldap_pvt_thread_mutex_unlock( &op->o_conn->c_mutex );
+
+               if( !settle ) {
+                       send_ldap_result( op, rs );
+                       return rs->sr_err;
+               }
+       }
+#endif
+
+       ctrls[num_ctrls] = 0;
+
+       /* allocate CSN */
+       if ( BER_BVISNULL( &op->o_csn ) ) {
+               struct berval csn;
+               char csnbuf[LDAP_PVT_CSNSTR_BUFSIZE];
+
+               csn.bv_val = csnbuf;
+               csn.bv_len = sizeof(csnbuf);
+               slap_get_csn( op, &csn, 1 );
+       }
+
+       if( 0 ) {
+retry: /* transaction retry */
+               if( e != NULL ) {
+                       mdb_unlocked_cache_return_entry_w(&mdb->bi_cache, e);
+                       e = NULL;
+               }
+               if( p != NULL ) {
+                       mdb_unlocked_cache_return_entry_r(&mdb->bi_cache, p);
+                       p = NULL;
+               }
+               Debug( LDAP_DEBUG_TRACE,
+                       "==> " LDAP_XSTRING(mdb_delete) ": retrying...\n",
+                       0, 0, 0 );
+               rs->sr_err = TXN_ABORT( ltid );
+               ltid = NULL;
+               LDAP_SLIST_REMOVE( &op->o_extra, &opinfo.boi_oe, OpExtra, oe_next );
+               opinfo.boi_oe.oe_key = NULL;
+               op->o_do_not_cache = opinfo.boi_acl_cache;
+               if( rs->sr_err != 0 ) {
+                       rs->sr_err = LDAP_OTHER;
+                       rs->sr_text = "internal error";
+                       goto return_results;
+               }
+               if ( op->o_abandon ) {
+                       rs->sr_err = SLAPD_ABANDON;
+                       goto return_results;
+               }
+               parent_is_glue = 0;
+               parent_is_leaf = 0;
+               mdb_trans_backoff( ++num_retries );
+       }
+
+       /* begin transaction */
+       rs->sr_err = TXN_BEGIN( mdb->bi_dbenv, NULL, &ltid, 
+               mdb->bi_db_opflags );
+       rs->sr_text = NULL;
+       if( rs->sr_err != 0 ) {
+               Debug( LDAP_DEBUG_TRACE,
+                       LDAP_XSTRING(mdb_delete) ": txn_begin failed: "
+                       "%s (%d)\n", db_strerror(rs->sr_err), rs->sr_err, 0 );
+               rs->sr_err = LDAP_OTHER;
+               rs->sr_text = "internal error";
+               goto return_results;
+       }
+
+       opinfo.boi_oe.oe_key = mdb;
+       opinfo.boi_txn = ltid;
+       opinfo.boi_err = 0;
+       opinfo.boi_acl_cache = op->o_do_not_cache;
+       LDAP_SLIST_INSERT_HEAD( &op->o_extra, &opinfo.boi_oe, oe_next );
+
+       if ( !be_issuffix( op->o_bd, &op->o_req_ndn ) ) {
+               dnParent( &op->o_req_ndn, &pdn );
+       }
+
+       /* get entry */
+       rs->sr_err = mdb_dn2entry( op, ltid, &op->o_req_ndn, &ei, 1,
+               &lock );
+
+       switch( rs->sr_err ) {
+       case 0:
+       case DB_NOTFOUND:
+               break;
+       case DB_LOCK_DEADLOCK:
+       case DB_LOCK_NOTGRANTED:
+               goto retry;
+       case LDAP_BUSY:
+               rs->sr_text = "ldap server busy";
+               goto return_results;
+       default:
+               rs->sr_err = LDAP_OTHER;
+               rs->sr_text = "internal error";
+               goto return_results;
+       }
+
+       if ( rs->sr_err == 0 ) {
+               e = ei->bei_e;
+               eip = ei->bei_parent;
+       } else {
+               matched = ei->bei_e;
+       }
+
+       /* FIXME : dn2entry() should return non-glue entry */
+       if ( e == NULL || ( !manageDSAit && is_entry_glue( e ))) {
+               Debug( LDAP_DEBUG_ARGS,
+                       "<=- " LDAP_XSTRING(mdb_delete) ": no such object %s\n",
+                       op->o_req_dn.bv_val, 0, 0);
+
+               if ( matched != NULL ) {
+                       rs->sr_matched = ch_strdup( matched->e_dn );
+                       rs->sr_ref = is_entry_referral( matched )
+                               ? get_entry_referrals( op, matched )
+                               : NULL;
+                       mdb_unlocked_cache_return_entry_r(&mdb->bi_cache, matched);
+                       matched = NULL;
+
+               } else {
+                       rs->sr_ref = referral_rewrite( default_referral, NULL,
+                                       &op->o_req_dn, LDAP_SCOPE_DEFAULT );
+               }
+
+               rs->sr_err = LDAP_REFERRAL;
+               rs->sr_flags = REP_MATCHED_MUSTBEFREED | REP_REF_MUSTBEFREED;
+               goto return_results;
+       }
+
+       rc = mdb_cache_find_id( op, ltid, eip->bei_id, &eip, 0, &plock );
+       switch( rc ) {
+       case DB_LOCK_DEADLOCK:
+       case DB_LOCK_NOTGRANTED:
+               goto retry;
+       case 0:
+       case DB_NOTFOUND:
+               break;
+       default:
+               rs->sr_err = LDAP_OTHER;
+               rs->sr_text = "internal error";
+               goto return_results;
+       }
+       if ( eip ) p = eip->bei_e;
+
+       if ( pdn.bv_len != 0 ) {
+               if( p == NULL || !bvmatch( &pdn, &p->e_nname )) {
+                       Debug( LDAP_DEBUG_TRACE,
+                               "<=- " LDAP_XSTRING(mdb_delete) ": parent "
+                               "does not exist\n", 0, 0, 0 );
+                       rs->sr_err = LDAP_OTHER;
+                       rs->sr_text = "could not locate parent of entry";
+                       goto return_results;
+               }
+
+               /* check parent for "children" acl */
+               rs->sr_err = access_allowed( op, p,
+                       children, NULL, ACL_WDEL, NULL );
+
+               if ( !rs->sr_err  ) {
+                       switch( opinfo.boi_err ) {
+                       case DB_LOCK_DEADLOCK:
+                       case DB_LOCK_NOTGRANTED:
+                               goto retry;
+                       }
+
+                       Debug( LDAP_DEBUG_TRACE,
+                               "<=- " LDAP_XSTRING(mdb_delete) ": no write "
+                               "access to parent\n", 0, 0, 0 );
+                       rs->sr_err = LDAP_INSUFFICIENT_ACCESS;
+                       rs->sr_text = "no write access to parent";
+                       goto return_results;
+               }
+
+       } else {
+               /* no parent, must be root to delete */
+               if( ! be_isroot( op ) ) {
+                       if ( be_issuffix( op->o_bd, (struct berval *)&slap_empty_bv )
+                               || be_shadow_update( op ) ) {
+                               p = (Entry *)&slap_entry_root;
+
+                               /* check parent for "children" acl */
+                               rs->sr_err = access_allowed( op, p,
+                                       children, NULL, ACL_WDEL, NULL );
+
+                               p = NULL;
+
+                               if ( !rs->sr_err  ) {
+                                       switch( opinfo.boi_err ) {
+                                       case DB_LOCK_DEADLOCK:
+                                       case DB_LOCK_NOTGRANTED:
+                                               goto retry;
+                                       }
+
+                                       Debug( LDAP_DEBUG_TRACE,
+                                               "<=- " LDAP_XSTRING(mdb_delete)
+                                               ": no access to parent\n",
+                                               0, 0, 0 );
+                                       rs->sr_err = LDAP_INSUFFICIENT_ACCESS;
+                                       rs->sr_text = "no write access to parent";
+                                       goto return_results;
+                               }
+
+                       } else {
+                               Debug( LDAP_DEBUG_TRACE,
+                                       "<=- " LDAP_XSTRING(mdb_delete)
+                                       ": no parent and not root\n", 0, 0, 0 );
+                               rs->sr_err = LDAP_INSUFFICIENT_ACCESS;
+                               goto return_results;
+                       }
+               }
+       }
+
+       if ( get_assert( op ) &&
+               ( test_filter( op, e, get_assertion( op )) != LDAP_COMPARE_TRUE ))
+       {
+               rs->sr_err = LDAP_ASSERTION_FAILED;
+               goto return_results;
+       }
+
+       rs->sr_err = access_allowed( op, e,
+               entry, NULL, ACL_WDEL, NULL );
+
+       if ( !rs->sr_err  ) {
+               switch( opinfo.boi_err ) {
+               case DB_LOCK_DEADLOCK:
+               case DB_LOCK_NOTGRANTED:
+                       goto retry;
+               }
+
+               Debug( LDAP_DEBUG_TRACE,
+                       "<=- " LDAP_XSTRING(mdb_delete) ": no write access "
+                       "to entry\n", 0, 0, 0 );
+               rs->sr_err = LDAP_INSUFFICIENT_ACCESS;
+               rs->sr_text = "no write access to entry";
+               goto return_results;
+       }
+
+       if ( !manageDSAit && is_entry_referral( e ) ) {
+               /* entry is a referral, don't allow delete */
+               rs->sr_ref = get_entry_referrals( op, e );
+
+               Debug( LDAP_DEBUG_TRACE,
+                       LDAP_XSTRING(mdb_delete) ": entry is referral\n",
+                       0, 0, 0 );
+
+               rs->sr_err = LDAP_REFERRAL;
+               rs->sr_matched = ch_strdup( e->e_name.bv_val );
+               rs->sr_flags = REP_MATCHED_MUSTBEFREED | REP_REF_MUSTBEFREED;
+               goto return_results;
+       }
+
+       /* pre-read */
+       if( op->o_preread ) {
+               if( preread_ctrl == NULL ) {
+                       preread_ctrl = &ctrls[num_ctrls++];
+                       ctrls[num_ctrls] = NULL;
+               }
+               if( slap_read_controls( op, rs, e,
+                       &slap_pre_read_bv, preread_ctrl ) )
+               {
+                       Debug( LDAP_DEBUG_TRACE,
+                               "<=- " LDAP_XSTRING(mdb_delete) ": pre-read "
+                               "failed!\n", 0, 0, 0 );
+                       if ( op->o_preread & SLAP_CONTROL_CRITICAL ) {
+                               /* FIXME: is it correct to abort
+                                * operation if control fails? */
+                               goto return_results;
+                       }
+               }
+       }
+
+       /* nested transaction */
+       rs->sr_err = TXN_BEGIN( mdb->bi_dbenv, ltid, &lt2, 
+               mdb->bi_db_opflags );
+       rs->sr_text = NULL;
+       if( rs->sr_err != 0 ) {
+               Debug( LDAP_DEBUG_TRACE,
+                       LDAP_XSTRING(mdb_delete) ": txn_begin(2) failed: "
+                       "%s (%d)\n", db_strerror(rs->sr_err), rs->sr_err, 0 );
+               rs->sr_err = LDAP_OTHER;
+               rs->sr_text = "internal error";
+               goto return_results;
+       }
+
+       MDB_LOG_PRINTF( mdb->bi_dbenv, lt2, "slapd Starting delete %s(%d)",
+               e->e_nname.bv_val, e->e_id );
+
+       /* Can't do it if we have kids */
+       rs->sr_err = mdb_cache_children( op, lt2, e );
+       if( rs->sr_err != DB_NOTFOUND ) {
+               switch( rs->sr_err ) {
+               case DB_LOCK_DEADLOCK:
+               case DB_LOCK_NOTGRANTED:
+                       goto retry;
+               case 0:
+                       Debug(LDAP_DEBUG_ARGS,
+                               "<=- " LDAP_XSTRING(mdb_delete)
+                               ": non-leaf %s\n",
+                               op->o_req_dn.bv_val, 0, 0);
+                       rs->sr_err = LDAP_NOT_ALLOWED_ON_NONLEAF;
+                       rs->sr_text = "subordinate objects must be deleted first";
+                       break;
+               default:
+                       Debug(LDAP_DEBUG_ARGS,
+                               "<=- " LDAP_XSTRING(mdb_delete)
+                               ": has_children failed: %s (%d)\n",
+                               db_strerror(rs->sr_err), rs->sr_err, 0 );
+                       rs->sr_err = LDAP_OTHER;
+                       rs->sr_text = "internal error";
+               }
+               goto return_results;
+       }
+
+       /* delete from dn2id */
+       rs->sr_err = mdb_dn2id_delete( op, lt2, eip, e );
+       if ( rs->sr_err != 0 ) {
+               Debug(LDAP_DEBUG_TRACE,
+                       "<=- " LDAP_XSTRING(mdb_delete) ": dn2id failed: "
+                       "%s (%d)\n", db_strerror(rs->sr_err), rs->sr_err, 0 );
+               switch( rs->sr_err ) {
+               case DB_LOCK_DEADLOCK:
+               case DB_LOCK_NOTGRANTED:
+                       goto retry;
+               }
+               rs->sr_text = "DN index delete failed";
+               rs->sr_err = LDAP_OTHER;
+               goto return_results;
+       }
+
+       /* delete indices for old attributes */
+       rs->sr_err = mdb_index_entry_del( op, lt2, e );
+       if ( rs->sr_err != LDAP_SUCCESS ) {
+               Debug(LDAP_DEBUG_TRACE,
+                       "<=- " LDAP_XSTRING(mdb_delete) ": index failed: "
+                       "%s (%d)\n", db_strerror(rs->sr_err), rs->sr_err, 0 );
+               switch( rs->sr_err ) {
+               case DB_LOCK_DEADLOCK:
+               case DB_LOCK_NOTGRANTED:
+                       goto retry;
+               }
+               rs->sr_text = "entry index delete failed";
+               rs->sr_err = LDAP_OTHER;
+               goto return_results;
+       }
+
+       /* fixup delete CSN */
+       if ( !SLAP_SHADOW( op->o_bd )) {
+               struct berval vals[2];
+
+               assert( !BER_BVISNULL( &op->o_csn ) );
+               vals[0] = op->o_csn;
+               BER_BVZERO( &vals[1] );
+               rs->sr_err = mdb_index_values( op, lt2, slap_schema.si_ad_entryCSN,
+                       vals, 0, SLAP_INDEX_ADD_OP );
+       if ( rs->sr_err != LDAP_SUCCESS ) {
+                       switch( rs->sr_err ) {
+                       case DB_LOCK_DEADLOCK:
+                       case DB_LOCK_NOTGRANTED:
+                               goto retry;
+                       }
+                       rs->sr_text = "entryCSN index update failed";
+                       rs->sr_err = LDAP_OTHER;
+                       goto return_results;
+               }
+       }
+
+       /* delete from id2entry */
+       rs->sr_err = mdb_id2entry_delete( op->o_bd, lt2, e );
+       if ( rs->sr_err != 0 ) {
+               Debug( LDAP_DEBUG_TRACE,
+                       "<=- " LDAP_XSTRING(mdb_delete) ": id2entry failed: "
+                       "%s (%d)\n", db_strerror(rs->sr_err), rs->sr_err, 0 );
+               switch( rs->sr_err ) {
+               case DB_LOCK_DEADLOCK:
+               case DB_LOCK_NOTGRANTED:
+                       goto retry;
+               }
+               rs->sr_text = "entry delete failed";
+               rs->sr_err = LDAP_OTHER;
+               goto return_results;
+       }
+
+       if ( pdn.bv_len != 0 ) {
+               parent_is_glue = is_entry_glue(p);
+               rs->sr_err = mdb_cache_children( op, lt2, p );
+               if ( rs->sr_err != DB_NOTFOUND ) {
+                       switch( rs->sr_err ) {
+                       case DB_LOCK_DEADLOCK:
+                       case DB_LOCK_NOTGRANTED:
+                               goto retry;
+                       case 0:
+                               break;
+                       default:
+                               Debug(LDAP_DEBUG_ARGS,
+                                       "<=- " LDAP_XSTRING(mdb_delete)
+                                       ": has_children failed: %s (%d)\n",
+                                       db_strerror(rs->sr_err), rs->sr_err, 0 );
+                               rs->sr_err = LDAP_OTHER;
+                               rs->sr_text = "internal error";
+                               goto return_results;
+                       }
+                       parent_is_leaf = 1;
+               }
+               mdb_unlocked_cache_return_entry_r(&mdb->bi_cache, p);
+               p = NULL;
+       }
+
+       MDB_LOG_PRINTF( mdb->bi_dbenv, lt2, "slapd Commit1 delete %s(%d)",
+               e->e_nname.bv_val, e->e_id );
+
+       if ( TXN_COMMIT( lt2, 0 ) != 0 ) {
+               rs->sr_err = LDAP_OTHER;
+               rs->sr_text = "txn_commit(2) failed";
+               goto return_results;
+       }
+
+       eid = e->e_id;
+
+#if 0  /* Do we want to reclaim deleted IDs? */
+       ldap_pvt_thread_mutex_lock( &mdb->bi_lastid_mutex );
+       if ( e->e_id == mdb->bi_lastid ) {
+               mdb_last_id( op->o_bd, ltid );
+       }
+       ldap_pvt_thread_mutex_unlock( &mdb->bi_lastid_mutex );
+#endif
+
+       if( op->o_noop ) {
+               if ( ( rs->sr_err = TXN_ABORT( ltid ) ) != 0 ) {
+                       rs->sr_text = "txn_abort (no-op) failed";
+               } else {
+                       rs->sr_err = LDAP_X_NO_OPERATION;
+                       ltid = NULL;
+                       goto return_results;
+               }
+       } else {
+
+               MDB_LOG_PRINTF( mdb->bi_dbenv, ltid, "slapd Cache delete %s(%d)",
+                       e->e_nname.bv_val, e->e_id );
+
+               rc = mdb_cache_delete( mdb, e, ltid, &lock );
+               switch( rc ) {
+               case DB_LOCK_DEADLOCK:
+               case DB_LOCK_NOTGRANTED:
+                       goto retry;
+               }
+
+               rs->sr_err = TXN_COMMIT( ltid, 0 );
+       }
+       ltid = NULL;
+       LDAP_SLIST_REMOVE( &op->o_extra, &opinfo.boi_oe, OpExtra, oe_next );
+       opinfo.boi_oe.oe_key = NULL;
+
+       MDB_LOG_PRINTF( mdb->bi_dbenv, NULL, "slapd Committed delete %s(%d)",
+               e->e_nname.bv_val, e->e_id );
+
+       if( rs->sr_err != 0 ) {
+               Debug( LDAP_DEBUG_TRACE,
+                       LDAP_XSTRING(mdb_delete) ": txn_%s failed: %s (%d)\n",
+                       op->o_noop ? "abort (no-op)" : "commit",
+                       db_strerror(rs->sr_err), rs->sr_err );
+               rs->sr_err = LDAP_OTHER;
+               rs->sr_text = "commit failed";
+
+               goto return_results;
+       }
+
+       Debug( LDAP_DEBUG_TRACE,
+               LDAP_XSTRING(mdb_delete) ": deleted%s id=%08lx dn=\"%s\"\n",
+               op->o_noop ? " (no-op)" : "",
+               eid, op->o_req_dn.bv_val );
+       rs->sr_err = LDAP_SUCCESS;
+       rs->sr_text = NULL;
+       if( num_ctrls ) rs->sr_ctrls = ctrls;
+
+return_results:
+       if ( rs->sr_err == LDAP_SUCCESS && parent_is_glue && parent_is_leaf ) {
+               op->o_delete_glue_parent = 1;
+       }
+
+       if ( p )
+               mdb_unlocked_cache_return_entry_r(&mdb->bi_cache, p);
+
+       /* free entry */
+       if( e != NULL ) {
+               if ( rs->sr_err == LDAP_SUCCESS ) {
+                       /* Free the EntryInfo and the Entry */
+                       mdb_cache_entryinfo_lock( BEI(e) );
+                       mdb_cache_delete_cleanup( &mdb->bi_cache, BEI(e) );
+               } else {
+                       mdb_unlocked_cache_return_entry_w(&mdb->bi_cache, e);
+               }
+       }
+
+       if( ltid != NULL ) {
+               TXN_ABORT( ltid );
+       }
+       if ( opinfo.boi_oe.oe_key ) {
+               LDAP_SLIST_REMOVE( &op->o_extra, &opinfo.boi_oe, OpExtra, oe_next );
+       }
+
+       send_ldap_result( op, rs );
+       slap_graduate_commit_csn( op );
+
+       if( preread_ctrl != NULL && (*preread_ctrl) != NULL ) {
+               slap_sl_free( (*preread_ctrl)->ldctl_value.bv_val, op->o_tmpmemctx );
+               slap_sl_free( *preread_ctrl, op->o_tmpmemctx );
+       }
+
+       if( rs->sr_err == LDAP_SUCCESS && mdb->bi_txn_cp_kbyte ) {
+               TXN_CHECKPOINT( mdb->bi_dbenv,
+                       mdb->bi_txn_cp_kbyte, mdb->bi_txn_cp_min, 0 );
+       }
+       return rs->sr_err;
+}
diff --git a/servers/slapd/back-mdb/dn2entry.c b/servers/slapd/back-mdb/dn2entry.c
new file mode 100644 (file)
index 0000000..fb9e18c
--- /dev/null
@@ -0,0 +1,84 @@
+/* dn2entry.c - routines to deal with the dn2id / id2entry glue */
+/* $OpenLDAP$ */
+/* This work is part of OpenLDAP Software <http://www.openldap.org/>.
+ *
+ * Copyright 2000-2011 The OpenLDAP Foundation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted only as authorized by the OpenLDAP
+ * Public License.
+ *
+ * A copy of this license is available in the file LICENSE in the
+ * top-level directory of the distribution or, alternatively, at
+ * <http://www.OpenLDAP.org/license.html>.
+ */
+
+#include "portable.h"
+
+#include <stdio.h>
+#include <ac/string.h>
+
+#include "back-mdb.h"
+
+/*
+ * dn2entry - look up dn in the cache/indexes and return the corresponding
+ * entry. If the requested DN is not found and matched is TRUE, return info
+ * for the closest ancestor of the DN. Otherwise e is NULL.
+ */
+
+int
+mdb_dn2entry(
+       Operation *op,
+       DB_TXN *tid,
+       struct berval *dn,
+       EntryInfo **e,
+       int matched,
+       DB_LOCK *lock )
+{
+       EntryInfo *ei = NULL;
+       int rc, rc2;
+
+       Debug(LDAP_DEBUG_TRACE, "mdb_dn2entry(\"%s\")\n",
+               dn->bv_val, 0, 0 );
+
+       *e = NULL;
+
+       rc = mdb_cache_find_ndn( op, tid, dn, &ei );
+       if ( rc ) {
+               if ( matched && rc == DB_NOTFOUND ) {
+                       /* Set the return value, whether we have its entry
+                        * or not.
+                        */
+                       *e = ei;
+                       if ( ei && ei->bei_id ) {
+                               rc2 = mdb_cache_find_id( op, tid, ei->bei_id,
+                                       &ei, ID_LOCKED, lock );
+                               if ( rc2 ) rc = rc2;
+                       } else if ( ei ) {
+                               mdb_cache_entryinfo_unlock( ei );
+                               memset( lock, 0, sizeof( *lock ));
+                               lock->mode = DB_LOCK_NG;
+                       }
+               } else if ( ei ) {
+                       mdb_cache_entryinfo_unlock( ei );
+               }
+       } else {
+               rc = mdb_cache_find_id( op, tid, ei->bei_id, &ei, ID_LOCKED,
+                       lock );
+               if ( rc == 0 ) {
+                       *e = ei;
+               } else if ( matched && rc == DB_NOTFOUND ) {
+                       /* always return EntryInfo */
+                       if ( ei->bei_parent ) {
+                               ei = ei->bei_parent;
+                               rc2 = mdb_cache_find_id( op, tid, ei->bei_id, &ei, 0,
+                                       lock );
+                               if ( rc2 ) rc = rc2;
+                       }
+                       *e = ei;
+               }
+       }
+
+       return rc;
+}
diff --git a/servers/slapd/back-mdb/dn2id.c b/servers/slapd/back-mdb/dn2id.c
new file mode 100644 (file)
index 0000000..1e9ca02
--- /dev/null
@@ -0,0 +1,844 @@
+/* dn2id.c - routines to deal with the dn2id index */
+/* $OpenLDAP$ */
+/* This work is part of OpenLDAP Software <http://www.openldap.org/>.
+ *
+ * Copyright 2000-2011 The OpenLDAP Foundation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted only as authorized by the OpenLDAP
+ * Public License.
+ *
+ * A copy of this license is available in the file LICENSE in the
+ * top-level directory of the distribution or, alternatively, at
+ * <http://www.OpenLDAP.org/license.html>.
+ */
+
+#include "portable.h"
+
+#include <stdio.h>
+#include <ac/string.h>
+
+#include "back-mdb.h"
+#include "idl.h"
+#include "lutil.h"
+
+/* Management routines for a hierarchically structured database.
+ *
+ * Instead of a ldbm-style dn2id database, we use a hierarchical one. Each
+ * entry in this database is a struct diskNode, keyed by entryID and with
+ * the data containing the RDN and entryID of the node's children. We use
+ * a B-Tree with sorted duplicates to store all the children of a node under
+ * the same key. Also, the first item under the key contains the entry's own
+ * rdn and the ID of the node's parent, to allow bottom-up tree traversal as
+ * well as top-down. To keep this info first in the list, the high bit of all
+ * subsequent nrdnlen's is always set. This means we can only accomodate
+ * RDNs up to length 32767, but that's fine since full DNs are already
+ * restricted to 8192.
+ *
+ * The diskNode is a variable length structure. This definition is not
+ * directly usable for in-memory manipulation.
+ */
+typedef struct diskNode {
+       unsigned char nrdnlen[2];
+       char nrdn[1];
+       char rdn[1];                        /* variable placement */
+       unsigned char entryID[sizeof(ID)];  /* variable placement */
+} diskNode;
+
+/* Sort function for the sorted duplicate data items of a dn2id key.
+ * Sorts based on normalized RDN, in length order.
+ */
+int
+mdb_dup_compare(
+       DB *db, 
+       const DBT *usrkey,
+       const DBT *curkey
+)
+{
+       diskNode *un, *cn;
+       int rc;
+
+       un = (diskNode *)usrkey->data;
+       cn = (diskNode *)curkey->data;
+
+       /* data is not aligned, cannot compare directly */
+       rc = un->nrdnlen[0] - cn->nrdnlen[0];
+       if ( rc ) return rc;
+       rc = un->nrdnlen[1] - cn->nrdnlen[1];
+       if ( rc ) return rc;
+
+       return strcmp( un->nrdn, cn->nrdn );
+}
+
+/* This function constructs a full DN for a given entry.
+ */
+int mdb_fix_dn(
+       Entry *e,
+       int checkit )
+{
+       EntryInfo *ei;
+       int rlen = 0, nrlen = 0;
+       char *ptr, *nptr;
+       int max = 0;
+
+       if ( !e->e_id )
+               return 0;
+
+       /* count length of all DN components */
+       for ( ei = BEI(e); ei && ei->bei_id; ei=ei->bei_parent ) {
+               rlen += ei->bei_rdn.bv_len + 1;
+               nrlen += ei->bei_nrdn.bv_len + 1;
+               if (ei->bei_modrdns > max) max = ei->bei_modrdns;
+       }
+
+       /* See if the entry DN was invalidated by a subtree rename */
+       if ( checkit ) {
+               if ( BEI(e)->bei_modrdns >= max ) {
+                       return 0;
+               }
+               /* We found a mismatch, tell the caller to lock it */
+               if ( checkit == 1 ) {
+                       return 1;
+               }
+               /* checkit == 2. do the fix. */
+               free( e->e_name.bv_val );
+               free( e->e_nname.bv_val );
+       }
+
+       e->e_name.bv_len = rlen - 1;
+       e->e_nname.bv_len = nrlen - 1;
+       e->e_name.bv_val = ch_malloc(rlen);
+       e->e_nname.bv_val = ch_malloc(nrlen);
+       ptr = e->e_name.bv_val;
+       nptr = e->e_nname.bv_val;
+       for ( ei = BEI(e); ei && ei->bei_id; ei=ei->bei_parent ) {
+               ptr = lutil_strcopy(ptr, ei->bei_rdn.bv_val);
+               nptr = lutil_strcopy(nptr, ei->bei_nrdn.bv_val);
+               if ( ei->bei_parent ) {
+                       *ptr++ = ',';
+                       *nptr++ = ',';
+               }
+       }
+       BEI(e)->bei_modrdns = max;
+       if ( ptr > e->e_name.bv_val ) ptr[-1] = '\0';
+       if ( nptr > e->e_nname.bv_val ) nptr[-1] = '\0';
+
+       return 0;
+}
+
+/* We add two elements to the DN2ID database - a data item under the parent's
+ * entryID containing the child's RDN and entryID, and an item under the
+ * child's entryID containing the parent's entryID.
+ */
+int
+mdb_dn2id_add(
+       Operation       *op,
+       DB_TXN *txn,
+       EntryInfo       *eip,
+       Entry           *e )
+{
+       struct mdb_info *mdb = (struct mdb_info *) op->o_bd->be_private;
+       DB *db = mdb->bi_dn2id->bdi_db;
+       DBT             key, data;
+       ID              nid;
+       int             rc, rlen, nrlen;
+       diskNode *d;
+       char *ptr;
+
+       Debug( LDAP_DEBUG_TRACE, "=> mdb_dn2id_add 0x%lx: \"%s\"\n",
+               e->e_id, e->e_ndn, 0 );
+
+       nrlen = dn_rdnlen( op->o_bd, &e->e_nname );
+       if (nrlen) {
+               rlen = dn_rdnlen( op->o_bd, &e->e_name );
+       } else {
+               nrlen = e->e_nname.bv_len;
+               rlen = e->e_name.bv_len;
+       }
+
+       d = op->o_tmpalloc(sizeof(diskNode) + rlen + nrlen, op->o_tmpmemctx);
+       d->nrdnlen[1] = nrlen & 0xff;
+       d->nrdnlen[0] = (nrlen >> 8) | 0x80;
+       ptr = lutil_strncopy( d->nrdn, e->e_nname.bv_val, nrlen );
+       *ptr++ = '\0';
+       ptr = lutil_strncopy( ptr, e->e_name.bv_val, rlen );
+       *ptr++ = '\0';
+       MDB_ID2DISK( e->e_id, ptr );
+
+       DBTzero(&key);
+       DBTzero(&data);
+       key.size = sizeof(ID);
+       key.flags = DB_DBT_USERMEM;
+       MDB_ID2DISK( eip->bei_id, &nid );
+
+       key.data = &nid;
+
+       /* Need to make dummy root node once. Subsequent attempts
+        * will fail harmlessly.
+        */
+       if ( eip->bei_id == 0 ) {
+               diskNode dummy = {{0, 0}, "", "", ""};
+               data.data = &dummy;
+               data.size = sizeof(diskNode);
+               data.flags = DB_DBT_USERMEM;
+
+               db->put( db, txn, &key, &data, DB_NODUPDATA );
+       }
+
+       data.data = d;
+       data.size = sizeof(diskNode) + rlen + nrlen;
+       data.flags = DB_DBT_USERMEM;
+
+       rc = db->put( db, txn, &key, &data, DB_NODUPDATA );
+
+       if (rc == 0) {
+               MDB_ID2DISK( e->e_id, &nid );
+               MDB_ID2DISK( eip->bei_id, ptr );
+               d->nrdnlen[0] ^= 0x80;
+
+               rc = db->put( db, txn, &key, &data, DB_NODUPDATA );
+       }
+
+       /* Update all parents' IDL cache entries */
+       if ( rc == 0 && mdb->bi_idl_cache_size ) {
+               ID tmp[2];
+               char *ptr = ((char *)&tmp[1])-1;
+               key.data = ptr;
+               key.size = sizeof(ID)+1;
+               tmp[1] = eip->bei_id;
+               *ptr = DN_ONE_PREFIX;
+               mdb_idl_cache_add_id( mdb, db, &key, e->e_id );
+               if ( eip->bei_parent ) {
+                       *ptr = DN_SUBTREE_PREFIX;
+                       for (; eip && eip->bei_parent->bei_id; eip = eip->bei_parent) {
+                               tmp[1] = eip->bei_id;
+                               mdb_idl_cache_add_id( mdb, db, &key, e->e_id );
+                       }
+                       /* Handle DB with empty suffix */
+                       if ( !op->o_bd->be_suffix[0].bv_len && eip ) {
+                               tmp[1] = eip->bei_id;
+                               mdb_idl_cache_add_id( mdb, db, &key, e->e_id );
+                       }
+               }
+       }
+
+       op->o_tmpfree( d, op->o_tmpmemctx );
+       Debug( LDAP_DEBUG_TRACE, "<= mdb_dn2id_add 0x%lx: %d\n", e->e_id, rc, 0 );
+
+       return rc;
+}
+
+int
+mdb_dn2id_delete(
+       Operation       *op,
+       DB_TXN *txn,
+       EntryInfo       *eip,
+       Entry   *e )
+{
+       struct mdb_info *mdb = (struct mdb_info *) op->o_bd->be_private;
+       DB *db = mdb->bi_dn2id->bdi_db;
+       DBT             key, data;
+       DBC     *cursor;
+       diskNode *d;
+       int rc;
+       ID      nid;
+       unsigned char dlen[2];
+
+       Debug( LDAP_DEBUG_TRACE, "=> mdb_dn2id_delete 0x%lx: \"%s\"\n",
+               e->e_id, e->e_ndn, 0 );
+
+       DBTzero(&key);
+       key.size = sizeof(ID);
+       key.ulen = key.size;
+       key.flags = DB_DBT_USERMEM;
+       MDB_ID2DISK( eip->bei_id, &nid );
+
+       DBTzero(&data);
+       data.size = sizeof(diskNode) + BEI(e)->bei_nrdn.bv_len - sizeof(ID) - 1;
+       data.ulen = data.size;
+       data.dlen = data.size;
+       data.flags = DB_DBT_USERMEM | DB_DBT_PARTIAL;
+
+       key.data = &nid;
+
+       d = op->o_tmpalloc( data.size, op->o_tmpmemctx );
+       d->nrdnlen[1] = BEI(e)->bei_nrdn.bv_len & 0xff;
+       d->nrdnlen[0] = (BEI(e)->bei_nrdn.bv_len >> 8) | 0x80;
+       dlen[0] = d->nrdnlen[0];
+       dlen[1] = d->nrdnlen[1];
+       memcpy( d->nrdn, BEI(e)->bei_nrdn.bv_val, BEI(e)->bei_nrdn.bv_len+1 );
+       data.data = d;
+
+       rc = db->cursor( db, txn, &cursor, mdb->bi_db_opflags );
+       if ( rc ) goto func_leave;
+
+       /* Delete our ID from the parent's list */
+       rc = cursor->c_get( cursor, &key, &data, DB_GET_BOTH_RANGE );
+       if ( rc == 0 ) {
+               if ( dlen[1] == d->nrdnlen[1] && dlen[0] == d->nrdnlen[0] &&
+                       !strcmp( d->nrdn, BEI(e)->bei_nrdn.bv_val ))
+                       rc = cursor->c_del( cursor, 0 );
+               else
+                       rc = DB_NOTFOUND;
+       }
+
+       /* Delete our ID from the tree. With sorted duplicates, this
+        * will leave any child nodes still hanging around. This is OK
+        * for modrdn, which will add our info back in later.
+        */
+       if ( rc == 0 ) {
+               MDB_ID2DISK( e->e_id, &nid );
+               rc = cursor->c_get( cursor, &key, &data, DB_SET );
+               if ( rc == 0 )
+                       rc = cursor->c_del( cursor, 0 );
+       }
+
+       cursor->c_close( cursor );
+func_leave:
+       op->o_tmpfree( d, op->o_tmpmemctx );
+
+       /* Delete IDL cache entries */
+       if ( rc == 0 && mdb->bi_idl_cache_size ) {
+               ID tmp[2];
+               char *ptr = ((char *)&tmp[1])-1;
+               key.data = ptr;
+               key.size = sizeof(ID)+1;
+               tmp[1] = eip->bei_id;
+               *ptr = DN_ONE_PREFIX;
+               mdb_idl_cache_del_id( mdb, db, &key, e->e_id );
+               if ( eip ->bei_parent ) {
+                       *ptr = DN_SUBTREE_PREFIX;
+                       for (; eip && eip->bei_parent->bei_id; eip = eip->bei_parent) {
+                               tmp[1] = eip->bei_id;
+                               mdb_idl_cache_del_id( mdb, db, &key, e->e_id );
+                       }
+                       /* Handle DB with empty suffix */
+                       if ( !op->o_bd->be_suffix[0].bv_len && eip ) {
+                               tmp[1] = eip->bei_id;
+                               mdb_idl_cache_del_id( mdb, db, &key, e->e_id );
+                       }
+               }
+       }
+       Debug( LDAP_DEBUG_TRACE, "<= mdb_dn2id_delete 0x%lx: %d\n", e->e_id, rc, 0 );
+       return rc;
+}
+
+
+int
+mdb_dn2id(
+       Operation       *op,
+       struct berval   *in,
+       EntryInfo       *ei,
+       DB_TXN *txn,
+       DBC **cursor )
+{
+       struct mdb_info *mdb = (struct mdb_info *) op->o_bd->be_private;
+       DB *db = mdb->bi_dn2id->bdi_db;
+       DBT             key, data;
+       int             rc = 0, nrlen;
+       diskNode *d;
+       char    *ptr;
+       unsigned char dlen[2];
+       ID idp, parentID;
+
+       Debug( LDAP_DEBUG_TRACE, "=> mdb_dn2id(\"%s\")\n", in->bv_val, 0, 0 );
+
+       nrlen = dn_rdnlen( op->o_bd, in );
+       if (!nrlen) nrlen = in->bv_len;
+
+       DBTzero(&key);
+       key.size = sizeof(ID);
+       key.data = &idp;
+       key.ulen = sizeof(ID);
+       key.flags = DB_DBT_USERMEM;
+       parentID = ( ei->bei_parent != NULL ) ? ei->bei_parent->bei_id : 0;
+       MDB_ID2DISK( parentID, &idp );
+
+       DBTzero(&data);
+       data.size = sizeof(diskNode) + nrlen - sizeof(ID) - 1;
+       data.ulen = data.size * 3;
+       data.dlen = data.ulen;
+       data.flags = DB_DBT_USERMEM | DB_DBT_PARTIAL;
+
+       rc = db->cursor( db, txn, cursor, mdb->bi_db_opflags );
+       if ( rc ) return rc;
+
+       d = op->o_tmpalloc( data.size * 3, op->o_tmpmemctx );
+       d->nrdnlen[1] = nrlen & 0xff;
+       d->nrdnlen[0] = (nrlen >> 8) | 0x80;
+       dlen[0] = d->nrdnlen[0];
+       dlen[1] = d->nrdnlen[1];
+       ptr = lutil_strncopy( d->nrdn, in->bv_val, nrlen );
+       *ptr = '\0';
+       data.data = d;
+
+       rc = (*cursor)->c_get( *cursor, &key, &data, DB_GET_BOTH_RANGE );
+       if ( rc == 0 && (dlen[1] != d->nrdnlen[1] || dlen[0] != d->nrdnlen[0] ||
+               strncmp( d->nrdn, in->bv_val, nrlen ))) {
+               rc = DB_NOTFOUND;
+       }
+       if ( rc == 0 ) {
+               ptr = (char *) data.data + data.size - sizeof(ID);
+               MDB_DISK2ID( ptr, &ei->bei_id );
+               ei->bei_rdn.bv_len = data.size - sizeof(diskNode) - nrlen;
+               ptr = d->nrdn + nrlen + 1;
+               ber_str2bv( ptr, ei->bei_rdn.bv_len, 1, &ei->bei_rdn );
+               if ( ei->bei_parent != NULL && !ei->bei_parent->bei_dkids ) {
+                       db_recno_t dkids;
+                       /* How many children does the parent have? */
+                       /* FIXME: do we need to lock the parent
+                        * entryinfo? Seems safe...
+                        */
+                       (*cursor)->c_count( *cursor, &dkids, 0 );
+                       ei->bei_parent->bei_dkids = dkids;
+               }
+       }
+
+       op->o_tmpfree( d, op->o_tmpmemctx );
+       if( rc != 0 ) {
+               Debug( LDAP_DEBUG_TRACE, "<= mdb_dn2id: get failed: %s (%d)\n",
+                       db_strerror( rc ), rc, 0 );
+       } else {
+               Debug( LDAP_DEBUG_TRACE, "<= mdb_dn2id: got id=0x%lx\n",
+                       ei->bei_id, 0, 0 );
+       }
+
+       return rc;
+}
+
+int
+mdb_dn2id_parent(
+       Operation *op,
+       DB_TXN *txn,
+       EntryInfo *ei,
+       ID *idp )
+{
+       struct mdb_info *mdb = (struct mdb_info *) op->o_bd->be_private;
+       DB *db = mdb->bi_dn2id->bdi_db;
+       DBT             key, data;
+       DBC     *cursor;
+       int             rc = 0;
+       diskNode *d;
+       char    *ptr;
+       ID      nid;
+
+       DBTzero(&key);
+       key.size = sizeof(ID);
+       key.data = &nid;
+       key.ulen = sizeof(ID);
+       key.flags = DB_DBT_USERMEM;
+       MDB_ID2DISK( ei->bei_id, &nid );
+
+       DBTzero(&data);
+       data.flags = DB_DBT_USERMEM;
+
+       rc = db->cursor( db, txn, &cursor, mdb->bi_db_opflags );
+       if ( rc ) return rc;
+
+       data.ulen = sizeof(diskNode) + (SLAP_LDAPDN_MAXLEN * 2);
+       d = op->o_tmpalloc( data.ulen, op->o_tmpmemctx );
+       data.data = d;
+
+       rc = cursor->c_get( cursor, &key, &data, DB_SET );
+       if ( rc == 0 ) {
+               if (d->nrdnlen[0] & 0x80) {
+                       rc = LDAP_OTHER;
+               } else {
+                       db_recno_t dkids;
+                       ptr = (char *) data.data + data.size - sizeof(ID);
+                       MDB_DISK2ID( ptr, idp );
+                       ei->bei_nrdn.bv_len = (d->nrdnlen[0] << 8) | d->nrdnlen[1];
+                       ber_str2bv( d->nrdn, ei->bei_nrdn.bv_len, 1, &ei->bei_nrdn );
+                       ei->bei_rdn.bv_len = data.size - sizeof(diskNode) -
+                               ei->bei_nrdn.bv_len;
+                       ptr = d->nrdn + ei->bei_nrdn.bv_len + 1;
+                       ber_str2bv( ptr, ei->bei_rdn.bv_len, 1, &ei->bei_rdn );
+                       /* How many children does this node have? */
+                       cursor->c_count( cursor, &dkids, 0 );
+                       ei->bei_dkids = dkids;
+               }
+       }
+       cursor->c_close( cursor );
+       op->o_tmpfree( d, op->o_tmpmemctx );
+       return rc;
+}
+
+int
+mdb_dn2id_children(
+       Operation *op,
+       DB_TXN *txn,
+       Entry *e )
+{
+       struct mdb_info *mdb = (struct mdb_info *) op->o_bd->be_private;
+       DB *db = mdb->bi_dn2id->bdi_db;
+       DBT             key, data;
+       DBC             *cursor;
+       int             rc;
+       ID              id;
+       diskNode d;
+
+       DBTzero(&key);
+       key.size = sizeof(ID);
+       key.data = &e->e_id;
+       key.flags = DB_DBT_USERMEM;
+       MDB_ID2DISK( e->e_id, &id );
+
+       /* IDL cache is in host byte order */
+       if ( mdb->bi_idl_cache_size ) {
+               rc = mdb_idl_cache_get( mdb, db, &key, NULL );
+               if ( rc != LDAP_NO_SUCH_OBJECT ) {
+                       return rc;
+               }
+       }
+
+       key.data = &id;
+       DBTzero(&data);
+       data.data = &d;
+       data.ulen = sizeof(d);
+       data.flags = DB_DBT_USERMEM | DB_DBT_PARTIAL;
+       data.dlen = sizeof(d);
+
+       rc = db->cursor( db, txn, &cursor, mdb->bi_db_opflags );
+       if ( rc ) return rc;
+
+       rc = cursor->c_get( cursor, &key, &data, DB_SET );
+       if ( rc == 0 ) {
+               db_recno_t dkids;
+               rc = cursor->c_count( cursor, &dkids, 0 );
+               if ( rc == 0 ) {
+                       BEI(e)->bei_dkids = dkids;
+                       if ( dkids < 2 ) rc = DB_NOTFOUND;
+               }
+       }
+       cursor->c_close( cursor );
+       return rc;
+}
+
+/* mdb_dn2idl:
+ * We can't just use mdb_idl_fetch_key because
+ * 1 - our data items are longer than just an entry ID
+ * 2 - our data items are sorted alphabetically by nrdn, not by ID.
+ *
+ * We descend the tree recursively, so we define this cookie
+ * to hold our necessary state information. The mdb_dn2idl_internal
+ * function uses this cookie when calling itself.
+ */
+
+struct dn2id_cookie {
+       struct mdb_info *mdb;
+       Operation *op;
+       DB_TXN *txn;
+       EntryInfo *ei;
+       ID *ids;
+       ID *tmp;
+       ID *buf;
+       DB *db;
+       DBC *dbc;
+       DBT key;
+       DBT data;
+       ID dbuf;
+       ID id;
+       ID nid;
+       int rc;
+       int depth;
+       char need_sort;
+       char prefix;
+};
+
+static int
+apply_func(
+       void *data,
+       void *arg )
+{
+       EntryInfo *ei = data;
+       ID *idl = arg;
+
+       mdb_idl_append_one( idl, ei->bei_id );
+       return 0;
+}
+
+static int
+mdb_dn2idl_internal(
+       struct dn2id_cookie *cx
+)
+{
+       MDB_IDL_ZERO( cx->tmp );
+
+       if ( cx->mdb->bi_idl_cache_size ) {
+               char *ptr = ((char *)&cx->id)-1;
+
+               cx->key.data = ptr;
+               cx->key.size = sizeof(ID)+1;
+               if ( cx->prefix == DN_SUBTREE_PREFIX ) {
+                       ID *ids = cx->depth ? cx->tmp : cx->ids;
+                       *ptr = cx->prefix;
+                       cx->rc = mdb_idl_cache_get(cx->mdb, cx->db, &cx->key, ids);
+                       if ( cx->rc == LDAP_SUCCESS ) {
+                               if ( cx->depth ) {
+                                       mdb_idl_append( cx->ids, cx->tmp );
+                                       cx->need_sort = 1;
+                               }
+                               return cx->rc;
+                       }
+               }
+               *ptr = DN_ONE_PREFIX;
+               cx->rc = mdb_idl_cache_get(cx->mdb, cx->db, &cx->key, cx->tmp);
+               if ( cx->rc == LDAP_SUCCESS ) {
+                       goto gotit;
+               }
+               if ( cx->rc == DB_NOTFOUND ) {
+                       return cx->rc;
+               }
+       }
+
+       mdb_cache_entryinfo_lock( cx->ei );
+
+       /* If number of kids in the cache differs from on-disk, load
+        * up all the kids from the database
+        */
+       if ( cx->ei->bei_ckids+1 != cx->ei->bei_dkids ) {
+               EntryInfo ei;
+               db_recno_t dkids = cx->ei->bei_dkids;
+               ei.bei_parent = cx->ei;
+
+               /* Only one thread should load the cache */
+               while ( cx->ei->bei_state & CACHE_ENTRY_ONELEVEL ) {
+                       mdb_cache_entryinfo_unlock( cx->ei );
+                       ldap_pvt_thread_yield();
+                       mdb_cache_entryinfo_lock( cx->ei );
+                       if ( cx->ei->bei_ckids+1 == cx->ei->bei_dkids ) {
+                               goto synced;
+                       }
+               }
+
+               cx->ei->bei_state |= CACHE_ENTRY_ONELEVEL;
+
+               mdb_cache_entryinfo_unlock( cx->ei );
+
+               cx->rc = cx->db->cursor( cx->db, NULL, &cx->dbc,
+                       cx->mdb->bi_db_opflags );
+               if ( cx->rc )
+                       goto done_one;
+
+               cx->data.data = &cx->dbuf;
+               cx->data.ulen = sizeof(ID);
+               cx->data.dlen = sizeof(ID);
+               cx->data.flags = DB_DBT_USERMEM | DB_DBT_PARTIAL;
+
+               /* The first item holds the parent ID. Ignore it. */
+               cx->key.data = &cx->nid;
+               cx->key.size = sizeof(ID);
+               cx->rc = cx->dbc->c_get( cx->dbc, &cx->key, &cx->data, DB_SET );
+               if ( cx->rc ) {
+                       cx->dbc->c_close( cx->dbc );
+                       goto done_one;
+               }
+
+               /* If the on-disk count is zero we've never checked it.
+                * Count it now.
+                */
+               if ( !dkids ) {
+                       cx->dbc->c_count( cx->dbc, &dkids, 0 );
+                       cx->ei->bei_dkids = dkids;
+               }
+
+               cx->data.data = cx->buf;
+               cx->data.ulen = MDB_IDL_UM_SIZE * sizeof(ID);
+               cx->data.flags = DB_DBT_USERMEM;
+
+               if ( dkids > 1 ) {
+                       /* Fetch the rest of the IDs in a loop... */
+                       while ( (cx->rc = cx->dbc->c_get( cx->dbc, &cx->key, &cx->data,
+                               DB_MULTIPLE | DB_NEXT_DUP )) == 0 ) {
+                               u_int8_t *j;
+                               size_t len;
+                               void *ptr;
+                               DB_MULTIPLE_INIT( ptr, &cx->data );
+                               while (ptr) {
+                                       DB_MULTIPLE_NEXT( ptr, &cx->data, j, len );
+                                       if (j) {
+                                               EntryInfo *ei2;
+                                               diskNode *d = (diskNode *)j;
+                                               short nrlen;
+
+                                               MDB_DISK2ID( j + len - sizeof(ID), &ei.bei_id );
+                                               nrlen = ((d->nrdnlen[0] ^ 0x80) << 8) | d->nrdnlen[1];
+                                               ei.bei_nrdn.bv_len = nrlen;
+                                               /* nrdn/rdn are set in-place.
+                                                * mdb_cache_load will copy them as needed
+                                                */
+                                               ei.bei_nrdn.bv_val = d->nrdn;
+                                               ei.bei_rdn.bv_len = len - sizeof(diskNode)
+                                                       - ei.bei_nrdn.bv_len;
+                                               ei.bei_rdn.bv_val = d->nrdn + ei.bei_nrdn.bv_len + 1;
+                                               mdb_idl_append_one( cx->tmp, ei.bei_id );
+                                               mdb_cache_load( cx->mdb, &ei, &ei2 );
+                                       }
+                               }
+                       }
+               }
+
+               cx->rc = cx->dbc->c_close( cx->dbc );
+done_one:
+               mdb_cache_entryinfo_lock( cx->ei );
+               cx->ei->bei_state &= ~CACHE_ENTRY_ONELEVEL;
+               mdb_cache_entryinfo_unlock( cx->ei );
+               if ( cx->rc )
+                       return cx->rc;
+
+       } else {
+               /* The in-memory cache is in sync with the on-disk data.
+                * do we have any kids?
+                */
+synced:
+               cx->rc = 0;
+               if ( cx->ei->bei_ckids > 0 ) {
+                       /* Walk the kids tree; order is irrelevant since mdb_idl_sort
+                        * will sort it later.
+                        */
+                       avl_apply( cx->ei->bei_kids, apply_func,
+                               cx->tmp, -1, AVL_POSTORDER );
+               }
+               mdb_cache_entryinfo_unlock( cx->ei );
+       }
+
+       if ( !MDB_IDL_IS_RANGE( cx->tmp ) && cx->tmp[0] > 3 )
+               mdb_idl_sort( cx->tmp, cx->buf );
+       if ( cx->mdb->bi_idl_cache_max_size && !MDB_IDL_IS_ZERO( cx->tmp )) {
+               char *ptr = ((char *)&cx->id)-1;
+               cx->key.data = ptr;
+               cx->key.size = sizeof(ID)+1;
+               *ptr = DN_ONE_PREFIX;
+               mdb_idl_cache_put( cx->mdb, cx->db, &cx->key, cx->tmp, cx->rc );
+       }
+
+gotit:
+       if ( !MDB_IDL_IS_ZERO( cx->tmp )) {
+               if ( cx->prefix == DN_SUBTREE_PREFIX ) {
+                       mdb_idl_append( cx->ids, cx->tmp );
+                       cx->need_sort = 1;
+                       if ( !(cx->ei->bei_state & CACHE_ENTRY_NO_GRANDKIDS)) {
+                               ID *save, idcurs;
+                               EntryInfo *ei = cx->ei;
+                               int nokids = 1;
+                               save = cx->op->o_tmpalloc( MDB_IDL_SIZEOF( cx->tmp ),
+                                       cx->op->o_tmpmemctx );
+                               MDB_IDL_CPY( save, cx->tmp );
+
+                               idcurs = 0;
+                               cx->depth++;
+                               for ( cx->id = mdb_idl_first( save, &idcurs );
+                                       cx->id != NOID;
+                                       cx->id = mdb_idl_next( save, &idcurs )) {
+                                       EntryInfo *ei2;
+                                       cx->ei = NULL;
+                                       if ( mdb_cache_find_id( cx->op, cx->txn, cx->id, &cx->ei,
+                                               ID_NOENTRY, NULL ))
+                                               continue;
+                                       if ( cx->ei ) {
+                                               ei2 = cx->ei;
+                                               if ( !( ei2->bei_state & CACHE_ENTRY_NO_KIDS )) {
+                                                       MDB_ID2DISK( cx->id, &cx->nid );
+                                                       mdb_dn2idl_internal( cx );
+                                                       if ( !MDB_IDL_IS_ZERO( cx->tmp ))
+                                                               nokids = 0;
+                                               }
+                                               mdb_cache_entryinfo_lock( ei2 );
+                                               ei2->bei_finders--;
+                                               mdb_cache_entryinfo_unlock( ei2 );
+                                       }
+                               }
+                               cx->depth--;
+                               cx->op->o_tmpfree( save, cx->op->o_tmpmemctx );
+                               if ( nokids ) {
+                                       mdb_cache_entryinfo_lock( ei );
+                                       ei->bei_state |= CACHE_ENTRY_NO_GRANDKIDS;
+                                       mdb_cache_entryinfo_unlock( ei );
+                               }
+                       }
+                       /* Make sure caller knows it had kids! */
+                       cx->tmp[0]=1;
+
+                       cx->rc = 0;
+               } else {
+                       MDB_IDL_CPY( cx->ids, cx->tmp );
+               }
+       }
+       return cx->rc;
+}
+
+int
+mdb_dn2idl(
+       Operation       *op,
+       DB_TXN *txn,
+       struct berval *ndn,
+       EntryInfo       *ei,
+       ID *ids,
+       ID *stack )
+{
+       struct mdb_info *mdb = (struct mdb_info *)op->o_bd->be_private;
+       struct dn2id_cookie cx;
+
+       Debug( LDAP_DEBUG_TRACE, "=> mdb_dn2idl(\"%s\")\n",
+               ndn->bv_val, 0, 0 );
+
+#ifndef MDB_MULTIPLE_SUFFIXES
+       if ( op->ors_scope != LDAP_SCOPE_ONELEVEL && 
+               ( ei->bei_id == 0 ||
+               ( ei->bei_parent->bei_id == 0 && op->o_bd->be_suffix[0].bv_len )))
+       {
+               MDB_IDL_ALL( mdb, ids );
+               return 0;
+       }
+#endif
+
+       cx.id = ei->bei_id;
+       MDB_ID2DISK( cx.id, &cx.nid );
+       cx.ei = ei;
+       cx.mdb = mdb;
+       cx.db = cx.mdb->bi_dn2id->bdi_db;
+       cx.prefix = (op->ors_scope == LDAP_SCOPE_ONELEVEL) ?
+               DN_ONE_PREFIX : DN_SUBTREE_PREFIX;
+       cx.ids = ids;
+       cx.tmp = stack;
+       cx.buf = stack + MDB_IDL_UM_SIZE;
+       cx.op = op;
+       cx.txn = txn;
+       cx.need_sort = 0;
+       cx.depth = 0;
+
+       if ( cx.prefix == DN_SUBTREE_PREFIX ) {
+               ids[0] = 1;
+               ids[1] = cx.id;
+       } else {
+               MDB_IDL_ZERO( ids );
+       }
+       if ( cx.ei->bei_state & CACHE_ENTRY_NO_KIDS )
+               return LDAP_SUCCESS;
+
+       DBTzero(&cx.key);
+       cx.key.ulen = sizeof(ID);
+       cx.key.size = sizeof(ID);
+       cx.key.flags = DB_DBT_USERMEM;
+
+       DBTzero(&cx.data);
+
+       mdb_dn2idl_internal(&cx);
+       if ( cx.need_sort ) {
+               char *ptr = ((char *)&cx.id)-1;
+               if ( !MDB_IDL_IS_RANGE( cx.ids ) && cx.ids[0] > 3 ) 
+                       mdb_idl_sort( cx.ids, cx.tmp );
+               cx.key.data = ptr;
+               cx.key.size = sizeof(ID)+1;
+               *ptr = cx.prefix;
+               cx.id = ei->bei_id;
+               if ( cx.mdb->bi_idl_cache_max_size )
+                       mdb_idl_cache_put( cx.mdb, cx.db, &cx.key, cx.ids, cx.rc );
+       }
+
+       if ( cx.rc == DB_NOTFOUND )
+               cx.rc = LDAP_SUCCESS;
+
+       return cx.rc;
+}
diff --git a/servers/slapd/back-mdb/error.c b/servers/slapd/back-mdb/error.c
new file mode 100644 (file)
index 0000000..14a0672
--- /dev/null
@@ -0,0 +1,62 @@
+/* error.c - MDB errcall routine */
+/* $OpenLDAP$ */
+/* This work is part of OpenLDAP Software <http://www.openldap.org/>.
+ *
+ * Copyright 2000-2011 The OpenLDAP Foundation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted only as authorized by the OpenLDAP
+ * Public License.
+ *
+ * A copy of this license is available in the file LICENSE in the
+ * top-level directory of the distribution or, alternatively, at
+ * <http://www.OpenLDAP.org/license.html>.
+ */
+
+#include "portable.h"
+
+#include <stdio.h>
+#include <ac/string.h>
+
+#include "slap.h"
+#include "back-mdb.h"
+
+#if DB_VERSION_FULL < 0x04030000
+void mdb_errcall( const char *pfx, char * msg )
+#else
+void mdb_errcall( const DB_ENV *env, const char *pfx, const char * msg )
+#endif
+{
+#ifdef HAVE_EBCDIC
+       if ( msg[0] > 0x7f )
+               __etoa( msg );
+#endif
+       Debug( LDAP_DEBUG_ANY, "mdb(%s): %s\n", pfx, msg, 0 );
+}
+
+#if DB_VERSION_FULL >= 0x04030000
+void mdb_msgcall( const DB_ENV *env, const char *msg )
+{
+#ifdef HAVE_EBCDIC
+       if ( msg[0] > 0x7f )
+               __etoa( msg );
+#endif
+       Debug( LDAP_DEBUG_TRACE, "mdb: %s\n", msg, 0, 0 );
+}
+#endif
+
+#ifdef HAVE_EBCDIC
+
+#undef db_strerror
+
+/* Not re-entrant! */
+char *ebcdic_dberror( int rc )
+{
+       static char msg[1024];
+
+       strcpy( msg, db_strerror( rc ) );
+       __etoa( msg );
+       return msg;
+}
+#endif
diff --git a/servers/slapd/back-mdb/extended.c b/servers/slapd/back-mdb/extended.c
new file mode 100644 (file)
index 0000000..57a0571
--- /dev/null
@@ -0,0 +1,54 @@
+/* extended.c - mdb backend extended routines */
+/* $OpenLDAP$ */
+/* This work is part of OpenLDAP Software <http://www.openldap.org/>.
+ *
+ * Copyright 2000-2011 The OpenLDAP Foundation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted only as authorized by the OpenLDAP
+ * Public License.
+ *
+ * A copy of this license is available in the file LICENSE in the
+ * top-level directory of the distribution or, alternatively, at
+ * <http://www.OpenLDAP.org/license.html>.
+ */
+
+#include "portable.h"
+
+#include <stdio.h>
+#include <ac/string.h>
+
+#include "back-mdb.h"
+#include "lber_pvt.h"
+
+static struct exop {
+       struct berval *oid;
+       BI_op_extended  *extended;
+} exop_table[] = {
+       { NULL, NULL }
+};
+
+int
+mdb_extended( Operation *op, SlapReply *rs )
+/*     struct berval           *reqoid,
+       struct berval   *reqdata,
+       char            **rspoid,
+       struct berval   **rspdata,
+       LDAPControl *** rspctrls,
+       const char**    text,
+       BerVarray       *refs 
+) */
+{
+       int i;
+
+       for( i=0; exop_table[i].extended != NULL; i++ ) {
+               if( ber_bvcmp( exop_table[i].oid, &op->oq_extended.rs_reqoid ) == 0 ) {
+                       return (exop_table[i].extended)( op, rs );
+               }
+       }
+
+       rs->sr_text = "not supported within naming context";
+       return rs->sr_err = LDAP_UNWILLING_TO_PERFORM;
+}
+
diff --git a/servers/slapd/back-mdb/filterindex.c b/servers/slapd/back-mdb/filterindex.c
new file mode 100644 (file)
index 0000000..747a6d0
--- /dev/null
@@ -0,0 +1,1179 @@
+/* filterindex.c - generate the list of candidate entries from a filter */
+/* $OpenLDAP$ */
+/* This work is part of OpenLDAP Software <http://www.openldap.org/>.
+ *
+ * Copyright 2000-2011 The OpenLDAP Foundation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted only as authorized by the OpenLDAP
+ * Public License.
+ *
+ * A copy of this license is available in the file LICENSE in the
+ * top-level directory of the distribution or, alternatively, at
+ * <http://www.OpenLDAP.org/license.html>.
+ */
+
+#include "portable.h"
+
+#include <stdio.h>
+#include <ac/string.h>
+
+#include "back-mdb.h"
+#include "idl.h"
+#ifdef LDAP_COMP_MATCH
+#include <component.h>
+#endif
+
+static int presence_candidates(
+       Operation *op,
+       DB_TXN *rtxn,
+       AttributeDescription *desc,
+       ID *ids );
+
+static int equality_candidates(
+       Operation *op,
+       DB_TXN *rtxn,
+       AttributeAssertion *ava,
+       ID *ids,
+       ID *tmp );
+static int inequality_candidates(
+       Operation *op,
+       DB_TXN *rtxn,
+       AttributeAssertion *ava,
+       ID *ids,
+       ID *tmp,
+       int gtorlt );
+static int approx_candidates(
+       Operation *op,
+       DB_TXN *rtxn,
+       AttributeAssertion *ava,
+       ID *ids,
+       ID *tmp );
+static int substring_candidates(
+       Operation *op,
+       DB_TXN *rtxn,
+       SubstringsAssertion *sub,
+       ID *ids,
+       ID *tmp );
+
+static int list_candidates(
+       Operation *op,
+       DB_TXN *rtxn,
+       Filter *flist,
+       int ftype,
+       ID *ids,
+       ID *tmp,
+       ID *stack );
+
+static int
+ext_candidates(
+        Operation *op,
+               DB_TXN *rtxn,
+        MatchingRuleAssertion *mra,
+        ID *ids,
+        ID *tmp,
+        ID *stack);
+
+#ifdef LDAP_COMP_MATCH
+static int
+comp_candidates (
+       Operation *op,
+       DB_TXN *rtxn,
+       MatchingRuleAssertion *mra,
+       ComponentFilter *f,
+       ID *ids,
+       ID *tmp,
+       ID *stack);
+
+static int
+ava_comp_candidates (
+               Operation *op,
+               DB_TXN *rtxn,
+               AttributeAssertion *ava,
+               AttributeAliasing *aa,
+               ID *ids,
+               ID *tmp,
+               ID *stack);
+#endif
+
+int
+mdb_filter_candidates(
+       Operation *op,
+       DB_TXN *rtxn,
+       Filter  *f,
+       ID *ids,
+       ID *tmp,
+       ID *stack )
+{
+       int rc = 0;
+#ifdef LDAP_COMP_MATCH
+       AttributeAliasing *aa;
+#endif
+       Debug( LDAP_DEBUG_FILTER, "=> mdb_filter_candidates\n", 0, 0, 0 );
+
+       if ( f->f_choice & SLAPD_FILTER_UNDEFINED ) {
+               MDB_IDL_ZERO( ids );
+               goto out;
+       }
+
+       switch ( f->f_choice ) {
+       case SLAPD_FILTER_COMPUTED:
+               switch( f->f_result ) {
+               case SLAPD_COMPARE_UNDEFINED:
+               /* This technically is not the same as FALSE, but it
+                * certainly will produce no matches.
+                */
+               /* FALL THRU */
+               case LDAP_COMPARE_FALSE:
+                       MDB_IDL_ZERO( ids );
+                       break;
+               case LDAP_COMPARE_TRUE: {
+                       struct mdb_info *mdb = (struct mdb_info *)op->o_bd->be_private;
+                       MDB_IDL_ALL( mdb, ids );
+                       } break;
+               case LDAP_SUCCESS:
+                       /* this is a pre-computed scope, leave it alone */
+                       break;
+               }
+               break;
+       case LDAP_FILTER_PRESENT:
+               Debug( LDAP_DEBUG_FILTER, "\tPRESENT\n", 0, 0, 0 );
+               rc = presence_candidates( op, rtxn, f->f_desc, ids );
+               break;
+
+       case LDAP_FILTER_EQUALITY:
+               Debug( LDAP_DEBUG_FILTER, "\tEQUALITY\n", 0, 0, 0 );
+#ifdef LDAP_COMP_MATCH
+               if ( is_aliased_attribute && ( aa = is_aliased_attribute ( f->f_ava->aa_desc ) ) ) {
+                       rc = ava_comp_candidates ( op, rtxn, f->f_ava, aa, ids, tmp, stack );
+               }
+               else
+#endif
+               {
+                       rc = equality_candidates( op, rtxn, f->f_ava, ids, tmp );
+               }
+               break;
+
+       case LDAP_FILTER_APPROX:
+               Debug( LDAP_DEBUG_FILTER, "\tAPPROX\n", 0, 0, 0 );
+               rc = approx_candidates( op, rtxn, f->f_ava, ids, tmp );
+               break;
+
+       case LDAP_FILTER_SUBSTRINGS:
+               Debug( LDAP_DEBUG_FILTER, "\tSUBSTRINGS\n", 0, 0, 0 );
+               rc = substring_candidates( op, rtxn, f->f_sub, ids, tmp );
+               break;
+
+       case LDAP_FILTER_GE:
+               /* if no GE index, use pres */
+               Debug( LDAP_DEBUG_FILTER, "\tGE\n", 0, 0, 0 );
+               if( f->f_ava->aa_desc->ad_type->sat_ordering &&
+                       ( f->f_ava->aa_desc->ad_type->sat_ordering->smr_usage & SLAP_MR_ORDERED_INDEX ) )
+                       rc = inequality_candidates( op, rtxn, f->f_ava, ids, tmp, LDAP_FILTER_GE );
+               else
+                       rc = presence_candidates( op, rtxn, f->f_ava->aa_desc, ids );
+               break;
+
+       case LDAP_FILTER_LE:
+               /* if no LE index, use pres */
+               Debug( LDAP_DEBUG_FILTER, "\tLE\n", 0, 0, 0 );
+               if( f->f_ava->aa_desc->ad_type->sat_ordering &&
+                       ( f->f_ava->aa_desc->ad_type->sat_ordering->smr_usage & SLAP_MR_ORDERED_INDEX ) )
+                       rc = inequality_candidates( op, rtxn, f->f_ava, ids, tmp, LDAP_FILTER_LE );
+               else
+                       rc = presence_candidates( op, rtxn, f->f_ava->aa_desc, ids );
+               break;
+
+       case LDAP_FILTER_NOT:
+               /* no indexing to support NOT filters */
+               Debug( LDAP_DEBUG_FILTER, "\tNOT\n", 0, 0, 0 );
+               { struct mdb_info *mdb = (struct mdb_info *) op->o_bd->be_private;
+               MDB_IDL_ALL( mdb, ids );
+               }
+               break;
+
+       case LDAP_FILTER_AND:
+               Debug( LDAP_DEBUG_FILTER, "\tAND\n", 0, 0, 0 );
+               rc = list_candidates( op, rtxn, 
+                       f->f_and, LDAP_FILTER_AND, ids, tmp, stack );
+               break;
+
+       case LDAP_FILTER_OR:
+               Debug( LDAP_DEBUG_FILTER, "\tOR\n", 0, 0, 0 );
+               rc = list_candidates( op, rtxn,
+                       f->f_or, LDAP_FILTER_OR, ids, tmp, stack );
+               break;
+       case LDAP_FILTER_EXT:
+                Debug( LDAP_DEBUG_FILTER, "\tEXT\n", 0, 0, 0 );
+                rc = ext_candidates( op, rtxn, f->f_mra, ids, tmp, stack );
+                break;
+       default:
+               Debug( LDAP_DEBUG_FILTER, "\tUNKNOWN %lu\n",
+                       (unsigned long) f->f_choice, 0, 0 );
+               /* Must not return NULL, otherwise extended filters break */
+               { struct mdb_info *mdb = (struct mdb_info *) op->o_bd->be_private;
+               MDB_IDL_ALL( mdb, ids );
+               }
+       }
+
+out:
+       Debug( LDAP_DEBUG_FILTER,
+               "<= mdb_filter_candidates: id=%ld first=%ld last=%ld\n",
+               (long) ids[0],
+               (long) MDB_IDL_FIRST( ids ),
+               (long) MDB_IDL_LAST( ids ) );
+
+       return rc;
+}
+
+#ifdef LDAP_COMP_MATCH
+static int
+comp_list_candidates(
+       Operation *op,
+       DB_TXN *rtxn,
+       MatchingRuleAssertion* mra,
+       ComponentFilter *flist,
+       int     ftype,
+       ID *ids,
+       ID *tmp,
+       ID *save )
+{
+       int rc = 0;
+       ComponentFilter *f;
+
+       Debug( LDAP_DEBUG_FILTER, "=> comp_list_candidates 0x%x\n", ftype, 0, 0 );
+       for ( f = flist; f != NULL; f = f->cf_next ) {
+               /* ignore precomputed scopes */
+               if ( f->cf_choice == SLAPD_FILTER_COMPUTED &&
+                    f->cf_result == LDAP_SUCCESS ) {
+                       continue;
+               }
+               MDB_IDL_ZERO( save );
+               rc = comp_candidates( op, rtxn, mra, f, save, tmp, save+MDB_IDL_UM_SIZE );
+
+               if ( rc != 0 ) {
+                       if ( ftype == LDAP_COMP_FILTER_AND ) {
+                               rc = 0;
+                               continue;
+                       }
+                       break;
+               }
+               
+               if ( ftype == LDAP_COMP_FILTER_AND ) {
+                       if ( f == flist ) {
+                               MDB_IDL_CPY( ids, save );
+                       } else {
+                               mdb_idl_intersection( ids, save );
+                       }
+                       if( MDB_IDL_IS_ZERO( ids ) )
+                               break;
+               } else {
+                       if ( f == flist ) {
+                               MDB_IDL_CPY( ids, save );
+                       } else {
+                               mdb_idl_union( ids, save );
+                       }
+               }
+       }
+
+       if( rc == LDAP_SUCCESS ) {
+               Debug( LDAP_DEBUG_FILTER,
+                       "<= comp_list_candidates: id=%ld first=%ld last=%ld\n",
+                       (long) ids[0],
+                       (long) MDB_IDL_FIRST(ids),
+                       (long) MDB_IDL_LAST(ids) );
+
+       } else {
+               Debug( LDAP_DEBUG_FILTER,
+                       "<= comp_list_candidates: undefined rc=%d\n",
+                       rc, 0, 0 );
+       }
+
+       return rc;
+}
+
+static int
+comp_equality_candidates (
+        Operation *op,
+       DB_TXN *rtxn,
+        MatchingRuleAssertion *mra,
+       ComponentAssertion *ca,
+        ID *ids,
+        ID *tmp,
+        ID *stack)
+{
+       struct mdb_info *mdb = (struct mdb_info *) op->o_bd->be_private;
+        DB      *db;
+        int i;
+        int rc;
+        slap_mask_t mask;
+        struct berval prefix = {0, NULL};
+        struct berval *keys = NULL;
+        MatchingRule *mr = mra->ma_rule;
+        Syntax *sat_syntax;
+       ComponentReference* cr_list, *cr;
+       AttrInfo *ai;
+
+        MDB_IDL_ALL( mdb, ids );
+
+       if ( !ca->ca_comp_ref )
+               return 0;
+
+       ai = mdb_attr_mask( op->o_bd->be_private, mra->ma_desc );
+       if( ai ) {
+               cr_list = ai->ai_cr;
+       }
+       else {
+               return 0;
+       }
+       /* find a component reference to be indexed */
+       sat_syntax = ca->ca_ma_rule->smr_syntax;
+       for ( cr = cr_list ; cr ; cr = cr->cr_next ) {
+               if ( cr->cr_string.bv_len == ca->ca_comp_ref->cr_string.bv_len &&
+                       strncmp( cr->cr_string.bv_val, ca->ca_comp_ref->cr_string.bv_val,cr->cr_string.bv_len ) == 0 )
+                       break;
+       }
+       
+       if ( !cr )
+               return 0;
+
+        rc = mdb_index_param( op->o_bd, mra->ma_desc, LDAP_FILTER_EQUALITY,
+                &db, &mask, &prefix );
+
+        if( rc != LDAP_SUCCESS ) {
+                return 0;
+        }
+
+        if( !mr ) {
+                return 0;
+        }
+
+        if( !mr->smr_filter ) {
+                return 0;
+        }
+
+       rc = (ca->ca_ma_rule->smr_filter)(
+                LDAP_FILTER_EQUALITY,
+                cr->cr_indexmask,
+                sat_syntax,
+                ca->ca_ma_rule,
+                &prefix,
+                &ca->ca_ma_value,
+                &keys, op->o_tmpmemctx );
+
+        if( rc != LDAP_SUCCESS ) {
+                return 0;
+        }
+
+        if( keys == NULL ) {
+                return 0;
+        }
+        for ( i= 0; keys[i].bv_val != NULL; i++ ) {
+                rc = mdb_key_read( op->o_bd, db, rtxn, &keys[i], tmp, NULL, 0 );
+
+                if( rc == DB_NOTFOUND ) {
+                        MDB_IDL_ZERO( ids );
+                        rc = 0;
+                        break;
+                } else if( rc != LDAP_SUCCESS ) {
+                        break;
+                }
+
+                if( MDB_IDL_IS_ZERO( tmp ) ) {
+                        MDB_IDL_ZERO( ids );
+                        break;
+                }
+
+                if ( i == 0 ) {
+                        MDB_IDL_CPY( ids, tmp );
+                } else {
+                        mdb_idl_intersection( ids, tmp );
+                }
+
+                if( MDB_IDL_IS_ZERO( ids ) )
+                        break;
+        }
+        ber_bvarray_free_x( keys, op->o_tmpmemctx );
+
+        Debug( LDAP_DEBUG_TRACE,
+                "<= comp_equality_candidates: id=%ld, first=%ld, last=%ld\n",
+                (long) ids[0],
+                (long) MDB_IDL_FIRST(ids),
+                (long) MDB_IDL_LAST(ids) );
+        return( rc );
+}
+
+static int
+ava_comp_candidates (
+       Operation *op,
+       DB_TXN *rtxn,
+       AttributeAssertion *ava,
+       AttributeAliasing *aa,
+       ID *ids,
+       ID *tmp,
+       ID *stack )
+{
+       MatchingRuleAssertion mra;
+       
+       mra.ma_rule = ava->aa_desc->ad_type->sat_equality;
+       if ( !mra.ma_rule ) {
+               struct mdb_info *mdb = (struct mdb_info *) op->o_bd->be_private;
+               MDB_IDL_ALL( mdb, ids );
+               return 0;
+       }
+       mra.ma_desc = aa->aa_aliased_ad;
+       mra.ma_rule = ava->aa_desc->ad_type->sat_equality;
+       
+       return comp_candidates ( op, rtxn, &mra, ava->aa_cf, ids, tmp, stack );
+}
+
+static int
+comp_candidates (
+       Operation *op,
+       DB_TXN *rtxn,
+       MatchingRuleAssertion *mra,
+       ComponentFilter *f,
+       ID *ids,
+       ID *tmp,
+       ID *stack)
+{
+       int     rc;
+
+       if ( !f ) return LDAP_PROTOCOL_ERROR;
+
+       Debug( LDAP_DEBUG_FILTER, "comp_candidates\n", 0, 0, 0 );
+       switch ( f->cf_choice ) {
+       case SLAPD_FILTER_COMPUTED:
+               rc = f->cf_result;
+               break;
+       case LDAP_COMP_FILTER_AND:
+               rc = comp_list_candidates( op, rtxn, mra, f->cf_and, LDAP_COMP_FILTER_AND, ids, tmp, stack );
+               break;
+       case LDAP_COMP_FILTER_OR:
+               rc = comp_list_candidates( op, rtxn, mra, f->cf_or, LDAP_COMP_FILTER_OR, ids, tmp, stack );
+               break;
+       case LDAP_COMP_FILTER_NOT:
+               /* No component indexing supported for NOT filter */
+               Debug( LDAP_DEBUG_FILTER, "\tComponent NOT\n", 0, 0, 0 );
+               {
+                       struct mdb_info *mdb = (struct mdb_info *) op->o_bd->be_private;
+                       MDB_IDL_ALL( mdb, ids );
+               }
+               rc = LDAP_PROTOCOL_ERROR;
+               break;
+       case LDAP_COMP_FILTER_ITEM:
+               rc = comp_equality_candidates( op, rtxn, mra, f->cf_ca, ids, tmp, stack );
+               break;
+       default:
+               {
+                       struct mdb_info *mdb = (struct mdb_info *) op->o_bd->be_private;
+                       MDB_IDL_ALL( mdb, ids );
+               }
+               rc = LDAP_PROTOCOL_ERROR;
+       }
+
+       return( rc );
+}
+#endif
+
+static int
+ext_candidates(
+        Operation *op,
+               DB_TXN *rtxn,
+        MatchingRuleAssertion *mra,
+        ID *ids,
+        ID *tmp,
+        ID *stack)
+{
+       struct mdb_info *mdb = (struct mdb_info *) op->o_bd->be_private;
+
+#ifdef LDAP_COMP_MATCH
+       /*
+        * Currently Only Component Indexing for componentFilterMatch is supported
+        * Indexing for an extensible filter is not supported yet
+        */
+       if ( mra->ma_cf ) {
+               return comp_candidates ( op, rtxn, mra, mra->ma_cf, ids, tmp, stack);
+       }
+#endif
+       if ( mra->ma_desc == slap_schema.si_ad_entryDN ) {
+               int rc;
+               EntryInfo *ei;
+
+               MDB_IDL_ZERO( ids );
+               if ( mra->ma_rule == slap_schema.si_mr_distinguishedNameMatch ) {
+                       ei = NULL;
+                       rc = mdb_cache_find_ndn( op, rtxn, &mra->ma_value, &ei );
+                       if ( rc == LDAP_SUCCESS )
+                               mdb_idl_insert( ids, ei->bei_id );
+                       if ( ei )
+                               mdb_cache_entryinfo_unlock( ei );
+                       return 0;
+               } else if ( mra->ma_rule && mra->ma_rule->smr_match ==
+                       dnRelativeMatch && dnIsSuffix( &mra->ma_value,
+                               op->o_bd->be_nsuffix )) {
+                       int scope;
+                       if ( mra->ma_rule == slap_schema.si_mr_dnSuperiorMatch ) {
+                               struct berval pdn;
+                               ei = NULL;
+                               dnParent( &mra->ma_value, &pdn );
+                               mdb_cache_find_ndn( op, rtxn, &pdn, &ei );
+                               if ( ei ) {
+                                       mdb_cache_entryinfo_unlock( ei );
+                                       while ( ei && ei->bei_id ) {
+                                               mdb_idl_insert( ids, ei->bei_id );
+                                               ei = ei->bei_parent;
+                                       }
+                               }
+                               return 0;
+                       }
+                       if ( mra->ma_rule == slap_schema.si_mr_dnSubtreeMatch )
+                               scope = LDAP_SCOPE_SUBTREE;
+                       else if ( mra->ma_rule == slap_schema.si_mr_dnOneLevelMatch )
+                               scope = LDAP_SCOPE_ONELEVEL;
+                       else if ( mra->ma_rule == slap_schema.si_mr_dnSubordinateMatch )
+                               scope = LDAP_SCOPE_SUBORDINATE;
+                       else
+                               scope = LDAP_SCOPE_BASE;
+                       if ( scope > LDAP_SCOPE_BASE ) {
+                               ei = NULL;
+                               rc = mdb_cache_find_ndn( op, rtxn, &mra->ma_value, &ei );
+                               if ( ei )
+                                       mdb_cache_entryinfo_unlock( ei );
+                               if ( rc == LDAP_SUCCESS ) {
+                                       int sc = op->ors_scope;
+                                       op->ors_scope = scope;
+                                       rc = mdb_dn2idl( op, rtxn, &mra->ma_value, ei, ids,
+                                               stack );
+                                       op->ors_scope = sc;
+                               }
+                               return 0;
+                       }
+               }
+       }
+
+       MDB_IDL_ALL( mdb, ids );
+       return 0;
+}
+
+static int
+list_candidates(
+       Operation *op,
+       DB_TXN *rtxn,
+       Filter  *flist,
+       int             ftype,
+       ID *ids,
+       ID *tmp,
+       ID *save )
+{
+       int rc = 0;
+       Filter  *f;
+
+       Debug( LDAP_DEBUG_FILTER, "=> mdb_list_candidates 0x%x\n", ftype, 0, 0 );
+       for ( f = flist; f != NULL; f = f->f_next ) {
+               /* ignore precomputed scopes */
+               if ( f->f_choice == SLAPD_FILTER_COMPUTED &&
+                    f->f_result == LDAP_SUCCESS ) {
+                       continue;
+               }
+               MDB_IDL_ZERO( save );
+               rc = mdb_filter_candidates( op, rtxn, f, save, tmp,
+                       save+MDB_IDL_UM_SIZE );
+
+               if ( rc != 0 ) {
+                       if ( rc == DB_LOCK_DEADLOCK )
+                               return rc;
+
+                       if ( ftype == LDAP_FILTER_AND ) {
+                               rc = 0;
+                               continue;
+                       }
+                       break;
+               }
+
+               
+               if ( ftype == LDAP_FILTER_AND ) {
+                       if ( f == flist ) {
+                               MDB_IDL_CPY( ids, save );
+                       } else {
+                               mdb_idl_intersection( ids, save );
+                       }
+                       if( MDB_IDL_IS_ZERO( ids ) )
+                               break;
+               } else {
+                       if ( f == flist ) {
+                               MDB_IDL_CPY( ids, save );
+                       } else {
+                               mdb_idl_union( ids, save );
+                       }
+               }
+       }
+
+       if( rc == LDAP_SUCCESS ) {
+               Debug( LDAP_DEBUG_FILTER,
+                       "<= mdb_list_candidates: id=%ld first=%ld last=%ld\n",
+                       (long) ids[0],
+                       (long) MDB_IDL_FIRST(ids),
+                       (long) MDB_IDL_LAST(ids) );
+
+       } else {
+               Debug( LDAP_DEBUG_FILTER,
+                       "<= mdb_list_candidates: undefined rc=%d\n",
+                       rc, 0, 0 );
+       }
+
+       return rc;
+}
+
+static int
+presence_candidates(
+       Operation *op,
+       DB_TXN *rtxn,
+       AttributeDescription *desc,
+       ID *ids )
+{
+       struct mdb_info *mdb = (struct mdb_info *) op->o_bd->be_private;
+       DB *db;
+       int rc;
+       slap_mask_t mask;
+       struct berval prefix = {0, NULL};
+
+       Debug( LDAP_DEBUG_TRACE, "=> mdb_presence_candidates (%s)\n",
+                       desc->ad_cname.bv_val, 0, 0 );
+
+       MDB_IDL_ALL( mdb, ids );
+
+       if( desc == slap_schema.si_ad_objectClass ) {
+               return 0;
+       }
+
+       rc = mdb_index_param( op->o_bd, desc, LDAP_FILTER_PRESENT,
+               &db, &mask, &prefix );
+
+       if( rc == LDAP_INAPPROPRIATE_MATCHING ) {
+               /* not indexed */
+               Debug( LDAP_DEBUG_TRACE,
+                       "<= mdb_presence_candidates: (%s) not indexed\n",
+                       desc->ad_cname.bv_val, 0, 0 );
+               return 0;
+       }
+
+       if( rc != LDAP_SUCCESS ) {
+               Debug( LDAP_DEBUG_TRACE,
+                       "<= mdb_presence_candidates: (%s) index_param "
+                       "returned=%d\n",
+                       desc->ad_cname.bv_val, rc, 0 );
+               return 0;
+       }
+
+       if( prefix.bv_val == NULL ) {
+               Debug( LDAP_DEBUG_TRACE,
+                       "<= mdb_presence_candidates: (%s) no prefix\n",
+                       desc->ad_cname.bv_val, 0, 0 );
+               return -1;
+       }
+
+       rc = mdb_key_read( op->o_bd, db, rtxn, &prefix, ids, NULL, 0 );
+
+       if( rc == DB_NOTFOUND ) {
+               MDB_IDL_ZERO( ids );
+               rc = 0;
+       } else if( rc != LDAP_SUCCESS ) {
+               Debug( LDAP_DEBUG_TRACE,
+                       "<= mdb_presense_candidates: (%s) "
+                       "key read failed (%d)\n",
+                       desc->ad_cname.bv_val, rc, 0 );
+               goto done;
+       }
+
+       Debug(LDAP_DEBUG_TRACE,
+               "<= mdb_presence_candidates: id=%ld first=%ld last=%ld\n",
+               (long) ids[0],
+               (long) MDB_IDL_FIRST(ids),
+               (long) MDB_IDL_LAST(ids) );
+
+done:
+       return rc;
+}
+
+static int
+equality_candidates(
+       Operation *op,
+       DB_TXN *rtxn,
+       AttributeAssertion *ava,
+       ID *ids,
+       ID *tmp )
+{
+       struct mdb_info *mdb = (struct mdb_info *) op->o_bd->be_private;
+       DB      *db;
+       int i;
+       int rc;
+       slap_mask_t mask;
+       struct berval prefix = {0, NULL};
+       struct berval *keys = NULL;
+       MatchingRule *mr;
+
+       Debug( LDAP_DEBUG_TRACE, "=> mdb_equality_candidates (%s)\n",
+                       ava->aa_desc->ad_cname.bv_val, 0, 0 );
+
+       if ( ava->aa_desc == slap_schema.si_ad_entryDN ) {
+               EntryInfo *ei = NULL;
+               rc = mdb_cache_find_ndn( op, rtxn, &ava->aa_value, &ei );
+               if ( rc == LDAP_SUCCESS ) {
+                       /* exactly one ID can match */
+                       ids[0] = 1;
+                       ids[1] = ei->bei_id;
+               }
+               if ( ei ) {
+                       mdb_cache_entryinfo_unlock( ei );
+               }
+               return rc;
+       }
+
+       MDB_IDL_ALL( mdb, ids );
+
+       rc = mdb_index_param( op->o_bd, ava->aa_desc, LDAP_FILTER_EQUALITY,
+               &db, &mask, &prefix );
+
+       if ( rc == LDAP_INAPPROPRIATE_MATCHING ) {
+               Debug( LDAP_DEBUG_ANY,
+                       "<= mdb_equality_candidates: (%s) not indexed\n", 
+                       ava->aa_desc->ad_cname.bv_val, 0, 0 );
+               return 0;
+       }
+
+       if( rc != LDAP_SUCCESS ) {
+               Debug( LDAP_DEBUG_ANY,
+                       "<= mdb_equality_candidates: (%s) "
+                       "index_param failed (%d)\n",
+                       ava->aa_desc->ad_cname.bv_val, rc, 0 );
+               return 0;
+       }
+
+       mr = ava->aa_desc->ad_type->sat_equality;
+       if( !mr ) {
+               return 0;
+       }
+
+       if( !mr->smr_filter ) {
+               return 0;
+       }
+
+       rc = (mr->smr_filter)(
+               LDAP_FILTER_EQUALITY,
+               mask,
+               ava->aa_desc->ad_type->sat_syntax,
+               mr,
+               &prefix,
+               &ava->aa_value,
+               &keys, op->o_tmpmemctx );
+
+       if( rc != LDAP_SUCCESS ) {
+               Debug( LDAP_DEBUG_TRACE,
+                       "<= mdb_equality_candidates: (%s, %s) "
+                       "MR filter failed (%d)\n",
+                       prefix.bv_val, ava->aa_desc->ad_cname.bv_val, rc );
+               return 0;
+       }
+
+       if( keys == NULL ) {
+               Debug( LDAP_DEBUG_TRACE,
+                       "<= mdb_equality_candidates: (%s) no keys\n",
+                       ava->aa_desc->ad_cname.bv_val, 0, 0 );
+               return 0;
+       }
+
+       for ( i= 0; keys[i].bv_val != NULL; i++ ) {
+               rc = mdb_key_read( op->o_bd, db, rtxn, &keys[i], tmp, NULL, 0 );
+
+               if( rc == DB_NOTFOUND ) {
+                       MDB_IDL_ZERO( ids );
+                       rc = 0;
+                       break;
+               } else if( rc != LDAP_SUCCESS ) {
+                       Debug( LDAP_DEBUG_TRACE,
+                               "<= mdb_equality_candidates: (%s) "
+                               "key read failed (%d)\n",
+                               ava->aa_desc->ad_cname.bv_val, rc, 0 );
+                       break;
+               }
+
+               if( MDB_IDL_IS_ZERO( tmp ) ) {
+                       Debug( LDAP_DEBUG_TRACE,
+                               "<= mdb_equality_candidates: (%s) NULL\n", 
+                               ava->aa_desc->ad_cname.bv_val, 0, 0 );
+                       MDB_IDL_ZERO( ids );
+                       break;
+               }
+
+               if ( i == 0 ) {
+                       MDB_IDL_CPY( ids, tmp );
+               } else {
+                       mdb_idl_intersection( ids, tmp );
+               }
+
+               if( MDB_IDL_IS_ZERO( ids ) )
+                       break;
+       }
+
+       ber_bvarray_free_x( keys, op->o_tmpmemctx );
+
+       Debug( LDAP_DEBUG_TRACE,
+               "<= mdb_equality_candidates: id=%ld, first=%ld, last=%ld\n",
+               (long) ids[0],
+               (long) MDB_IDL_FIRST(ids),
+               (long) MDB_IDL_LAST(ids) );
+       return( rc );
+}
+
+
+static int
+approx_candidates(
+       Operation *op,
+       DB_TXN *rtxn,
+       AttributeAssertion *ava,
+       ID *ids,
+       ID *tmp )
+{
+       struct mdb_info *mdb = (struct mdb_info *) op->o_bd->be_private;
+       DB      *db;
+       int i;
+       int rc;
+       slap_mask_t mask;
+       struct berval prefix = {0, NULL};
+       struct berval *keys = NULL;
+       MatchingRule *mr;
+
+       Debug( LDAP_DEBUG_TRACE, "=> mdb_approx_candidates (%s)\n",
+                       ava->aa_desc->ad_cname.bv_val, 0, 0 );
+
+       MDB_IDL_ALL( mdb, ids );
+
+       rc = mdb_index_param( op->o_bd, ava->aa_desc, LDAP_FILTER_APPROX,
+               &db, &mask, &prefix );
+
+       if ( rc == LDAP_INAPPROPRIATE_MATCHING ) {
+               Debug( LDAP_DEBUG_ANY,
+                       "<= mdb_approx_candidates: (%s) not indexed\n",
+                       ava->aa_desc->ad_cname.bv_val, 0, 0 );
+               return 0;
+       }
+
+       if( rc != LDAP_SUCCESS ) {
+               Debug( LDAP_DEBUG_ANY,
+                       "<= mdb_approx_candidates: (%s) "
+                       "index_param failed (%d)\n",
+                       ava->aa_desc->ad_cname.bv_val, rc, 0 );
+               return 0;
+       }
+
+       mr = ava->aa_desc->ad_type->sat_approx;
+       if( !mr ) {
+               /* no approx matching rule, try equality matching rule */
+               mr = ava->aa_desc->ad_type->sat_equality;
+       }
+
+       if( !mr ) {
+               return 0;
+       }
+
+       if( !mr->smr_filter ) {
+               return 0;
+       }
+
+       rc = (mr->smr_filter)(
+               LDAP_FILTER_APPROX,
+               mask,
+               ava->aa_desc->ad_type->sat_syntax,
+               mr,
+               &prefix,
+               &ava->aa_value,
+               &keys, op->o_tmpmemctx );
+
+       if( rc != LDAP_SUCCESS ) {
+               Debug( LDAP_DEBUG_TRACE,
+                       "<= mdb_approx_candidates: (%s, %s) "
+                       "MR filter failed (%d)\n",
+                       prefix.bv_val, ava->aa_desc->ad_cname.bv_val, rc );
+               return 0;
+       }
+
+       if( keys == NULL ) {
+               Debug( LDAP_DEBUG_TRACE,
+                       "<= mdb_approx_candidates: (%s) no keys (%s)\n",
+                       prefix.bv_val, ava->aa_desc->ad_cname.bv_val, 0 );
+               return 0;
+       }
+
+       for ( i= 0; keys[i].bv_val != NULL; i++ ) {
+               rc = mdb_key_read( op->o_bd, db, rtxn, &keys[i], tmp, NULL, 0 );
+
+               if( rc == DB_NOTFOUND ) {
+                       MDB_IDL_ZERO( ids );
+                       rc = 0;
+                       break;
+               } else if( rc != LDAP_SUCCESS ) {
+                       Debug( LDAP_DEBUG_TRACE,
+                               "<= mdb_approx_candidates: (%s) "
+                               "key read failed (%d)\n",
+                               ava->aa_desc->ad_cname.bv_val, rc, 0 );
+                       break;
+               }
+
+               if( MDB_IDL_IS_ZERO( tmp ) ) {
+                       Debug( LDAP_DEBUG_TRACE,
+                               "<= mdb_approx_candidates: (%s) NULL\n",
+                               ava->aa_desc->ad_cname.bv_val, 0, 0 );
+                       MDB_IDL_ZERO( ids );
+                       break;
+               }
+
+               if ( i == 0 ) {
+                       MDB_IDL_CPY( ids, tmp );
+               } else {
+                       mdb_idl_intersection( ids, tmp );
+               }
+
+               if( MDB_IDL_IS_ZERO( ids ) )
+                       break;
+       }
+
+       ber_bvarray_free_x( keys, op->o_tmpmemctx );
+
+       Debug( LDAP_DEBUG_TRACE, "<= mdb_approx_candidates %ld, first=%ld, last=%ld\n",
+               (long) ids[0],
+               (long) MDB_IDL_FIRST(ids),
+               (long) MDB_IDL_LAST(ids) );
+       return( rc );
+}
+
+static int
+substring_candidates(
+       Operation *op,
+       DB_TXN *rtxn,
+       SubstringsAssertion     *sub,
+       ID *ids,
+       ID *tmp )
+{
+       struct mdb_info *mdb = (struct mdb_info *) op->o_bd->be_private;
+       DB      *db;
+       int i;
+       int rc;
+       slap_mask_t mask;
+       struct berval prefix = {0, NULL};
+       struct berval *keys = NULL;
+       MatchingRule *mr;
+
+       Debug( LDAP_DEBUG_TRACE, "=> mdb_substring_candidates (%s)\n",
+                       sub->sa_desc->ad_cname.bv_val, 0, 0 );
+
+       MDB_IDL_ALL( mdb, ids );
+
+       rc = mdb_index_param( op->o_bd, sub->sa_desc, LDAP_FILTER_SUBSTRINGS,
+               &db, &mask, &prefix );
+
+       if ( rc == LDAP_INAPPROPRIATE_MATCHING ) {
+               Debug( LDAP_DEBUG_ANY,
+                       "<= mdb_substring_candidates: (%s) not indexed\n",
+                       sub->sa_desc->ad_cname.bv_val, 0, 0 );
+               return 0;
+       }
+
+       if( rc != LDAP_SUCCESS ) {
+               Debug( LDAP_DEBUG_ANY,
+                       "<= mdb_substring_candidates: (%s) "
+                       "index_param failed (%d)\n",
+                       sub->sa_desc->ad_cname.bv_val, rc, 0 );
+               return 0;
+       }
+
+       mr = sub->sa_desc->ad_type->sat_substr;
+
+       if( !mr ) {
+               return 0;
+       }
+
+       if( !mr->smr_filter ) {
+               return 0;
+       }
+
+       rc = (mr->smr_filter)(
+               LDAP_FILTER_SUBSTRINGS,
+               mask,
+               sub->sa_desc->ad_type->sat_syntax,
+               mr,
+               &prefix,
+               sub,
+               &keys, op->o_tmpmemctx );
+
+       if( rc != LDAP_SUCCESS ) {
+               Debug( LDAP_DEBUG_TRACE,
+                       "<= mdb_substring_candidates: (%s) "
+                       "MR filter failed (%d)\n",
+                       sub->sa_desc->ad_cname.bv_val, rc, 0 );
+               return 0;
+       }
+
+       if( keys == NULL ) {
+               Debug( LDAP_DEBUG_TRACE,
+                       "<= mdb_substring_candidates: (0x%04lx) no keys (%s)\n",
+                       mask, sub->sa_desc->ad_cname.bv_val, 0 );
+               return 0;
+       }
+
+       for ( i= 0; keys[i].bv_val != NULL; i++ ) {
+               rc = mdb_key_read( op->o_bd, db, rtxn, &keys[i], tmp, NULL, 0 );
+
+               if( rc == DB_NOTFOUND ) {
+                       MDB_IDL_ZERO( ids );
+                       rc = 0;
+                       break;
+               } else if( rc != LDAP_SUCCESS ) {
+                       Debug( LDAP_DEBUG_TRACE,
+                               "<= mdb_substring_candidates: (%s) "
+                               "key read failed (%d)\n",
+                               sub->sa_desc->ad_cname.bv_val, rc, 0 );
+                       break;
+               }
+
+               if( MDB_IDL_IS_ZERO( tmp ) ) {
+                       Debug( LDAP_DEBUG_TRACE,
+                               "<= mdb_substring_candidates: (%s) NULL\n",
+                               sub->sa_desc->ad_cname.bv_val, 0, 0 );
+                       MDB_IDL_ZERO( ids );
+                       break;
+               }
+
+               if ( i == 0 ) {
+                       MDB_IDL_CPY( ids, tmp );
+               } else {
+                       mdb_idl_intersection( ids, tmp );
+               }
+
+               if( MDB_IDL_IS_ZERO( ids ) )
+                       break;
+       }
+
+       ber_bvarray_free_x( keys, op->o_tmpmemctx );
+
+       Debug( LDAP_DEBUG_TRACE, "<= mdb_substring_candidates: %ld, first=%ld, last=%ld\n",
+               (long) ids[0],
+               (long) MDB_IDL_FIRST(ids),
+               (long) MDB_IDL_LAST(ids) );
+       return( rc );
+}
+
+static int
+inequality_candidates(
+       Operation *op,
+       DB_TXN *rtxn,
+       AttributeAssertion *ava,
+       ID *ids,
+       ID *tmp,
+       int gtorlt )
+{
+       struct mdb_info *mdb = (struct mdb_info *) op->o_bd->be_private;
+       DB      *db;
+       int rc;
+       slap_mask_t mask;
+       struct berval prefix = {0, NULL};
+       struct berval *keys = NULL;
+       MatchingRule *mr;
+       DBC * cursor = NULL;
+
+       Debug( LDAP_DEBUG_TRACE, "=> mdb_inequality_candidates (%s)\n",
+                       ava->aa_desc->ad_cname.bv_val, 0, 0 );
+
+       MDB_IDL_ALL( mdb, ids );
+
+       rc = mdb_index_param( op->o_bd, ava->aa_desc, LDAP_FILTER_EQUALITY,
+               &db, &mask, &prefix );
+
+       if ( rc == LDAP_INAPPROPRIATE_MATCHING ) {
+               Debug( LDAP_DEBUG_ANY,
+                       "<= mdb_inequality_candidates: (%s) not indexed\n", 
+                       ava->aa_desc->ad_cname.bv_val, 0, 0 );
+               return 0;
+       }
+
+       if( rc != LDAP_SUCCESS ) {
+               Debug( LDAP_DEBUG_ANY,
+                       "<= mdb_inequality_candidates: (%s) "
+                       "index_param failed (%d)\n",
+                       ava->aa_desc->ad_cname.bv_val, rc, 0 );
+               return 0;
+       }
+
+       mr = ava->aa_desc->ad_type->sat_equality;
+       if( !mr ) {
+               return 0;
+       }
+
+       if( !mr->smr_filter ) {
+               return 0;
+       }
+
+       rc = (mr->smr_filter)(
+               LDAP_FILTER_EQUALITY,
+               mask,
+               ava->aa_desc->ad_type->sat_syntax,
+               mr,
+               &prefix,
+               &ava->aa_value,
+               &keys, op->o_tmpmemctx );
+
+       if( rc != LDAP_SUCCESS ) {
+               Debug( LDAP_DEBUG_TRACE,
+                       "<= mdb_inequality_candidates: (%s, %s) "
+                       "MR filter failed (%d)\n",
+                       prefix.bv_val, ava->aa_desc->ad_cname.bv_val, rc );
+               return 0;
+       }
+
+       if( keys == NULL ) {
+               Debug( LDAP_DEBUG_TRACE,
+                       "<= mdb_inequality_candidates: (%s) no keys\n",
+                       ava->aa_desc->ad_cname.bv_val, 0, 0 );
+               return 0;
+       }
+
+       MDB_IDL_ZERO( ids );
+       while(1) {
+               rc = mdb_key_read( op->o_bd, db, rtxn, &keys[0], tmp, &cursor, gtorlt );
+
+               if( rc == DB_NOTFOUND ) {
+                       rc = 0;
+                       break;
+               } else if( rc != LDAP_SUCCESS ) {
+                       Debug( LDAP_DEBUG_TRACE,
+                              "<= mdb_inequality_candidates: (%s) "
+                              "key read failed (%d)\n",
+                              ava->aa_desc->ad_cname.bv_val, rc, 0 );
+                       break;
+               }
+
+               if( MDB_IDL_IS_ZERO( tmp ) ) {
+                       Debug( LDAP_DEBUG_TRACE,
+                              "<= mdb_inequality_candidates: (%s) NULL\n", 
+                              ava->aa_desc->ad_cname.bv_val, 0, 0 );
+                       break;
+               }
+
+               mdb_idl_union( ids, tmp );
+
+               if( op->ors_limit && op->ors_limit->lms_s_unchecked != -1 &&
+                       MDB_IDL_N( ids ) >= (unsigned) op->ors_limit->lms_s_unchecked ) {
+                       cursor->c_close( cursor );
+                       break;
+               }
+       }
+       ber_bvarray_free_x( keys, op->o_tmpmemctx );
+
+       Debug( LDAP_DEBUG_TRACE,
+               "<= mdb_inequality_candidates: id=%ld, first=%ld, last=%ld\n",
+               (long) ids[0],
+               (long) MDB_IDL_FIRST(ids),
+               (long) MDB_IDL_LAST(ids) );
+       return( rc );
+}
diff --git a/servers/slapd/back-mdb/id2entry.c b/servers/slapd/back-mdb/id2entry.c
new file mode 100644 (file)
index 0000000..951cb0b
--- /dev/null
@@ -0,0 +1,440 @@
+/* id2entry.c - routines to deal with the id2entry database */
+/* $OpenLDAP$ */
+/* This work is part of OpenLDAP Software <http://www.openldap.org/>.
+ *
+ * Copyright 2000-2011 The OpenLDAP Foundation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted only as authorized by the OpenLDAP
+ * Public License.
+ *
+ * A copy of this license is available in the file LICENSE in the
+ * top-level directory of the distribution or, alternatively, at
+ * <http://www.OpenLDAP.org/license.html>.
+ */
+
+#include "portable.h"
+
+#include <stdio.h>
+#include <ac/string.h>
+#include <ac/errno.h>
+
+#include "back-mdb.h"
+
+static int mdb_id2entry_put(
+       BackendDB *be,
+       DB_TXN *tid,
+       Entry *e,
+       int flag )
+{
+       struct mdb_info *mdb = (struct mdb_info *) be->be_private;
+       DB *db = mdb->bi_id2entry->bdi_db;
+       DBT key, data;
+       struct berval bv;
+       int rc;
+       ID nid;
+#ifdef MDB_HIER
+       struct berval odn, ondn;
+
+       /* We only store rdns, and they go in the dn2id database. */
+
+       odn = e->e_name; ondn = e->e_nname;
+
+       e->e_name = slap_empty_bv;
+       e->e_nname = slap_empty_bv;
+#endif
+       DBTzero( &key );
+
+       /* Store ID in BigEndian format */
+       key.data = &nid;
+       key.size = sizeof(ID);
+       MDB_ID2DISK( e->e_id, &nid );
+
+       rc = entry_encode( e, &bv );
+#ifdef MDB_HIER
+       e->e_name = odn; e->e_nname = ondn;
+#endif
+       if( rc != LDAP_SUCCESS ) {
+               return -1;
+       }
+
+       DBTzero( &data );
+       bv2DBT( &bv, &data );
+
+       rc = db->put( db, tid, &key, &data, flag );
+
+       free( bv.bv_val );
+       return rc;
+}
+
+/*
+ * This routine adds (or updates) an entry on disk.
+ * The cache should be already be updated.
+ */
+
+
+int mdb_id2entry_add(
+       BackendDB *be,
+       DB_TXN *tid,
+       Entry *e )
+{
+       return mdb_id2entry_put(be, tid, e, DB_NOOVERWRITE);
+}
+
+int mdb_id2entry_update(
+       BackendDB *be,
+       DB_TXN *tid,
+       Entry *e )
+{
+       return mdb_id2entry_put(be, tid, e, 0);
+}
+
+int mdb_id2entry(
+       BackendDB *be,
+       DB_TXN *tid,
+       ID id,
+       Entry **e )
+{
+       struct mdb_info *mdb = (struct mdb_info *) be->be_private;
+       DB *db = mdb->bi_id2entry->bdi_db;
+       DBT key, data;
+       DBC *cursor;
+       EntryHeader eh;
+       char buf[16];
+       int rc = 0, off;
+       ID nid;
+
+       *e = NULL;
+
+       DBTzero( &key );
+       key.data = &nid;
+       key.size = sizeof(ID);
+       MDB_ID2DISK( id, &nid );
+
+       DBTzero( &data );
+       data.flags = DB_DBT_USERMEM | DB_DBT_PARTIAL;
+
+       /* fetch it */
+       rc = db->cursor( db, tid, &cursor, mdb->bi_db_opflags );
+       if ( rc ) return rc;
+
+       /* Get the nattrs / nvals counts first */
+       data.ulen = data.dlen = sizeof(buf);
+       data.data = buf;
+       rc = cursor->c_get( cursor, &key, &data, DB_SET );
+       if ( rc ) goto finish;
+
+
+       eh.bv.bv_val = buf;
+       eh.bv.bv_len = data.size;
+       rc = entry_header( &eh );
+       if ( rc ) goto finish;
+
+       /* Get the size */
+       data.flags ^= DB_DBT_PARTIAL;
+       data.ulen = 0;
+       rc = cursor->c_get( cursor, &key, &data, DB_CURRENT );
+       if ( rc != DB_BUFFER_SMALL ) goto finish;
+
+       /* Allocate a block and retrieve the data */
+       off = eh.data - eh.bv.bv_val;
+       eh.bv.bv_len = eh.nvals * sizeof( struct berval ) + data.size;
+       eh.bv.bv_val = ch_malloc( eh.bv.bv_len );
+       eh.data = eh.bv.bv_val + eh.nvals * sizeof( struct berval );
+       data.data = eh.data;
+       data.ulen = data.size;
+
+       /* skip past already parsed nattr/nvals */
+       eh.data += off;
+
+       rc = cursor->c_get( cursor, &key, &data, DB_CURRENT );
+
+finish:
+       cursor->c_close( cursor );
+
+       if( rc != 0 ) {
+               return rc;
+       }
+
+#ifdef SLAP_ZONE_ALLOC
+       rc = entry_decode(&eh, e, mdb->bi_cache.c_zctx);
+#else
+       rc = entry_decode(&eh, e);
+#endif
+
+       if( rc == 0 ) {
+               (*e)->e_id = id;
+       } else {
+               /* only free on error. On success, the entry was
+                * decoded in place.
+                */
+#ifndef SLAP_ZONE_ALLOC
+               ch_free(eh.bv.bv_val);
+#endif
+       }
+#ifdef SLAP_ZONE_ALLOC
+       ch_free(eh.bv.bv_val);
+#endif
+
+       return rc;
+}
+
+int mdb_id2entry_delete(
+       BackendDB *be,
+       DB_TXN *tid,
+       Entry *e )
+{
+       struct mdb_info *mdb = (struct mdb_info *) be->be_private;
+       DB *db = mdb->bi_id2entry->bdi_db;
+       DBT key;
+       int rc;
+       ID nid;
+
+       DBTzero( &key );
+       key.data = &nid;
+       key.size = sizeof(ID);
+       MDB_ID2DISK( e->e_id, &nid );
+
+       /* delete from database */
+       rc = db->del( db, tid, &key, 0 );
+
+       return rc;
+}
+
+int mdb_entry_return(
+       Entry *e
+)
+{
+       /* Our entries are allocated in two blocks; the data comes from
+        * the db itself and the Entry structure and associated pointers
+        * are allocated in entry_decode. The db data pointer is saved
+        * in e_bv.
+        */
+       if ( e->e_bv.bv_val ) {
+               /* See if the DNs were changed by modrdn */
+               if( e->e_nname.bv_val < e->e_bv.bv_val || e->e_nname.bv_val >
+                       e->e_bv.bv_val + e->e_bv.bv_len ) {
+                       ch_free(e->e_name.bv_val);
+                       ch_free(e->e_nname.bv_val);
+               }
+               e->e_name.bv_val = NULL;
+               e->e_nname.bv_val = NULL;
+               /* In tool mode the e_bv buffer is realloc'd, leave it alone */
+               if( !(slapMode & SLAP_TOOL_MODE) ) {
+                       free( e->e_bv.bv_val );
+               }
+               BER_BVZERO( &e->e_bv );
+       }
+       entry_free( e );
+       return 0;
+}
+
+int mdb_entry_release(
+       Operation *op,
+       Entry *e,
+       int rw )
+{
+       struct mdb_info *mdb = (struct mdb_info *) op->o_bd->be_private;
+       struct mdb_op_info *boi;
+       OpExtra *oex;
+       /* slapMode : SLAP_SERVER_MODE, SLAP_TOOL_MODE,
+                       SLAP_TRUNCATE_MODE, SLAP_UNDEFINED_MODE */
+       if ( slapMode == SLAP_SERVER_MODE ) {
+               /* If not in our cache, just free it */
+               if ( !e->e_private ) {
+#ifdef SLAP_ZONE_ALLOC
+                       return mdb_entry_return( mdb, e, -1 );
+#else
+                       return mdb_entry_return( e );
+#endif
+               }
+               /* free entry and reader or writer lock */
+               LDAP_SLIST_FOREACH( oex, &op->o_extra, oe_next ) {
+                       if ( oex->oe_key == mdb ) break;
+               }
+               boi = (struct mdb_op_info *)oex;
+
+               /* lock is freed with txn */
+               if ( !boi || boi->boi_txn ) {
+                       mdb_unlocked_cache_return_entry_rw( mdb, e, rw );
+               } else {
+                       struct mdb_lock_info *bli, *prev;
+                       for ( prev=(struct mdb_lock_info *)&boi->boi_locks,
+                               bli = boi->boi_locks; bli; prev=bli, bli=bli->bli_next ) {
+                               if ( bli->bli_id == e->e_id ) {
+                                       mdb_cache_return_entry_rw( mdb, e, rw, &bli->bli_lock );
+                                       prev->bli_next = bli->bli_next;
+                                       /* Cleanup, or let caller know we unlocked */
+                                       if ( bli->bli_flag & BLI_DONTFREE )
+                                               bli->bli_flag = 0;
+                                       else
+                                               op->o_tmpfree( bli, op->o_tmpmemctx );
+                                       break;
+                               }
+                       }
+                       if ( !boi->boi_locks ) {
+                               LDAP_SLIST_REMOVE( &op->o_extra, &boi->boi_oe, OpExtra, oe_next );
+                               if ( !(boi->boi_flag & BOI_DONTFREE))
+                                       op->o_tmpfree( boi, op->o_tmpmemctx );
+                       }
+               }
+       } else {
+#ifdef SLAP_ZONE_ALLOC
+               int zseq = -1;
+               if (e->e_private != NULL) {
+                       BEI(e)->bei_e = NULL;
+                       zseq = BEI(e)->bei_zseq;
+               }
+#else
+               if (e->e_private != NULL)
+                       BEI(e)->bei_e = NULL;
+#endif
+               e->e_private = NULL;
+#ifdef SLAP_ZONE_ALLOC
+               mdb_entry_return ( mdb, e, zseq );
+#else
+               mdb_entry_return ( e );
+#endif
+       }
+       return 0;
+}
+
+/* return LDAP_SUCCESS IFF we can retrieve the specified entry.
+ */
+int mdb_entry_get(
+       Operation *op,
+       struct berval *ndn,
+       ObjectClass *oc,
+       AttributeDescription *at,
+       int rw,
+       Entry **ent )
+{
+       struct mdb_info *mdb = (struct mdb_info *) op->o_bd->be_private;
+       struct mdb_op_info *boi = NULL;
+       DB_TXN *txn = NULL;
+       Entry *e = NULL;
+       EntryInfo *ei;
+       int     rc;
+       const char *at_name = at ? at->ad_cname.bv_val : "(null)";
+
+       DB_LOCK         lock;
+
+       Debug( LDAP_DEBUG_ARGS,
+               "=> mdb_entry_get: ndn: \"%s\"\n", ndn->bv_val, 0, 0 ); 
+       Debug( LDAP_DEBUG_ARGS,
+               "=> mdb_entry_get: oc: \"%s\", at: \"%s\"\n",
+               oc ? oc->soc_cname.bv_val : "(null)", at_name, 0);
+
+       if( op ) {
+               OpExtra *oex;
+               LDAP_SLIST_FOREACH( oex, &op->o_extra, oe_next ) {
+                       if ( oex->oe_key == mdb ) break;
+               }
+               boi = (struct mdb_op_info *)oex;
+               if ( boi )
+                       txn = boi->boi_txn;
+       }
+
+       if ( !txn ) {
+               rc = mdb_reader_get( op, mdb->bi_dbenv, &txn );
+               switch(rc) {
+               case 0:
+                       break;
+               default:
+                       return LDAP_OTHER;
+               }
+       }
+
+dn2entry_retry:
+       /* can we find entry */
+       rc = mdb_dn2entry( op, txn, ndn, &ei, 0, &lock );
+       switch( rc ) {
+       case DB_NOTFOUND:
+       case 0:
+               break;
+       case DB_LOCK_DEADLOCK:
+       case DB_LOCK_NOTGRANTED:
+               /* the txn must abort and retry */
+               if ( txn ) {
+                       if ( boi ) boi->boi_err = rc;
+                       return LDAP_BUSY;
+               }
+               ldap_pvt_thread_yield();
+               goto dn2entry_retry;
+       default:
+               if ( boi ) boi->boi_err = rc;
+               return (rc != LDAP_BUSY) ? LDAP_OTHER : LDAP_BUSY;
+       }
+       if (ei) e = ei->bei_e;
+       if (e == NULL) {
+               Debug( LDAP_DEBUG_ACL,
+                       "=> mdb_entry_get: cannot find entry: \"%s\"\n",
+                               ndn->bv_val, 0, 0 ); 
+               return LDAP_NO_SUCH_OBJECT; 
+       }
+       
+       Debug( LDAP_DEBUG_ACL,
+               "=> mdb_entry_get: found entry: \"%s\"\n",
+               ndn->bv_val, 0, 0 ); 
+
+       if ( oc && !is_entry_objectclass( e, oc, 0 )) {
+               Debug( LDAP_DEBUG_ACL,
+                       "<= mdb_entry_get: failed to find objectClass %s\n",
+                       oc->soc_cname.bv_val, 0, 0 ); 
+               rc = LDAP_NO_SUCH_ATTRIBUTE;
+               goto return_results;
+       }
+
+       /* NOTE: attr_find() or attrs_find()? */
+       if ( at && attr_find( e->e_attrs, at ) == NULL ) {
+               Debug( LDAP_DEBUG_ACL,
+                       "<= mdb_entry_get: failed to find attribute %s\n",
+                       at->ad_cname.bv_val, 0, 0 ); 
+               rc = LDAP_NO_SUCH_ATTRIBUTE;
+               goto return_results;
+       }
+
+return_results:
+       if( rc != LDAP_SUCCESS ) {
+               /* free entry */
+               mdb_cache_return_entry_rw(mdb, e, rw, &lock);
+
+       } else {
+               if ( slapMode == SLAP_SERVER_MODE ) {
+                       *ent = e;
+                       /* big drag. we need a place to store a read lock so we can
+                        * release it later?? If we're in a txn, nothing is needed
+                        * here because the locks will go away with the txn.
+                        */
+                       if ( op ) {
+                               if ( !boi ) {
+                                       boi = op->o_tmpcalloc(1,sizeof(struct mdb_op_info),op->o_tmpmemctx);
+                                       boi->boi_oe.oe_key = mdb;
+                                       LDAP_SLIST_INSERT_HEAD( &op->o_extra, &boi->boi_oe, oe_next );
+                               }
+                               if ( !boi->boi_txn ) {
+                                       struct mdb_lock_info *bli;
+                                       bli = op->o_tmpalloc( sizeof(struct mdb_lock_info),
+                                               op->o_tmpmemctx );
+                                       bli->bli_next = boi->boi_locks;
+                                       bli->bli_id = e->e_id;
+                                       bli->bli_flag = 0;
+                                       bli->bli_lock = lock;
+                                       boi->boi_locks = bli;
+                               }
+                       }
+               } else {
+                       *ent = entry_dup( e );
+                       mdb_cache_return_entry_rw(mdb, e, rw, &lock);
+               }
+       }
+
+       Debug( LDAP_DEBUG_TRACE,
+               "mdb_entry_get: rc=%d\n",
+               rc, 0, 0 ); 
+       return(rc);
+}
diff --git a/servers/slapd/back-mdb/idl.c b/servers/slapd/back-mdb/idl.c
new file mode 100644 (file)
index 0000000..c111630
--- /dev/null
@@ -0,0 +1,1575 @@
+/* idl.c - ldap id list handling routines */
+/* $OpenLDAP$ */
+/* This work is part of OpenLDAP Software <http://www.openldap.org/>.
+ *
+ * Copyright 2000-2011 The OpenLDAP Foundation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted only as authorized by the OpenLDAP
+ * Public License.
+ *
+ * A copy of this license is available in the file LICENSE in the
+ * top-level directory of the distribution or, alternatively, at
+ * <http://www.OpenLDAP.org/license.html>.
+ */
+
+#include "portable.h"
+
+#include <stdio.h>
+#include <ac/string.h>
+
+#include "back-mdb.h"
+#include "idl.h"
+
+#define IDL_MAX(x,y)   ( x > y ? x : y )
+#define IDL_MIN(x,y)   ( x < y ? x : y )
+
+#define IDL_CMP(x,y)   ( x < y ? -1 : ( x > y ? 1 : 0 ) )
+
+#define IDL_LRU_DELETE( mdb, e ) do { \
+       if ( (e) == (mdb)->bi_idl_lru_head ) { \
+               if ( (e)->idl_lru_next == (mdb)->bi_idl_lru_head ) { \
+                       (mdb)->bi_idl_lru_head = NULL; \
+               } else { \
+                       (mdb)->bi_idl_lru_head = (e)->idl_lru_next; \
+               } \
+       } \
+       if ( (e) == (mdb)->bi_idl_lru_tail ) { \
+               if ( (e)->idl_lru_prev == (mdb)->bi_idl_lru_tail ) { \
+                       assert( (mdb)->bi_idl_lru_head == NULL ); \
+                       (mdb)->bi_idl_lru_tail = NULL; \
+               } else { \
+                       (mdb)->bi_idl_lru_tail = (e)->idl_lru_prev; \
+               } \
+       } \
+       (e)->idl_lru_next->idl_lru_prev = (e)->idl_lru_prev; \
+       (e)->idl_lru_prev->idl_lru_next = (e)->idl_lru_next; \
+} while ( 0 )
+
+static int
+mdb_idl_entry_cmp( const void *v_idl1, const void *v_idl2 )
+{
+       const mdb_idl_cache_entry_t *idl1 = v_idl1, *idl2 = v_idl2;
+       int rc;
+
+       if ((rc = SLAP_PTRCMP( idl1->db, idl2->db ))) return rc;
+       if ((rc = idl1->kstr.bv_len - idl2->kstr.bv_len )) return rc;
+       return ( memcmp ( idl1->kstr.bv_val, idl2->kstr.bv_val , idl1->kstr.bv_len ) );
+}
+
+#if IDL_DEBUG > 0
+static void idl_check( ID *ids )
+{
+       if( MDB_IDL_IS_RANGE( ids ) ) {
+               assert( MDB_IDL_RANGE_FIRST(ids) <= MDB_IDL_RANGE_LAST(ids) );
+       } else {
+               ID i;
+               for( i=1; i < ids[0]; i++ ) {
+                       assert( ids[i+1] > ids[i] );
+               }
+       }
+}
+
+#if IDL_DEBUG > 1
+static void idl_dump( ID *ids )
+{
+       if( MDB_IDL_IS_RANGE( ids ) ) {
+               Debug( LDAP_DEBUG_ANY,
+                       "IDL: range ( %ld - %ld )\n",
+                       (long) MDB_IDL_RANGE_FIRST( ids ),
+                       (long) MDB_IDL_RANGE_LAST( ids ) );
+
+       } else {
+               ID i;
+               Debug( LDAP_DEBUG_ANY, "IDL: size %ld", (long) ids[0], 0, 0 );
+
+               for( i=1; i<=ids[0]; i++ ) {
+                       if( i % 16 == 1 ) {
+                               Debug( LDAP_DEBUG_ANY, "\n", 0, 0, 0 );
+                       }
+                       Debug( LDAP_DEBUG_ANY, "  %02lx", (long) ids[i], 0, 0 );
+               }
+
+               Debug( LDAP_DEBUG_ANY, "\n", 0, 0, 0 );
+       }
+
+       idl_check( ids );
+}
+#endif /* IDL_DEBUG > 1 */
+#endif /* IDL_DEBUG > 0 */
+
+unsigned mdb_idl_search( ID *ids, ID id )
+{
+#define IDL_BINARY_SEARCH 1
+#ifdef IDL_BINARY_SEARCH
+       /*
+        * binary search of id in ids
+        * if found, returns position of id
+        * if not found, returns first postion greater than id
+        */
+       unsigned base = 0;
+       unsigned cursor = 0;
+       int val = 0;
+       unsigned n = ids[0];
+
+#if IDL_DEBUG > 0
+       idl_check( ids );
+#endif
+
+       while( 0 < n ) {
+               int pivot = n >> 1;
+               cursor = base + pivot;
+               val = IDL_CMP( id, ids[cursor + 1] );
+
+               if( val < 0 ) {
+                       n = pivot;
+
+               } else if ( val > 0 ) {
+                       base = cursor + 1;
+                       n -= pivot + 1;
+
+               } else {
+                       return cursor + 1;
+               }
+       }
+       
+       if( val > 0 ) {
+               return cursor + 2;
+       } else {
+               return cursor + 1;
+       }
+
+#else
+       /* (reverse) linear search */
+       int i;
+
+#if IDL_DEBUG > 0
+       idl_check( ids );
+#endif
+
+       for( i=ids[0]; i; i-- ) {
+               if( id > ids[i] ) {
+                       break;
+               }
+       }
+
+       return i+1;
+#endif
+}
+
+int mdb_idl_insert( ID *ids, ID id )
+{
+       unsigned x;
+
+#if IDL_DEBUG > 1
+       Debug( LDAP_DEBUG_ANY, "insert: %04lx at %d\n", (long) id, x, 0 );
+       idl_dump( ids );
+#elif IDL_DEBUG > 0
+       idl_check( ids );
+#endif
+
+       if (MDB_IDL_IS_RANGE( ids )) {
+               /* if already in range, treat as a dup */
+               if (id >= MDB_IDL_FIRST(ids) && id <= MDB_IDL_LAST(ids))
+                       return -1;
+               if (id < MDB_IDL_FIRST(ids))
+                       ids[1] = id;
+               else if (id > MDB_IDL_LAST(ids))
+                       ids[2] = id;
+               return 0;
+       }
+
+       x = mdb_idl_search( ids, id );
+       assert( x > 0 );
+
+       if( x < 1 ) {
+               /* internal error */
+               return -2;
+       }
+
+       if ( x <= ids[0] && ids[x] == id ) {
+               /* duplicate */
+               return -1;
+       }
+
+       if ( ++ids[0] >= MDB_IDL_DB_MAX ) {
+               if( id < ids[1] ) {
+                       ids[1] = id;
+                       ids[2] = ids[ids[0]-1];
+               } else if ( ids[ids[0]-1] < id ) {
+                       ids[2] = id;
+               } else {
+                       ids[2] = ids[ids[0]-1];
+               }
+               ids[0] = NOID;
+       
+       } else {
+               /* insert id */
+               AC_MEMCPY( &ids[x+1], &ids[x], (ids[0]-x) * sizeof(ID) );
+               ids[x] = id;
+       }
+
+#if IDL_DEBUG > 1
+       idl_dump( ids );
+#elif IDL_DEBUG > 0
+       idl_check( ids );
+#endif
+
+       return 0;
+}
+
+static int mdb_idl_delete( ID *ids, ID id )
+{
+       unsigned x;
+
+#if IDL_DEBUG > 1
+       Debug( LDAP_DEBUG_ANY, "delete: %04lx at %d\n", (long) id, x, 0 );
+       idl_dump( ids );
+#elif IDL_DEBUG > 0
+       idl_check( ids );
+#endif
+
+       if (MDB_IDL_IS_RANGE( ids )) {
+               /* If deleting a range boundary, adjust */
+               if ( ids[1] == id )
+                       ids[1]++;
+               else if ( ids[2] == id )
+                       ids[2]--;
+               /* deleting from inside a range is a no-op */
+
+               /* If the range has collapsed, re-adjust */
+               if ( ids[1] > ids[2] )
+                       ids[0] = 0;
+               else if ( ids[1] == ids[2] )
+                       ids[1] = 1;
+               return 0;
+       }
+
+       x = mdb_idl_search( ids, id );
+       assert( x > 0 );
+
+       if( x <= 0 ) {
+               /* internal error */
+               return -2;
+       }
+
+       if( x > ids[0] || ids[x] != id ) {
+               /* not found */
+               return -1;
+
+       } else if ( --ids[0] == 0 ) {
+               if( x != 1 ) {
+                       return -3;
+               }
+
+       } else {
+               AC_MEMCPY( &ids[x], &ids[x+1], (1+ids[0]-x) * sizeof(ID) );
+       }
+
+#if IDL_DEBUG > 1
+       idl_dump( ids );
+#elif IDL_DEBUG > 0
+       idl_check( ids );
+#endif
+
+       return 0;
+}
+
+static char *
+mdb_show_key(
+       DBT             *key,
+       char            *buf )
+{
+       if ( key->size == 4 /* LUTIL_HASH_BYTES */ ) {
+               unsigned char *c = key->data;
+               sprintf( buf, "[%02x%02x%02x%02x]", c[0], c[1], c[2], c[3] );
+               return buf;
+       } else {
+               return key->data;
+       }
+}
+
+/* Find a db/key pair in the IDL cache. If ids is non-NULL,
+ * copy the cached IDL into it, otherwise just return the status.
+ */
+int
+mdb_idl_cache_get(
+       struct mdb_info *mdb,
+       DB                      *db,
+       DBT                     *key,
+       ID                      *ids )
+{
+       mdb_idl_cache_entry_t idl_tmp;
+       mdb_idl_cache_entry_t *matched_idl_entry;
+       int rc = LDAP_NO_SUCH_OBJECT;
+
+       DBT2bv( key, &idl_tmp.kstr );
+       idl_tmp.db = db;
+       ldap_pvt_thread_rdwr_rlock( &mdb->bi_idl_tree_rwlock );
+       matched_idl_entry = avl_find( mdb->bi_idl_tree, &idl_tmp,
+                                     mdb_idl_entry_cmp );
+       if ( matched_idl_entry != NULL ) {
+               if ( matched_idl_entry->idl && ids )
+                       MDB_IDL_CPY( ids, matched_idl_entry->idl );
+               matched_idl_entry->idl_flags |= CACHE_ENTRY_REFERENCED;
+               if ( matched_idl_entry->idl )
+                       rc = LDAP_SUCCESS;
+               else
+                       rc = DB_NOTFOUND;
+       }
+       ldap_pvt_thread_rdwr_runlock( &mdb->bi_idl_tree_rwlock );
+
+       return rc;
+}
+
+void
+mdb_idl_cache_put(
+       struct mdb_info *mdb,
+       DB                      *db,
+       DBT                     *key,
+       ID                      *ids,
+       int                     rc )
+{
+       mdb_idl_cache_entry_t idl_tmp;
+       mdb_idl_cache_entry_t *ee, *eprev;
+
+       if ( rc == DB_NOTFOUND || MDB_IDL_IS_ZERO( ids ))
+               return;
+
+       DBT2bv( key, &idl_tmp.kstr );
+
+       ee = (mdb_idl_cache_entry_t *) ch_malloc(
+               sizeof( mdb_idl_cache_entry_t ) );
+       ee->db = db;
+       ee->idl = (ID*) ch_malloc( MDB_IDL_SIZEOF ( ids ) );
+       MDB_IDL_CPY( ee->idl, ids );
+
+       ee->idl_lru_prev = NULL;
+       ee->idl_lru_next = NULL;
+       ee->idl_flags = 0;
+       ber_dupbv( &ee->kstr, &idl_tmp.kstr );
+       ldap_pvt_thread_rdwr_wlock( &mdb->bi_idl_tree_rwlock );
+       if ( avl_insert( &mdb->bi_idl_tree, (caddr_t) ee,
+               mdb_idl_entry_cmp, avl_dup_error ))
+       {
+               ch_free( ee->kstr.bv_val );
+               ch_free( ee->idl );
+               ch_free( ee );
+               ldap_pvt_thread_rdwr_wunlock( &mdb->bi_idl_tree_rwlock );
+               return;
+       }
+       ldap_pvt_thread_mutex_lock( &mdb->bi_idl_tree_lrulock );
+       /* LRU_ADD */
+       if ( mdb->bi_idl_lru_head ) {
+               assert( mdb->bi_idl_lru_tail != NULL );
+               assert( mdb->bi_idl_lru_head->idl_lru_prev != NULL );
+               assert( mdb->bi_idl_lru_head->idl_lru_next != NULL );
+
+               ee->idl_lru_next = mdb->bi_idl_lru_head;
+               ee->idl_lru_prev = mdb->bi_idl_lru_head->idl_lru_prev;
+               mdb->bi_idl_lru_head->idl_lru_prev->idl_lru_next = ee;
+               mdb->bi_idl_lru_head->idl_lru_prev = ee;
+       } else {
+               ee->idl_lru_next = ee->idl_lru_prev = ee;
+               mdb->bi_idl_lru_tail = ee;
+       }
+       mdb->bi_idl_lru_head = ee;
+
+       if ( mdb->bi_idl_cache_size >= mdb->bi_idl_cache_max_size ) {
+               int i;
+               eprev = mdb->bi_idl_lru_tail;
+               for ( i = 0; (ee = eprev) != NULL && i < 10; i++ ) {
+                       eprev = ee->idl_lru_prev;
+                       if ( eprev == ee ) {
+                               eprev = NULL;
+                       }
+                       if ( ee->idl_flags & CACHE_ENTRY_REFERENCED ) {
+                               ee->idl_flags ^= CACHE_ENTRY_REFERENCED;
+                               continue;
+                       }
+                       if ( avl_delete( &mdb->bi_idl_tree, (caddr_t) ee,
+                                   mdb_idl_entry_cmp ) == NULL ) {
+                               Debug( LDAP_DEBUG_ANY, "=> mdb_idl_cache_put: "
+                                       "AVL delete failed\n",
+                                       0, 0, 0 );
+                       }
+                       IDL_LRU_DELETE( mdb, ee );
+                       i++;
+                       --mdb->bi_idl_cache_size;
+                       ch_free( ee->kstr.bv_val );
+                       ch_free( ee->idl );
+                       ch_free( ee );
+               }
+               mdb->bi_idl_lru_tail = eprev;
+               assert( mdb->bi_idl_lru_tail != NULL
+                       || mdb->bi_idl_lru_head == NULL );
+       }
+       mdb->bi_idl_cache_size++;
+       ldap_pvt_thread_mutex_unlock( &mdb->bi_idl_tree_lrulock );
+       ldap_pvt_thread_rdwr_wunlock( &mdb->bi_idl_tree_rwlock );
+}
+
+void
+mdb_idl_cache_del(
+       struct mdb_info *mdb,
+       DB                      *db,
+       DBT                     *key )
+{
+       mdb_idl_cache_entry_t *matched_idl_entry, idl_tmp;
+       DBT2bv( key, &idl_tmp.kstr );
+       idl_tmp.db = db;
+       ldap_pvt_thread_rdwr_wlock( &mdb->bi_idl_tree_rwlock );
+       matched_idl_entry = avl_find( mdb->bi_idl_tree, &idl_tmp,
+                                     mdb_idl_entry_cmp );
+       if ( matched_idl_entry != NULL ) {
+               if ( avl_delete( &mdb->bi_idl_tree, (caddr_t) matched_idl_entry,
+                                   mdb_idl_entry_cmp ) == NULL ) {
+                       Debug( LDAP_DEBUG_ANY, "=> mdb_idl_cache_del: "
+                               "AVL delete failed\n",
+                               0, 0, 0 );
+               }
+               --mdb->bi_idl_cache_size;
+               ldap_pvt_thread_mutex_lock( &mdb->bi_idl_tree_lrulock );
+               IDL_LRU_DELETE( mdb, matched_idl_entry );
+               ldap_pvt_thread_mutex_unlock( &mdb->bi_idl_tree_lrulock );
+               free( matched_idl_entry->kstr.bv_val );
+               if ( matched_idl_entry->idl )
+                       free( matched_idl_entry->idl );
+               free( matched_idl_entry );
+       }
+       ldap_pvt_thread_rdwr_wunlock( &mdb->bi_idl_tree_rwlock );
+}
+
+void
+mdb_idl_cache_add_id(
+       struct mdb_info *mdb,
+       DB                      *db,
+       DBT                     *key,
+       ID                      id )
+{
+       mdb_idl_cache_entry_t *cache_entry, idl_tmp;
+       DBT2bv( key, &idl_tmp.kstr );
+       idl_tmp.db = db;
+       ldap_pvt_thread_rdwr_wlock( &mdb->bi_idl_tree_rwlock );
+       cache_entry = avl_find( mdb->bi_idl_tree, &idl_tmp,
+                                     mdb_idl_entry_cmp );
+       if ( cache_entry != NULL ) {
+               if ( !MDB_IDL_IS_RANGE( cache_entry->idl ) &&
+                       cache_entry->idl[0] < MDB_IDL_DB_MAX ) {
+                       size_t s = MDB_IDL_SIZEOF( cache_entry->idl ) + sizeof(ID);
+                       cache_entry->idl = ch_realloc( cache_entry->idl, s );
+               }
+               mdb_idl_insert( cache_entry->idl, id );
+       }
+       ldap_pvt_thread_rdwr_wunlock( &mdb->bi_idl_tree_rwlock );
+}
+
+void
+mdb_idl_cache_del_id(
+       struct mdb_info *mdb,
+       DB                      *db,
+       DBT                     *key,
+       ID                      id )
+{
+       mdb_idl_cache_entry_t *cache_entry, idl_tmp;
+       DBT2bv( key, &idl_tmp.kstr );
+       idl_tmp.db = db;
+       ldap_pvt_thread_rdwr_wlock( &mdb->bi_idl_tree_rwlock );
+       cache_entry = avl_find( mdb->bi_idl_tree, &idl_tmp,
+                                     mdb_idl_entry_cmp );
+       if ( cache_entry != NULL ) {
+               mdb_idl_delete( cache_entry->idl, id );
+               if ( cache_entry->idl[0] == 0 ) {
+                       if ( avl_delete( &mdb->bi_idl_tree, (caddr_t) cache_entry,
+                                               mdb_idl_entry_cmp ) == NULL ) {
+                               Debug( LDAP_DEBUG_ANY, "=> mdb_idl_cache_del: "
+                                       "AVL delete failed\n",
+                                       0, 0, 0 );
+                       }
+                       --mdb->bi_idl_cache_size;
+                       ldap_pvt_thread_mutex_lock( &mdb->bi_idl_tree_lrulock );
+                       IDL_LRU_DELETE( mdb, cache_entry );
+                       ldap_pvt_thread_mutex_unlock( &mdb->bi_idl_tree_lrulock );
+                       free( cache_entry->kstr.bv_val );
+                       free( cache_entry->idl );
+                       free( cache_entry );
+               }
+       }
+       ldap_pvt_thread_rdwr_wunlock( &mdb->bi_idl_tree_rwlock );
+}
+
+int
+mdb_idl_fetch_key(
+       BackendDB       *be,
+       DB                      *db,
+       DB_TXN          *txn,
+       DBT                     *key,
+       ID                      *ids,
+       DBC                     **saved_cursor,
+       int                     get_flag )
+{
+       struct mdb_info *mdb = (struct mdb_info *) be->be_private;
+       int rc;
+       DBT data, key2, *kptr;
+       DBC *cursor;
+       ID *i;
+       void *ptr;
+       size_t len;
+       int rc2;
+       int flags = mdb->bi_db_opflags | DB_MULTIPLE;
+       int opflag;
+
+       /* If using BerkeleyDB 4.0, the buf must be large enough to
+        * grab the entire IDL in one get(), otherwise MDB will leak
+        * resources on subsequent get's.  We can safely call get()
+        * twice - once for the data, and once to get the DB_NOTFOUND
+        * result meaning there's no more data. See ITS#2040 for details.
+        * This bug is fixed in MDB 4.1 so a smaller buffer will work if
+        * stack space is too limited.
+        *
+        * configure now requires Berkeley DB 4.1.
+        */
+#if DB_VERSION_FULL < 0x04010000
+#      define MDB_ENOUGH 5
+#else
+       /* We sometimes test with tiny IDLs, and MDB always wants buffers
+        * that are at least one page in size.
+        */
+# if MDB_IDL_DB_SIZE < 4096
+#   define MDB_ENOUGH 2048
+# else
+#      define MDB_ENOUGH 1
+# endif
+#endif
+       ID buf[MDB_IDL_DB_SIZE*MDB_ENOUGH];
+
+       char keybuf[16];
+
+       Debug( LDAP_DEBUG_ARGS,
+               "mdb_idl_fetch_key: %s\n", 
+               mdb_show_key( key, keybuf ), 0, 0 );
+
+       assert( ids != NULL );
+
+       if ( saved_cursor && *saved_cursor ) {
+               opflag = DB_NEXT;
+       } else if ( get_flag == LDAP_FILTER_GE ) {
+               opflag = DB_SET_RANGE;
+       } else if ( get_flag == LDAP_FILTER_LE ) {
+               opflag = DB_FIRST;
+       } else {
+               opflag = DB_SET;
+       }
+
+       /* only non-range lookups can use the IDL cache */
+       if ( mdb->bi_idl_cache_size && opflag == DB_SET ) {
+               rc = mdb_idl_cache_get( mdb, db, key, ids );
+               if ( rc != LDAP_NO_SUCH_OBJECT ) return rc;
+       }
+
+       DBTzero( &data );
+
+       data.data = buf;
+       data.ulen = sizeof(buf);
+       data.flags = DB_DBT_USERMEM;
+
+       /* If we're not reusing an existing cursor, get a new one */
+       if( opflag != DB_NEXT ) {
+               rc = db->cursor( db, txn, &cursor, mdb->bi_db_opflags );
+               if( rc != 0 ) {
+                       Debug( LDAP_DEBUG_ANY, "=> mdb_idl_fetch_key: "
+                               "cursor failed: %s (%d)\n", db_strerror(rc), rc, 0 );
+                       return rc;
+               }
+       } else {
+               cursor = *saved_cursor;
+       }
+       
+       /* If this is a LE lookup, save original key so we can determine
+        * when to stop. If this is a GE lookup, save the key since it
+        * will be overwritten.
+        */
+       if ( get_flag == LDAP_FILTER_LE || get_flag == LDAP_FILTER_GE ) {
+               DBTzero( &key2 );
+               key2.flags = DB_DBT_USERMEM;
+               key2.ulen = sizeof(keybuf);
+               key2.data = keybuf;
+               key2.size = key->size;
+               AC_MEMCPY( keybuf, key->data, key->size );
+               kptr = &key2;
+       } else {
+               kptr = key;
+       }
+       len = key->size;
+       rc = cursor->c_get( cursor, kptr, &data, flags | opflag );
+
+       /* skip presence key on range inequality lookups */
+       while (rc == 0 && kptr->size != len) {
+               rc = cursor->c_get( cursor, kptr, &data, flags | DB_NEXT_NODUP );
+       }
+       /* If we're doing a LE compare and the new key is greater than
+        * our search key, we're done
+        */
+       if (rc == 0 && get_flag == LDAP_FILTER_LE && memcmp( kptr->data,
+               key->data, key->size ) > 0 ) {
+               rc = DB_NOTFOUND;
+       }
+       if (rc == 0) {
+               i = ids;
+               while (rc == 0) {
+                       u_int8_t *j;
+
+                       DB_MULTIPLE_INIT( ptr, &data );
+                       while (ptr) {
+                               DB_MULTIPLE_NEXT(ptr, &data, j, len);
+                               if (j) {
+                                       ++i;
+                                       MDB_DISK2ID( j, i );
+                               }
+                       }
+                       rc = cursor->c_get( cursor, key, &data, flags | DB_NEXT_DUP );
+               }
+               if ( rc == DB_NOTFOUND ) rc = 0;
+               ids[0] = i - ids;
+               /* On disk, a range is denoted by 0 in the first element */
+               if (ids[1] == 0) {
+                       if (ids[0] != MDB_IDL_RANGE_SIZE) {
+                               Debug( LDAP_DEBUG_ANY, "=> mdb_idl_fetch_key: "
+                                       "range size mismatch: expected %d, got %ld\n",
+                                       MDB_IDL_RANGE_SIZE, ids[0], 0 );
+                               cursor->c_close( cursor );
+                               return -1;
+                       }
+                       MDB_IDL_RANGE( ids, ids[2], ids[3] );
+               }
+               data.size = MDB_IDL_SIZEOF(ids);
+       }
+
+       if ( saved_cursor && rc == 0 ) {
+               if ( !*saved_cursor )
+                       *saved_cursor = cursor;
+               rc2 = 0;
+       }
+       else
+               rc2 = cursor->c_close( cursor );
+       if (rc2) {
+               Debug( LDAP_DEBUG_ANY, "=> mdb_idl_fetch_key: "
+                       "close failed: %s (%d)\n", db_strerror(rc2), rc2, 0 );
+               return rc2;
+       }
+
+       if( rc == DB_NOTFOUND ) {
+               return rc;
+
+       } else if( rc != 0 ) {
+               Debug( LDAP_DEBUG_ANY, "=> mdb_idl_fetch_key: "
+                       "get failed: %s (%d)\n",
+                       db_strerror(rc), rc, 0 );
+               return rc;
+
+       } else if ( data.size == 0 || data.size % sizeof( ID ) ) {
+               /* size not multiple of ID size */
+               Debug( LDAP_DEBUG_ANY, "=> mdb_idl_fetch_key: "
+                       "odd size: expected %ld multiple, got %ld\n",
+                       (long) sizeof( ID ), (long) data.size, 0 );
+               return -1;
+
+       } else if ( data.size != MDB_IDL_SIZEOF(ids) ) {
+               /* size mismatch */
+               Debug( LDAP_DEBUG_ANY, "=> mdb_idl_fetch_key: "
+                       "get size mismatch: expected %ld, got %ld\n",
+                       (long) ((1 + ids[0]) * sizeof( ID )), (long) data.size, 0 );
+               return -1;
+       }
+
+       if ( mdb->bi_idl_cache_max_size ) {
+               mdb_idl_cache_put( mdb, db, key, ids, rc );
+       }
+
+       return rc;
+}
+
+
+int
+mdb_idl_insert_key(
+       BackendDB       *be,
+       DB                      *db,
+       DB_TXN          *tid,
+       DBT                     *key,
+       ID                      id )
+{
+       struct mdb_info *mdb = (struct mdb_info *) be->be_private;
+       int     rc;
+       DBT data;
+       DBC *cursor;
+       ID lo, hi, nlo, nhi, nid;
+       char *err;
+
+       {
+               char buf[16];
+               Debug( LDAP_DEBUG_ARGS,
+                       "mdb_idl_insert_key: %lx %s\n", 
+                       (long) id, mdb_show_key( key, buf ), 0 );
+       }
+
+       assert( id != NOID );
+
+       DBTzero( &data );
+       data.size = sizeof( ID );
+       data.ulen = data.size;
+       data.flags = DB_DBT_USERMEM;
+
+       MDB_ID2DISK( id, &nid );
+
+       rc = db->cursor( db, tid, &cursor, mdb->bi_db_opflags );
+       if ( rc != 0 ) {
+               Debug( LDAP_DEBUG_ANY, "=> mdb_idl_insert_key: "
+                       "cursor failed: %s (%d)\n", db_strerror(rc), rc, 0 );
+               return rc;
+       }
+       data.data = &nlo;
+       /* Fetch the first data item for this key, to see if it
+        * exists and if it's a range.
+        */
+       rc = cursor->c_get( cursor, key, &data, DB_SET );
+       err = "c_get";
+       if ( rc == 0 ) {
+               if ( nlo != 0 ) {
+                       /* not a range, count the number of items */
+                       db_recno_t count;
+                       rc = cursor->c_count( cursor, &count, 0 );
+                       if ( rc != 0 ) {
+                               err = "c_count";
+                               goto fail;
+                       }
+                       if ( count >= MDB_IDL_DB_MAX ) {
+                       /* No room, convert to a range */
+                               DBT key2 = *key;
+                               db_recno_t i;
+
+                               key2.dlen = key2.ulen;
+                               key2.flags |= DB_DBT_PARTIAL;
+
+                               MDB_DISK2ID( &nlo, &lo );
+                               data.data = &nhi;
+
+                               rc = cursor->c_get( cursor, &key2, &data, DB_NEXT_NODUP );
+                               if ( rc != 0 && rc != DB_NOTFOUND ) {
+                                       err = "c_get next_nodup";
+                                       goto fail;
+                               }
+                               if ( rc == DB_NOTFOUND ) {
+                                       rc = cursor->c_get( cursor, key, &data, DB_LAST );
+                                       if ( rc != 0 ) {
+                                               err = "c_get last";
+                                               goto fail;
+                                       }
+                               } else {
+                                       rc = cursor->c_get( cursor, key, &data, DB_PREV );
+                                       if ( rc != 0 ) {
+                                               err = "c_get prev";
+                                               goto fail;
+                                       }
+                               }
+                               MDB_DISK2ID( &nhi, &hi );
+                               /* Update hi/lo if needed, then delete all the items
+                                * between lo and hi
+                                */
+                               if ( id < lo ) {
+                                       lo = id;
+                                       nlo = nid;
+                               } else if ( id > hi ) {
+                                       hi = id;
+                                       nhi = nid;
+                               }
+                               data.data = &nid;
+                               /* Don't fetch anything, just position cursor */
+                               data.flags = DB_DBT_USERMEM | DB_DBT_PARTIAL;
+                               data.dlen = data.ulen = 0;
+                               rc = cursor->c_get( cursor, key, &data, DB_SET );
+                               if ( rc != 0 ) {
+                                       err = "c_get 2";
+                                       goto fail;
+                               }
+                               rc = cursor->c_del( cursor, 0 );
+                               if ( rc != 0 ) {
+                                       err = "c_del range1";
+                                       goto fail;
+                               }
+                               /* Delete all the records */
+                               for ( i=1; i<count; i++ ) {
+                                       rc = cursor->c_get( cursor, &key2, &data, DB_NEXT_DUP );
+                                       if ( rc != 0 ) {
+                                               err = "c_get next_dup";
+                                               goto fail;
+                                       }
+                                       rc = cursor->c_del( cursor, 0 );
+                                       if ( rc != 0 ) {
+                                               err = "c_del range";
+                                               goto fail;
+                                       }
+                               }
+                               /* Store the range marker */
+                               data.size = data.ulen = sizeof(ID);
+                               data.flags = DB_DBT_USERMEM;
+                               nid = 0;
+                               rc = cursor->c_put( cursor, key, &data, DB_KEYFIRST );
+                               if ( rc != 0 ) {
+                                       err = "c_put range";
+                                       goto fail;
+                               }
+                               nid = nlo;
+                               rc = cursor->c_put( cursor, key, &data, DB_KEYLAST );
+                               if ( rc != 0 ) {
+                                       err = "c_put lo";
+                                       goto fail;
+                               }
+                               nid = nhi;
+                               rc = cursor->c_put( cursor, key, &data, DB_KEYLAST );
+                               if ( rc != 0 ) {
+                                       err = "c_put hi";
+                                       goto fail;
+                               }
+                       } else {
+                       /* There's room, just store it */
+                               goto put1;
+                       }
+               } else {
+                       /* It's a range, see if we need to rewrite
+                        * the boundaries
+                        */
+                       hi = id;
+                       data.data = &nlo;
+                       rc = cursor->c_get( cursor, key, &data, DB_NEXT_DUP );
+                       if ( rc != 0 ) {
+                               err = "c_get lo";
+                               goto fail;
+                       }
+                       MDB_DISK2ID( &nlo, &lo );
+                       if ( id > lo ) {
+                               data.data = &nhi;
+                               rc = cursor->c_get( cursor, key, &data, DB_NEXT_DUP );
+                               if ( rc != 0 ) {
+                                       err = "c_get hi";
+                                       goto fail;
+                               }
+                               MDB_DISK2ID( &nhi, &hi );
+                       }
+                       if ( id < lo || id > hi ) {
+                               /* Delete the current lo/hi */
+                               rc = cursor->c_del( cursor, 0 );
+                               if ( rc != 0 ) {
+                                       err = "c_del";
+                                       goto fail;
+                               }
+                               data.data = &nid;
+                               rc = cursor->c_put( cursor, key, &data, DB_KEYFIRST );
+                               if ( rc != 0 ) {
+                                       err = "c_put lo/hi";
+                                       goto fail;
+                               }
+                       }
+               }
+       } else if ( rc == DB_NOTFOUND ) {
+put1:          data.data = &nid;
+               rc = cursor->c_put( cursor, key, &data, DB_NODUPDATA );
+               /* Don't worry if it's already there */
+               if ( rc != 0 && rc != DB_KEYEXIST ) {
+                       err = "c_put id";
+                       goto fail;
+               }
+       } else {
+               /* initial c_get failed, nothing was done */
+fail:
+               Debug( LDAP_DEBUG_ANY, "=> mdb_idl_insert_key: "
+                       "%s failed: %s (%d)\n", err, db_strerror(rc), rc );
+               cursor->c_close( cursor );
+               return rc;
+       }
+       /* If key was added (didn't already exist) and using IDL cache,
+        * update key in IDL cache.
+        */
+       if ( !rc && mdb->bi_idl_cache_max_size ) {
+               mdb_idl_cache_add_id( mdb, db, key, id );
+       }
+       rc = cursor->c_close( cursor );
+       if( rc != 0 ) {
+               Debug( LDAP_DEBUG_ANY, "=> mdb_idl_insert_key: "
+                       "c_close failed: %s (%d)\n",
+                       db_strerror(rc), rc, 0 );
+       }
+       return rc;
+}
+
+int
+mdb_idl_delete_key(
+       BackendDB       *be,
+       DB                      *db,
+       DB_TXN          *tid,
+       DBT                     *key,
+       ID                      id )
+{
+       struct mdb_info *mdb = (struct mdb_info *) be->be_private;
+       int     rc;
+       DBT data;
+       DBC *cursor;
+       ID lo, hi, tmp, nid, nlo, nhi;
+       char *err;
+
+       {
+               char buf[16];
+               Debug( LDAP_DEBUG_ARGS,
+                       "mdb_idl_delete_key: %lx %s\n", 
+                       (long) id, mdb_show_key( key, buf ), 0 );
+       }
+       assert( id != NOID );
+
+       if ( mdb->bi_idl_cache_size ) {
+               mdb_idl_cache_del( mdb, db, key );
+       }
+
+       MDB_ID2DISK( id, &nid );
+
+       DBTzero( &data );
+       data.data = &tmp;
+       data.size = sizeof( id );
+       data.ulen = data.size;
+       data.flags = DB_DBT_USERMEM;
+
+       rc = db->cursor( db, tid, &cursor, mdb->bi_db_opflags );
+       if ( rc != 0 ) {
+               Debug( LDAP_DEBUG_ANY, "=> mdb_idl_delete_key: "
+                       "cursor failed: %s (%d)\n", db_strerror(rc), rc, 0 );
+               return rc;
+       }
+       /* Fetch the first data item for this key, to see if it
+        * exists and if it's a range.
+        */
+       rc = cursor->c_get( cursor, key, &data, DB_SET );
+       err = "c_get";
+       if ( rc == 0 ) {
+               if ( tmp != 0 ) {
+                       /* Not a range, just delete it */
+                       if (tmp != nid) {
+                               /* position to correct item */
+                               tmp = nid;
+                               rc = cursor->c_get( cursor, key, &data, DB_GET_BOTH );
+                               if ( rc != 0 ) {
+                                       err = "c_get id";
+                                       goto fail;
+                               }
+                       }
+                       rc = cursor->c_del( cursor, 0 );
+                       if ( rc != 0 ) {
+                               err = "c_del id";
+                               goto fail;
+                       }
+               } else {
+                       /* It's a range, see if we need to rewrite
+                        * the boundaries
+                        */
+                       data.data = &nlo;
+                       rc = cursor->c_get( cursor, key, &data, DB_NEXT_DUP );
+                       if ( rc != 0 ) {
+                               err = "c_get lo";
+                               goto fail;
+                       }
+                       MDB_DISK2ID( &nlo, &lo );
+                       data.data = &nhi;
+                       rc = cursor->c_get( cursor, key, &data, DB_NEXT_DUP );
+                       if ( rc != 0 ) {
+                               err = "c_get hi";
+                               goto fail;
+                       }
+                       MDB_DISK2ID( &nhi, &hi );
+                       if ( id == lo || id == hi ) {
+                               if ( id == lo ) {
+                                       id++;
+                                       lo = id;
+                               } else if ( id == hi ) {
+                                       id--;
+                                       hi = id;
+                               }
+                               if ( lo >= hi ) {
+                               /* The range has collapsed... */
+                                       rc = db->del( db, tid, key, 0 );
+                                       if ( rc != 0 ) {
+                                               err = "del";
+                                               goto fail;
+                                       }
+                               } else {
+                                       if ( id == lo ) {
+                                               /* reposition on lo slot */
+                                               data.data = &nlo;
+                                               cursor->c_get( cursor, key, &data, DB_PREV );
+                                       }
+                                       rc = cursor->c_del( cursor, 0 );
+                                       if ( rc != 0 ) {
+                                               err = "c_del";
+                                               goto fail;
+                                       }
+                               }
+                               if ( lo <= hi ) {
+                                       MDB_ID2DISK( id, &nid );
+                                       data.data = &nid;
+                                       rc = cursor->c_put( cursor, key, &data, DB_KEYFIRST );
+                                       if ( rc != 0 ) {
+                                               err = "c_put lo/hi";
+                                               goto fail;
+                                       }
+                               }
+                       }
+               }
+       } else {
+               /* initial c_get failed, nothing was done */
+fail:
+               if ( rc != DB_NOTFOUND ) {
+               Debug( LDAP_DEBUG_ANY, "=> mdb_idl_delete_key: "
+                       "%s failed: %s (%d)\n", err, db_strerror(rc), rc );
+               }
+               cursor->c_close( cursor );
+               return rc;
+       }
+       rc = cursor->c_close( cursor );
+       if( rc != 0 ) {
+               Debug( LDAP_DEBUG_ANY,
+                       "=> mdb_idl_delete_key: c_close failed: %s (%d)\n",
+                       db_strerror(rc), rc, 0 );
+       }
+
+       return rc;
+}
+
+
+/*
+ * idl_intersection - return a = a intersection b
+ */
+int
+mdb_idl_intersection(
+       ID *a,
+       ID *b )
+{
+       ID ida, idb;
+       ID idmax, idmin;
+       ID cursora = 0, cursorb = 0, cursorc;
+       int swap = 0;
+
+       if ( MDB_IDL_IS_ZERO( a ) || MDB_IDL_IS_ZERO( b ) ) {
+               a[0] = 0;
+               return 0;
+       }
+
+       idmin = IDL_MAX( MDB_IDL_FIRST(a), MDB_IDL_FIRST(b) );
+       idmax = IDL_MIN( MDB_IDL_LAST(a), MDB_IDL_LAST(b) );
+       if ( idmin > idmax ) {
+               a[0] = 0;
+               return 0;
+       } else if ( idmin == idmax ) {
+               a[0] = 1;
+               a[1] = idmin;
+               return 0;
+       }
+
+       if ( MDB_IDL_IS_RANGE( a ) ) {
+               if ( MDB_IDL_IS_RANGE(b) ) {
+               /* If both are ranges, just shrink the boundaries */
+                       a[1] = idmin;
+                       a[2] = idmax;
+                       return 0;
+               } else {
+               /* Else swap so that b is the range, a is a list */
+                       ID *tmp = a;
+                       a = b;
+                       b = tmp;
+                       swap = 1;
+               }
+       }
+
+       /* If a range completely covers the list, the result is
+        * just the list. If idmin to idmax is contiguous, just
+        * turn it into a range.
+        */
+       if ( MDB_IDL_IS_RANGE( b )
+               && MDB_IDL_FIRST( b ) <= MDB_IDL_FIRST( a )
+               && MDB_IDL_LAST( b ) >= MDB_IDL_LAST( a ) ) {
+               if (idmax - idmin + 1 == a[0])
+               {
+                       a[0] = NOID;
+                       a[1] = idmin;
+                       a[2] = idmax;
+               }
+               goto done;
+       }
+
+       /* Fine, do the intersection one element at a time.
+        * First advance to idmin in both IDLs.
+        */
+       cursora = cursorb = idmin;
+       ida = mdb_idl_first( a, &cursora );
+       idb = mdb_idl_first( b, &cursorb );
+       cursorc = 0;
+
+       while( ida <= idmax || idb <= idmax ) {
+               if( ida == idb ) {
+                       a[++cursorc] = ida;
+                       ida = mdb_idl_next( a, &cursora );
+                       idb = mdb_idl_next( b, &cursorb );
+               } else if ( ida < idb ) {
+                       ida = mdb_idl_next( a, &cursora );
+               } else {
+                       idb = mdb_idl_next( b, &cursorb );
+               }
+       }
+       a[0] = cursorc;
+done:
+       if (swap)
+               MDB_IDL_CPY( b, a );
+
+       return 0;
+}
+
+
+/*
+ * idl_union - return a = a union b
+ */
+int
+mdb_idl_union(
+       ID      *a,
+       ID      *b )
+{
+       ID ida, idb;
+       ID cursora = 0, cursorb = 0, cursorc;
+
+       if ( MDB_IDL_IS_ZERO( b ) ) {
+               return 0;
+       }
+
+       if ( MDB_IDL_IS_ZERO( a ) ) {
+               MDB_IDL_CPY( a, b );
+               return 0;
+       }
+
+       if ( MDB_IDL_IS_RANGE( a ) || MDB_IDL_IS_RANGE(b) ) {
+over:          ida = IDL_MIN( MDB_IDL_FIRST(a), MDB_IDL_FIRST(b) );
+               idb = IDL_MAX( MDB_IDL_LAST(a), MDB_IDL_LAST(b) );
+               a[0] = NOID;
+               a[1] = ida;
+               a[2] = idb;
+               return 0;
+       }
+
+       ida = mdb_idl_first( a, &cursora );
+       idb = mdb_idl_first( b, &cursorb );
+
+       cursorc = b[0];
+
+       /* The distinct elements of a are cat'd to b */
+       while( ida != NOID || idb != NOID ) {
+               if ( ida < idb ) {
+                       if( ++cursorc > MDB_IDL_UM_MAX ) {
+                               goto over;
+                       }
+                       b[cursorc] = ida;
+                       ida = mdb_idl_next( a, &cursora );
+
+               } else {
+                       if ( ida == idb )
+                               ida = mdb_idl_next( a, &cursora );
+                       idb = mdb_idl_next( b, &cursorb );
+               }
+       }
+
+       /* b is copied back to a in sorted order */
+       a[0] = cursorc;
+       cursora = 1;
+       cursorb = 1;
+       cursorc = b[0]+1;
+       while (cursorb <= b[0] || cursorc <= a[0]) {
+               if (cursorc > a[0])
+                       idb = NOID;
+               else
+                       idb = b[cursorc];
+               if (cursorb <= b[0] && b[cursorb] < idb)
+                       a[cursora++] = b[cursorb++];
+               else {
+                       a[cursora++] = idb;
+                       cursorc++;
+               }
+       }
+
+       return 0;
+}
+
+
+#if 0
+/*
+ * mdb_idl_notin - return a intersection ~b (or a minus b)
+ */
+int
+mdb_idl_notin(
+       ID      *a,
+       ID      *b,
+       ID *ids )
+{
+       ID ida, idb;
+       ID cursora = 0, cursorb = 0;
+
+       if( MDB_IDL_IS_ZERO( a ) ||
+               MDB_IDL_IS_ZERO( b ) ||
+               MDB_IDL_IS_RANGE( b ) )
+       {
+               MDB_IDL_CPY( ids, a );
+               return 0;
+       }
+
+       if( MDB_IDL_IS_RANGE( a ) ) {
+               MDB_IDL_CPY( ids, a );
+               return 0;
+       }
+
+       ida = mdb_idl_first( a, &cursora ),
+       idb = mdb_idl_first( b, &cursorb );
+
+       ids[0] = 0;
+
+       while( ida != NOID ) {
+               if ( idb == NOID ) {
+                       /* we could shortcut this */
+                       ids[++ids[0]] = ida;
+                       ida = mdb_idl_next( a, &cursora );
+
+               } else if ( ida < idb ) {
+                       ids[++ids[0]] = ida;
+                       ida = mdb_idl_next( a, &cursora );
+
+               } else if ( ida > idb ) {
+                       idb = mdb_idl_next( b, &cursorb );
+
+               } else {
+                       ida = mdb_idl_next( a, &cursora );
+                       idb = mdb_idl_next( b, &cursorb );
+               }
+       }
+
+       return 0;
+}
+#endif
+
+ID mdb_idl_first( ID *ids, ID *cursor )
+{
+       ID pos;
+
+       if ( ids[0] == 0 ) {
+               *cursor = NOID;
+               return NOID;
+       }
+
+       if ( MDB_IDL_IS_RANGE( ids ) ) {
+               if( *cursor < ids[1] ) {
+                       *cursor = ids[1];
+               }
+               return *cursor;
+       }
+
+       if ( *cursor == 0 )
+               pos = 1;
+       else
+               pos = mdb_idl_search( ids, *cursor );
+
+       if( pos > ids[0] ) {
+               return NOID;
+       }
+
+       *cursor = pos;
+       return ids[pos];
+}
+
+ID mdb_idl_next( ID *ids, ID *cursor )
+{
+       if ( MDB_IDL_IS_RANGE( ids ) ) {
+               if( ids[2] < ++(*cursor) ) {
+                       return NOID;
+               }
+               return *cursor;
+       }
+
+       if ( ++(*cursor) <= ids[0] ) {
+               return ids[*cursor];
+       }
+
+       return NOID;
+}
+
+#ifdef MDB_HIER
+
+/* Add one ID to an unsorted list. We ensure that the first element is the
+ * minimum and the last element is the maximum, for fast range compaction.
+ *   this means IDLs up to length 3 are always sorted...
+ */
+int mdb_idl_append_one( ID *ids, ID id )
+{
+       if (MDB_IDL_IS_RANGE( ids )) {
+               /* if already in range, treat as a dup */
+               if (id >= MDB_IDL_FIRST(ids) && id <= MDB_IDL_LAST(ids))
+                       return -1;
+               if (id < MDB_IDL_FIRST(ids))
+                       ids[1] = id;
+               else if (id > MDB_IDL_LAST(ids))
+                       ids[2] = id;
+               return 0;
+       }
+       if ( ids[0] ) {
+               ID tmp;
+
+               if (id < ids[1]) {
+                       tmp = ids[1];
+                       ids[1] = id;
+                       id = tmp;
+               }
+               if ( ids[0] > 1 && id < ids[ids[0]] ) {
+                       tmp = ids[ids[0]];
+                       ids[ids[0]] = id;
+                       id = tmp;
+               }
+       }
+       ids[0]++;
+       if ( ids[0] >= MDB_IDL_UM_MAX ) {
+               ids[0] = NOID;
+               ids[2] = id;
+       } else {
+               ids[ids[0]] = id;
+       }
+       return 0;
+}
+
+/* Append sorted list b to sorted list a. The result is unsorted but
+ * a[1] is the min of the result and a[a[0]] is the max.
+ */
+int mdb_idl_append( ID *a, ID *b )
+{
+       ID ida, idb, tmp, swap = 0;
+
+       if ( MDB_IDL_IS_ZERO( b ) ) {
+               return 0;
+       }
+
+       if ( MDB_IDL_IS_ZERO( a ) ) {
+               MDB_IDL_CPY( a, b );
+               return 0;
+       }
+
+       ida = MDB_IDL_LAST( a );
+       idb = MDB_IDL_LAST( b );
+       if ( MDB_IDL_IS_RANGE( a ) || MDB_IDL_IS_RANGE(b) ||
+               a[0] + b[0] >= MDB_IDL_UM_MAX ) {
+               a[2] = IDL_MAX( ida, idb );
+               a[1] = IDL_MIN( a[1], b[1] );
+               a[0] = NOID;
+               return 0;
+       }
+
+       if ( b[0] > 1 && ida > idb ) {
+               swap = idb;
+               a[a[0]] = idb;
+               b[b[0]] = ida;
+       }
+
+       if ( b[1] < a[1] ) {
+               tmp = a[1];
+               a[1] = b[1];
+       } else {
+               tmp = b[1];
+       }
+       a[0]++;
+       a[a[0]] = tmp;
+
+       if ( b[0] > 1 ) {
+               int i = b[0] - 1;
+               AC_MEMCPY(a+a[0]+1, b+2, i * sizeof(ID));
+               a[0] += i;
+       }
+       if ( swap ) {
+               b[b[0]] = swap;
+       }
+       return 0;
+}
+
+#if 1
+
+/* Quicksort + Insertion sort for small arrays */
+
+#define SMALL  8
+#define        SWAP(a,b)       itmp=(a);(a)=(b);(b)=itmp
+
+void
+mdb_idl_sort( ID *ids, ID *tmp )
+{
+       int *istack = (int *)tmp;
+       int i,j,k,l,ir,jstack;
+       ID a, itmp;
+
+       if ( MDB_IDL_IS_RANGE( ids ))
+               return;
+
+       ir = ids[0];
+       l = 1;
+       jstack = 0;
+       for(;;) {
+               if (ir - l < SMALL) {   /* Insertion sort */
+                       for (j=l+1;j<=ir;j++) {
+                               a = ids[j];
+                               for (i=j-1;i>=1;i--) {
+                                       if (ids[i] <= a) break;
+                                       ids[i+1] = ids[i];
+                               }
+                               ids[i+1] = a;
+                       }
+                       if (jstack == 0) break;
+                       ir = istack[jstack--];
+                       l = istack[jstack--];
+               } else {
+                       k = (l + ir) >> 1;      /* Choose median of left, center, right */
+                       SWAP(ids[k], ids[l+1]);
+                       if (ids[l] > ids[ir]) {
+                               SWAP(ids[l], ids[ir]);
+                       }
+                       if (ids[l+1] > ids[ir]) {
+                               SWAP(ids[l+1], ids[ir]);
+                       }
+                       if (ids[l] > ids[l+1]) {
+                               SWAP(ids[l], ids[l+1]);
+                       }
+                       i = l+1;
+                       j = ir;
+                       a = ids[l+1];
+                       for(;;) {
+                               do i++; while(ids[i] < a);
+                               do j--; while(ids[j] > a);
+                               if (j < i) break;
+                               SWAP(ids[i],ids[j]);
+                       }
+                       ids[l+1] = ids[j];
+                       ids[j] = a;
+                       jstack += 2;
+                       if (ir-i+1 >= j-1) {
+                               istack[jstack] = ir;
+                               istack[jstack-1] = i;
+                               ir = j-1;
+                       } else {
+                               istack[jstack] = j-1;
+                               istack[jstack-1] = l;
+                               l = i;
+                       }
+               }
+       }
+}
+
+#else
+
+/* 8 bit Radix sort + insertion sort
+ * 
+ * based on code from http://www.cubic.org/docs/radix.htm
+ * with improvements by mbackes@symas.com and hyc@symas.com
+ *
+ * This code is O(n) but has a relatively high constant factor. For lists
+ * up to ~50 Quicksort is slightly faster; up to ~100 they are even.
+ * Much faster than quicksort for lists longer than ~100. Insertion
+ * sort is actually superior for lists <50.
+ */
+
+#define BUCKETS        (1<<8)
+#define SMALL  50
+
+void
+mdb_idl_sort( ID *ids, ID *tmp )
+{
+       int count, soft_limit, phase = 0, size = ids[0];
+       ID *idls[2];
+       unsigned char *maxv = (unsigned char *)&ids[size];
+
+       if ( MDB_IDL_IS_RANGE( ids ))
+               return;
+
+       /* Use insertion sort for small lists */
+       if ( size <= SMALL ) {
+               int i,j;
+               ID a;
+
+               for (j=1;j<=size;j++) {
+                       a = ids[j];
+                       for (i=j-1;i>=1;i--) {
+                               if (ids[i] <= a) break;
+                               ids[i+1] = ids[i];
+                       }
+                       ids[i+1] = a;
+               }
+               return;
+       }
+
+       tmp[0] = size;
+       idls[0] = ids;
+       idls[1] = tmp;
+
+#if BYTE_ORDER == BIG_ENDIAN
+    for (soft_limit = 0; !maxv[soft_limit]; soft_limit++);
+#else
+    for (soft_limit = sizeof(ID)-1; !maxv[soft_limit]; soft_limit--);
+#endif
+
+       for (
+#if BYTE_ORDER == BIG_ENDIAN
+       count = sizeof(ID)-1; count >= soft_limit; --count
+#else
+       count = 0; count <= soft_limit; ++count
+#endif
+       ) {
+               unsigned int num[BUCKETS], * np, n, sum;
+               int i;
+        ID *sp, *source, *dest;
+        unsigned char *bp, *source_start;
+
+               source = idls[phase]+1;
+               dest = idls[phase^1]+1;
+               source_start =  ((unsigned char *) source) + count;
+
+        np = num;
+        for ( i = BUCKETS; i > 0; --i ) *np++ = 0;
+
+               /* count occurences of every byte value */
+               bp = source_start;
+        for ( i = size; i > 0; --i, bp += sizeof(ID) )
+                               num[*bp]++;
+
+               /* transform count into index by summing elements and storing
+                * into same array
+                */
+        sum = 0;
+        np = num;
+        for ( i = BUCKETS; i > 0; --i ) {
+                n = *np;
+                *np++ = sum;
+                sum += n;
+        }
+
+               /* fill dest with the right values in the right place */
+               bp = source_start;
+        sp = source;
+        for ( i = size; i > 0; --i, bp += sizeof(ID) ) {
+                np = num + *bp;
+                dest[*np] = *sp++;
+                ++(*np);
+        }
+               phase ^= 1;
+       }
+
+       /* copy back from temp if needed */
+       if ( phase ) {
+               ids++; tmp++;
+               for ( count = 0; count < size; ++count ) 
+                       *ids++ = *tmp++;
+       }
+}
+#endif /* Quick vs Radix */
+
+#endif /* MDB_HIER */
diff --git a/servers/slapd/back-mdb/idl.h b/servers/slapd/back-mdb/idl.h
new file mode 100644 (file)
index 0000000..be70dbb
--- /dev/null
@@ -0,0 +1,74 @@
+/* idl.h - ldap mdb back-end ID list header file */
+/* $OpenLDAP$ */
+/* This work is part of OpenLDAP Software <http://www.openldap.org/>.
+ *
+ * Copyright 2000-2011 The OpenLDAP Foundation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted only as authorized by the OpenLDAP
+ * Public License.
+ *
+ * A copy of this license is available in the file LICENSE in the
+ * top-level directory of the distribution or, alternatively, at
+ * <http://www.OpenLDAP.org/license.html>.
+ */
+
+#ifndef _MDB_IDL_H_
+#define _MDB_IDL_H_
+
+/* IDL sizes - likely should be even bigger
+ *   limiting factors: sizeof(ID), thread stack size
+ */
+#define        MDB_IDL_LOGN    16      /* DB_SIZE is 2^16, UM_SIZE is 2^17 */
+#define MDB_IDL_DB_SIZE                (1<<MDB_IDL_LOGN)
+#define MDB_IDL_UM_SIZE                (1<<(MDB_IDL_LOGN+1))
+#define MDB_IDL_UM_SIZEOF      (MDB_IDL_UM_SIZE * sizeof(ID))
+
+#define MDB_IDL_DB_MAX         (MDB_IDL_DB_SIZE-1)
+
+#define MDB_IDL_UM_MAX         (MDB_IDL_UM_SIZE-1)
+
+#define MDB_IDL_IS_RANGE(ids)  ((ids)[0] == NOID)
+#define MDB_IDL_RANGE_SIZE             (3)
+#define MDB_IDL_RANGE_SIZEOF   (MDB_IDL_RANGE_SIZE * sizeof(ID))
+#define MDB_IDL_SIZEOF(ids)            ((MDB_IDL_IS_RANGE(ids) \
+       ? MDB_IDL_RANGE_SIZE : ((ids)[0]+1)) * sizeof(ID))
+
+#define MDB_IDL_RANGE_FIRST(ids)       ((ids)[1])
+#define MDB_IDL_RANGE_LAST(ids)                ((ids)[2])
+
+#define MDB_IDL_RANGE( ids, f, l ) \
+       do { \
+               (ids)[0] = NOID; \
+               (ids)[1] = (f);  \
+               (ids)[2] = (l);  \
+       } while(0)
+
+#define MDB_IDL_ZERO(ids) \
+       do { \
+               (ids)[0] = 0; \
+               (ids)[1] = 0; \
+               (ids)[2] = 0; \
+       } while(0)
+
+#define MDB_IDL_IS_ZERO(ids) ( (ids)[0] == 0 )
+#define MDB_IDL_IS_ALL( range, ids ) ( (ids)[0] == NOID \
+       && (ids)[1] <= (range)[1] && (range)[2] <= (ids)[2] )
+
+#define MDB_IDL_CPY( dst, src ) (AC_MEMCPY( dst, src, MDB_IDL_SIZEOF( src ) ))
+
+#define MDB_IDL_ID( mdb, ids, id ) MDB_IDL_RANGE( ids, id, ((mdb)->bi_lastid) )
+#define MDB_IDL_ALL( mdb, ids ) MDB_IDL_RANGE( ids, 1, ((mdb)->bi_lastid) )
+
+#define MDB_IDL_FIRST( ids )   ( ids[1] )
+#define MDB_IDL_LAST( ids )            ( MDB_IDL_IS_RANGE(ids) \
+       ? ids[2] : ids[ids[0]] )
+
+#define MDB_IDL_N( ids )               ( MDB_IDL_IS_RANGE(ids) \
+       ? (ids[2]-ids[1])+1 : ids[0] )
+
+LDAP_BEGIN_DECL
+LDAP_END_DECL
+
+#endif
diff --git a/servers/slapd/back-mdb/index.c b/servers/slapd/back-mdb/index.c
new file mode 100644 (file)
index 0000000..7a9453b
--- /dev/null
@@ -0,0 +1,574 @@
+/* index.c - routines for dealing with attribute indexes */
+/* $OpenLDAP$ */
+/* This work is part of OpenLDAP Software <http://www.openldap.org/>.
+ *
+ * Copyright 2000-2011 The OpenLDAP Foundation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted only as authorized by the OpenLDAP
+ * Public License.
+ *
+ * A copy of this license is available in the file LICENSE in the
+ * top-level directory of the distribution or, alternatively, at
+ * <http://www.OpenLDAP.org/license.html>.
+ */
+
+#include "portable.h"
+
+#include <stdio.h>
+
+#include <ac/string.h>
+#include <ac/socket.h>
+
+#include "slap.h"
+#include "back-mdb.h"
+#include "lutil_hash.h"
+
+static char presence_keyval[] = {0,0};
+static struct berval presence_key = BER_BVC(presence_keyval);
+
+AttrInfo *mdb_index_mask(
+       Backend *be,
+       AttributeDescription *desc,
+       struct berval *atname )
+{
+       AttributeType *at;
+       AttrInfo *ai = mdb_attr_mask( be->be_private, desc );
+
+       if( ai ) {
+               *atname = desc->ad_cname;
+               return ai;
+       }
+
+       /* If there is a tagging option, did we ever index the base
+        * type? If so, check for mask, otherwise it's not there.
+        */
+       if( slap_ad_is_tagged( desc ) && desc != desc->ad_type->sat_ad ) {
+               /* has tagging option */
+               ai = mdb_attr_mask( be->be_private, desc->ad_type->sat_ad );
+
+               if ( ai && !( ai->ai_indexmask & SLAP_INDEX_NOTAGS ) ) {
+                       *atname = desc->ad_type->sat_cname;
+                       return ai;
+               }
+       }
+
+       /* see if supertype defined mask for its subtypes */
+       for( at = desc->ad_type; at != NULL ; at = at->sat_sup ) {
+               /* If no AD, we've never indexed this type */
+               if ( !at->sat_ad ) continue;
+
+               ai = mdb_attr_mask( be->be_private, at->sat_ad );
+
+               if ( ai && !( ai->ai_indexmask & SLAP_INDEX_NOSUBTYPES ) ) {
+                       *atname = at->sat_cname;
+                       return ai;
+               }
+       }
+
+       return 0;
+}
+
+/* This function is only called when evaluating search filters.
+ */
+int mdb_index_param(
+       Backend *be,
+       AttributeDescription *desc,
+       int ftype,
+       DB **dbp,
+       slap_mask_t *maskp,
+       struct berval *prefixp )
+{
+       AttrInfo *ai;
+       int rc;
+       slap_mask_t mask, type = 0;
+       DB *db;
+
+       ai = mdb_index_mask( be, desc, prefixp );
+
+       if ( !ai ) {
+#ifdef MDB_MONITOR_IDX
+               switch ( ftype ) {
+               case LDAP_FILTER_PRESENT:
+                       type = SLAP_INDEX_PRESENT;
+                       break;
+               case LDAP_FILTER_APPROX:
+                       type = SLAP_INDEX_APPROX;
+                       break;
+               case LDAP_FILTER_EQUALITY:
+                       type = SLAP_INDEX_EQUALITY;
+                       break;
+               case LDAP_FILTER_SUBSTRINGS:
+                       type = SLAP_INDEX_SUBSTR;
+                       break;
+               default:
+                       return LDAP_INAPPROPRIATE_MATCHING;
+               }
+               mdb_monitor_idx_add( be->be_private, desc, type );
+#endif /* MDB_MONITOR_IDX */
+
+               return LDAP_INAPPROPRIATE_MATCHING;
+       }
+       mask = ai->ai_indexmask;
+
+       rc = mdb_db_cache( be, prefixp, &db );
+
+       if( rc != LDAP_SUCCESS ) {
+               return rc;
+       }
+
+       switch( ftype ) {
+       case LDAP_FILTER_PRESENT:
+               type = SLAP_INDEX_PRESENT;
+               if( IS_SLAP_INDEX( mask, SLAP_INDEX_PRESENT ) ) {
+                       *prefixp = presence_key;
+                       goto done;
+               }
+               break;
+
+       case LDAP_FILTER_APPROX:
+               type = SLAP_INDEX_APPROX;
+               if ( desc->ad_type->sat_approx ) {
+                       if( IS_SLAP_INDEX( mask, SLAP_INDEX_APPROX ) ) {
+                               goto done;
+                       }
+                       break;
+               }
+
+               /* Use EQUALITY rule and index for approximate match */
+               /* fall thru */
+
+       case LDAP_FILTER_EQUALITY:
+               type = SLAP_INDEX_EQUALITY;
+               if( IS_SLAP_INDEX( mask, SLAP_INDEX_EQUALITY ) ) {
+                       goto done;
+               }
+               break;
+
+       case LDAP_FILTER_SUBSTRINGS:
+               type = SLAP_INDEX_SUBSTR;
+               if( IS_SLAP_INDEX( mask, SLAP_INDEX_SUBSTR ) ) {
+                       goto done;
+               }
+               break;
+
+       default:
+               return LDAP_OTHER;
+       }
+
+#ifdef MDB_MONITOR_IDX
+       mdb_monitor_idx_add( be->be_private, desc, type );
+#endif /* MDB_MONITOR_IDX */
+
+       return LDAP_INAPPROPRIATE_MATCHING;
+
+done:
+       *dbp = db;
+       *maskp = mask;
+       return LDAP_SUCCESS;
+}
+
+static int indexer(
+       Operation *op,
+       DB_TXN *txn,
+       AttributeDescription *ad,
+       struct berval *atname,
+       BerVarray vals,
+       ID id,
+       int opid,
+       slap_mask_t mask )
+{
+       int rc, i;
+       DB *db;
+       struct berval *keys;
+
+       assert( mask != 0 );
+
+       rc = mdb_db_cache( op->o_bd, atname, &db );
+       
+       if ( rc != LDAP_SUCCESS ) {
+               Debug( LDAP_DEBUG_ANY,
+                       "mdb_index_read: Could not open DB %s\n",
+                       atname->bv_val, 0, 0 );
+               return LDAP_OTHER;
+       }
+
+       if( IS_SLAP_INDEX( mask, SLAP_INDEX_PRESENT ) ) {
+               rc = mdb_key_change( op->o_bd, db, txn, &presence_key, id, opid );
+               if( rc ) {
+                       goto done;
+               }
+       }
+
+       if( IS_SLAP_INDEX( mask, SLAP_INDEX_EQUALITY ) ) {
+               rc = ad->ad_type->sat_equality->smr_indexer(
+                       LDAP_FILTER_EQUALITY,
+                       mask,
+                       ad->ad_type->sat_syntax,
+                       ad->ad_type->sat_equality,
+                       atname, vals, &keys, op->o_tmpmemctx );
+
+               if( rc == LDAP_SUCCESS && keys != NULL ) {
+                       for( i=0; keys[i].bv_val != NULL; i++ ) {
+                               rc = mdb_key_change( op->o_bd, db, txn, &keys[i], id, opid );
+                               if( rc ) {
+                                       ber_bvarray_free_x( keys, op->o_tmpmemctx );
+                                       goto done;
+                               }
+                       }
+                       ber_bvarray_free_x( keys, op->o_tmpmemctx );
+               }
+               rc = LDAP_SUCCESS;
+       }
+
+       if( IS_SLAP_INDEX( mask, SLAP_INDEX_APPROX ) ) {
+               rc = ad->ad_type->sat_approx->smr_indexer(
+                       LDAP_FILTER_APPROX,
+                       mask,
+                       ad->ad_type->sat_syntax,
+                       ad->ad_type->sat_approx,
+                       atname, vals, &keys, op->o_tmpmemctx );
+
+               if( rc == LDAP_SUCCESS && keys != NULL ) {
+                       for( i=0; keys[i].bv_val != NULL; i++ ) {
+                               rc = mdb_key_change( op->o_bd, db, txn, &keys[i], id, opid );
+                               if( rc ) {
+                                       ber_bvarray_free_x( keys, op->o_tmpmemctx );
+                                       goto done;
+                               }
+                       }
+                       ber_bvarray_free_x( keys, op->o_tmpmemctx );
+               }
+
+               rc = LDAP_SUCCESS;
+       }
+
+       if( IS_SLAP_INDEX( mask, SLAP_INDEX_SUBSTR ) ) {
+               rc = ad->ad_type->sat_substr->smr_indexer(
+                       LDAP_FILTER_SUBSTRINGS,
+                       mask,
+                       ad->ad_type->sat_syntax,
+                       ad->ad_type->sat_substr,
+                       atname, vals, &keys, op->o_tmpmemctx );
+
+               if( rc == LDAP_SUCCESS && keys != NULL ) {
+                       for( i=0; keys[i].bv_val != NULL; i++ ) {
+                               rc = mdb_key_change( op->o_bd, db, txn, &keys[i], id, opid );
+                               if( rc ) {
+                                       ber_bvarray_free_x( keys, op->o_tmpmemctx );
+                                       goto done;
+                               }
+                       }
+                       ber_bvarray_free_x( keys, op->o_tmpmemctx );
+               }
+
+               rc = LDAP_SUCCESS;
+       }
+
+done:
+       switch( rc ) {
+       /* The callers all know how to deal with these results */
+       case 0:
+       case DB_LOCK_DEADLOCK:
+       case DB_LOCK_NOTGRANTED:
+               break;
+       /* Anything else is bad news */
+       default:
+               rc = LDAP_OTHER;
+       }
+       return rc;
+}
+
+static int index_at_values(
+       Operation *op,
+       DB_TXN *txn,
+       AttributeDescription *ad,
+       AttributeType *type,
+       struct berval *tags,
+       BerVarray vals,
+       ID id,
+       int opid )
+{
+       int rc;
+       slap_mask_t mask = 0;
+       int ixop = opid;
+       AttrInfo *ai = NULL;
+
+       if ( opid == MDB_INDEX_UPDATE_OP )
+               ixop = SLAP_INDEX_ADD_OP;
+
+       if( type->sat_sup ) {
+               /* recurse */
+               rc = index_at_values( op, txn, NULL,
+                       type->sat_sup, tags,
+                       vals, id, opid );
+
+               if( rc ) return rc;
+       }
+
+       /* If this type has no AD, we've never used it before */
+       if( type->sat_ad ) {
+               ai = mdb_attr_mask( op->o_bd->be_private, type->sat_ad );
+               if ( ai ) {
+#ifdef LDAP_COMP_MATCH
+                       /* component indexing */
+                       if ( ai->ai_cr ) {
+                               ComponentReference *cr;
+                               for( cr = ai->ai_cr ; cr ; cr = cr->cr_next ) {
+                                       rc = indexer( op, txn, cr->cr_ad, &type->sat_cname,
+                                               cr->cr_nvals, id, ixop,
+                                               cr->cr_indexmask );
+                               }
+                       }
+#endif
+                       ad = type->sat_ad;
+                       /* If we're updating the index, just set the new bits that aren't
+                        * already in the old mask.
+                        */
+                       if ( opid == MDB_INDEX_UPDATE_OP )
+                               mask = ai->ai_newmask & ~ai->ai_indexmask;
+                       else
+                       /* For regular updates, if there is a newmask use it. Otherwise
+                        * just use the old mask.
+                        */
+                               mask = ai->ai_newmask ? ai->ai_newmask : ai->ai_indexmask;
+                       if( mask ) {
+                               rc = indexer( op, txn, ad, &type->sat_cname,
+                                       vals, id, ixop, mask );
+
+                               if( rc ) return rc;
+                       }
+               }
+       }
+
+       if( tags->bv_len ) {
+               AttributeDescription *desc;
+
+               desc = ad_find_tags( type, tags );
+               if( desc ) {
+                       ai = mdb_attr_mask( op->o_bd->be_private, desc );
+
+                       if( ai ) {
+                               if ( opid == MDB_INDEX_UPDATE_OP )
+                                       mask = ai->ai_newmask & ~ai->ai_indexmask;
+                               else
+                                       mask = ai->ai_newmask ? ai->ai_newmask : ai->ai_indexmask;
+                               if ( mask ) {
+                                       rc = indexer( op, txn, desc, &desc->ad_cname,
+                                               vals, id, ixop, mask );
+
+                                       if( rc ) {
+                                               return rc;
+                                       }
+                               }
+                       }
+               }
+       }
+
+       return LDAP_SUCCESS;
+}
+
+int mdb_index_values(
+       Operation *op,
+       DB_TXN *txn,
+       AttributeDescription *desc,
+       BerVarray vals,
+       ID id,
+       int opid )
+{
+       int rc;
+
+       /* Never index ID 0 */
+       if ( id == 0 )
+               return 0;
+
+       rc = index_at_values( op, txn, desc,
+               desc->ad_type, &desc->ad_tags,
+               vals, id, opid );
+
+       return rc;
+}
+
+/* Get the list of which indices apply to this attr */
+int
+mdb_index_recset(
+       struct mdb_info *mdb,
+       Attribute *a,
+       AttributeType *type,
+       struct berval *tags,
+       IndexRec *ir )
+{
+       int rc, slot;
+       AttrList *al;
+
+       if( type->sat_sup ) {
+               /* recurse */
+               rc = mdb_index_recset( mdb, a, type->sat_sup, tags, ir );
+               if( rc ) return rc;
+       }
+       /* If this type has no AD, we've never used it before */
+       if( type->sat_ad ) {
+               slot = mdb_attr_slot( mdb, type->sat_ad, NULL );
+               if ( slot >= 0 ) {
+                       ir[slot].ai = mdb->bi_attrs[slot];
+                       al = ch_malloc( sizeof( AttrList ));
+                       al->attr = a;
+                       al->next = ir[slot].attrs;
+                       ir[slot].attrs = al;
+               }
+       }
+       if( tags->bv_len ) {
+               AttributeDescription *desc;
+
+               desc = ad_find_tags( type, tags );
+               if( desc ) {
+                       slot = mdb_attr_slot( mdb, desc, NULL );
+                       if ( slot >= 0 ) {
+                               ir[slot].ai = mdb->bi_attrs[slot];
+                               al = ch_malloc( sizeof( AttrList ));
+                               al->attr = a;
+                               al->next = ir[slot].attrs;
+                               ir[slot].attrs = al;
+                       }
+               }
+       }
+       return LDAP_SUCCESS;
+}
+
+/* Apply the indices for the recset */
+int mdb_index_recrun(
+       Operation *op,
+       struct mdb_info *mdb,
+       IndexRec *ir0,
+       ID id,
+       int base )
+{
+       IndexRec *ir;
+       AttrList *al;
+       int i, rc = 0;
+
+       /* Never index ID 0 */
+       if ( id == 0 )
+               return 0;
+
+       for (i=base; i<mdb->bi_nattrs; i+=slap_tool_thread_max) {
+               ir = ir0 + i;
+               if ( !ir->ai ) continue;
+               while (( al = ir->attrs )) {
+                       ir->attrs = al->next;
+                       rc = indexer( op, NULL, ir->ai->ai_desc,
+                               &ir->ai->ai_desc->ad_type->sat_cname,
+                               al->attr->a_nvals, id, SLAP_INDEX_ADD_OP,
+                               ir->ai->ai_indexmask );
+                       free( al );
+                       if ( rc ) break;
+               }
+       }
+       return rc;
+}
+
+int
+mdb_index_entry(
+       Operation *op,
+       DB_TXN *txn,
+       int opid,
+       Entry   *e )
+{
+       int rc;
+       Attribute *ap = e->e_attrs;
+#if 0 /* ifdef LDAP_COMP_MATCH */
+       ComponentReference *cr_list = NULL;
+       ComponentReference *cr = NULL, *dupped_cr = NULL;
+       void* decoded_comp;
+       ComponentSyntaxInfo* csi_attr;
+       Syntax* syn;
+       AttributeType* at;
+       int i, num_attr;
+       void* mem_op;
+       struct berval value = {0};
+#endif
+
+       /* Never index ID 0 */
+       if ( e->e_id == 0 )
+               return 0;
+
+       Debug( LDAP_DEBUG_TRACE, "=> index_entry_%s( %ld, \"%s\" )\n",
+               opid == SLAP_INDEX_DELETE_OP ? "del" : "add",
+               (long) e->e_id, e->e_dn );
+
+       /* add each attribute to the indexes */
+       for ( ; ap != NULL; ap = ap->a_next ) {
+#if 0 /* ifdef LDAP_COMP_MATCH */
+               AttrInfo *ai;
+               /* see if attribute has components to be indexed */
+               ai = mdb_attr_mask( op->o_bd->be_private, ap->a_desc->ad_type->sat_ad );
+               if ( !ai ) continue;
+               cr_list = ai->ai_cr;
+               if ( attr_converter && cr_list ) {
+                       syn = ap->a_desc->ad_type->sat_syntax;
+                       ap->a_comp_data = op->o_tmpalloc( sizeof( ComponentData ), op->o_tmpmemctx );
+                       /* Memory chunk(nibble) pre-allocation for decoders */
+                       mem_op = nibble_mem_allocator ( 1024*16, 1024*4 );
+                       ap->a_comp_data->cd_mem_op = mem_op;
+                       for( cr = cr_list ; cr ; cr = cr->cr_next ) {
+                               /* count how many values in an attribute */
+                               for( num_attr=0; ap->a_vals[num_attr].bv_val != NULL; num_attr++ );
+                               num_attr++;
+                               cr->cr_nvals = (BerVarray)op->o_tmpalloc( sizeof( struct berval )*num_attr, op->o_tmpmemctx );
+                               for( i=0; ap->a_vals[i].bv_val != NULL; i++ ) {
+                                       /* decoding attribute value */
+                                       decoded_comp = attr_converter ( ap, syn, &ap->a_vals[i] );
+                                       if ( !decoded_comp )
+                                               return LDAP_DECODING_ERROR;
+                                       /* extracting the referenced component */
+                                       dupped_cr = dup_comp_ref( op, cr );
+                                       csi_attr = ((ComponentSyntaxInfo*)decoded_comp)->csi_comp_desc->cd_extract_i( mem_op, dupped_cr, decoded_comp );
+                                       if ( !csi_attr )
+                                               return LDAP_DECODING_ERROR;
+                                       cr->cr_asn_type_id = csi_attr->csi_comp_desc->cd_type_id;
+                                       cr->cr_ad = (AttributeDescription*)get_component_description ( cr->cr_asn_type_id );
+                                       if ( !cr->cr_ad )
+                                               return LDAP_INVALID_SYNTAX;
+                                       at = cr->cr_ad->ad_type;
+                                       /* encoding the value of component in GSER */
+                                       rc = component_encoder( mem_op, csi_attr, &value );
+                                       if ( rc != LDAP_SUCCESS )
+                                               return LDAP_ENCODING_ERROR;
+                                       /* Normalize the encoded component values */
+                                       if ( at->sat_equality && at->sat_equality->smr_normalize ) {
+                                               rc = at->sat_equality->smr_normalize (
+                                                       SLAP_MR_VALUE_OF_ATTRIBUTE_SYNTAX,
+                                                       at->sat_syntax, at->sat_equality,
+                                                       &value, &cr->cr_nvals[i], op->o_tmpmemctx );
+                                       } else {
+                                               cr->cr_nvals[i] = value;
+                                       }
+                               }
+                               /* The end of BerVarray */
+                               cr->cr_nvals[num_attr-1].bv_val = NULL;
+                               cr->cr_nvals[num_attr-1].bv_len = 0;
+                       }
+                       op->o_tmpfree( ap->a_comp_data, op->o_tmpmemctx );
+                       nibble_mem_free ( mem_op );
+                       ap->a_comp_data = NULL;
+               }
+#endif
+               rc = mdb_index_values( op, txn, ap->a_desc,
+                       ap->a_nvals, e->e_id, opid );
+
+               if( rc != LDAP_SUCCESS ) {
+                       Debug( LDAP_DEBUG_TRACE,
+                               "<= index_entry_%s( %ld, \"%s\" ) failure\n",
+                               opid == SLAP_INDEX_ADD_OP ? "add" : "del",
+                               (long) e->e_id, e->e_dn );
+                       return rc;
+               }
+       }
+
+       Debug( LDAP_DEBUG_TRACE, "<= index_entry_%s( %ld, \"%s\" ) success\n",
+               opid == SLAP_INDEX_DELETE_OP ? "del" : "add",
+               (long) e->e_id, e->e_dn );
+
+       return LDAP_SUCCESS;
+}
diff --git a/servers/slapd/back-mdb/init.c b/servers/slapd/back-mdb/init.c
new file mode 100644 (file)
index 0000000..dcda605
--- /dev/null
@@ -0,0 +1,448 @@
+/* init.c - initialize mdb backend */
+/* $OpenLDAP$ */
+/* This work is part of OpenLDAP Software <http://www.openldap.org/>.
+ *
+ * Copyright 2000-2011 The OpenLDAP Foundation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted only as authorized by the OpenLDAP
+ * Public License.
+ *
+ * A copy of this license is available in the file LICENSE in the
+ * top-level directory of the distribution or, alternatively, at
+ * <http://www.OpenLDAP.org/license.html>.
+ */
+
+#include "portable.h"
+
+#include <stdio.h>
+#include <ac/string.h>
+#include <ac/unistd.h>
+#include <ac/stdlib.h>
+#include <ac/errno.h>
+#include <sys/stat.h>
+#include "back-mdb.h"
+#include <lutil.h>
+#include <ldap_rq.h>
+#include "config.h"
+
+static const struct berval mdmi_databases[] = {
+       BER_BVC("ad2i"),
+       BER_BVC("dn2i"),
+       BER_BVC("id2e"),
+       BER_BVNULL
+};
+
+static int
+mdb_db_init( BackendDB *be, ConfigReply *cr )
+{
+       struct mdb_info *mdb;
+       int rc;
+
+       Debug( LDAP_DEBUG_TRACE,
+               LDAP_XSTRING(mdb_db_init) ": Initializing mdb database\n",
+               0, 0, 0 );
+
+       /* allocate backend-database-specific stuff */
+       mdb = (struct mdb_info *) ch_calloc( 1, sizeof(struct mdb_info) );
+
+       /* DBEnv parameters */
+       mdb->mi_dbenv_home = ch_strdup( SLAPD_DEFAULT_DB_DIR );
+       mdb->mi_dbenv_flags = 0;
+       mdb->mi_dbenv_mode = SLAPD_DEFAULT_DB_MODE;
+
+       mdb->mi_search_stack_depth = DEFAULT_SEARCH_STACK_DEPTH;
+       mdb->mi_search_stack = NULL;
+
+       ldap_pvt_thread_mutex_init( &mdb->mi_database_mutex );
+
+       be->be_private = mdb;
+       be->be_cf_ocs = be->bd_info->bi_cf_ocs;
+
+#ifndef MDB_MULTIPLE_SUFFIXES
+       SLAP_DBFLAGS( be ) |= SLAP_DBFLAG_ONE_SUFFIX;
+#endif
+
+       rc = mdb_monitor_db_init( be );
+
+       return rc;
+}
+
+static int
+mdb_db_close( BackendDB *be, ConfigReply *cr );
+
+static int
+mdb_db_open( BackendDB *be, ConfigReply *cr )
+{
+       int rc, i;
+       struct mdb_info *mdb = (struct mdb_info *) be->be_private;
+       struct stat stat1, stat2;
+       u_int32_t flags;
+       char path[MAXPATHLEN];
+       char *dbhome;
+       Entry *e = NULL;
+       int alockt, quick = 0;
+       int do_retry = 1;
+       MDB_txn *txn;
+
+       if ( be->be_suffix == NULL ) {
+               Debug( LDAP_DEBUG_ANY,
+                       LDAP_XSTRING(mdb_db_open) ": need suffix.\n",
+                       1, 0, 0 );
+               return -1;
+       }
+
+       Debug( LDAP_DEBUG_ARGS,
+               LDAP_XSTRING(mdb_db_open) ": \"%s\"\n",
+               be->be_suffix[0].bv_val, 0, 0 );
+
+       /* Check existence of dbenv_home. Any error means trouble */
+       rc = stat( mdb->mi_dbenv_home, &stat1 );
+       if( rc != 0 ) {
+               Debug( LDAP_DEBUG_ANY,
+                       LDAP_XSTRING(mdb_db_open) ": database \"%s\": "
+                       "cannot access database directory \"%s\" (%d).\n",
+                       be->be_suffix[0].bv_val, mdb->mi_dbenv_home, errno );
+               return -1;
+       }
+
+       /* mdb is always clean */
+       be->be_flags |= SLAP_DBFLAG_CLEAN;
+
+       rc = mdb_env_create( &mdb->mi_dbenv );
+       if( rc != 0 ) {
+               Debug( LDAP_DEBUG_ANY,
+                       LDAP_XSTRING(mdb_db_open) ": database \"%s\": "
+                       "mdb_env_create failed: %s (%d).\n",
+                       be->be_suffix[0].bv_val, mdb_strerror(rc), rc );
+               goto fail;
+       }
+
+       if ( mdb->mi_mapsize ) {
+               rc = mdb_env_set_mapsize( mdb->mi_dbenv, mdb->mi_mapsize );
+               if( rc != 0 ) {
+                       Debug( LDAP_DEBUG_ANY,
+                               LDAP_XSTRING(mdb_db_open) ": database \"%s\": "
+                               "mdb_env_set_mapsize failed: %s (%d).\n",
+                               be->be_suffix[0].bv_val, mdb_strerror(rc), rc );
+                       goto fail;
+               }
+       }
+
+       rc = mdb_env_set_maxdbs( mdb->mi_dbenv, MDB_INDICES );
+       if( rc != 0 ) {
+               Debug( LDAP_DEBUG_ANY,
+                       LDAP_XSTRING(mdb_db_open) ": database \"%s\": "
+                       "mdb_env_set_maxdbs failed: %s (%d).\n",
+                       be->be_suffix[0].bv_val, mdb_strerror(rc), rc );
+               goto fail;
+       }
+
+#ifdef HAVE_EBCDIC
+       strcpy( path, mdb->mi_dbenv_home );
+       __atoe( path );
+       dbhome = path;
+#else
+       dbhome = mdb->mi_dbenv_home;
+#endif
+
+       Debug( LDAP_DEBUG_TRACE,
+               LDAP_XSTRING(mdb_db_open) ": database \"%s\": "
+               "dbenv_open(%s).\n",
+               be->be_suffix[0].bv_val, mdb->mi_dbenv_home, 0);
+
+       flags = mdb->mi_dbenv_flags;
+
+       if ( quick )
+               flags |= MDB_NOSYNC;
+
+       if ( slapMode & SLAP_TOOL_READONLY)
+               flags |= MDB_RDONLY;
+
+       rc = mdb_env_open( mdb->mi_dbenv, dbhome,
+                       flags, mdb->mi_dbenv_mode );
+
+       if ( rc ) {
+               Debug( LDAP_DEBUG_ANY,
+                       LDAP_XSTRING(mdb_db_open) ": database \"%s\" cannot be opened, err %d. "
+                       "Restore from backup!\n",
+                       be->be_suffix[0].bv_val, rc, 0 );
+               goto fail;
+       }
+
+       mdb->mi_databases = (struct mdb_db_info **) ch_malloc(
+               MDB_INDICES * sizeof(struct mdb_db_info *) );
+
+       rc = mdb_txn_begin( mdb->mi_dbenv, 0, &txn );
+       if ( rc ) {
+               Debug( LDAP_DEBUG_ANY,
+                       LDAP_XSTRING(mdb_db_open) ": database \"%s\" cannot be opened, err %d. "
+                       "Restore from backup!\n",
+                       be->be_suffix[0].bv_val, rc, 0 );
+               goto fail;
+       }
+
+       /* open (and create) main databases */
+       for( i = 0; mdmi_databases[i].bv_val; i++ ) {
+               struct mdb_db_info *db;
+
+               db = (struct mdb_db_info *) ch_calloc(1, sizeof(struct mdb_db_info));
+
+               flags = MDB_INTEGERKEY;
+               if( i == MDB_ID2ENTRY ) {
+                       if ( !(slapMode & (SLAP_TOOL_READMAIN|SLAP_TOOL_READONLY) ))
+                               flags |= MDB_CREATE;
+               } else {
+                       if ( i == MDB_DN2ID )
+                               flags |= MDB_DUPSORT;
+                       if ( !(slapMode & SLAP_TOOL_READONLY) )
+                               flags |= MDB_CREATE;
+               }
+
+               rc = mdb_open( txn,
+                       mdmi_databases[i].bv_val,
+                       flags,
+                       &db->mdi_dbi );
+
+               if ( rc != 0 ) {
+                       snprintf( cr->msg, sizeof(cr->msg), "database \"%s\": "
+                               "mdb_open(%s/%s) failed: %s (%d).", 
+                               be->be_suffix[0].bv_val, 
+                               mdb->mi_dbenv_home, mdmi_databases[i].bv_val,
+                               mdb_strerror(rc), rc );
+                       Debug( LDAP_DEBUG_ANY,
+                               LDAP_XSTRING(mdb_db_open) ": %s\n",
+                               cr->msg, 0, 0 );
+                       goto fail;
+               }
+
+               db->mdi_name = mdmi_databases[i];
+               mdb->mi_databases[i] = db;
+       }
+
+       rc = mdb_txn_commit(txn);
+       if ( rc != 0 ) {
+               goto fail;
+       }
+
+       mdb->mi_databases[i] = NULL;
+       mdb->mi_ndatabases = i;
+
+       /* monitor setup */
+       rc = mdb_monitor_db_open( be );
+       if ( rc != 0 ) {
+               goto fail;
+       }
+
+       mdb->mi_flags |= MDB_IS_OPEN;
+
+       return 0;
+
+fail:
+       mdb_db_close( be, NULL );
+       return rc;
+}
+
+static int
+mdb_db_close( BackendDB *be, ConfigReply *cr )
+{
+       int rc;
+       struct mdb_info *mdb = (struct mdb_info *) be->be_private;
+       struct mdb_db_info *db;
+       MDB_txn *txn;
+
+       /* monitor handling */
+       (void)mdb_monitor_db_close( be );
+
+       mdb->mi_flags &= ~MDB_IS_OPEN;
+
+       if( mdb->mi_dbenv ) {
+               mdb_reader_flush( mdb->mi_dbenv );
+       }
+
+       rc = mdb_txn_begin( mdb->mi_dbenv, 1, &txn );
+
+       while( mdb->mi_databases && mdb->mi_ndatabases-- ) {
+               db = mdb->mi_databases[mdb->mi_ndatabases];
+               mdb_close( txn, db->mdi_dbi );
+               /* Lower numbered names are not strdup'd */
+               if( mdb->mi_ndatabases >= MDB_NDB )
+                       free( db->mdi_name.bv_val );
+               free( db );
+       }
+       mdb_txn_abort( txn );
+
+       free( mdb->mi_databases );
+       mdb->mi_databases = NULL;
+
+       /* close db environment */
+       if( mdb->mi_dbenv ) {
+               /* force a sync */
+               rc = mdb_env_sync( mdb->mi_dbenv, 1 );
+               if( rc != 0 ) {
+                       Debug( LDAP_DEBUG_ANY,
+                               "mdb_db_close: database \"%s\": "
+                               "mdb_env_sync failed: %s (%d).\n",
+                               be->be_suffix[0].bv_val, mdb_strerror(rc), rc );
+               }
+
+               mdb_env_close( mdb->mi_dbenv );
+               mdb->mi_dbenv = NULL;
+       }
+
+       return 0;
+}
+
+static int
+mdb_db_destroy( BackendDB *be, ConfigReply *cr )
+{
+       struct mdb_info *mdb = (struct mdb_info *) be->be_private;
+
+       /* stop and remove checkpoint task */
+       if ( mdb->mi_txn_cp_task ) {
+               struct re_s *re = mdb->mi_txn_cp_task;
+               mdb->mi_txn_cp_task = NULL;
+               ldap_pvt_thread_mutex_lock( &slapd_rq.rq_mutex );
+               if ( ldap_pvt_runqueue_isrunning( &slapd_rq, re ) )
+                       ldap_pvt_runqueue_stoptask( &slapd_rq, re );
+               ldap_pvt_runqueue_remove( &slapd_rq, re );
+               ldap_pvt_thread_mutex_unlock( &slapd_rq.rq_mutex );
+       }
+
+       /* monitor handling */
+       (void)mdb_monitor_db_destroy( be );
+
+       if( mdb->mi_dbenv_home ) ch_free( mdb->mi_dbenv_home );
+
+       mdb_attr_index_destroy( mdb );
+
+       ldap_pvt_thread_mutex_destroy( &mdb->mi_database_mutex );
+
+       ch_free( mdb );
+       be->be_private = NULL;
+
+       return 0;
+}
+
+int
+mdb_back_initialize(
+       BackendInfo     *bi )
+{
+       int rc;
+
+       static char *controls[] = {
+               LDAP_CONTROL_ASSERT,
+               LDAP_CONTROL_MANAGEDSAIT,
+               LDAP_CONTROL_NOOP,
+               LDAP_CONTROL_PAGEDRESULTS,
+               LDAP_CONTROL_PRE_READ,
+               LDAP_CONTROL_POST_READ,
+               LDAP_CONTROL_SUBENTRIES,
+               LDAP_CONTROL_X_PERMISSIVE_MODIFY,
+#ifdef LDAP_X_TXN
+               LDAP_CONTROL_X_TXN_SPEC,
+#endif
+               NULL
+       };
+
+       /* initialize the underlying database system */
+       Debug( LDAP_DEBUG_TRACE,
+               LDAP_XSTRING(mdb_back_initialize) ": initialize " 
+               MDB_UCTYPE " backend\n", 0, 0, 0 );
+
+       bi->bi_flags |=
+               SLAP_BFLAG_INCREMENT |
+               SLAP_BFLAG_SUBENTRIES |
+               SLAP_BFLAG_ALIASES |
+               SLAP_BFLAG_REFERRALS;
+
+       bi->bi_controls = controls;
+
+       {       /* version check */
+               int major, minor, patch, ver;
+               char *version = mdb_version( &major, &minor, &patch );
+#ifdef HAVE_EBCDIC
+               char v2[1024];
+
+               /* All our stdio does an ASCII to EBCDIC conversion on
+                * the output. Strings from the MDB library are already
+                * in EBCDIC; we have to go back and forth...
+                */
+               strcpy( v2, version );
+               __etoa( v2 );
+               version = v2;
+#endif
+               ver = (major << 24) | (minor << 16) | patch;
+               if( ver != MDB_VERSION_FULL ) {
+                       /* fail if a versions don't match */
+                       Debug( LDAP_DEBUG_ANY,
+                               LDAP_XSTRING(mdb_back_initialize) ": "
+                               "MDB library version mismatch:"
+                               " expected " MDB_VERSION_STRING ","
+                               " got %s\n", version, 0, 0 );
+                       return -1;
+               }
+
+               Debug( LDAP_DEBUG_TRACE, LDAP_XSTRING(mdb_back_initialize)
+                       ": %s\n", version, 0, 0 );
+       }
+
+       bi->bi_open = 0;
+       bi->bi_close = 0;
+       bi->bi_config = 0;
+       bi->bi_destroy = 0;
+
+       bi->bi_db_init = mdb_db_init;
+       bi->bi_db_config = config_generic_wrapper;
+       bi->bi_db_open = mdb_db_open;
+       bi->bi_db_close = mdb_db_close;
+       bi->bi_db_destroy = mdb_db_destroy;
+
+       bi->bi_op_add = mdb_add;
+       bi->bi_op_bind = mdb_bind;
+       bi->bi_op_compare = mdb_compare;
+       bi->bi_op_delete = mdb_delete;
+       bi->bi_op_modify = mdb_modify;
+       bi->bi_op_modrdn = mdb_modrdn;
+       bi->bi_op_search = mdb_search;
+
+       bi->bi_op_unbind = 0;
+
+       bi->bi_extended = mdb_extended;
+
+       bi->bi_chk_referrals = mdb_referrals;
+       bi->bi_operational = mdb_operational;
+       bi->bi_has_subordinates = mdb_hasSubordinates;
+       bi->bi_entry_release_rw = mdb_entry_release;
+       bi->bi_entry_get_rw = mdb_entry_get;
+
+       /*
+        * hooks for slap tools
+        */
+       bi->bi_tool_entry_open = mdb_tool_entry_open;
+       bi->bi_tool_entry_close = mdb_tool_entry_close;
+       bi->bi_tool_entry_first = backend_tool_entry_first;
+       bi->bi_tool_entry_first_x = mdb_tool_entry_first_x;
+       bi->bi_tool_entry_next = mdb_tool_entry_next;
+       bi->bi_tool_entry_get = mdb_tool_entry_get;
+       bi->bi_tool_entry_put = mdb_tool_entry_put;
+       bi->bi_tool_entry_reindex = mdb_tool_entry_reindex;
+       bi->bi_tool_sync = 0;
+       bi->bi_tool_dn2id_get = mdb_tool_dn2id_get;
+       bi->bi_tool_entry_modify = mdb_tool_entry_modify;
+
+       bi->bi_connection_init = 0;
+       bi->bi_connection_destroy = 0;
+
+       rc = mdb_back_init_cf( bi );
+
+       return rc;
+}
+
+#if    (SLAPD_MDB == SLAPD_MOD_DYNAMIC)
+
+SLAP_BACKEND_INIT_MODULE( mdb )
+
+#endif /* SLAPD_MDB == SLAPD_MOD_DYNAMIC */
+
diff --git a/servers/slapd/back-mdb/key.c b/servers/slapd/back-mdb/key.c
new file mode 100644 (file)
index 0000000..62377b5
--- /dev/null
@@ -0,0 +1,98 @@
+/* index.c - routines for dealing with attribute indexes */
+/* $OpenLDAP$ */
+/* This work is part of OpenLDAP Software <http://www.openldap.org/>.
+ *
+ * Copyright 2000-2011 The OpenLDAP Foundation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted only as authorized by the OpenLDAP
+ * Public License.
+ *
+ * A copy of this license is available in the file LICENSE in the
+ * top-level directory of the distribution or, alternatively, at
+ * <http://www.OpenLDAP.org/license.html>.
+ */
+
+#include "portable.h"
+
+#include <stdio.h>
+
+#include <ac/string.h>
+#include <ac/socket.h>
+
+#include "slap.h"
+#include "back-mdb.h"
+#include "idl.h"
+
+/* read a key */
+int
+mdb_key_read(
+       Backend *be,
+       DB *db,
+       DB_TXN *txn,
+       struct berval *k,
+       ID *ids,
+       DBC **saved_cursor,
+       int get_flag
+)
+{
+       int rc;
+       DBT key;
+
+       Debug( LDAP_DEBUG_TRACE, "=> key_read\n", 0, 0, 0 );
+
+       DBTzero( &key );
+       bv2DBT(k,&key);
+       key.ulen = key.size;
+       key.flags = DB_DBT_USERMEM;
+
+       rc = mdb_idl_fetch_key( be, db, txn, &key, ids, saved_cursor, get_flag );
+
+       if( rc != LDAP_SUCCESS ) {
+               Debug( LDAP_DEBUG_TRACE, "<= mdb_index_read: failed (%d)\n",
+                       rc, 0, 0 );
+       } else {
+               Debug( LDAP_DEBUG_TRACE, "<= mdb_index_read %ld candidates\n",
+                       (long) MDB_IDL_N(ids), 0, 0 );
+       }
+
+       return rc;
+}
+
+/* Add or remove stuff from index files */
+int
+mdb_key_change(
+       Backend *be,
+       DB *db,
+       DB_TXN *txn,
+       struct berval *k,
+       ID id,
+       int op
+)
+{
+       int     rc;
+       DBT     key;
+
+       Debug( LDAP_DEBUG_TRACE, "=> key_change(%s,%lx)\n",
+               op == SLAP_INDEX_ADD_OP ? "ADD":"DELETE", (long) id, 0 );
+
+       DBTzero( &key );
+       bv2DBT(k,&key);
+       key.ulen = key.size;
+       key.flags = DB_DBT_USERMEM;
+
+       if (op == SLAP_INDEX_ADD_OP) {
+               /* Add values */
+               rc = mdb_idl_insert_key( be, db, txn, &key, id );
+               if ( rc == DB_KEYEXIST ) rc = 0;
+       } else {
+               /* Delete values */
+               rc = mdb_idl_delete_key( be, db, txn, &key, id );
+               if ( rc == DB_NOTFOUND ) rc = 0;
+       }
+
+       Debug( LDAP_DEBUG_TRACE, "<= key_change %d\n", rc, 0, 0 );
+
+       return rc;
+}
diff --git a/servers/slapd/back-mdb/mdb.c b/servers/slapd/back-mdb/mdb.c
new file mode 100644 (file)
index 0000000..d724655
--- /dev/null
@@ -0,0 +1,3246 @@
+/* mdb.c - memory-mapped database library */
+/*
+ * Copyright 2011 Howard Chu, Symas Corp.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted only as authorized by the OpenLDAP
+ * Public License.
+ *
+ * A copy of this license is available in the file LICENSE in the
+ * top-level directory of the distribution or, alternatively, at
+ * <http://www.OpenLDAP.org/license.html>.
+ *
+ * This code is derived from btree.c written by Martin Hedenfalk.
+ *
+ * Copyright (c) 2009, 2010 Martin Hedenfalk <martin@bzero.se>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/queue.h>
+#include <sys/param.h>
+#include <sys/uio.h>
+#include <sys/mman.h>
+#ifdef HAVE_SYS_FILE_H
+#include <sys/file.h>
+#endif
+#include <fcntl.h>
+
+#include <assert.h>
+#include <errno.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <unistd.h>
+#include <pthread.h>
+
+#include "mdb.h"
+
+#define ULONG          unsigned long
+typedef ULONG          pgno_t;
+
+#include "midl.h"
+
+#ifndef DEBUG
+#define DEBUG 1
+#endif
+
+#if DEBUG && defined(__GNUC__)
+# define DPRINTF(fmt, ...) \
+       fprintf(stderr, "%s:%d: " fmt "\n", __func__, __LINE__, ##__VA_ARGS__)
+#else
+# define DPRINTF(...)  ((void) 0)
+#endif
+
+#define PAGESIZE        4096
+#define MDB_MINKEYS     4
+#define MDB_MAGIC       0xBEEFC0DE
+#define MDB_VERSION     1
+#define MAXKEYSIZE      511
+
+#define P_INVALID       (~0UL)
+
+#define F_ISSET(w, f)   (((w) & (f)) == (f))
+
+typedef uint16_t        indx_t;
+
+#define DEFAULT_READERS        126
+#define DEFAULT_MAPSIZE        1048576
+
+/* Lock descriptor stuff */
+#define RXBODY \
+       ULONG           mr_txnid; \
+       pid_t           mr_pid; \
+       pthread_t       mr_tid
+typedef struct MDB_rxbody {
+       RXBODY;
+} MDB_rxbody;
+
+#ifndef CACHELINE
+# ifdef __APPLE__
+#  define CACHELINE    128     /* 64 is too small to contain a mutex */
+# else
+#  define CACHELINE    64      /* most CPUs. Itanium uses 128 */
+# endif
+#endif
+
+typedef struct MDB_reader {
+       RXBODY;
+       /* cache line alignment */
+       char pad[CACHELINE-sizeof(MDB_rxbody)];
+} MDB_reader;
+
+#define        TXBODY \
+       uint32_t        mt_magic;       \
+       uint32_t        mt_version;     \
+       pthread_mutex_t mt_mutex;       \
+       ULONG           mt_txnid;       \
+       uint32_t        mt_numreaders
+typedef struct MDB_txbody {
+       TXBODY;
+} MDB_txbody;
+
+typedef struct MDB_txninfo {
+       TXBODY;
+       char pad[CACHELINE-sizeof(MDB_txbody)];
+       pthread_mutex_t mt_wmutex;
+       char pad2[CACHELINE-sizeof(pthread_mutex_t)];
+       MDB_reader      mt_readers[1];
+} MDB_txninfo;
+
+/* Common header for all page types. Overflow pages
+ * occupy a number of contiguous pages with no
+ * headers on any page after the first.
+ */
+typedef struct MDB_page {              /* represents a page of storage */
+#define        mp_pgno         mp_p.p_pgno
+       union padded {
+               pgno_t          p_pgno;         /* page number */
+               void *          p_pad;
+       } mp_p;
+#define        P_BRANCH         0x01           /* branch page */
+#define        P_LEAF           0x02           /* leaf page */
+#define        P_OVERFLOW       0x04           /* overflow page */
+#define        P_META           0x08           /* meta page */
+#define        P_DIRTY          0x10           /* dirty page */
+       uint32_t        mp_flags;
+#define mp_lower       mp_pb.pb.pb_lower
+#define mp_upper       mp_pb.pb.pb_upper
+#define mp_pages       mp_pb.pb_pages
+       union page_bounds {
+               struct {
+                       indx_t          pb_lower;               /* lower bound of free space */
+                       indx_t          pb_upper;               /* upper bound of free space */
+               } pb;
+               uint32_t        pb_pages;       /* number of overflow pages */
+       } mp_pb;
+       indx_t          mp_ptrs[1];             /* dynamic size */
+} MDB_page;
+
+#define PAGEHDRSZ       ((unsigned) offsetof(MDB_page, mp_ptrs))
+
+#define NUMKEYS(p)      (((p)->mp_lower - PAGEHDRSZ) >> 1)
+#define SIZELEFT(p)     (indx_t)((p)->mp_upper - (p)->mp_lower)
+#define PAGEFILL(env, p) (1000L * ((env)->me_psize - PAGEHDRSZ - SIZELEFT(p)) / \
+                               ((env)->me_psize - PAGEHDRSZ))
+#define IS_LEAF(p)      F_ISSET((p)->mp_flags, P_LEAF)
+#define IS_BRANCH(p)    F_ISSET((p)->mp_flags, P_BRANCH)
+#define IS_OVERFLOW(p)  F_ISSET((p)->mp_flags, P_OVERFLOW)
+
+#define OVPAGES(size, psize)   (PAGEHDRSZ + size + psize - 1) / psize;
+
+typedef struct MDB_db {
+       uint32_t        md_pad;
+       uint16_t        md_flags;
+       uint16_t        md_depth;
+       ULONG           md_branch_pages;
+       ULONG           md_leaf_pages;
+       ULONG           md_overflow_pages;
+       ULONG           md_entries;
+       pgno_t          md_root;
+} MDB_db;
+
+#define        FREE_DBI        0
+#define        MAIN_DBI        1
+
+typedef struct MDB_meta {                      /* meta (footer) page content */
+       uint32_t        mm_magic;
+       uint32_t        mm_version;
+       void            *mm_address;            /* address for fixed mapping */
+       size_t          mm_mapsize;                     /* size of mmap region */
+       MDB_db          mm_dbs[2];                      /* first is free space, 2nd is main db */
+#define        mm_psize        mm_dbs[0].md_pad
+#define        mm_flags        mm_dbs[0].md_flags
+       pgno_t          mm_last_pg;                     /* last used page in file */
+       ULONG           mm_txnid;                       /* txnid that committed this page */
+} MDB_meta;
+
+typedef struct MDB_dhead {                                     /* a dirty page */
+       STAILQ_ENTRY(MDB_dpage)  md_next;       /* queue of dirty pages */
+       MDB_page        *md_parent;
+       unsigned        md_pi;                          /* parent index */
+       int                     md_num;
+} MDB_dhead;
+
+typedef struct MDB_dpage {
+       MDB_dhead       h;
+       MDB_page        p;
+} MDB_dpage;
+
+STAILQ_HEAD(dirty_queue, MDB_dpage);   /* FIXME: use a sorted data structure */
+
+typedef struct MDB_oldpages {
+       struct MDB_oldpages *mo_next;
+       ULONG           mo_txnid;
+       pgno_t          mo_pages[1];    /* dynamic */
+} MDB_oldpages;
+
+typedef struct MDB_pageparent {
+       MDB_page *mp_page;
+       MDB_page *mp_parent;
+       unsigned mp_pi;
+} MDB_pageparent;
+
+static MDB_dpage *mdb_alloc_page(MDB_txn *txn, MDB_page *parent, unsigned int parent_idx, int num);
+static int             mdb_touch(MDB_txn *txn, MDB_pageparent *mp);
+
+typedef struct MDB_ppage {                                     /* ordered list of pages */
+       SLIST_ENTRY(MDB_ppage)   mp_entry;
+       MDB_page                *mp_page;
+       unsigned int    mp_ki;          /* cursor index on page */
+} MDB_ppage;
+SLIST_HEAD(page_stack, MDB_ppage);
+
+/* FIXME: tree depth is mostly bounded, we should just
+ * use a fixed array and avoid malloc/pointer chasing
+ */
+#define CURSOR_EMPTY(c)                 SLIST_EMPTY(&(c)->mc_stack)
+#define CURSOR_TOP(c)           SLIST_FIRST(&(c)->mc_stack)
+#define CURSOR_POP(c)           SLIST_REMOVE_HEAD(&(c)->mc_stack, mp_entry)
+#define CURSOR_PUSH(c,p)        SLIST_INSERT_HEAD(&(c)->mc_stack, p, mp_entry)
+
+struct MDB_xcursor;
+
+struct MDB_cursor {
+       MDB_txn         *mc_txn;
+       struct page_stack        mc_stack;              /* stack of parent pages */
+       MDB_dbi         mc_dbi;
+       short           mc_initialized; /* 1 if initialized */
+       short           mc_eof;         /* 1 if end is reached */
+       struct MDB_xcursor      *mc_xcursor;
+};
+
+#define METAHASHLEN     offsetof(MDB_meta, mm_hash)
+#define METADATA(p)     ((void *)((char *)p + PAGEHDRSZ))
+
+typedef struct MDB_node {
+#define mn_pgno                 mn_p.np_pgno
+#define mn_dsize        mn_p.np_dsize
+       union {
+               pgno_t           np_pgno;       /* child page number */
+               uint32_t         np_dsize;      /* leaf data size */
+       } mn_p;
+       unsigned int    mn_flags:4;
+       unsigned int    mn_ksize:12;                    /* key size */
+#define F_BIGDATA       0x01                   /* data put on overflow page */
+#define F_SUBDATA       0x02                   /* data is a sub-database */
+       char            mn_data[1];
+} MDB_node;
+
+typedef struct MDB_dbx {
+       MDB_val         md_name;
+       MDB_cmp_func    *md_cmp;                /* user compare function */
+       MDB_cmp_func    *md_dcmp;               /* user dupsort function */
+       MDB_rel_func    *md_rel;                /* user relocate function */
+       MDB_dbi md_parent;
+       unsigned int    md_dirty;
+} MDB_dbx;
+
+struct MDB_txn {
+       pgno_t          mt_next_pgno;   /* next unallocated page */
+       ULONG           mt_txnid;
+       ULONG           mt_oldest;
+       MDB_env         *mt_env;        
+       pgno_t          *mt_free_pgs;   /* this is an IDL */
+       union {
+               struct dirty_queue      *dirty_queue;   /* modified pages */
+               MDB_reader      *reader;
+       } mt_u;
+       MDB_dbx         *mt_dbxs;               /* array */
+       MDB_db          *mt_dbs;
+       unsigned int    mt_numdbs;
+
+#define MDB_TXN_RDONLY          0x01           /* read-only transaction */
+#define MDB_TXN_ERROR           0x02           /* an error has occurred */
+#define MDB_TXN_METOGGLE       0x04            /* used meta page 1 */
+       unsigned int    mt_flags;
+};
+
+/* Context for sorted-dup records */
+typedef struct MDB_xcursor {
+       MDB_cursor mx_cursor;
+       MDB_txn mx_txn;
+       MDB_dbx mx_dbxs[4];
+       MDB_db  mx_dbs[4];
+} MDB_xcursor;
+
+struct MDB_env {
+       int                     me_fd;
+       int                     me_lfd;
+       uint32_t        me_flags;
+       unsigned int    me_maxreaders;
+       unsigned int    me_numdbs;
+       unsigned int    me_maxdbs;
+       char            *me_path;
+       char            *me_map;
+       MDB_txninfo     *me_txns;
+       MDB_meta        *me_metas[2];
+       MDB_meta        *me_meta;
+       MDB_txn         *me_txn;                /* current write transaction */
+       size_t          me_mapsize;
+       off_t           me_size;                /* current file size */
+       unsigned int    me_psize;
+       int                     me_db_toggle;
+       MDB_dbx         *me_dbxs;               /* array */
+       MDB_db          *me_dbs[2];
+       MDB_oldpages *me_pghead;
+       pthread_key_t   me_txkey;       /* thread-key for readers */
+       pgno_t          me_free_pgs[MDB_IDL_UM_SIZE];
+};
+
+#define NODESIZE        offsetof(MDB_node, mn_data)
+
+#define INDXSIZE(k)     (NODESIZE + ((k) == NULL ? 0 : (k)->mv_size))
+#define LEAFSIZE(k, d)  (NODESIZE + (k)->mv_size + (d)->mv_size)
+#define NODEPTR(p, i)   ((MDB_node *)((char *)(p) + (p)->mp_ptrs[i]))
+#define NODEKEY(node)   (void *)((node)->mn_data)
+#define NODEDATA(node)  (void *)((char *)(node)->mn_data + (node)->mn_ksize)
+#define NODEPGNO(node)  ((node)->mn_pgno)
+#define NODEDSZ(node)   ((node)->mn_dsize)
+
+#define MDB_COMMIT_PAGES        64     /* max number of pages to write in one commit */
+#define MDB_MAXCACHE_DEF        1024   /* max number of pages to keep in cache  */
+
+static int  mdb_search_page_root(MDB_txn *txn,
+                           MDB_dbi dbi, MDB_val *key,
+                           MDB_cursor *cursor, int modify,
+                           MDB_pageparent *mpp);
+static int  mdb_search_page(MDB_txn *txn,
+                           MDB_dbi dbi, MDB_val *key,
+                           MDB_cursor *cursor, int modify,
+                           MDB_pageparent *mpp);
+
+static int  mdb_env_read_header(MDB_env *env, MDB_meta *meta);
+static int  mdb_env_read_meta(MDB_env *env, int *which);
+static int  mdb_env_write_meta(MDB_txn *txn);
+static MDB_page *mdb_get_page(MDB_txn *txn, pgno_t pgno);
+
+static MDB_node *mdb_search_node(MDB_txn *txn, MDB_dbi dbi, MDB_page *mp,
+                           MDB_val *key, int *exactp, unsigned int *kip);
+static int  mdb_add_node(MDB_txn *txn, MDB_dbi dbi, MDB_page *mp,
+                           indx_t indx, MDB_val *key, MDB_val *data,
+                           pgno_t pgno, uint8_t flags);
+static void mdb_del_node(MDB_page *mp, indx_t indx);
+static int mdb_del0(MDB_txn *txn, MDB_dbi dbi, unsigned int ki,
+    MDB_pageparent *mpp, MDB_node *leaf);
+static int mdb_put0(MDB_txn *txn, MDB_dbi dbi,
+    MDB_val *key, MDB_val *data, unsigned int flags);
+static int  mdb_read_data(MDB_txn *txn, MDB_node *leaf, MDB_val *data);
+
+static int              mdb_rebalance(MDB_txn *txn, MDB_dbi dbi, MDB_pageparent *mp);
+static int              mdb_update_key(MDB_page *mp, indx_t indx, MDB_val *key);
+static int              mdb_move_node(MDB_txn *txn, MDB_dbi dbi, 
+                               MDB_pageparent *src, indx_t srcindx,
+                               MDB_pageparent *dst, indx_t dstindx);
+static int              mdb_merge(MDB_txn *txn, MDB_dbi dbi, MDB_pageparent *src,
+                           MDB_pageparent *dst);
+static int              mdb_split(MDB_txn *txn, MDB_dbi dbi, MDB_page **mpp,
+                           unsigned int *newindxp, MDB_val *newkey,
+                           MDB_val *newdata, pgno_t newpgno);
+static MDB_dpage *mdb_new_page(MDB_txn *txn, MDB_dbi dbi, uint32_t flags, int num);
+
+static void             cursor_pop_page(MDB_cursor *cursor);
+static MDB_ppage *cursor_push_page(MDB_cursor *cursor,
+                           MDB_page *mp);
+
+static int              mdb_set_key(MDB_node *node, MDB_val *key);
+static int              mdb_sibling(MDB_cursor *cursor, int move_right);
+static int              mdb_cursor_next(MDB_cursor *cursor,
+                           MDB_val *key, MDB_val *data, MDB_cursor_op op);
+static int              mdb_cursor_prev(MDB_cursor *cursor,
+                           MDB_val *key, MDB_val *data, MDB_cursor_op op);
+static int              mdb_cursor_set(MDB_cursor *cursor,
+                           MDB_val *key, MDB_val *data, MDB_cursor_op op, int *exactp);
+static int              mdb_cursor_first(MDB_cursor *cursor,
+                           MDB_val *key, MDB_val *data);
+static int              mdb_cursor_last(MDB_cursor *cursor,
+                           MDB_val *key, MDB_val *data);
+
+static void            mdb_xcursor_init0(MDB_txn *txn, MDB_dbi dbi, MDB_xcursor *mx);
+static void            mdb_xcursor_init1(MDB_txn *txn, MDB_dbi dbi, MDB_xcursor *mx, MDB_node *node);
+static void            mdb_xcursor_fini(MDB_txn *txn, MDB_dbi dbi, MDB_xcursor *mx);
+
+static size_t           mdb_leaf_size(MDB_env *env, MDB_val *key,
+                           MDB_val *data);
+static size_t           mdb_branch_size(MDB_env *env, MDB_val *key);
+
+static int              memncmp(const void *s1, size_t n1,
+                                const void *s2, size_t n2);
+static int              memnrcmp(const void *s1, size_t n1,
+                                 const void *s2, size_t n2);
+
+static int
+memncmp(const void *s1, size_t n1, const void *s2, size_t n2)
+{
+       int diff, len_diff = -1;
+
+       if (n1 >= n2) {
+               len_diff = (n1 > n2);
+               n1 = n2;
+       }
+       diff = memcmp(s1, s2, n1);
+       return diff ? diff : len_diff;
+}
+
+static int
+memnrcmp(const void *s1, size_t n1, const void *s2, size_t n2)
+{
+       const unsigned char     *p1, *p2, *p1_lim;
+
+       if (n2 == 0)
+               return n1 != 0;
+       if (n1 == 0)
+               return -1;
+
+       p1 = (const unsigned char *)s1 + n1 - 1;
+       p2 = (const unsigned char *)s2 + n2 - 1;
+
+       for (p1_lim = (n1 <= n2 ? s1 : s2);  *p1 == *p2;  p1--, p2--) {
+               if (p1 == p1_lim)
+                       return (p1 != s1) ? (p1 != p2) : (p2 != s2) ? -1 : 0;
+       }
+       return *p1 - *p2;
+}
+
+char *
+mdb_version(int *maj, int *min, int *pat)
+{
+       *maj = MDB_VERSION_MAJOR;
+       *min = MDB_VERSION_MINOR;
+       *pat = MDB_VERSION_PATCH;
+       return MDB_VERSION_STRING;
+}
+
+int
+mdb_cmp(MDB_txn *txn, MDB_dbi dbi, const MDB_val *a, const MDB_val *b)
+{
+       return txn->mt_dbxs[dbi].md_cmp(a, b);
+}
+
+static int
+_mdb_cmp(MDB_txn *txn, MDB_dbi dbi, const MDB_val *key1, const MDB_val *key2)
+{
+       if (txn->mt_dbs[dbi].md_flags & (MDB_REVERSEKEY
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+               |MDB_INTEGERKEY
+#endif
+       ))
+               return memnrcmp(key1->mv_data, key1->mv_size, key2->mv_data, key2->mv_size);
+       else
+               return memncmp((char *)key1->mv_data, key1->mv_size, key2->mv_data, key2->mv_size);
+}
+
+/* Allocate new page(s) for writing */
+static MDB_dpage *
+mdb_alloc_page(MDB_txn *txn, MDB_page *parent, unsigned int parent_idx, int num)
+{
+       MDB_dpage *dp;
+       pgno_t pgno = P_INVALID;
+       ULONG oldest;
+
+       if (txn->mt_txnid > 2) {
+
+       oldest = txn->mt_txnid - 2;
+       if (!txn->mt_env->me_pghead && txn->mt_dbs[FREE_DBI].md_root != P_INVALID) {
+               /* See if there's anything in the free DB */
+               MDB_pageparent mpp;
+               MDB_node *leaf;
+               ULONG *kptr;
+
+               mpp.mp_parent = NULL;
+               mpp.mp_pi = 0;
+               mdb_search_page(txn, FREE_DBI, NULL, NULL, 0, &mpp);
+               leaf = NODEPTR(mpp.mp_page, 0);
+               kptr = (ULONG *)NODEKEY(leaf);
+
+               /* It's potentially usable, unless there are still
+                * older readers outstanding. Grab it.
+                */
+               if (oldest > *kptr) {
+                       MDB_oldpages *mop;
+                       MDB_val data;
+                       pgno_t *idl;
+
+                       mdb_read_data(txn, leaf, &data);
+                       idl = (ULONG *)data.mv_data;
+                       mop = malloc(sizeof(MDB_oldpages) + MDB_IDL_SIZEOF(idl) - sizeof(pgno_t));
+                       mop->mo_next = txn->mt_env->me_pghead;
+                       mop->mo_txnid = *kptr;
+                       txn->mt_env->me_pghead = mop;
+                       memcpy(mop->mo_pages, idl, MDB_IDL_SIZEOF(idl));
+
+#if DEBUG > 1
+                       {
+                               unsigned int i;
+                               DPRINTF("IDL read txn %lu root %lu num %lu",
+                                       mop->mo_txnid, txn->mt_dbs[FREE_DBI].md_root, idl[0]);
+                               for (i=0; i<idl[0]; i++) {
+                                       DPRINTF("IDL %lu", idl[i+1]);
+                               }
+                       }
+#endif
+                       /* drop this IDL from the DB */
+                       mpp.mp_parent = NULL;
+                       mpp.mp_pi = 0;
+                       mdb_search_page(txn, FREE_DBI, NULL, NULL, 1, &mpp);
+                       leaf = NODEPTR(mpp.mp_page, 0);
+                       mdb_del0(txn, FREE_DBI, 0, &mpp, leaf);
+               }
+       }
+       if (txn->mt_env->me_pghead) {
+               unsigned int i;
+               for (i=0; i<txn->mt_env->me_txns->mt_numreaders; i++) {
+                       ULONG mr = txn->mt_env->me_txns->mt_readers[i].mr_txnid;
+                       if (!mr) continue;
+                       if (mr < oldest)
+                               oldest = txn->mt_env->me_txns->mt_readers[i].mr_txnid;
+               }
+               if (oldest > txn->mt_env->me_pghead->mo_txnid) {
+                       MDB_oldpages *mop = txn->mt_env->me_pghead;
+                       txn->mt_oldest = oldest;
+                       if (num > 1) {
+                               /* FIXME: For now, always use fresh pages. We
+                                * really ought to search the free list for a
+                                * contiguous range.
+                                */
+                               ;
+                       } else {
+                               /* peel pages off tail, so we only have to truncate the list */
+                               pgno = MDB_IDL_LAST(mop->mo_pages);
+                               if (MDB_IDL_IS_RANGE(mop->mo_pages)) {
+                                       mop->mo_pages[2]++;
+                                       if (mop->mo_pages[2] > mop->mo_pages[1])
+                                               mop->mo_pages[0] = 0;
+                               } else {
+                                       mop->mo_pages[0]--;
+                               }
+                               if (MDB_IDL_IS_ZERO(mop->mo_pages)) {
+                                       txn->mt_env->me_pghead = mop->mo_next;
+                                       free(mop);
+                               }
+                       }
+               }
+       }
+       }
+
+       if ((dp = malloc(txn->mt_env->me_psize * num + sizeof(MDB_dhead))) == NULL)
+               return NULL;
+       dp->h.md_num = num;
+       dp->h.md_parent = parent;
+       dp->h.md_pi = parent_idx;
+       STAILQ_INSERT_TAIL(txn->mt_u.dirty_queue, dp, h.md_next);
+       if (pgno == P_INVALID) {
+               dp->p.mp_pgno = txn->mt_next_pgno;
+               txn->mt_next_pgno += num;
+       } else {
+               dp->p.mp_pgno = pgno;
+       }
+
+       return dp;
+}
+
+/* Touch a page: make it dirty and re-insert into tree with updated pgno.
+ */
+static int
+mdb_touch(MDB_txn *txn, MDB_pageparent *pp)
+{
+       MDB_page *mp = pp->mp_page;
+       pgno_t  pgno;
+       assert(txn != NULL);
+       assert(pp != NULL);
+
+       if (!F_ISSET(mp->mp_flags, P_DIRTY)) {
+               MDB_dpage *dp;
+               if ((dp = mdb_alloc_page(txn, pp->mp_parent, pp->mp_pi, 1)) == NULL)
+                       return ENOMEM;
+               DPRINTF("touched page %lu -> %lu", mp->mp_pgno, dp->p.mp_pgno);
+               mdb_midl_insert(txn->mt_free_pgs, mp->mp_pgno);
+               pgno = dp->p.mp_pgno;
+               memcpy(&dp->p, mp, txn->mt_env->me_psize);
+               mp = &dp->p;
+               mp->mp_pgno = pgno;
+               mp->mp_flags |= P_DIRTY;
+
+               /* Update the page number to new touched page. */
+               if (pp->mp_parent != NULL)
+                       NODEPGNO(NODEPTR(pp->mp_parent, pp->mp_pi)) = mp->mp_pgno;
+               pp->mp_page = mp;
+       }
+       return 0;
+}
+
+int
+mdb_env_sync(MDB_env *env, int force)
+{
+       int rc = 0;
+       if (force || !F_ISSET(env->me_flags, MDB_NOSYNC)) {
+               if (fsync(env->me_fd))
+                       rc = errno;
+       }
+       return rc;
+}
+
+int
+mdb_txn_begin(MDB_env *env, int rdonly, MDB_txn **ret)
+{
+       MDB_txn *txn;
+       int rc, toggle;
+
+       if ((txn = calloc(1, sizeof(MDB_txn))) == NULL) {
+               DPRINTF("calloc: %s", strerror(errno));
+               return ENOMEM;
+       }
+
+       if (rdonly) {
+               txn->mt_flags |= MDB_TXN_RDONLY;
+       } else {
+               txn->mt_u.dirty_queue = calloc(1, sizeof(*txn->mt_u.dirty_queue));
+               if (txn->mt_u.dirty_queue == NULL) {
+                       free(txn);
+                       return ENOMEM;
+               }
+               STAILQ_INIT(txn->mt_u.dirty_queue);
+
+               pthread_mutex_lock(&env->me_txns->mt_wmutex);
+               env->me_txns->mt_txnid++;
+               txn->mt_free_pgs = env->me_free_pgs;
+               txn->mt_free_pgs[0] = 0;
+       }
+
+       txn->mt_txnid = env->me_txns->mt_txnid;
+       if (rdonly) {
+               MDB_reader *r = pthread_getspecific(env->me_txkey);
+               if (!r) {
+                       unsigned int i;
+                       pthread_mutex_lock(&env->me_txns->mt_mutex);
+                       for (i=0; i<env->me_txns->mt_numreaders; i++)
+                               if (env->me_txns->mt_readers[i].mr_pid == 0)
+                                       break;
+                       if (i == env->me_maxreaders) {
+                               return ENOSPC;
+                       }
+                       env->me_txns->mt_readers[i].mr_pid = getpid();
+                       env->me_txns->mt_readers[i].mr_tid = pthread_self();
+                       r = &env->me_txns->mt_readers[i];
+                       pthread_setspecific(env->me_txkey, r);
+                       if (i >= env->me_txns->mt_numreaders)
+                               env->me_txns->mt_numreaders = i+1;
+                       pthread_mutex_unlock(&env->me_txns->mt_mutex);
+               }
+               r->mr_txnid = txn->mt_txnid;
+               txn->mt_u.reader = r;
+       } else {
+               env->me_txn = txn;
+       }
+
+       txn->mt_env = env;
+
+       if ((rc = mdb_env_read_meta(env, &toggle)) != MDB_SUCCESS) {
+               mdb_txn_abort(txn);
+               return rc;
+       }
+
+       /* Copy the DB arrays */
+       txn->mt_numdbs = env->me_numdbs;
+       txn->mt_dbxs = env->me_dbxs;    /* mostly static anyway */
+       txn->mt_dbs = malloc(env->me_maxdbs * sizeof(MDB_db));
+       memcpy(txn->mt_dbs, env->me_meta->mm_dbs, 2 * sizeof(MDB_db));
+       if (txn->mt_numdbs > 2)
+               memcpy(txn->mt_dbs+2, env->me_dbs[env->me_db_toggle]+2,
+                       (txn->mt_numdbs - 2) * sizeof(MDB_db));
+
+       if (!rdonly) {
+               if (toggle)
+                       txn->mt_flags |= MDB_TXN_METOGGLE;
+               txn->mt_next_pgno = env->me_meta->mm_last_pg+1;
+       }
+
+       DPRINTF("begin transaction %lu on mdbenv %p, root page %lu",
+               txn->mt_txnid, (void *) env, txn->mt_dbs[MAIN_DBI].md_root);
+
+       *ret = txn;
+       return MDB_SUCCESS;
+}
+
+void
+mdb_txn_abort(MDB_txn *txn)
+{
+       MDB_dpage *dp;
+       MDB_env *env;
+
+       if (txn == NULL)
+               return;
+
+       env = txn->mt_env;
+       DPRINTF("abort transaction %lu on mdbenv %p, root page %lu",
+               txn->mt_txnid, (void *) env, txn->mt_dbs[MAIN_DBI].md_root);
+
+       free(txn->mt_dbs);
+
+       if (F_ISSET(txn->mt_flags, MDB_TXN_RDONLY)) {
+               txn->mt_u.reader->mr_txnid = 0;
+       } else {
+               MDB_oldpages *mop;
+               unsigned int i;
+
+               /* Discard all dirty pages. */
+               while (!STAILQ_EMPTY(txn->mt_u.dirty_queue)) {
+                       dp = STAILQ_FIRST(txn->mt_u.dirty_queue);
+                       STAILQ_REMOVE_HEAD(txn->mt_u.dirty_queue, h.md_next);
+                       free(dp);
+               }
+               free(txn->mt_u.dirty_queue);
+
+               while ((mop = txn->mt_env->me_pghead)) {
+                       txn->mt_env->me_pghead = mop->mo_next;
+                       free(mop);
+               }
+
+               env->me_txn = NULL;
+               env->me_txns->mt_txnid--;
+               for (i=2; i<env->me_numdbs; i++)
+                       env->me_dbxs[i].md_dirty = 0;
+               pthread_mutex_unlock(&env->me_txns->mt_wmutex);
+       }
+
+       free(txn);
+}
+
+int
+mdb_txn_commit(MDB_txn *txn)
+{
+       int              n, done;
+       unsigned int i;
+       ssize_t          rc;
+       off_t            size;
+       MDB_dpage       *dp;
+       MDB_env *env;
+       pgno_t  next;
+       struct iovec     iov[MDB_COMMIT_PAGES];
+
+       assert(txn != NULL);
+       assert(txn->mt_env != NULL);
+
+       env = txn->mt_env;
+
+       if (F_ISSET(txn->mt_flags, MDB_TXN_RDONLY)) {
+               DPRINTF("attempt to commit read-only transaction");
+               mdb_txn_abort(txn);
+               return EPERM;
+       }
+
+       if (txn != env->me_txn) {
+               DPRINTF("attempt to commit unknown transaction");
+               mdb_txn_abort(txn);
+               return EINVAL;
+       }
+
+       if (F_ISSET(txn->mt_flags, MDB_TXN_ERROR)) {
+               DPRINTF("error flag is set, can't commit");
+               mdb_txn_abort(txn);
+               return EINVAL;
+       }
+
+       if (STAILQ_EMPTY(txn->mt_u.dirty_queue))
+               goto done;
+
+       DPRINTF("committing transaction %lu on mdbenv %p, root page %lu",
+           txn->mt_txnid, (void *) env, txn->mt_dbs[MAIN_DBI].md_root);
+
+       /* should only be one record now */
+       if (env->me_pghead) {
+               MDB_val key, data;
+               MDB_oldpages *mop;
+
+               mop = env->me_pghead;
+               key.mv_size = sizeof(pgno_t);
+               key.mv_data = (char *)&mop->mo_txnid;
+               data.mv_size = MDB_IDL_SIZEOF(mop->mo_pages);
+               data.mv_data = mop->mo_pages;
+               mdb_put0(txn, FREE_DBI, &key, &data, 0);
+               free(env->me_pghead);
+               env->me_pghead = NULL;
+       }
+       /* save to free list */
+       if (!MDB_IDL_IS_ZERO(txn->mt_free_pgs)) {
+               MDB_val key, data;
+               MDB_pageparent mpp;
+
+               /* make sure last page of freeDB is touched and on freelist */
+               key.mv_size = MAXKEYSIZE+1;
+               key.mv_data = NULL;
+               mpp.mp_parent = NULL;
+               mpp.mp_pi = 0;
+               mdb_search_page(txn, FREE_DBI, &key, NULL, 1, &mpp);
+
+#if DEBUG > 1
+               {
+                       unsigned int i;
+                       ULONG *idl = txn->mt_free_pgs;
+                       DPRINTF("IDL write txn %lu root %lu num %lu",
+                               txn->mt_txnid, txn->mt_dbs[FREE_DBI].md_root, idl[0]);
+                       for (i=0; i<idl[0]; i++) {
+                               DPRINTF("IDL %lu", idl[i+1]);
+                       }
+               }
+#endif
+               /* write to last page of freeDB */
+               key.mv_size = sizeof(pgno_t);
+               key.mv_data = (char *)&txn->mt_txnid;
+               data.mv_size = MDB_IDL_SIZEOF(txn->mt_free_pgs);
+               data.mv_data = txn->mt_free_pgs;
+               mdb_put0(txn, FREE_DBI, &key, &data, 0);
+       }
+
+       /* Update DB root pointers. Their pages have already been
+        * touched so this is all in-place and cannot fail.
+        */
+       {
+               MDB_val data;
+               data.mv_size = sizeof(MDB_db);
+
+               for (i = 2; i < txn->mt_numdbs; i++) {
+                       if (txn->mt_dbxs[i].md_dirty) {
+                               data.mv_data = &txn->mt_dbs[i];
+                               mdb_put0(txn, MAIN_DBI, &txn->mt_dbxs[i].md_name, &data, 0);
+                       }
+               }
+       }
+
+       /* Commit up to MDB_COMMIT_PAGES dirty pages to disk until done.
+        */
+       next = 0;
+       do {
+               n = 0;
+               done = 1;
+               size = 0;
+               STAILQ_FOREACH(dp, txn->mt_u.dirty_queue, h.md_next) {
+                       if (dp->p.mp_pgno != next) {
+                               if (n) {
+                                       DPRINTF("committing %u dirty pages", n);
+                                       rc = writev(env->me_fd, iov, n);
+                                       if (rc != size) {
+                                               n = errno;
+                                               if (rc > 0)
+                                                       DPRINTF("short write, filesystem full?");
+                                               else
+                                                       DPRINTF("writev: %s", strerror(errno));
+                                               mdb_txn_abort(txn);
+                                               return n;
+                                       }
+                                       n = 0;
+                                       size = 0;
+                               }
+                               lseek(env->me_fd, dp->p.mp_pgno * env->me_psize, SEEK_SET);
+                               next = dp->p.mp_pgno;
+                       }
+                       DPRINTF("committing page %lu", dp->p.mp_pgno);
+                       iov[n].iov_len = env->me_psize * dp->h.md_num;
+                       iov[n].iov_base = &dp->p;
+                       size += iov[n].iov_len;
+                       next = dp->p.mp_pgno + dp->h.md_num;
+                       /* clear dirty flag */
+                       dp->p.mp_flags &= ~P_DIRTY;
+                       if (++n >= MDB_COMMIT_PAGES) {
+                               done = 0;
+                               break;
+                       }
+               }
+
+               if (n == 0)
+                       break;
+
+               DPRINTF("committing %u dirty pages", n);
+               rc = writev(env->me_fd, iov, n);
+               if (rc != size) {
+                       n = errno;
+                       if (rc > 0)
+                               DPRINTF("short write, filesystem full?");
+                       else
+                               DPRINTF("writev: %s", strerror(errno));
+                       mdb_txn_abort(txn);
+                       return n;
+               }
+
+       } while (!done);
+
+       /* Drop the dirty pages.
+        */
+       while (!STAILQ_EMPTY(txn->mt_u.dirty_queue)) {
+               dp = STAILQ_FIRST(txn->mt_u.dirty_queue);
+               STAILQ_REMOVE_HEAD(txn->mt_u.dirty_queue, h.md_next);
+               free(dp);
+       }
+
+       if ((n = mdb_env_sync(env, 0)) != 0 ||
+           (n = mdb_env_write_meta(txn)) != MDB_SUCCESS ||
+           (n = mdb_env_sync(env, 0)) != 0) {
+               mdb_txn_abort(txn);
+               return n;
+       }
+       env->me_txn = NULL;
+
+       /* update the DB tables */
+       {
+               int toggle = !env->me_db_toggle;
+
+               for (i = 2; i < env->me_numdbs; i++) {
+                       if (txn->mt_dbxs[i].md_dirty) {
+                               env->me_dbs[toggle][i] = txn->mt_dbs[i];
+                               txn->mt_dbxs[i].md_dirty = 0;
+                       }
+               }
+               for (i = env->me_numdbs; i < txn->mt_numdbs; i++) {
+                       txn->mt_dbxs[i].md_dirty = 0;
+                       env->me_dbxs[i] = txn->mt_dbxs[i];
+                       env->me_dbs[toggle][i] = txn->mt_dbs[i];
+               }
+               env->me_db_toggle = toggle;
+               env->me_numdbs = txn->mt_numdbs;
+
+               free(txn->mt_dbs);
+       }
+
+       pthread_mutex_unlock(&env->me_txns->mt_wmutex);
+       free(txn->mt_u.dirty_queue);
+       free(txn);
+       txn = NULL;
+
+done:
+       mdb_txn_abort(txn);
+
+       return MDB_SUCCESS;
+}
+
+static int
+mdb_env_read_header(MDB_env *env, MDB_meta *meta)
+{
+       char             page[PAGESIZE];
+       MDB_page        *p;
+       MDB_meta        *m;
+       int              rc;
+
+       assert(env != NULL);
+
+       /* We don't know the page size yet, so use a minimum value.
+        */
+
+       if ((rc = pread(env->me_fd, page, PAGESIZE, 0)) == 0) {
+               return ENOENT;
+       } else if (rc != PAGESIZE) {
+               if (rc > 0)
+                       errno = EINVAL;
+               DPRINTF("read: %s", strerror(errno));
+               return errno;
+       }
+
+       p = (MDB_page *)page;
+
+       if (!F_ISSET(p->mp_flags, P_META)) {
+               DPRINTF("page %lu not a meta page", p->mp_pgno);
+               return EINVAL;
+       }
+
+       m = METADATA(p);
+       if (m->mm_magic != MDB_MAGIC) {
+               DPRINTF("meta has invalid magic");
+               return EINVAL;
+       }
+
+       if (m->mm_version != MDB_VERSION) {
+               DPRINTF("database is version %u, expected version %u",
+                   m->mm_version, MDB_VERSION);
+               return MDB_VERSION_MISMATCH;
+       }
+
+       memcpy(meta, m, sizeof(*m));
+       return 0;
+}
+
+static int
+mdb_env_init_meta(MDB_env *env, MDB_meta *meta)
+{
+       MDB_page *p, *q;
+       MDB_meta *m;
+       int rc;
+       unsigned int     psize;
+
+       DPRINTF("writing new meta page");
+       psize = sysconf(_SC_PAGE_SIZE);
+
+       meta->mm_magic = MDB_MAGIC;
+       meta->mm_version = MDB_VERSION;
+       meta->mm_psize = psize;
+       meta->mm_last_pg = 1;
+       meta->mm_flags = env->me_flags & 0xffff;
+       meta->mm_flags |= MDB_INTEGERKEY;
+       meta->mm_dbs[0].md_root = P_INVALID;
+       meta->mm_dbs[1].md_root = P_INVALID;
+
+       p = calloc(2, psize);
+       p->mp_pgno = 0;
+       p->mp_flags = P_META;
+
+       m = METADATA(p);
+       memcpy(m, meta, sizeof(*meta));
+
+       q = (MDB_page *)((char *)p + psize);
+
+       q->mp_pgno = 1;
+       q->mp_flags = P_META;
+
+       m = METADATA(q);
+       memcpy(m, meta, sizeof(*meta));
+
+       rc = write(env->me_fd, p, psize * 2);
+       free(p);
+       return (rc == (int)psize * 2) ? MDB_SUCCESS : errno;
+}
+
+static int
+mdb_env_write_meta(MDB_txn *txn)
+{
+       MDB_env *env;
+       MDB_meta        meta;
+       off_t off;
+       int rc, len;
+       char *ptr;
+
+       assert(txn != NULL);
+       assert(txn->mt_env != NULL);
+
+       DPRINTF("writing meta page %d for root page %lu",
+               !F_ISSET(txn->mt_flags, MDB_TXN_METOGGLE), txn->mt_dbs[MAIN_DBI].md_root);
+
+       env = txn->mt_env;
+
+       ptr = (char *)&meta;
+       off = offsetof(MDB_meta, mm_dbs[0].md_depth);
+       len = sizeof(MDB_meta) - off;
+
+       ptr += off;
+       meta.mm_dbs[0] = txn->mt_dbs[0];
+       meta.mm_dbs[1] = txn->mt_dbs[1];
+       meta.mm_last_pg = txn->mt_next_pgno - 1;
+       meta.mm_txnid = txn->mt_txnid;
+
+       if (!F_ISSET(txn->mt_flags, MDB_TXN_METOGGLE))
+               off += env->me_psize;
+       off += PAGEHDRSZ;
+
+       lseek(env->me_fd, off, SEEK_SET);
+       rc = write(env->me_fd, ptr, len);
+       if (rc != len) {
+               DPRINTF("write failed, disk error?");
+               return errno;
+       }
+
+       return MDB_SUCCESS;
+}
+
+static int
+mdb_env_read_meta(MDB_env *env, int *which)
+{
+       int toggle = 0;
+
+       assert(env != NULL);
+
+       if (env->me_metas[0]->mm_txnid < env->me_metas[1]->mm_txnid)
+               toggle = 1;
+
+       if (env->me_meta != env->me_metas[toggle])
+               env->me_meta = env->me_metas[toggle];
+       if (which)
+               *which = toggle;
+
+       DPRINTF("Using meta page %d", toggle);
+
+       return MDB_SUCCESS;
+}
+
+int
+mdb_env_create(MDB_env **env)
+{
+       MDB_env *e;
+
+       e = calloc(1, sizeof(MDB_env));
+       if (!e) return ENOMEM;
+
+       e->me_maxreaders = DEFAULT_READERS;
+       e->me_maxdbs = 2;
+       e->me_fd = -1;
+       e->me_lfd = -1;
+       *env = e;
+       return MDB_SUCCESS;
+}
+
+int
+mdb_env_set_mapsize(MDB_env *env, size_t size)
+{
+       if (env->me_map)
+               return EINVAL;
+       env->me_mapsize = size;
+       return MDB_SUCCESS;
+}
+
+int
+mdb_env_set_maxdbs(MDB_env *env, int dbs)
+{
+       env->me_maxdbs = dbs;
+       return MDB_SUCCESS;
+}
+
+int
+mdb_env_set_maxreaders(MDB_env *env, int readers)
+{
+       env->me_maxreaders = readers;
+       return MDB_SUCCESS;
+}
+
+int
+mdb_env_get_maxreaders(MDB_env *env, int *readers)
+{
+       if (!env || !readers)
+               return EINVAL;
+       *readers = env->me_maxreaders;
+       return MDB_SUCCESS;
+}
+
+static int
+mdb_env_open2(MDB_env *env, unsigned int flags)
+{
+       int i, newenv = 0;
+       MDB_meta meta;
+       MDB_page *p;
+
+       env->me_flags = flags;
+
+       memset(&meta, 0, sizeof(meta));
+
+       if ((i = mdb_env_read_header(env, &meta)) != 0) {
+               if (i != ENOENT)
+                       return i;
+               DPRINTF("new mdbenv");
+               newenv = 1;
+       }
+
+       if (!env->me_mapsize) {
+               env->me_mapsize = newenv ? DEFAULT_MAPSIZE : meta.mm_mapsize;
+       }
+
+       i = MAP_SHARED;
+       if (meta.mm_address && (flags & MDB_FIXEDMAP))
+               i |= MAP_FIXED;
+       env->me_map = mmap(meta.mm_address, env->me_mapsize, PROT_READ, i,
+               env->me_fd, 0);
+       if (env->me_map == MAP_FAILED)
+               return errno;
+
+       if (newenv) {
+               meta.mm_mapsize = env->me_mapsize;
+               if (flags & MDB_FIXEDMAP)
+                       meta.mm_address = env->me_map;
+               i = mdb_env_init_meta(env, &meta);
+               if (i != MDB_SUCCESS) {
+                       munmap(env->me_map, env->me_mapsize);
+                       return i;
+               }
+       }
+       env->me_psize = meta.mm_psize;
+
+       p = (MDB_page *)(MDB_page *)(MDB_page *)(MDB_page *)(MDB_page *)(MDB_page *)(MDB_page *)(MDB_page *)(MDB_page *)env->me_map;
+       env->me_metas[0] = METADATA(p);
+       env->me_metas[1] = (MDB_meta *)((char *)env->me_metas[0] + meta.mm_psize);
+
+       if ((i = mdb_env_read_meta(env, NULL)) != 0)
+               return i;
+
+       DPRINTF("opened database version %u, pagesize %u",
+           env->me_meta->mm_version, env->me_psize);
+       DPRINTF("depth: %u", env->me_meta->mm_dbs[MAIN_DBI].md_depth);
+       DPRINTF("entries: %lu", env->me_meta->mm_dbs[MAIN_DBI].md_entries);
+       DPRINTF("branch pages: %lu", env->me_meta->mm_dbs[MAIN_DBI].md_branch_pages);
+       DPRINTF("leaf pages: %lu", env->me_meta->mm_dbs[MAIN_DBI].md_leaf_pages);
+       DPRINTF("overflow pages: %lu", env->me_meta->mm_dbs[MAIN_DBI].md_overflow_pages);
+       DPRINTF("root: %lu", env->me_meta->mm_dbs[MAIN_DBI].md_root);
+
+       return MDB_SUCCESS;
+}
+
+static void
+mdb_env_reader_dest(void *ptr)
+{
+       MDB_reader *reader = ptr;
+
+       reader->mr_txnid = 0;
+       reader->mr_pid = 0;
+       reader->mr_tid = 0;
+}
+
+/* downgrade the exclusive lock on the region back to shared */
+static void
+mdb_env_share_locks(MDB_env *env)
+{
+       struct flock lock_info;
+
+       env->me_txns->mt_txnid = env->me_meta->mm_txnid;
+
+       memset((void *)&lock_info, 0, sizeof(lock_info));
+       lock_info.l_type = F_RDLCK;
+       lock_info.l_whence = SEEK_SET;
+       lock_info.l_start = 0;
+       lock_info.l_len = 1;
+       fcntl(env->me_lfd, F_SETLK, &lock_info);
+}
+
+static int
+mdb_env_setup_locks(MDB_env *env, char *lpath, int mode, int *excl)
+{
+       int rc;
+       off_t size, rsize;
+       struct flock lock_info;
+
+       *excl = 0;
+
+       if ((env->me_lfd = open(lpath, O_RDWR|O_CREAT, mode)) == -1) {
+               rc = errno;
+               return rc;
+       }
+       /* Try to get exclusive lock. If we succeed, then
+        * nobody is using the lock region and we should initialize it.
+        */
+       memset((void *)&lock_info, 0, sizeof(lock_info));
+       lock_info.l_type = F_WRLCK;
+       lock_info.l_whence = SEEK_SET;
+       lock_info.l_start = 0;
+       lock_info.l_len = 1;
+       rc = fcntl(env->me_lfd, F_SETLK, &lock_info);
+       if (rc == 0) {
+               *excl = 1;
+       } else {
+               lock_info.l_type = F_RDLCK;
+               rc = fcntl(env->me_lfd, F_SETLK, &lock_info);
+               if (rc) {
+                       rc = errno;
+                       goto fail;
+               }
+       }
+       size = lseek(env->me_lfd, 0, SEEK_END);
+       rsize = (env->me_maxreaders-1) * sizeof(MDB_reader) + sizeof(MDB_txninfo);
+       if (size < rsize && *excl) {
+               if (ftruncate(env->me_lfd, rsize) != 0) {
+                       rc = errno;
+                       goto fail;
+               }
+       } else {
+               rsize = size;
+               size = rsize - sizeof(MDB_txninfo);
+               env->me_maxreaders = size/sizeof(MDB_reader) + 1;
+       }
+       env->me_txns = mmap(0, rsize, PROT_READ|PROT_WRITE, MAP_SHARED,
+               env->me_lfd, 0);
+       if (env->me_txns == MAP_FAILED) {
+               rc = errno;
+               goto fail;
+       }
+       if (*excl) {
+               pthread_mutexattr_t mattr;
+
+               pthread_mutexattr_init(&mattr);
+               pthread_mutexattr_setpshared(&mattr, PTHREAD_PROCESS_SHARED);
+               pthread_mutex_init(&env->me_txns->mt_mutex, &mattr);
+               pthread_mutex_init(&env->me_txns->mt_wmutex, &mattr);
+               env->me_txns->mt_version = MDB_VERSION;
+               env->me_txns->mt_magic = MDB_MAGIC;
+               env->me_txns->mt_txnid = 0;
+               env->me_txns->mt_numreaders = 0;
+
+       } else {
+               if (env->me_txns->mt_magic != MDB_MAGIC) {
+                       DPRINTF("lock region has invalid magic");
+                       rc = EINVAL;
+                       goto fail;
+               }
+               if (env->me_txns->mt_version != MDB_VERSION) {
+                       DPRINTF("lock region is version %u, expected version %u",
+                               env->me_txns->mt_version, MDB_VERSION);
+                       rc = MDB_VERSION_MISMATCH;
+                       goto fail;
+               }
+               if (errno != EACCES && errno != EAGAIN) {
+                       rc = errno;
+                       goto fail;
+               }
+       }
+       return MDB_SUCCESS;
+
+fail:
+       close(env->me_lfd);
+       return rc;
+
+}
+
+#define LOCKNAME       "/lock.mdb"
+#define DATANAME       "/data.mdb"
+int
+mdb_env_open(MDB_env *env, const char *path, unsigned int flags, mode_t mode)
+{
+       int             oflags, rc, len, excl;
+       char *lpath, *dpath;
+
+       len = strlen(path);
+       lpath = malloc(len + sizeof(LOCKNAME) + len + sizeof(DATANAME));
+       if (!lpath)
+               return ENOMEM;
+       dpath = lpath + len + sizeof(LOCKNAME);
+       sprintf(lpath, "%s" LOCKNAME, path);
+       sprintf(dpath, "%s" DATANAME, path);
+
+       rc = mdb_env_setup_locks(env, lpath, mode, &excl);
+       if (rc)
+               goto leave;
+
+       if (F_ISSET(flags, MDB_RDONLY))
+               oflags = O_RDONLY;
+       else
+               oflags = O_RDWR | O_CREAT;
+
+       if ((env->me_fd = open(dpath, oflags, mode)) == -1)
+               return errno;
+
+       if ((rc = mdb_env_open2(env, flags)) != MDB_SUCCESS) {
+               close(env->me_fd);
+               env->me_fd = -1;
+       } else {
+               env->me_path = strdup(path);
+               DPRINTF("opened dbenv %p", (void *) env);
+               pthread_key_create(&env->me_txkey, mdb_env_reader_dest);
+               if (excl)
+                       mdb_env_share_locks(env);
+               env->me_dbxs = calloc(env->me_maxdbs, sizeof(MDB_dbx));
+               env->me_dbs[0] = calloc(env->me_maxdbs, sizeof(MDB_db));
+               env->me_dbs[1] = calloc(env->me_maxdbs, sizeof(MDB_db));
+               env->me_numdbs = 2;
+       }
+
+leave:
+       free(lpath);
+       return rc;
+}
+
+void
+mdb_env_close(MDB_env *env)
+{
+       if (env == NULL)
+               return;
+
+       free(env->me_dbs[1]);
+       free(env->me_dbs[0]);
+       free(env->me_dbxs);
+       free(env->me_path);
+
+       if (env->me_map) {
+               munmap(env->me_map, env->me_mapsize);
+       }
+       close(env->me_fd);
+       if (env->me_txns) {
+               size_t size = (env->me_maxreaders-1) * sizeof(MDB_reader) + sizeof(MDB_txninfo);
+               munmap(env->me_txns, size);
+       }
+       close(env->me_lfd);
+       free(env);
+}
+
+/* Search for key within a leaf page, using binary search.
+ * Returns the smallest entry larger or equal to the key.
+ * If exactp is non-null, stores whether the found entry was an exact match
+ * in *exactp (1 or 0).
+ * If kip is non-null, stores the index of the found entry in *kip.
+ * If no entry larger or equal to the key is found, returns NULL.
+ */
+static MDB_node *
+mdb_search_node(MDB_txn *txn, MDB_dbi dbi, MDB_page *mp, MDB_val *key,
+    int *exactp, unsigned int *kip)
+{
+       unsigned int     i = 0;
+       int              low, high;
+       int              rc = 0;
+       MDB_node        *node;
+       MDB_val  nodekey;
+
+       DPRINTF("searching %u keys in %s page %lu",
+           NUMKEYS(mp),
+           IS_LEAF(mp) ? "leaf" : "branch",
+           mp->mp_pgno);
+
+       assert(NUMKEYS(mp) > 0);
+
+       memset(&nodekey, 0, sizeof(nodekey));
+
+       low = IS_LEAF(mp) ? 0 : 1;
+       high = NUMKEYS(mp) - 1;
+       while (low <= high) {
+               i = (low + high) >> 1;
+               node = NODEPTR(mp, i);
+
+               nodekey.mv_size = node->mn_ksize;
+               nodekey.mv_data = NODEKEY(node);
+
+               if (txn->mt_dbxs[dbi].md_cmp)
+                       rc = txn->mt_dbxs[dbi].md_cmp(key, &nodekey);
+               else
+                       rc = _mdb_cmp(txn, dbi, key, &nodekey);
+
+               if (IS_LEAF(mp))
+                       DPRINTF("found leaf index %u [%.*s], rc = %i",
+                           i, (int)nodekey.mv_size, (char *)nodekey.mv_data, rc);
+               else
+                       DPRINTF("found branch index %u [%.*s -> %lu], rc = %i",
+                           i, (int)node->mn_ksize, (char *)NODEKEY(node),
+                           node->mn_pgno, rc);
+
+               if (rc == 0)
+                       break;
+               if (rc > 0)
+                       low = i + 1;
+               else
+                       high = i - 1;
+       }
+
+       if (rc > 0) {   /* Found entry is less than the key. */
+               i++;    /* Skip to get the smallest entry larger than key. */
+               if (i >= NUMKEYS(mp))
+                       /* There is no entry larger or equal to the key. */
+                       return NULL;
+       }
+       if (exactp)
+               *exactp = (rc == 0);
+       if (kip)        /* Store the key index if requested. */
+               *kip = i;
+
+       return NODEPTR(mp, i);
+}
+
+static void
+cursor_pop_page(MDB_cursor *cursor)
+{
+       MDB_ppage       *top;
+
+       top = CURSOR_TOP(cursor);
+       CURSOR_POP(cursor);
+
+       DPRINTF("popped page %lu off cursor %p", top->mp_page->mp_pgno, (void *) cursor);
+
+       free(top);
+}
+
+static MDB_ppage *
+cursor_push_page(MDB_cursor *cursor, MDB_page *mp)
+{
+       MDB_ppage       *ppage;
+
+       DPRINTF("pushing page %lu on cursor %p", mp->mp_pgno, (void *) cursor);
+
+       if ((ppage = calloc(1, sizeof(MDB_ppage))) == NULL)
+               return NULL;
+       ppage->mp_page = mp;
+       CURSOR_PUSH(cursor, ppage);
+       return ppage;
+}
+
+static MDB_page *
+mdb_get_page(MDB_txn *txn, pgno_t pgno)
+{
+       MDB_page *p = NULL;
+       int found = 0;
+
+       if (!F_ISSET(txn->mt_flags, MDB_TXN_RDONLY) && !STAILQ_EMPTY(txn->mt_u.dirty_queue)) {
+               MDB_dpage *dp;
+               STAILQ_FOREACH(dp, txn->mt_u.dirty_queue, h.md_next) {
+                       if (dp->p.mp_pgno == pgno) {
+                               p = &dp->p;
+                               found = 1;
+                               break;
+                       }
+               }
+       }
+       if (!found) {
+               if (pgno > txn->mt_env->me_meta->mm_last_pg)
+                       return NULL;
+               p = (MDB_page *)(txn->mt_env->me_map + txn->mt_env->me_psize * pgno);
+       }
+       return p;
+}
+
+static int
+mdb_search_page_root(MDB_txn *txn, MDB_dbi dbi, MDB_val *key,
+    MDB_cursor *cursor, int modify, MDB_pageparent *mpp)
+{
+       MDB_page        *mp = mpp->mp_page;
+       int rc;
+
+       if (cursor && cursor_push_page(cursor, mp) == NULL)
+               return MDB_FAIL;
+
+       while (IS_BRANCH(mp)) {
+               unsigned int     i = 0;
+               MDB_node        *node;
+
+               DPRINTF("branch page %lu has %u keys", mp->mp_pgno, NUMKEYS(mp));
+               assert(NUMKEYS(mp) > 1);
+               DPRINTF("found index 0 to page %lu", NODEPGNO(NODEPTR(mp, 0)));
+
+               if (key == NULL)        /* Initialize cursor to first page. */
+                       i = 0;
+               else if (key->mv_size > MAXKEYSIZE && key->mv_data == NULL) {
+                                                       /* cursor to last page */
+                       i = NUMKEYS(mp)-1;
+               } else {
+                       int      exact;
+                       node = mdb_search_node(txn, dbi, mp, key, &exact, &i);
+                       if (node == NULL)
+                               i = NUMKEYS(mp) - 1;
+                       else if (!exact) {
+                               assert(i > 0);
+                               i--;
+                       }
+               }
+
+               if (key)
+                       DPRINTF("following index %u for key %.*s",
+                           i, (int)key->mv_size, (char *)key->mv_data);
+               assert(i < NUMKEYS(mp));
+               node = NODEPTR(mp, i);
+
+               if (cursor)
+                       CURSOR_TOP(cursor)->mp_ki = i;
+
+               mpp->mp_parent = mp;
+               if ((mp = mdb_get_page(txn, NODEPGNO(node))) == NULL)
+                       return MDB_FAIL;
+               mpp->mp_pi = i;
+               mpp->mp_page = mp;
+
+               if (cursor && cursor_push_page(cursor, mp) == NULL)
+                       return MDB_FAIL;
+
+               if (modify) {
+                       MDB_dhead *dh = ((MDB_dhead *)mp)-1;
+                       if ((rc = mdb_touch(txn, mpp)) != 0)
+                               return rc;
+                       dh = ((MDB_dhead *)mpp->mp_page)-1;
+                       dh->md_parent = mpp->mp_parent;
+                       dh->md_pi = mpp->mp_pi;
+               }
+
+               mp = mpp->mp_page;
+       }
+
+       if (!IS_LEAF(mp)) {
+               DPRINTF("internal error, index points to a %02X page!?",
+                   mp->mp_flags);
+               return MDB_FAIL;
+       }
+
+       DPRINTF("found leaf page %lu for key %.*s", mp->mp_pgno,
+           key ? (int)key->mv_size : 0, key ? (char *)key->mv_data : NULL);
+
+       return MDB_SUCCESS;
+}
+
+/* Search for the page a given key should be in.
+ * Stores a pointer to the found page in *mpp.
+ * If key is NULL, search for the lowest page (used by mdb_cursor_first).
+ * If cursor is non-null, pushes parent pages on the cursor stack.
+ * If modify is true, visited pages are updated with new page numbers.
+ */
+static int
+mdb_search_page(MDB_txn *txn, MDB_dbi dbi, MDB_val *key,
+    MDB_cursor *cursor, int modify, MDB_pageparent *mpp)
+{
+       int              rc;
+       pgno_t           root;
+
+       /* Choose which root page to start with. If a transaction is given
+        * use the root page from the transaction, otherwise read the last
+        * committed root page.
+        */
+       if (F_ISSET(txn->mt_flags, MDB_TXN_ERROR)) {
+               DPRINTF("transaction has failed, must abort");
+               return EINVAL;
+       } else
+               root = txn->mt_dbs[dbi].md_root;
+
+       if (root == P_INVALID) {                /* Tree is empty. */
+               DPRINTF("tree is empty");
+               return MDB_NOTFOUND;
+       }
+
+       if ((mpp->mp_page = mdb_get_page(txn, root)) == NULL)
+               return MDB_FAIL;
+
+       DPRINTF("root page has flags 0x%X", mpp->mp_page->mp_flags);
+
+       if (modify) {
+               /* For sub-databases, update main root first */
+               if (dbi > MAIN_DBI && !txn->mt_dbxs[dbi].md_dirty) {
+                       MDB_pageparent mp2;
+                       rc = mdb_search_page(txn, MAIN_DBI, &txn->mt_dbxs[dbi].md_name,
+                               NULL, 1, &mp2);
+                       if (rc)
+                               return rc;
+                       txn->mt_dbxs[dbi].md_dirty = 1;
+               }
+               if (!F_ISSET(mpp->mp_page->mp_flags, P_DIRTY)) {
+                       mpp->mp_parent = NULL;
+                       mpp->mp_pi = 0;
+                       if ((rc = mdb_touch(txn, mpp)))
+                               return rc;
+                       txn->mt_dbs[dbi].md_root = mpp->mp_page->mp_pgno;
+               }
+       }
+
+       return mdb_search_page_root(txn, dbi, key, cursor, modify, mpp);
+}
+
+static int
+mdb_read_data(MDB_txn *txn, MDB_node *leaf, MDB_val *data)
+{
+       MDB_page        *omp;           /* overflow mpage */
+       pgno_t           pgno;
+
+       if (!F_ISSET(leaf->mn_flags, F_BIGDATA)) {
+               data->mv_size = leaf->mn_dsize;
+               data->mv_data = NODEDATA(leaf);
+               return MDB_SUCCESS;
+       }
+
+       /* Read overflow data.
+        */
+       data->mv_size = leaf->mn_dsize;
+       memcpy(&pgno, NODEDATA(leaf), sizeof(pgno));
+       if ((omp = mdb_get_page(txn, pgno)) == NULL) {
+               DPRINTF("read overflow page %lu failed", pgno);
+               return MDB_FAIL;
+       }
+       data->mv_data = omp;
+
+       return MDB_SUCCESS;
+}
+
+int
+mdb_get(MDB_txn *txn, MDB_dbi dbi,
+    MDB_val *key, MDB_val *data)
+{
+       int              rc, exact;
+       MDB_node        *leaf;
+       MDB_pageparent mpp;
+
+       assert(key);
+       assert(data);
+       DPRINTF("===> get key [%.*s]", (int)key->mv_size, (char *)key->mv_data);
+
+       if (txn == NULL || !dbi || dbi >= txn->mt_numdbs)
+               return EINVAL;
+
+       if (key->mv_size == 0 || key->mv_size > MAXKEYSIZE) {
+               return EINVAL;
+       }
+
+       if ((rc = mdb_search_page(txn, dbi, key, NULL, 0, &mpp)) != MDB_SUCCESS)
+               return rc;
+
+       leaf = mdb_search_node(txn, dbi, mpp.mp_page, key, &exact, NULL);
+       if (leaf && exact) {
+               /* Return first duplicate data item */
+               if (F_ISSET(txn->mt_dbs[dbi].md_flags, MDB_DUPSORT)) {
+                       MDB_xcursor mx;
+
+                       mdb_xcursor_init0(txn, dbi, &mx);
+                       mdb_xcursor_init1(txn, dbi, &mx, leaf);
+                       rc = mdb_search_page(&mx.mx_txn, mx.mx_cursor.mc_dbi, NULL, NULL, 0, &mpp);
+                       if (rc != MDB_SUCCESS)
+                               return rc;
+                       leaf = NODEPTR(mpp.mp_page, 0);
+               }
+               rc = mdb_read_data(txn, leaf, data);
+       } else {
+               rc = MDB_NOTFOUND;
+       }
+
+       return rc;
+}
+
+static int
+mdb_sibling(MDB_cursor *cursor, int move_right)
+{
+       int              rc;
+       MDB_node        *indx;
+       MDB_ppage       *parent, *top;
+       MDB_page        *mp;
+
+       top = CURSOR_TOP(cursor);
+       if ((parent = SLIST_NEXT(top, mp_entry)) == NULL) {
+               return MDB_NOTFOUND;            /* root has no siblings */
+       }
+
+       DPRINTF("parent page is page %lu, index %u",
+           parent->mp_page->mp_pgno, parent->mp_ki);
+
+       cursor_pop_page(cursor);
+       if (move_right ? (parent->mp_ki + 1 >= NUMKEYS(parent->mp_page))
+                      : (parent->mp_ki == 0)) {
+               DPRINTF("no more keys left, moving to %s sibling",
+                   move_right ? "right" : "left");
+               if ((rc = mdb_sibling(cursor, move_right)) != MDB_SUCCESS)
+                       return rc;
+               parent = CURSOR_TOP(cursor);
+       } else {
+               if (move_right)
+                       parent->mp_ki++;
+               else
+                       parent->mp_ki--;
+               DPRINTF("just moving to %s index key %u",
+                   move_right ? "right" : "left", parent->mp_ki);
+       }
+       assert(IS_BRANCH(parent->mp_page));
+
+       indx = NODEPTR(parent->mp_page, parent->mp_ki);
+       if ((mp = mdb_get_page(cursor->mc_txn, indx->mn_pgno)) == NULL)
+               return MDB_FAIL;
+#if 0
+       mp->parent = parent->mp_page;
+       mp->parent_index = parent->mp_ki;
+#endif
+
+       cursor_push_page(cursor, mp);
+
+       return MDB_SUCCESS;
+}
+
+static int
+mdb_set_key(MDB_node *node, MDB_val *key)
+{
+       if (key == NULL)
+               return 0;
+
+       key->mv_size = node->mn_ksize;
+       key->mv_data = NODEKEY(node);
+
+       return 0;
+}
+
+static int
+mdb_cursor_next(MDB_cursor *cursor, MDB_val *key, MDB_val *data, MDB_cursor_op op)
+{
+       MDB_ppage       *top;
+       MDB_page        *mp;
+       MDB_node        *leaf;
+       int rc;
+
+       if (cursor->mc_eof) {
+               return MDB_NOTFOUND;
+       }
+
+       assert(cursor->mc_initialized);
+
+       if (cursor->mc_txn->mt_dbs[cursor->mc_dbi].md_flags & MDB_DUPSORT) {
+               if (op == MDB_NEXT || op == MDB_NEXT_DUP) {
+                       rc = mdb_cursor_next(&cursor->mc_xcursor->mx_cursor, data, NULL, MDB_NEXT);
+                       if (op != MDB_NEXT || rc == MDB_SUCCESS)
+                               return rc;
+               }
+       }
+
+       top = CURSOR_TOP(cursor);
+       mp = top->mp_page;
+
+       DPRINTF("cursor_next: top page is %lu in cursor %p", mp->mp_pgno, (void *) cursor);
+
+       if (top->mp_ki + 1 >= NUMKEYS(mp)) {
+               DPRINTF("=====> move to next sibling page");
+               if (mdb_sibling(cursor, 1) != MDB_SUCCESS) {
+                       cursor->mc_eof = 1;
+                       return MDB_NOTFOUND;
+               }
+               top = CURSOR_TOP(cursor);
+               mp = top->mp_page;
+               DPRINTF("next page is %lu, key index %u", mp->mp_pgno, top->mp_ki);
+       } else
+               top->mp_ki++;
+
+       DPRINTF("==> cursor points to page %lu with %u keys, key index %u",
+           mp->mp_pgno, NUMKEYS(mp), top->mp_ki);
+
+       assert(IS_LEAF(mp));
+       leaf = NODEPTR(mp, top->mp_ki);
+
+       if (data) {
+               if ((rc = mdb_read_data(cursor->mc_txn, leaf, data) != MDB_SUCCESS))
+                       return rc;
+
+               if (cursor->mc_txn->mt_dbs[cursor->mc_dbi].md_flags & MDB_DUPSORT) {
+                       mdb_xcursor_init1(cursor->mc_txn, cursor->mc_dbi, cursor->mc_xcursor, leaf);
+                       rc = mdb_cursor_first(&cursor->mc_xcursor->mx_cursor, data, NULL);
+                       if (rc != MDB_SUCCESS)
+                               return rc;
+               }
+       }
+
+       return mdb_set_key(leaf, key);
+}
+
+static int
+mdb_cursor_prev(MDB_cursor *cursor, MDB_val *key, MDB_val *data, MDB_cursor_op op)
+{
+       MDB_ppage       *top;
+       MDB_page        *mp;
+       MDB_node        *leaf;
+       int rc;
+
+       assert(cursor->mc_initialized);
+
+       if (cursor->mc_txn->mt_dbs[cursor->mc_dbi].md_flags & MDB_DUPSORT) {
+               if (op == MDB_PREV || op == MDB_PREV_DUP) {
+                       rc = mdb_cursor_prev(&cursor->mc_xcursor->mx_cursor, data, NULL, MDB_PREV);
+                       if (op != MDB_PREV || rc == MDB_SUCCESS)
+                               return rc;
+               }
+       }
+
+       top = CURSOR_TOP(cursor);
+       mp = top->mp_page;
+
+       DPRINTF("cursor_prev: top page is %lu in cursor %p", mp->mp_pgno, (void *) cursor);
+
+       if (top->mp_ki == 0)  {
+               DPRINTF("=====> move to prev sibling page");
+               if (mdb_sibling(cursor, 0) != MDB_SUCCESS) {
+                       return MDB_NOTFOUND;
+               }
+               top = CURSOR_TOP(cursor);
+               mp = top->mp_page;
+               top->mp_ki = NUMKEYS(mp) - 1;
+               DPRINTF("prev page is %lu, key index %u", mp->mp_pgno, top->mp_ki);
+       } else
+               top->mp_ki--;
+
+       cursor->mc_eof = 0;
+
+       DPRINTF("==> cursor points to page %lu with %u keys, key index %u",
+           mp->mp_pgno, NUMKEYS(mp), top->mp_ki);
+
+       assert(IS_LEAF(mp));
+       leaf = NODEPTR(mp, top->mp_ki);
+
+       if (data) {
+               if ((rc = mdb_read_data(cursor->mc_txn, leaf, data) != MDB_SUCCESS))
+                       return rc;
+
+               if (cursor->mc_txn->mt_dbs[cursor->mc_dbi].md_flags & MDB_DUPSORT) {
+                       mdb_xcursor_init1(cursor->mc_txn, cursor->mc_dbi, cursor->mc_xcursor, leaf);
+                       rc = mdb_cursor_last(&cursor->mc_xcursor->mx_cursor, data, NULL);
+                       if (rc != MDB_SUCCESS)
+                               return rc;
+               }
+       }
+
+       return mdb_set_key(leaf, key);
+}
+
+static int
+mdb_cursor_set(MDB_cursor *cursor, MDB_val *key, MDB_val *data,
+    MDB_cursor_op op, int *exactp)
+{
+       int              rc;
+       MDB_node        *leaf;
+       MDB_ppage       *top;
+       MDB_pageparent mpp;
+
+       assert(cursor);
+       assert(key);
+       assert(key->mv_size > 0);
+
+       while (CURSOR_TOP(cursor) != NULL)
+               cursor_pop_page(cursor);
+
+       rc = mdb_search_page(cursor->mc_txn, cursor->mc_dbi, key, cursor, 0, &mpp);
+       if (rc != MDB_SUCCESS)
+               return rc;
+       assert(IS_LEAF(mpp.mp_page));
+
+       top = CURSOR_TOP(cursor);
+       leaf = mdb_search_node(cursor->mc_txn, cursor->mc_dbi, mpp.mp_page, key, exactp, &top->mp_ki);
+       if (exactp != NULL && !*exactp) {
+               /* MDB_SET specified and not an exact match. */
+               return MDB_NOTFOUND;
+       }
+
+       if (leaf == NULL) {
+               DPRINTF("===> inexact leaf not found, goto sibling");
+               if ((rc = mdb_sibling(cursor, 1)) != MDB_SUCCESS)
+                       return rc;              /* no entries matched */
+               top = CURSOR_TOP(cursor);
+               top->mp_ki = 0;
+               mpp.mp_page = top->mp_page;
+               assert(IS_LEAF(mpp.mp_page));
+               leaf = NODEPTR(mpp.mp_page, 0);
+       }
+
+       cursor->mc_initialized = 1;
+       cursor->mc_eof = 0;
+
+       if (data) {
+               if ((rc = mdb_read_data(cursor->mc_txn, leaf, data)) != MDB_SUCCESS)
+                       return rc;
+
+               if (cursor->mc_txn->mt_dbs[cursor->mc_dbi].md_flags & MDB_DUPSORT) {
+                       mdb_xcursor_init1(cursor->mc_txn, cursor->mc_dbi, cursor->mc_xcursor, leaf);
+                       if (op == MDB_SET || op == MDB_SET_RANGE) {
+                               rc = mdb_cursor_first(&cursor->mc_xcursor->mx_cursor, data, NULL);
+                       } else {
+                               int ex2, *ex2p;
+                               MDB_cursor_op op2;
+                               if (op == MDB_GET_BOTH) {
+                                       ex2p = &ex2;
+                                       op2 = MDB_SET;
+                               } else {
+                                       ex2p = NULL;
+                                       op2 = MDB_SET_RANGE;
+                               }
+                               rc = mdb_cursor_set(&cursor->mc_xcursor->mx_cursor, data, NULL, op2, ex2p);
+                               if (rc != MDB_SUCCESS)
+                                       return rc;
+                       }
+               }
+       }
+
+       rc = mdb_set_key(leaf, key);
+       if (rc == MDB_SUCCESS) {
+               DPRINTF("==> cursor placed on key %.*s",
+                       (int)key->mv_size, (char *)key->mv_data);
+               ;
+       }
+
+       return rc;
+}
+
+static int
+mdb_cursor_first(MDB_cursor *cursor, MDB_val *key, MDB_val *data)
+{
+       int              rc;
+       MDB_pageparent  mpp;
+       MDB_node        *leaf;
+
+       while (CURSOR_TOP(cursor) != NULL)
+               cursor_pop_page(cursor);
+
+       rc = mdb_search_page(cursor->mc_txn, cursor->mc_dbi, NULL, cursor, 0, &mpp);
+       if (rc != MDB_SUCCESS)
+               return rc;
+       assert(IS_LEAF(mpp.mp_page));
+
+       leaf = NODEPTR(mpp.mp_page, 0);
+       cursor->mc_initialized = 1;
+       cursor->mc_eof = 0;
+
+       if (data) {
+               if ((rc = mdb_read_data(cursor->mc_txn, leaf, data)) != MDB_SUCCESS)
+                       return rc;
+
+               if (cursor->mc_txn->mt_dbs[cursor->mc_dbi].md_flags & MDB_DUPSORT) {
+                       mdb_xcursor_init1(cursor->mc_txn, cursor->mc_dbi, cursor->mc_xcursor, leaf);
+                       rc = mdb_cursor_first(&cursor->mc_xcursor->mx_cursor, data, NULL);
+                       if (rc)
+                               return rc;
+               }
+       }
+       return mdb_set_key(leaf, key);
+}
+
+static int
+mdb_cursor_last(MDB_cursor *cursor, MDB_val *key, MDB_val *data)
+{
+       int              rc;
+       MDB_ppage       *top;
+       MDB_pageparent  mpp;
+       MDB_node        *leaf;
+       MDB_val lkey;
+
+       while (CURSOR_TOP(cursor) != NULL)
+               cursor_pop_page(cursor);
+
+       lkey.mv_size = MAXKEYSIZE+1;
+       lkey.mv_data = NULL;
+
+       rc = mdb_search_page(cursor->mc_txn, cursor->mc_dbi, &lkey, cursor, 0, &mpp);
+       if (rc != MDB_SUCCESS)
+               return rc;
+       assert(IS_LEAF(mpp.mp_page));
+
+       leaf = NODEPTR(mpp.mp_page, NUMKEYS(mpp.mp_page)-1);
+       cursor->mc_initialized = 1;
+       cursor->mc_eof = 0;
+
+       top = CURSOR_TOP(cursor);
+       top->mp_ki = NUMKEYS(top->mp_page) - 1;
+
+       if (data) {
+               if ((rc = mdb_read_data(cursor->mc_txn, leaf, data)) != MDB_SUCCESS)
+                       return rc;
+
+               if (cursor->mc_txn->mt_dbs[cursor->mc_dbi].md_flags & MDB_DUPSORT) {
+                       mdb_xcursor_init1(cursor->mc_txn, cursor->mc_dbi, cursor->mc_xcursor, leaf);
+                       rc = mdb_cursor_last(&cursor->mc_xcursor->mx_cursor, data, NULL);
+                       if (rc)
+                               return rc;
+               }
+       }
+
+       return mdb_set_key(leaf, key);
+}
+
+int
+mdb_cursor_get(MDB_cursor *cursor, MDB_val *key, MDB_val *data,
+    MDB_cursor_op op)
+{
+       int              rc;
+       int              exact = 0;
+
+       assert(cursor);
+
+       switch (op) {
+       case MDB_GET_BOTH:
+       case MDB_GET_BOTH_RANGE:
+               if (data == NULL) {
+                       rc = EINVAL;
+                       break;
+               }
+               /* FALLTHRU */
+       case MDB_SET:
+       case MDB_SET_RANGE:
+               if (key == NULL || key->mv_size == 0 || key->mv_size > MAXKEYSIZE) {
+                       rc = EINVAL;
+               } else if (op != MDB_SET_RANGE)
+                       rc = mdb_cursor_set(cursor, key, data, op, NULL);
+               else
+                       rc = mdb_cursor_set(cursor, key, data, op, &exact);
+               break;
+       case MDB_NEXT:
+       case MDB_NEXT_DUP:
+       case MDB_NEXT_NODUP:
+               if (!cursor->mc_initialized)
+                       rc = mdb_cursor_first(cursor, key, data);
+               else
+                       rc = mdb_cursor_next(cursor, key, data, op);
+               break;
+       case MDB_PREV:
+       case MDB_PREV_DUP:
+       case MDB_PREV_NODUP:
+               if (!cursor->mc_initialized || cursor->mc_eof)
+                       rc = mdb_cursor_last(cursor, key, data);
+               else
+                       rc = mdb_cursor_prev(cursor, key, data, op);
+               break;
+       case MDB_FIRST:
+               rc = mdb_cursor_first(cursor, key, data);
+               break;
+       case MDB_LAST:
+               rc = mdb_cursor_last(cursor, key, data);
+               break;
+       default:
+               DPRINTF("unhandled/unimplemented cursor operation %u", op);
+               rc = EINVAL;
+               break;
+       }
+
+       return rc;
+}
+
+/* Allocate a page and initialize it
+ */
+static MDB_dpage *
+mdb_new_page(MDB_txn *txn, MDB_dbi dbi, uint32_t flags, int num)
+{
+       MDB_dpage       *dp;
+
+       if ((dp = mdb_alloc_page(txn, NULL, 0, num)) == NULL)
+               return NULL;
+       DPRINTF("allocated new mpage %lu, page size %u",
+           dp->p.mp_pgno, txn->mt_env->me_psize);
+       dp->p.mp_flags = flags | P_DIRTY;
+       dp->p.mp_lower = PAGEHDRSZ;
+       dp->p.mp_upper = txn->mt_env->me_psize;
+
+       if (IS_BRANCH(&dp->p))
+               txn->mt_dbs[dbi].md_branch_pages++;
+       else if (IS_LEAF(&dp->p))
+               txn->mt_dbs[dbi].md_leaf_pages++;
+       else if (IS_OVERFLOW(&dp->p)) {
+               txn->mt_dbs[dbi].md_overflow_pages += num;
+               dp->p.mp_pages = num;
+       }
+
+       return dp;
+}
+
+static size_t
+mdb_leaf_size(MDB_env *env, MDB_val *key, MDB_val *data)
+{
+       size_t           sz;
+
+       sz = LEAFSIZE(key, data);
+       if (data->mv_size >= env->me_psize / MDB_MINKEYS) {
+               /* put on overflow page */
+               sz -= data->mv_size - sizeof(pgno_t);
+       }
+
+       return sz + sizeof(indx_t);
+}
+
+static size_t
+mdb_branch_size(MDB_env *env, MDB_val *key)
+{
+       size_t           sz;
+
+       sz = INDXSIZE(key);
+       if (sz >= env->me_psize / MDB_MINKEYS) {
+               /* put on overflow page */
+               /* not implemented */
+               /* sz -= key->size - sizeof(pgno_t); */
+       }
+
+       return sz + sizeof(indx_t);
+}
+
+static int
+mdb_add_node(MDB_txn *txn, MDB_dbi dbi, MDB_page *mp, indx_t indx,
+    MDB_val *key, MDB_val *data, pgno_t pgno, uint8_t flags)
+{
+       unsigned int     i;
+       size_t           node_size = NODESIZE;
+       indx_t           ofs;
+       MDB_node        *node;
+       MDB_dpage       *ofp = NULL;            /* overflow page */
+
+       assert(mp->mp_upper >= mp->mp_lower);
+
+       DPRINTF("add node [%.*s] to %s page %lu at index %i, key size %zu",
+           key ? (int)key->mv_size : 0, key ? (char *)key->mv_data : NULL,
+           IS_LEAF(mp) ? "leaf" : "branch",
+           mp->mp_pgno, indx, key ? key->mv_size : 0);
+
+       if (key != NULL)
+               node_size += key->mv_size;
+
+       if (IS_LEAF(mp)) {
+               assert(data);
+               if (F_ISSET(flags, F_BIGDATA)) {
+                       /* Data already on overflow page. */
+                       node_size += sizeof(pgno_t);
+               } else if (data->mv_size >= txn->mt_env->me_psize / MDB_MINKEYS) {
+                       int ovpages = OVPAGES(data->mv_size, txn->mt_env->me_psize);
+                       /* Put data on overflow page. */
+                       DPRINTF("data size is %zu, put on overflow page",
+                           data->mv_size);
+                       node_size += sizeof(pgno_t);
+                       if ((ofp = mdb_new_page(txn, dbi, P_OVERFLOW, ovpages)) == NULL)
+                               return MDB_FAIL;
+                       DPRINTF("allocated overflow page %lu", ofp->p.mp_pgno);
+                       flags |= F_BIGDATA;
+               } else {
+                       node_size += data->mv_size;
+               }
+       }
+
+       if (node_size + sizeof(indx_t) > SIZELEFT(mp)) {
+               DPRINTF("not enough room in page %lu, got %u ptrs",
+                   mp->mp_pgno, NUMKEYS(mp));
+               DPRINTF("upper - lower = %u - %u = %u", mp->mp_upper, mp->mp_lower,
+                   mp->mp_upper - mp->mp_lower);
+               DPRINTF("node size = %zu", node_size);
+               return ENOSPC;
+       }
+
+       /* Move higher pointers up one slot. */
+       for (i = NUMKEYS(mp); i > indx; i--)
+               mp->mp_ptrs[i] = mp->mp_ptrs[i - 1];
+
+       /* Adjust free space offsets. */
+       ofs = mp->mp_upper - node_size;
+       assert(ofs >= mp->mp_lower + sizeof(indx_t));
+       mp->mp_ptrs[indx] = ofs;
+       mp->mp_upper = ofs;
+       mp->mp_lower += sizeof(indx_t);
+
+       /* Write the node data. */
+       node = NODEPTR(mp, indx);
+       node->mn_ksize = (key == NULL) ? 0 : key->mv_size;
+       node->mn_flags = flags;
+       if (IS_LEAF(mp))
+               node->mn_dsize = data->mv_size;
+       else
+               node->mn_pgno = pgno;
+
+       if (key)
+               memcpy(NODEKEY(node), key->mv_data, key->mv_size);
+
+       if (IS_LEAF(mp)) {
+               assert(key);
+               if (ofp == NULL) {
+                       if (F_ISSET(flags, F_BIGDATA))
+                               memcpy(node->mn_data + key->mv_size, data->mv_data,
+                                   sizeof(pgno_t));
+                       else
+                               memcpy(node->mn_data + key->mv_size, data->mv_data,
+                                   data->mv_size);
+               } else {
+                       memcpy(node->mn_data + key->mv_size, &ofp->p.mp_pgno,
+                           sizeof(pgno_t));
+                       memcpy(METADATA(&ofp->p), data->mv_data, data->mv_size);
+               }
+       }
+
+       return MDB_SUCCESS;
+}
+
+static void
+mdb_del_node(MDB_page *mp, indx_t indx)
+{
+       unsigned int     sz;
+       indx_t           i, j, numkeys, ptr;
+       MDB_node        *node;
+       char            *base;
+
+       DPRINTF("delete node %u on %s page %lu", indx,
+           IS_LEAF(mp) ? "leaf" : "branch", mp->mp_pgno);
+       assert(indx < NUMKEYS(mp));
+
+       node = NODEPTR(mp, indx);
+       sz = NODESIZE + node->mn_ksize;
+       if (IS_LEAF(mp)) {
+               if (F_ISSET(node->mn_flags, F_BIGDATA))
+                       sz += sizeof(pgno_t);
+               else
+                       sz += NODEDSZ(node);
+       }
+
+       ptr = mp->mp_ptrs[indx];
+       numkeys = NUMKEYS(mp);
+       for (i = j = 0; i < numkeys; i++) {
+               if (i != indx) {
+                       mp->mp_ptrs[j] = mp->mp_ptrs[i];
+                       if (mp->mp_ptrs[i] < ptr)
+                               mp->mp_ptrs[j] += sz;
+                       j++;
+               }
+       }
+
+       base = (char *)mp + mp->mp_upper;
+       memmove(base + sz, base, ptr - mp->mp_upper);
+
+       mp->mp_lower -= sizeof(indx_t);
+       mp->mp_upper += sz;
+}
+
+static void
+mdb_xcursor_init0(MDB_txn *txn, MDB_dbi dbi, MDB_xcursor *mx)
+{
+       MDB_dbi dbn;
+
+       mx->mx_txn = *txn;
+       mx->mx_txn.mt_dbxs = mx->mx_dbxs;
+       mx->mx_txn.mt_dbs = mx->mx_dbs;
+       mx->mx_dbxs[0] = txn->mt_dbxs[0];
+       mx->mx_dbxs[1] = txn->mt_dbxs[1];
+       if (dbi > 1) {
+               mx->mx_dbxs[2] = txn->mt_dbxs[dbi];
+               dbn = 2;
+       } else {
+               dbn = 1;
+       }
+       mx->mx_dbxs[dbn+1].md_parent = dbn;
+       mx->mx_dbxs[dbn+1].md_cmp = mx->mx_dbxs[dbn].md_dcmp;
+       mx->mx_dbxs[dbn+1].md_rel = mx->mx_dbxs[dbn].md_rel;
+       mx->mx_dbxs[dbn+1].md_dirty = 0;
+       mx->mx_txn.mt_numdbs = dbn+2;
+
+       SLIST_INIT(&mx->mx_cursor.mc_stack);
+       mx->mx_cursor.mc_txn = &mx->mx_txn;
+       mx->mx_cursor.mc_dbi = dbn+1;
+}
+
+static void
+mdb_xcursor_init1(MDB_txn *txn, MDB_dbi dbi, MDB_xcursor *mx, MDB_node *node)
+{
+       MDB_db *db = NODEDATA(node);
+       MDB_dbi dbn;
+       mx->mx_dbs[0] = txn->mt_dbs[0];
+       mx->mx_dbs[1] = txn->mt_dbs[1];
+       if (dbi > 1) {
+               mx->mx_dbs[2] = txn->mt_dbs[dbi];
+               dbn = 3;
+       } else {
+               dbn = 2;
+       }
+       mx->mx_dbs[dbn] = *db;
+       mx->mx_dbxs[dbn].md_name.mv_data = NODEKEY(node);
+       mx->mx_dbxs[dbn].md_name.mv_size = node->mn_ksize;
+       mx->mx_txn.mt_next_pgno = txn->mt_next_pgno;
+       mx->mx_txn.mt_oldest = txn->mt_oldest;
+       mx->mx_txn.mt_u = txn->mt_u;
+}
+
+static void
+mdb_xcursor_fini(MDB_txn *txn, MDB_dbi dbi, MDB_xcursor *mx)
+{
+       txn->mt_next_pgno = mx->mx_txn.mt_next_pgno;
+       txn->mt_oldest = mx->mx_txn.mt_oldest;
+       txn->mt_u = mx->mx_txn.mt_u;
+       txn->mt_dbs[0] = mx->mx_dbs[0];
+       txn->mt_dbs[1] = mx->mx_dbs[1];
+       txn->mt_dbxs[0].md_dirty = mx->mx_dbxs[0].md_dirty;
+       txn->mt_dbxs[1].md_dirty = mx->mx_dbxs[1].md_dirty;
+       if (dbi > 1) {
+               txn->mt_dbs[dbi] = mx->mx_dbs[2];
+               txn->mt_dbxs[dbi].md_dirty = mx->mx_dbxs[2].md_dirty;
+       }
+}
+
+int
+mdb_cursor_open(MDB_txn *txn, MDB_dbi dbi, MDB_cursor **ret)
+{
+       MDB_cursor      *cursor;
+       size_t size = sizeof(MDB_cursor);
+
+       if (txn == NULL || ret == NULL || !dbi || dbi >= txn->mt_numdbs)
+               return EINVAL;
+
+       if (txn->mt_dbs[dbi].md_flags & MDB_DUPSORT)
+               size += sizeof(MDB_xcursor);
+
+       if ((cursor = calloc(1, size)) != NULL) {
+               SLIST_INIT(&cursor->mc_stack);
+               cursor->mc_dbi = dbi;
+               cursor->mc_txn = txn;
+               if (txn->mt_dbs[dbi].md_flags & MDB_DUPSORT) {
+                       MDB_xcursor *mx = (MDB_xcursor *)(cursor + 1);
+                       cursor->mc_xcursor = mx;
+                       mdb_xcursor_init0(txn, dbi, mx);
+               }
+       } else {
+               return ENOMEM;
+       }
+
+       *ret = cursor;
+
+       return MDB_SUCCESS;
+}
+
+/* Return the count of duplicate data items for the current key */
+int
+mdb_cursor_count(MDB_cursor *mc, unsigned long *countp)
+{
+       if (mc == NULL || countp == NULL)
+               return EINVAL;
+
+       if (!(mc->mc_txn->mt_dbs[mc->mc_dbi].md_flags & MDB_DUPSORT))
+               return EINVAL;
+
+       if (!mc->mc_xcursor->mx_cursor.mc_initialized)
+               return EINVAL;
+
+       *countp = mc->mc_xcursor->mx_txn.mt_dbs[mc->mc_xcursor->mx_cursor.mc_dbi].md_entries;
+       return MDB_SUCCESS;
+}
+
+void
+mdb_cursor_close(MDB_cursor *cursor)
+{
+       if (cursor != NULL) {
+               while(!CURSOR_EMPTY(cursor))
+                       cursor_pop_page(cursor);
+               if (cursor->mc_txn->mt_dbs[cursor->mc_dbi].md_flags & MDB_DUPSORT) {
+                       mdb_xcursor_fini(cursor->mc_txn, cursor->mc_dbi, cursor->mc_xcursor);
+                       while(!CURSOR_EMPTY(&cursor->mc_xcursor->mx_cursor))
+                               cursor_pop_page(&cursor->mc_xcursor->mx_cursor);
+               }
+
+               free(cursor);
+       }
+}
+
+static int
+mdb_update_key(MDB_page *mp, indx_t indx, MDB_val *key)
+{
+       indx_t                   ptr, i, numkeys;
+       int                      delta;
+       size_t                   len;
+       MDB_node                *node;
+       char                    *base;
+
+       node = NODEPTR(mp, indx);
+       ptr = mp->mp_ptrs[indx];
+       DPRINTF("update key %u (ofs %u) [%.*s] to [%.*s] on page %lu",
+           indx, ptr,
+           (int)node->mn_ksize, (char *)NODEKEY(node),
+           (int)key->mv_size, (char *)key->mv_data,
+           mp->mp_pgno);
+
+       delta = key->mv_size - node->mn_ksize;
+       if (delta) {
+               if (delta > 0 && SIZELEFT(mp) < delta) {
+                       DPRINTF("OUCH! Not enough room, delta = %d", delta);
+                       return ENOSPC;
+               }
+
+               numkeys = NUMKEYS(mp);
+               for (i = 0; i < numkeys; i++) {
+                       if (mp->mp_ptrs[i] <= ptr)
+                               mp->mp_ptrs[i] -= delta;
+               }
+
+               base = (char *)mp + mp->mp_upper;
+               len = ptr - mp->mp_upper + NODESIZE;
+               memmove(base - delta, base, len);
+               mp->mp_upper -= delta;
+
+               node = NODEPTR(mp, indx);
+               node->mn_ksize = key->mv_size;
+       }
+
+       memcpy(NODEKEY(node), key->mv_data, key->mv_size);
+
+       return MDB_SUCCESS;
+}
+
+/* Move a node from src to dst.
+ */
+static int
+mdb_move_node(MDB_txn *txn, MDB_dbi dbi, MDB_pageparent *src, indx_t srcindx,
+    MDB_pageparent *dst, indx_t dstindx)
+{
+       int                      rc;
+       MDB_node                *srcnode;
+       MDB_val          key, data;
+
+       srcnode = NODEPTR(src->mp_page, srcindx);
+       DPRINTF("moving %s node %u [%.*s] on page %lu to node %u on page %lu",
+           IS_LEAF(src->mp_page) ? "leaf" : "branch",
+           srcindx,
+           (int)srcnode->mn_ksize, (char *)NODEKEY(srcnode),
+           src->mp_page->mp_pgno,
+           dstindx, dst->mp_page->mp_pgno);
+
+       /* Mark src and dst as dirty. */
+       if ((rc = mdb_touch(txn, src)) ||
+           (rc = mdb_touch(txn, dst)))
+               return rc;;
+
+       /* Add the node to the destination page.
+        */
+       key.mv_size = srcnode->mn_ksize;
+       key.mv_data = NODEKEY(srcnode);
+       data.mv_size = NODEDSZ(srcnode);
+       data.mv_data = NODEDATA(srcnode);
+       rc = mdb_add_node(txn, dbi, dst->mp_page, dstindx, &key, &data, NODEPGNO(srcnode),
+           srcnode->mn_flags);
+       if (rc != MDB_SUCCESS)
+               return rc;
+
+       /* Delete the node from the source page.
+        */
+       mdb_del_node(src->mp_page, srcindx);
+
+       /* Update the parent separators.
+        */
+       if (srcindx == 0 && src->mp_pi != 0) {
+               DPRINTF("update separator for source page %lu to [%.*s]",
+                   src->mp_page->mp_pgno, (int)key.mv_size, (char *)key.mv_data);
+               if ((rc = mdb_update_key(src->mp_parent, src->mp_pi,
+                   &key)) != MDB_SUCCESS)
+                       return rc;
+       }
+
+       if (srcindx == 0 && IS_BRANCH(src->mp_page)) {
+               MDB_val  nullkey;
+               nullkey.mv_size = 0;
+               assert(mdb_update_key(src->mp_page, 0, &nullkey) == MDB_SUCCESS);
+       }
+
+       if (dstindx == 0 && dst->mp_pi != 0) {
+               DPRINTF("update separator for destination page %lu to [%.*s]",
+                   dst->mp_page->mp_pgno, (int)key.mv_size, (char *)key.mv_data);
+               if ((rc = mdb_update_key(dst->mp_parent, dst->mp_pi,
+                   &key)) != MDB_SUCCESS)
+                       return rc;
+       }
+
+       if (dstindx == 0 && IS_BRANCH(dst->mp_page)) {
+               MDB_val  nullkey;
+               nullkey.mv_size = 0;
+               assert(mdb_update_key(dst->mp_page, 0, &nullkey) == MDB_SUCCESS);
+       }
+
+       return MDB_SUCCESS;
+}
+
+static int
+mdb_merge(MDB_txn *txn, MDB_dbi dbi, MDB_pageparent *src, MDB_pageparent *dst)
+{
+       int                      rc;
+       indx_t                   i;
+       MDB_node                *srcnode;
+       MDB_val          key, data;
+       MDB_pageparent  mpp;
+       MDB_dhead *dh;
+
+       DPRINTF("merging page %lu and %lu", src->mp_page->mp_pgno, dst->mp_page->mp_pgno);
+
+       assert(txn != NULL);
+       assert(src->mp_parent); /* can't merge root page */
+       assert(dst->mp_parent);
+
+       /* Mark src and dst as dirty. */
+       if ((rc = mdb_touch(txn, src)) ||
+           (rc = mdb_touch(txn, dst)))
+               return rc;
+
+       /* Move all nodes from src to dst.
+        */
+       for (i = 0; i < NUMKEYS(src->mp_page); i++) {
+               srcnode = NODEPTR(src->mp_page, i);
+
+               key.mv_size = srcnode->mn_ksize;
+               key.mv_data = NODEKEY(srcnode);
+               data.mv_size = NODEDSZ(srcnode);
+               data.mv_data = NODEDATA(srcnode);
+               rc = mdb_add_node(txn, dbi, dst->mp_page, NUMKEYS(dst->mp_page), &key,
+                   &data, NODEPGNO(srcnode), srcnode->mn_flags);
+               if (rc != MDB_SUCCESS)
+                       return rc;
+       }
+
+       DPRINTF("dst page %lu now has %u keys (%.1f%% filled)",
+           dst->mp_page->mp_pgno, NUMKEYS(dst->mp_page), (float)PAGEFILL(txn->mt_env, dst->mp_page) / 10);
+
+       /* Unlink the src page from parent.
+        */
+       mdb_del_node(src->mp_parent, src->mp_pi);
+       if (src->mp_pi == 0) {
+               key.mv_size = 0;
+               if ((rc = mdb_update_key(src->mp_parent, 0, &key)) != MDB_SUCCESS)
+                       return rc;
+       }
+
+       if (IS_LEAF(src->mp_page))
+               txn->mt_dbs[dbi].md_leaf_pages--;
+       else
+               txn->mt_dbs[dbi].md_branch_pages--;
+
+       mpp.mp_page = src->mp_parent;
+       dh = (MDB_dhead *)src->mp_parent;
+       dh--;
+       mpp.mp_parent = dh->md_parent;
+       mpp.mp_pi = dh->md_pi;
+
+       return mdb_rebalance(txn, dbi, &mpp);
+}
+
+#define FILL_THRESHOLD  250
+
+static int
+mdb_rebalance(MDB_txn *txn, MDB_dbi dbi, MDB_pageparent *mpp)
+{
+       MDB_node        *node;
+       MDB_page        *root;
+       MDB_pageparent npp;
+       indx_t           si = 0, di = 0;
+
+       assert(txn != NULL);
+       assert(mpp != NULL);
+
+       DPRINTF("rebalancing %s page %lu (has %u keys, %.1f%% full)",
+           IS_LEAF(mpp->mp_page) ? "leaf" : "branch",
+           mpp->mp_page->mp_pgno, NUMKEYS(mpp->mp_page), (float)PAGEFILL(txn->mt_env, mpp->mp_page) / 10);
+
+       if (PAGEFILL(txn->mt_env, mpp->mp_page) >= FILL_THRESHOLD) {
+               DPRINTF("no need to rebalance page %lu, above fill threshold",
+                   mpp->mp_page->mp_pgno);
+               return MDB_SUCCESS;
+       }
+
+       if (mpp->mp_parent == NULL) {
+               if (NUMKEYS(mpp->mp_page) == 0) {
+                       DPRINTF("tree is completely empty");
+                       txn->mt_dbs[dbi].md_root = P_INVALID;
+                       txn->mt_dbs[dbi].md_depth--;
+                       txn->mt_dbs[dbi].md_leaf_pages--;
+               } else if (IS_BRANCH(mpp->mp_page) && NUMKEYS(mpp->mp_page) == 1) {
+                       DPRINTF("collapsing root page!");
+                       txn->mt_dbs[dbi].md_root = NODEPGNO(NODEPTR(mpp->mp_page, 0));
+                       if ((root = mdb_get_page(txn, txn->mt_dbs[dbi].md_root)) == NULL)
+                               return MDB_FAIL;
+                       txn->mt_dbs[dbi].md_depth--;
+                       txn->mt_dbs[dbi].md_branch_pages--;
+               } else
+                       DPRINTF("root page doesn't need rebalancing");
+               return MDB_SUCCESS;
+       }
+
+       /* The parent (branch page) must have at least 2 pointers,
+        * otherwise the tree is invalid.
+        */
+       assert(NUMKEYS(mpp->mp_parent) > 1);
+
+       /* Leaf page fill factor is below the threshold.
+        * Try to move keys from left or right neighbor, or
+        * merge with a neighbor page.
+        */
+
+       /* Find neighbors.
+        */
+       if (mpp->mp_pi == 0) {
+               /* We're the leftmost leaf in our parent.
+                */
+               DPRINTF("reading right neighbor");
+               node = NODEPTR(mpp->mp_parent, mpp->mp_pi + 1);
+               if ((npp.mp_page = mdb_get_page(txn, NODEPGNO(node))) == NULL)
+                       return MDB_FAIL;
+               npp.mp_pi = mpp->mp_pi + 1;
+               si = 0;
+               di = NUMKEYS(mpp->mp_page);
+       } else {
+               /* There is at least one neighbor to the left.
+                */
+               DPRINTF("reading left neighbor");
+               node = NODEPTR(mpp->mp_parent, mpp->mp_pi - 1);
+               if ((npp.mp_page = mdb_get_page(txn, NODEPGNO(node))) == NULL)
+                       return MDB_FAIL;
+               npp.mp_pi = mpp->mp_pi - 1;
+               si = NUMKEYS(npp.mp_page) - 1;
+               di = 0;
+       }
+       npp.mp_parent = mpp->mp_parent;
+
+       DPRINTF("found neighbor page %lu (%u keys, %.1f%% full)",
+           npp.mp_page->mp_pgno, NUMKEYS(npp.mp_page), (float)PAGEFILL(txn->mt_env, npp.mp_page) / 10);
+
+       /* If the neighbor page is above threshold and has at least two
+        * keys, move one key from it.
+        *
+        * Otherwise we should try to merge them.
+        */
+       if (PAGEFILL(txn->mt_env, npp.mp_page) >= FILL_THRESHOLD && NUMKEYS(npp.mp_page) >= 2)
+               return mdb_move_node(txn, dbi, &npp, si, mpp, di);
+       else { /* FIXME: if (has_enough_room()) */
+               if (mpp->mp_pi == 0)
+                       return mdb_merge(txn, dbi, &npp, mpp);
+               else
+                       return mdb_merge(txn, dbi, mpp, &npp);
+       }
+}
+
+static int
+mdb_del0(MDB_txn *txn, MDB_dbi dbi, unsigned int ki, MDB_pageparent *mpp, MDB_node *leaf)
+{
+       int rc;
+
+       /* add overflow pages to free list */
+       if (F_ISSET(leaf->mn_flags, F_BIGDATA)) {
+               int i, ovpages;
+               pgno_t pg;
+
+               memcpy(&pg, NODEDATA(leaf), sizeof(pg));
+               ovpages = OVPAGES(NODEDSZ(leaf), txn->mt_env->me_psize);
+               for (i=0; i<ovpages; i++) {
+                       DPRINTF("freed ov page %lu", pg);
+                       mdb_midl_insert(txn->mt_free_pgs, pg);
+                       pg++;
+               }
+       }
+       mdb_del_node(mpp->mp_page, ki);
+       txn->mt_dbs[dbi].md_entries--;
+       rc = mdb_rebalance(txn, dbi, mpp);
+       if (rc != MDB_SUCCESS)
+               txn->mt_flags |= MDB_TXN_ERROR;
+
+       return rc;
+}
+
+int
+mdb_del(MDB_txn *txn, MDB_dbi dbi,
+    MDB_val *key, MDB_val *data,
+       unsigned int flags)
+{
+       int              rc, exact;
+       unsigned int     ki;
+       MDB_node        *leaf;
+       MDB_pageparent  mpp;
+
+       DPRINTF("========> delete key %.*s", (int)key->mv_size, (char *)key->mv_data);
+
+       assert(key != NULL);
+
+       if (txn == NULL || !dbi || dbi >= txn->mt_numdbs)
+               return EINVAL;
+
+       if (F_ISSET(txn->mt_flags, MDB_TXN_RDONLY)) {
+               return EINVAL;
+       }
+
+       if (key->mv_size == 0 || key->mv_size > MAXKEYSIZE) {
+               return EINVAL;
+       }
+
+       mpp.mp_parent = NULL;
+       mpp.mp_pi = 0;
+       if ((rc = mdb_search_page(txn, dbi, key, NULL, 1, &mpp)) != MDB_SUCCESS)
+               return rc;
+
+       leaf = mdb_search_node(txn, dbi, mpp.mp_page, key, &exact, &ki);
+       if (leaf == NULL || !exact) {
+               return MDB_NOTFOUND;
+       }
+
+       if (F_ISSET(txn->mt_dbs[dbi].md_flags, MDB_DUPSORT)) {
+               MDB_xcursor mx;
+               MDB_pageparent mp2;
+
+               mdb_xcursor_init0(txn, dbi, &mx);
+               mdb_xcursor_init1(txn, dbi, &mx, leaf);
+               if (flags == MDB_DEL_DUP) {
+                       rc = mdb_del(&mx.mx_txn, mx.mx_cursor.mc_dbi, data, NULL, 0);
+                       mdb_xcursor_fini(txn, dbi, &mx);
+                       if (rc != MDB_SUCCESS)
+                               return rc;
+                       /* If sub-DB still has entries, we're done */
+                       if (mx.mx_txn.mt_dbs[mx.mx_cursor.mc_dbi].md_root != P_INVALID) {
+                               memcpy(NODEDATA(leaf), &mx.mx_txn.mt_dbs[mx.mx_cursor.mc_dbi],
+                                       sizeof(MDB_db));
+                               return rc;
+                       }
+                       /* otherwise fall thru and delete the sub-DB */
+               } else {
+                       /* add all the child DB's pages to the free list */
+                       rc = mdb_search_page(&mx.mx_txn, mx.mx_cursor.mc_dbi,
+                               NULL, &mx.mx_cursor, 0, &mp2);
+                       if (rc == MDB_SUCCESS) {
+                               MDB_ppage *top, *parent;
+                               MDB_node *ni;
+                               unsigned int i;
+
+                               cursor_pop_page(&mx.mx_cursor);
+                               top = CURSOR_TOP(&mx.mx_cursor);
+                               if (top != NULL) {
+                                       parent = SLIST_NEXT(top, mp_entry);
+                                       while (parent != NULL) {
+                                               for (i=0; i<NUMKEYS(top->mp_page); i++) {
+                                                       ni = NODEPTR(top->mp_page, i);
+                                                       mdb_midl_insert(txn->mt_free_pgs, ni->mn_pgno);
+                                               }
+                                               if (parent) {
+                                                       parent->mp_ki++;
+                                                       if (parent->mp_ki >= NUMKEYS(parent->mp_page)) {
+                                                               cursor_pop_page(&mx.mx_cursor);
+                                                               top = CURSOR_TOP(&mx.mx_cursor);
+                                                               parent = SLIST_NEXT(top, mp_entry);
+                                                       } else {
+                                                               ni = NODEPTR(parent->mp_page, parent->mp_ki);
+                                                               top->mp_page = mdb_get_page(&mx.mx_txn, ni->mn_pgno);
+                                                       }
+                                               }
+                                       }
+                               }
+                               mdb_midl_insert(txn->mt_free_pgs, mx.mx_txn.mt_dbs[mx.mx_cursor.mc_dbi].md_root);
+                       }
+               }
+       }
+
+       if (data && (rc = mdb_read_data(txn, leaf, data)) != MDB_SUCCESS)
+               return rc;
+
+       return mdb_del0(txn, dbi, ki, &mpp, leaf);
+}
+
+/* Split page <*mpp>, and insert <key,(data|newpgno)> in either left or
+ * right sibling, at index <*newindxp> (as if unsplit). Updates *mpp and
+ * *newindxp with the actual values after split, ie if *mpp and *newindxp
+ * refer to a node in the new right sibling page.
+ */
+static int
+mdb_split(MDB_txn *txn, MDB_dbi dbi, MDB_page **mpp, unsigned int *newindxp,
+    MDB_val *newkey, MDB_val *newdata, pgno_t newpgno)
+{
+       uint8_t          flags;
+       int              rc = MDB_SUCCESS, ins_new = 0;
+       indx_t           newindx;
+       pgno_t           pgno = 0;
+       unsigned int     i, j, split_indx;
+       MDB_node        *node;
+       MDB_val  sepkey, rkey, rdata;
+       MDB_page        *copy;
+       MDB_dpage       *mdp, *rdp, *pdp;
+       MDB_dhead *dh;
+
+       assert(txn != NULL);
+
+       dh = ((MDB_dhead *)*mpp) - 1;
+       mdp = (MDB_dpage *)dh;
+       newindx = *newindxp;
+
+       DPRINTF("-----> splitting %s page %lu and adding [%.*s] at index %i",
+           IS_LEAF(&mdp->p) ? "leaf" : "branch", mdp->p.mp_pgno,
+           (int)newkey->mv_size, (char *)newkey->mv_data, *newindxp);
+
+       if (mdp->h.md_parent == NULL) {
+               if ((pdp = mdb_new_page(txn, dbi, P_BRANCH, 1)) == NULL)
+                       return MDB_FAIL;
+               mdp->h.md_pi = 0;
+               mdp->h.md_parent = &pdp->p;
+               txn->mt_dbs[dbi].md_root = pdp->p.mp_pgno;
+               DPRINTF("root split! new root = %lu", pdp->p.mp_pgno);
+               txn->mt_dbs[dbi].md_depth++;
+
+               /* Add left (implicit) pointer. */
+               if (mdb_add_node(txn, dbi, &pdp->p, 0, NULL, NULL,
+                   mdp->p.mp_pgno, 0) != MDB_SUCCESS)
+                       return MDB_FAIL;
+       } else {
+               DPRINTF("parent branch page is %lu", mdp->h.md_parent->mp_pgno);
+       }
+
+       /* Create a right sibling. */
+       if ((rdp = mdb_new_page(txn, dbi, mdp->p.mp_flags, 1)) == NULL)
+               return MDB_FAIL;
+       rdp->h.md_parent = mdp->h.md_parent;
+       rdp->h.md_pi = mdp->h.md_pi + 1;
+       DPRINTF("new right sibling: page %lu", rdp->p.mp_pgno);
+
+       /* Move half of the keys to the right sibling. */
+       if ((copy = malloc(txn->mt_env->me_psize)) == NULL)
+               return MDB_FAIL;
+       memcpy(copy, &mdp->p, txn->mt_env->me_psize);
+       memset(&mdp->p.mp_ptrs, 0, txn->mt_env->me_psize - PAGEHDRSZ);
+       mdp->p.mp_lower = PAGEHDRSZ;
+       mdp->p.mp_upper = txn->mt_env->me_psize;
+
+       split_indx = NUMKEYS(copy) / 2 + 1;
+
+       /* First find the separating key between the split pages.
+        */
+       memset(&sepkey, 0, sizeof(sepkey));
+       if (newindx == split_indx) {
+               sepkey.mv_size = newkey->mv_size;
+               sepkey.mv_data = newkey->mv_data;
+       } else {
+               node = NODEPTR(copy, split_indx);
+               sepkey.mv_size = node->mn_ksize;
+               sepkey.mv_data = NODEKEY(node);
+       }
+
+       DPRINTF("separator is [%.*s]", (int)sepkey.mv_size, (char *)sepkey.mv_data);
+
+       /* Copy separator key to the parent.
+        */
+       if (SIZELEFT(rdp->h.md_parent) < mdb_branch_size(txn->mt_env, &sepkey)) {
+               rc = mdb_split(txn, dbi, &rdp->h.md_parent, &rdp->h.md_pi,
+                   &sepkey, NULL, rdp->p.mp_pgno);
+
+               /* Right page might now have changed parent.
+                * Check if left page also changed parent.
+                */
+               if (rdp->h.md_parent != mdp->h.md_parent &&
+                   mdp->h.md_pi >= NUMKEYS(mdp->h.md_parent)) {
+                       mdp->h.md_parent = rdp->h.md_parent;
+                       mdp->h.md_pi = rdp->h.md_pi - 1;
+               }
+       } else {
+               rc = mdb_add_node(txn, dbi, rdp->h.md_parent, rdp->h.md_pi,
+                   &sepkey, NULL, rdp->p.mp_pgno, 0);
+       }
+       if (rc != MDB_SUCCESS) {
+               free(copy);
+               return MDB_FAIL;
+       }
+
+       for (i = j = 0; i <= NUMKEYS(copy); j++) {
+               if (i < split_indx) {
+                       /* Re-insert in left sibling. */
+                       pdp = mdp;
+               } else {
+                       /* Insert in right sibling. */
+                       if (i == split_indx)
+                               /* Reset insert index for right sibling. */
+                               j = (i == newindx && ins_new);
+                       pdp = rdp;
+               }
+
+               if (i == newindx && !ins_new) {
+                       /* Insert the original entry that caused the split. */
+                       rkey.mv_data = newkey->mv_data;
+                       rkey.mv_size = newkey->mv_size;
+                       if (IS_LEAF(&mdp->p)) {
+                               rdata.mv_data = newdata->mv_data;
+                               rdata.mv_size = newdata->mv_size;
+                       } else
+                               pgno = newpgno;
+                       flags = 0;
+
+                       ins_new = 1;
+
+                       /* Update page and index for the new key. */
+                       *newindxp = j;
+                       *mpp = &pdp->p;
+               } else if (i == NUMKEYS(copy)) {
+                       break;
+               } else {
+                       node = NODEPTR(copy, i);
+                       rkey.mv_data = NODEKEY(node);
+                       rkey.mv_size = node->mn_ksize;
+                       if (IS_LEAF(&mdp->p)) {
+                               rdata.mv_data = NODEDATA(node);
+                               rdata.mv_size = node->mn_dsize;
+                       } else
+                               pgno = node->mn_pgno;
+                       flags = node->mn_flags;
+
+                       i++;
+               }
+
+               if (!IS_LEAF(&mdp->p) && j == 0) {
+                       /* First branch index doesn't need key data. */
+                       rkey.mv_size = 0;
+               }
+
+               rc = mdb_add_node(txn, dbi, &pdp->p, j, &rkey, &rdata, pgno,flags);
+       }
+
+       free(copy);
+       return rc;
+}
+
+static int
+mdb_put0(MDB_txn *txn, MDB_dbi dbi,
+    MDB_val *key, MDB_val *data, unsigned int flags)
+{
+       int              rc = MDB_SUCCESS, exact;
+       unsigned int     ki;
+       MDB_node        *leaf;
+       MDB_pageparent  mpp;
+       MDB_val xdata, *rdata;
+       MDB_db dummy;
+
+       DPRINTF("==> put key %.*s, size %zu, data size %zu",
+               (int)key->mv_size, (char *)key->mv_data, key->mv_size, data->mv_size);
+
+       mpp.mp_parent = NULL;
+       mpp.mp_pi = 0;
+       rc = mdb_search_page(txn, dbi, key, NULL, 1, &mpp);
+       if (rc == MDB_SUCCESS) {
+               leaf = mdb_search_node(txn, dbi, mpp.mp_page, key, &exact, &ki);
+               if (leaf && exact) {
+                       if (F_ISSET(txn->mt_dbs[dbi].md_flags, MDB_DUPSORT)) {
+                               goto put_sub;
+                       }
+                       if (flags == MDB_NOOVERWRITE) {
+                               DPRINTF("duplicate key %.*s",
+                                   (int)key->mv_size, (char *)key->mv_data);
+                               return MDB_KEYEXIST;
+                       }
+                       /* same size, just replace it */
+                       if (NODEDSZ(leaf) == data->mv_size) {
+                               memcpy(NODEDATA(leaf), data->mv_data, data->mv_size);
+                               goto done;
+                       }
+                       mdb_del_node(mpp.mp_page, ki);
+               }
+               if (leaf == NULL) {             /* append if not found */
+                       ki = NUMKEYS(mpp.mp_page);
+                       DPRINTF("appending key at index %i", ki);
+               }
+       } else if (rc == MDB_NOTFOUND) {
+               MDB_dpage *dp;
+               /* new file, just write a root leaf page */
+               DPRINTF("allocating new root leaf page");
+               if ((dp = mdb_new_page(txn, dbi, P_LEAF, 1)) == NULL) {
+                       return ENOMEM;
+               }
+               mpp.mp_page = &dp->p;
+               txn->mt_dbs[dbi].md_root = mpp.mp_page->mp_pgno;
+               txn->mt_dbs[dbi].md_depth++;
+               ki = 0;
+       }
+       else
+               goto done;
+
+       assert(IS_LEAF(mpp.mp_page));
+       DPRINTF("there are %u keys, should insert new key at index %i",
+               NUMKEYS(mpp.mp_page), ki);
+
+       /* For sorted dups, the data item at this level is a DB record
+        * for a child DB; the actual data elements are stored as keys
+        * in the child DB.
+        */
+       if (F_ISSET(txn->mt_dbs[dbi].md_flags, MDB_DUPSORT)) {
+               rdata = &xdata;
+               xdata.mv_size = sizeof(MDB_db);
+               xdata.mv_data = &dummy;
+               memset(&dummy, 0, sizeof(dummy));
+               dummy.md_root = P_INVALID;
+       } else {
+               rdata = data;
+       }
+
+       if (SIZELEFT(mpp.mp_page) < mdb_leaf_size(txn->mt_env, key, rdata)) {
+               rc = mdb_split(txn, dbi, &mpp.mp_page, &ki, key, rdata, P_INVALID);
+       } else {
+               /* There is room already in this leaf page. */
+               rc = mdb_add_node(txn, dbi, mpp.mp_page, ki, key, rdata, 0, 0);
+       }
+
+       if (rc != MDB_SUCCESS)
+               txn->mt_flags |= MDB_TXN_ERROR;
+       else {
+               txn->mt_dbs[dbi].md_entries++;
+
+               /* Remember if we just added a subdatabase */
+               if (flags & F_SUBDATA) {
+                       leaf = NODEPTR(mpp.mp_page, ki);
+                       leaf->mn_flags |= F_SUBDATA;
+               }
+
+               /* Now store the actual data in the child DB. Note that we're
+                * storing the user data in the keys field, so there are strict
+                * size limits on dupdata. The actual data fields of the child
+                * DB are all zero size.
+                */
+               if (F_ISSET(txn->mt_dbs[dbi].md_flags, MDB_DUPSORT)) {
+                       MDB_xcursor mx;
+
+                       leaf = NODEPTR(mpp.mp_page, ki);
+put_sub:
+                       mdb_xcursor_init0(txn, dbi, &mx);
+                       mdb_xcursor_init1(txn, dbi, &mx, leaf);
+                       xdata.mv_size = 0;
+                       xdata.mv_data = "";
+                       if (flags == MDB_NODUPDATA)
+                               flags = MDB_NOOVERWRITE;
+                       rc = mdb_put0(&mx.mx_txn, mx.mx_cursor.mc_dbi, data, &xdata, flags);
+                       mdb_xcursor_fini(txn, dbi, &mx);
+                       memcpy(NODEDATA(leaf), &mx.mx_txn.mt_dbs[mx.mx_cursor.mc_dbi],
+                               sizeof(MDB_db));
+               }
+       }
+
+done:
+       return rc;
+}
+
+int
+mdb_put(MDB_txn *txn, MDB_dbi dbi,
+    MDB_val *key, MDB_val *data, unsigned int flags)
+{
+       assert(key != NULL);
+       assert(data != NULL);
+
+       if (txn == NULL || !dbi || dbi >= txn->mt_numdbs)
+               return EINVAL;
+
+       if (F_ISSET(txn->mt_flags, MDB_TXN_RDONLY)) {
+               return EINVAL;
+       }
+
+       if (key->mv_size == 0 || key->mv_size > MAXKEYSIZE) {
+               return EINVAL;
+       }
+
+       if ((flags & (MDB_NOOVERWRITE|MDB_NODUPDATA)) != flags)
+               return EINVAL;
+
+       return mdb_put0(txn, dbi, key, data, flags);
+}
+
+int
+mdb_env_get_flags(MDB_env *env, unsigned int *arg)
+{
+       if (!env || !arg)
+               return EINVAL;
+
+       *arg = env->me_flags;
+       return MDB_SUCCESS;
+}
+
+int
+mdb_env_get_path(MDB_env *env, const char **arg)
+{
+       if (!env || !arg)
+               return EINVAL;
+
+       *arg = env->me_path;
+       return MDB_SUCCESS;
+}
+
+static int
+mdb_stat0(MDB_env *env, MDB_db *db, MDB_stat *arg)
+{
+       arg->ms_psize = env->me_psize;
+       arg->ms_depth = db->md_depth;
+       arg->ms_branch_pages = db->md_branch_pages;
+       arg->ms_leaf_pages = db->md_leaf_pages;
+       arg->ms_overflow_pages = db->md_overflow_pages;
+       arg->ms_entries = db->md_entries;
+
+       return MDB_SUCCESS;
+}
+int
+mdb_env_stat(MDB_env *env, MDB_stat *arg)
+{
+       if (env == NULL || arg == NULL)
+               return EINVAL;
+
+       return mdb_stat0(env, &env->me_meta->mm_dbs[MAIN_DBI], arg);
+}
+
+int mdb_open(MDB_txn *txn, const char *name, unsigned int flags, MDB_dbi *dbi)
+{
+       MDB_val key, data;
+       MDB_dbi i;
+       int rc, dirty = 0;
+       size_t len;
+
+       /* main DB? */
+       if (!name) {
+               *dbi = MAIN_DBI;
+               if (flags & (MDB_DUPSORT|MDB_REVERSEKEY|MDB_INTEGERKEY))
+                       txn->mt_dbs[MAIN_DBI].md_flags |= (flags & (MDB_DUPSORT|MDB_REVERSEKEY|MDB_INTEGERKEY));
+               return MDB_SUCCESS;
+       }
+
+       /* Is the DB already open? */
+       len = strlen(name);
+       for (i=2; i<txn->mt_numdbs; i++) {
+               if (len == txn->mt_dbxs[i].md_name.mv_size &&
+                       !strncmp(name, txn->mt_dbxs[i].md_name.mv_data, len)) {
+                       *dbi = i;
+                       return MDB_SUCCESS;
+               }
+       }
+
+       if (txn->mt_numdbs >= txn->mt_env->me_maxdbs - 1)
+               return ENFILE;
+
+       /* Find the DB info */
+       key.mv_size = len;
+       key.mv_data = (void *)name;
+       rc = mdb_get(txn, MAIN_DBI, &key, &data);
+
+       /* Create if requested */
+       if (rc == MDB_NOTFOUND && (flags & MDB_CREATE)) {
+               MDB_db dummy;
+               data.mv_size = sizeof(MDB_db);
+               data.mv_data = &dummy;
+               memset(&dummy, 0, sizeof(dummy));
+               dummy.md_root = P_INVALID;
+               dummy.md_flags = flags & 0xffff;
+               rc = mdb_put0(txn, MAIN_DBI, &key, &data, F_SUBDATA);
+               dirty = 1;
+       }
+
+       /* OK, got info, add to table */
+       if (rc == MDB_SUCCESS) {
+               txn->mt_dbxs[txn->mt_numdbs].md_name.mv_data = strdup(name);
+               txn->mt_dbxs[txn->mt_numdbs].md_name.mv_size = len;
+               txn->mt_dbxs[txn->mt_numdbs].md_cmp = NULL;
+               txn->mt_dbxs[txn->mt_numdbs].md_dcmp = NULL;
+               txn->mt_dbxs[txn->mt_numdbs].md_rel = NULL;
+               txn->mt_dbxs[txn->mt_numdbs].md_parent = MAIN_DBI;
+               txn->mt_dbxs[txn->mt_numdbs].md_dirty = dirty;
+               memcpy(&txn->mt_dbs[txn->mt_numdbs], data.mv_data, sizeof(MDB_db));
+               *dbi = txn->mt_numdbs;
+               txn->mt_numdbs++;
+       }
+
+       return rc;
+}
+
+int mdb_stat(MDB_txn *txn, MDB_dbi dbi, MDB_stat *arg)
+{
+       if (txn == NULL || arg == NULL || dbi >= txn->mt_numdbs)
+               return EINVAL;
+
+       return mdb_stat0(txn->mt_env, &txn->mt_dbs[dbi], arg);
+}
+
+void mdb_close(MDB_txn *txn, MDB_dbi dbi)
+{
+       char *ptr;
+       if (dbi <= MAIN_DBI || dbi >= txn->mt_numdbs)
+               return;
+       ptr = txn->mt_dbxs[dbi].md_name.mv_data;
+       txn->mt_dbxs[dbi].md_name.mv_data = NULL;
+       txn->mt_dbxs[dbi].md_name.mv_size = 0;
+       free(ptr);
+}
+
+int mdb_set_compare(MDB_txn *txn, MDB_dbi dbi, MDB_cmp_func *cmp)
+{
+       if (txn == NULL || !dbi || dbi >= txn->mt_numdbs)
+               return EINVAL;
+
+       txn->mt_dbxs[dbi].md_cmp = cmp;
+       return MDB_SUCCESS;
+}
+
+int mdb_set_dupsort(MDB_txn *txn, MDB_dbi dbi, MDB_cmp_func *cmp)
+{
+       if (txn == NULL || !dbi || dbi >= txn->mt_numdbs)
+               return EINVAL;
+
+       txn->mt_dbxs[dbi].md_dcmp = cmp;
+       return MDB_SUCCESS;
+}
+
+int mdb_set_relfunc(MDB_txn *txn, MDB_dbi dbi, MDB_rel_func *rel)
+{
+       if (txn == NULL || !dbi || dbi >= txn->mt_numdbs)
+               return EINVAL;
+
+       txn->mt_dbxs[dbi].md_rel = rel;
+       return MDB_SUCCESS;
+}
diff --git a/servers/slapd/back-mdb/mdb.h b/servers/slapd/back-mdb/mdb.h
new file mode 100644 (file)
index 0000000..f7e98e5
--- /dev/null
@@ -0,0 +1,153 @@
+/* mdb.h - memory-mapped database library header file */
+/*
+ * Copyright 2011 Howard Chu, Symas Corp.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted only as authorized by the OpenLDAP
+ * Public License.
+ *
+ * A copy of this license is available in the file LICENSE in the
+ * top-level directory of the distribution or, alternatively, at
+ * <http://www.OpenLDAP.org/license.html>.
+ *
+ * This code is derived from btree.c written by Martin Hedenfalk.
+ *
+ * Copyright (c) 2009, 2010 Martin Hedenfalk <martin@bzero.se>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+#ifndef _MDB_H_
+#define _MDB_H_
+
+#include <sys/types.h>
+
+#define MDB_VERSION_MAJOR      0
+#define MDB_VERSION_MINOR      8
+#define MDB_VERSION_PATCH      0
+#define MDB_VERINT(a,b,c)      ((a << 24) | (b << 16) | c)
+#define MDB_VERSION_FULL       \
+       MDB_VERINT(MDB_VERSION_MAJOR,MDB_VERSION_MINOR,MDB_VERSION_PATCH)
+#define MDB_VERSION_DATE       "August 11, 2011"
+#define MDB_VERSTR(a,b,c,d)    "MDB " #a "." #b "." #c "(" #d ")"
+#define        MDB_VERSION_STRING      \
+       MDB_VERSTR(MDB_VERSION_MAJOR,MDB_VERSION_MINOR,MDB_VERSION_PATCH,MDB_VERSION_DATE)
+
+struct MDB_cursor;
+struct MDB_txn;
+struct MDB_env;
+
+typedef struct MDB_cursor MDB_cursor;
+typedef struct MDB_txn MDB_txn;
+typedef struct MDB_env MDB_env;
+
+typedef unsigned int   MDB_dbi;
+
+typedef struct MDB_val {
+       size_t           mv_size;
+       void            *mv_data;
+} MDB_val;
+
+typedef int  (MDB_cmp_func)(const MDB_val *a, const MDB_val *b);
+typedef void (MDB_rel_func)(void *ptr, void *oldptr);
+
+#define MDB_NOOVERWRITE        0x10
+#define MDB_NODUPDATA  0x20
+#define MDB_DEL_DUP            0x40
+
+typedef enum MDB_cursor_op {           /* cursor operations */
+       MDB_FIRST,
+       MDB_GET_BOTH,                   /* position at key/data */
+       MDB_GET_BOTH_RANGE,             /* position at key, nearest data */
+       MDB_LAST,
+       MDB_NEXT,
+       MDB_NEXT_DUP,
+       MDB_NEXT_NODUP,
+       MDB_PREV,
+       MDB_PREV_DUP,
+       MDB_PREV_NODUP,
+       MDB_SET,                                /* position at key, or fail */
+       MDB_SET_RANGE                   /* position at given key */
+} MDB_cursor_op;
+
+/* return codes */
+#define MDB_SUCCESS     0
+#define MDB_FAIL               -1
+#define MDB_KEYEXIST   -2
+#define MDB_NOTFOUND   -3
+#define MDB_VERSION_MISMATCH   -4
+
+/* DB flags */
+#define MDB_REVERSEKEY 0x02            /* use reverse string keys */
+#define MDB_DUPSORT            0x04            /* use sorted duplicates */
+#define MDB_INTEGERKEY 0x08            /* numeric keys in native byte order */
+
+/* environment flags */
+#define MDB_FIXEDMAP   0x01            /* mmap at a fixed address */
+#define MDB_NOSYNC             0x10000         /* don't fsync after commit */
+#define MDB_RDONLY             0x20000         /* read only */
+
+/* DB or env flags */
+#define MDB_CREATE             0x40000         /* create if not present */
+
+typedef struct MDB_stat {
+       unsigned int    ms_psize;
+       unsigned int    ms_depth;
+       unsigned long   ms_branch_pages;
+       unsigned long   ms_leaf_pages;
+       unsigned long   ms_overflow_pages;
+       unsigned long   ms_entries;
+} MDB_stat;
+
+char *mdb_version(int *major, int *minor, int *patch);
+char *mdb_strerror(int rc);
+
+int  mdb_env_create(MDB_env **env);
+int  mdb_env_open(MDB_env *env, const char *path, unsigned int flags, mode_t mode);
+int  mdb_env_stat(MDB_env *env, MDB_stat *stat);
+int  mdb_env_sync(MDB_env *env, int force);
+void mdb_env_close(MDB_env *env);
+int  mdb_env_get_flags(MDB_env *env, unsigned int *flags);
+int  mdb_env_get_path(MDB_env *env, const char **path);
+int  mdb_env_set_mapsize(MDB_env *env, size_t size);
+int  mdb_env_set_maxreaders(MDB_env *env, int readers);
+int  mdb_env_get_maxreaders(MDB_env *env, int *readers);
+int  mdb_env_set_maxdbs(MDB_env *env, int dbs);
+
+int  mdb_txn_begin(MDB_env *env, int rdonly, MDB_txn **txn);
+int  mdb_txn_commit(MDB_txn *txn);
+void mdb_txn_abort(MDB_txn *txn);
+
+int  mdb_open(MDB_txn *txn, const char *name, unsigned int flags, MDB_dbi *dbi);
+int  mdb_stat(MDB_txn *txn, MDB_dbi dbi, MDB_stat *stat);
+void mdb_close(MDB_txn *txn, MDB_dbi dbi);
+
+int  mdb_set_compare(MDB_txn *txn, MDB_dbi dbi, MDB_cmp_func *cmp);
+int  mdb_set_dupsort(MDB_txn *txn, MDB_dbi dbi, MDB_cmp_func *cmp);
+int  mdb_set_relfunc(MDB_txn *txn, MDB_dbi dbi, MDB_rel_func *rel);
+
+int  mdb_get(MDB_txn *txn, MDB_dbi dbi, MDB_val *key, MDB_val *data);
+int  mdb_put(MDB_txn *txn, MDB_dbi dbi, MDB_val *key, MDB_val *data,
+                           unsigned int flags);
+int  mdb_del(MDB_txn *txn, MDB_dbi dbi, MDB_val *key, MDB_val *data,
+                           unsigned int flags);
+
+int  mdb_cursor_open(MDB_txn *txn, MDB_dbi dbi, MDB_cursor **cursor);
+void mdb_cursor_close(MDB_cursor *cursor);
+int  mdb_cursor_get(MDB_cursor *cursor, MDB_val *key, MDB_val *data,
+                           MDB_cursor_op op);
+int  mdb_cursor_count(MDB_cursor *cursor, unsigned long *countp);
+
+int  mdb_cmp(MDB_txn *txn, MDB_dbi dbi, const MDB_val *a, const MDB_val *b);
+
+#endif /* _MDB_H_ */
diff --git a/servers/slapd/back-mdb/midl.c b/servers/slapd/back-mdb/midl.c
new file mode 100644 (file)
index 0000000..8b39aca
--- /dev/null
@@ -0,0 +1,109 @@
+/* idl.c - ldap bdb back-end ID list functions */
+/* $OpenLDAP$ */
+/* This work is part of OpenLDAP Software <http://www.openldap.org/>.
+ *
+ * Copyright 2000-2011 The OpenLDAP Foundation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted only as authorized by the OpenLDAP
+ * Public License.
+ *
+ * A copy of this license is available in the file LICENSE in the
+ * top-level directory of the distribution or, alternatively, at
+ * <http://www.OpenLDAP.org/license.html>.
+ */
+
+#include <string.h>
+#include <sys/types.h>
+#include <assert.h>
+#include "midl.h"
+
+typedef unsigned long pgno_t;
+
+/* Sort the IDLs from highest to lowest */
+#define IDL_CMP(x,y)    ( x > y ? -1 : ( x < y ? 1 : 0 ) )
+
+unsigned mdb_midl_search( ID *ids, ID id )
+{
+       /*
+        * binary search of id in ids
+        * if found, returns position of id
+        * if not found, returns first position greater than id
+        */
+       unsigned base = 0;
+       unsigned cursor = 0;
+       int val = 0;
+       unsigned n = ids[0];
+
+       while( 0 < n ) {
+               int pivot = n >> 1;
+               cursor = base + pivot;
+               val = IDL_CMP( id, ids[cursor + 1] );
+
+               if( val < 0 ) {
+                       n = pivot;
+
+               } else if ( val > 0 ) {
+                       base = cursor + 1;
+                       n -= pivot + 1;
+
+               } else {
+                       return cursor + 1;
+               }
+       }
+       
+       if( val > 0 ) {
+               return cursor + 2;
+       } else {
+               return cursor + 1;
+       }
+}
+
+int mdb_midl_insert( ID *ids, ID id )
+{
+       unsigned x;
+
+       if (MDB_IDL_IS_RANGE( ids )) {
+               /* if already in range, treat as a dup */
+               if (id >= MDB_IDL_FIRST(ids) && id <= MDB_IDL_LAST(ids))
+                       return -1;
+               if (id < MDB_IDL_FIRST(ids))
+                       ids[1] = id;
+               else if (id > MDB_IDL_LAST(ids))
+                       ids[2] = id;
+               return 0;
+       }
+
+       x = mdb_midl_search( ids, id );
+       assert( x > 0 );
+
+       if( x < 1 ) {
+               /* internal error */
+               return -2;
+       }
+
+       if ( x <= ids[0] && ids[x] == id ) {
+               /* duplicate */
+               return -1;
+       }
+
+       if ( ++ids[0] >= MDB_IDL_DB_MAX ) {
+               if( id < ids[1] ) {
+                       ids[1] = id;
+                       ids[2] = ids[ids[0]-1];
+               } else if ( ids[ids[0]-1] < id ) {
+                       ids[2] = id;
+               } else {
+                       ids[2] = ids[ids[0]-1];
+               }
+               ids[0] = NOID;
+       
+       } else {
+               /* insert id */
+               AC_MEMCPY( &ids[x+1], &ids[x], (ids[0]-x) * sizeof(ID) );
+               ids[x] = id;
+       }
+
+       return 0;
+}
diff --git a/servers/slapd/back-mdb/midl.h b/servers/slapd/back-mdb/midl.h
new file mode 100644 (file)
index 0000000..aeb0af6
--- /dev/null
@@ -0,0 +1,78 @@
+/* idl.h - ldap bdb back-end ID list header file */
+/* $OpenLDAP$ */
+/* This work is part of OpenLDAP Software <http://www.openldap.org/>.
+ *
+ * Copyright 2000-2011 The OpenLDAP Foundation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted only as authorized by the OpenLDAP
+ * Public License.
+ *
+ * A copy of this license is available in the file LICENSE in the
+ * top-level directory of the distribution or, alternatively, at
+ * <http://www.OpenLDAP.org/license.html>.
+ */
+
+#ifndef _MDB_MIDL_H_
+#define _MDB_MIDL_H_
+
+#define AC_MEMCPY(dst,src,size)        memcpy(dst,src,size)
+
+#define        ID      unsigned long
+#define        NOID    ((ID)~0)
+
+/* IDL sizes - likely should be even bigger
+ *   limiting factors: sizeof(ID), thread stack size
+ */
+#define        MDB_IDL_LOGN    16      /* DB_SIZE is 2^16, UM_SIZE is 2^17 */
+#define MDB_IDL_DB_SIZE                (1<<MDB_IDL_LOGN)
+#define MDB_IDL_UM_SIZE                (1<<(MDB_IDL_LOGN+1))
+#define MDB_IDL_UM_SIZEOF      (MDB_IDL_UM_SIZE * sizeof(ID))
+
+#define MDB_IDL_DB_MAX         (MDB_IDL_DB_SIZE-1)
+
+#define MDB_IDL_UM_MAX         (MDB_IDL_UM_SIZE-1)
+
+#define MDB_IDL_IS_RANGE(ids)  ((ids)[0] == NOID)
+#define MDB_IDL_RANGE_SIZE             (3)
+#define MDB_IDL_RANGE_SIZEOF   (MDB_IDL_RANGE_SIZE * sizeof(ID))
+#define MDB_IDL_SIZEOF(ids)            ((MDB_IDL_IS_RANGE(ids) \
+       ? MDB_IDL_RANGE_SIZE : ((ids)[0]+1)) * sizeof(ID))
+
+#define MDB_IDL_RANGE_FIRST(ids)       ((ids)[1])
+#define MDB_IDL_RANGE_LAST(ids)                ((ids)[2])
+
+#define MDB_IDL_RANGE( ids, f, l ) \
+       do { \
+               (ids)[0] = NOID; \
+               (ids)[1] = (f);  \
+               (ids)[2] = (l);  \
+       } while(0)
+
+#define MDB_IDL_ZERO(ids) \
+       do { \
+               (ids)[0] = 0; \
+               (ids)[1] = 0; \
+               (ids)[2] = 0; \
+       } while(0)
+
+#define MDB_IDL_IS_ZERO(ids) ( (ids)[0] == 0 )
+#define MDB_IDL_IS_ALL( range, ids ) ( (ids)[0] == NOID \
+       && (ids)[1] <= (range)[1] && (range)[2] <= (ids)[2] )
+
+#define MDB_IDL_CPY( dst, src ) (AC_MEMCPY( dst, src, MDB_IDL_SIZEOF( src ) ))
+
+#define MDB_IDL_ID( bdb, ids, id ) MDB_IDL_RANGE( ids, id, ((bdb)->bi_lastid) )
+#define MDB_IDL_ALL( bdb, ids ) MDB_IDL_RANGE( ids, 1, ((bdb)->bi_lastid) )
+
+#define MDB_IDL_FIRST( ids )   ( ids[1] )
+#define MDB_IDL_LAST( ids )            ( MDB_IDL_IS_RANGE(ids) \
+       ? ids[2] : ids[ids[0]] )
+
+#define MDB_IDL_N( ids )               ( MDB_IDL_IS_RANGE(ids) \
+       ? (ids[2]-ids[1])+1 : ids[0] )
+
+int mdb_midl_insert( ID *ids, ID id );
+
+#endif /* _MDB_MIDL_H_ */
diff --git a/servers/slapd/back-mdb/modify.c b/servers/slapd/back-mdb/modify.c
new file mode 100644 (file)
index 0000000..128077e
--- /dev/null
@@ -0,0 +1,782 @@
+/* modify.c - mdb backend modify routine */
+/* $OpenLDAP$ */
+/* This work is part of OpenLDAP Software <http://www.openldap.org/>.
+ *
+ * Copyright 2000-2011 The OpenLDAP Foundation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted only as authorized by the OpenLDAP
+ * Public License.
+ *
+ * A copy of this license is available in the file LICENSE in the
+ * top-level directory of the distribution or, alternatively, at
+ * <http://www.OpenLDAP.org/license.html>.
+ */
+
+#include "portable.h"
+
+#include <stdio.h>
+#include <ac/string.h>
+#include <ac/time.h>
+
+#include "back-mdb.h"
+
+static struct berval scbva[] = {
+       BER_BVC("glue"),
+       BER_BVNULL
+};
+
+static void
+mdb_modify_idxflags(
+       Operation *op,
+       AttributeDescription *desc,
+       int got_delete,
+       Attribute *newattrs,
+       Attribute *oldattrs )
+{
+       struct berval   ix_at;
+       AttrInfo        *ai;
+
+       /* check if modified attribute was indexed
+        * but not in case of NOOP... */
+       ai = mdb_index_mask( op->o_bd, desc, &ix_at );
+       if ( ai ) {
+               if ( got_delete ) {
+                       Attribute       *ap;
+                       struct berval   ix2;
+
+                       ap = attr_find( oldattrs, desc );
+                       if ( ap ) ap->a_flags |= SLAP_ATTR_IXDEL;
+
+                       /* Find all other attrs that index to same slot */
+                       for ( ap = newattrs; ap; ap = ap->a_next ) {
+                               ai = mdb_index_mask( op->o_bd, ap->a_desc, &ix2 );
+                               if ( ai && ix2.bv_val == ix_at.bv_val )
+                                       ap->a_flags |= SLAP_ATTR_IXADD;
+                       }
+
+               } else {
+                       Attribute       *ap;
+
+                       ap = attr_find( newattrs, desc );
+                       if ( ap ) ap->a_flags |= SLAP_ATTR_IXADD;
+               }
+       }
+}
+
+int mdb_modify_internal(
+       Operation *op,
+       DB_TXN *tid,
+       Modifications *modlist,
+       Entry *e,
+       const char **text,
+       char *textbuf,
+       size_t textlen )
+{
+       int rc, err;
+       Modification    *mod;
+       Modifications   *ml;
+       Attribute       *save_attrs;
+       Attribute       *ap;
+       int                     glue_attr_delete = 0;
+       int                     got_delete;
+
+       Debug( LDAP_DEBUG_TRACE, "mdb_modify_internal: 0x%08lx: %s\n",
+               e->e_id, e->e_dn, 0);
+
+       if ( !acl_check_modlist( op, e, modlist )) {
+               return LDAP_INSUFFICIENT_ACCESS;
+       }
+
+       /* save_attrs will be disposed of by mdb_cache_modify */
+       save_attrs = e->e_attrs;
+       e->e_attrs = attrs_dup( e->e_attrs );
+
+       for ( ml = modlist; ml != NULL; ml = ml->sml_next ) {
+               int match;
+               mod = &ml->sml_mod;
+               switch( mod->sm_op ) {
+               case LDAP_MOD_ADD:
+               case LDAP_MOD_REPLACE:
+                       if ( mod->sm_desc == slap_schema.si_ad_structuralObjectClass ) {
+                               value_match( &match, slap_schema.si_ad_structuralObjectClass,
+                                       slap_schema.si_ad_structuralObjectClass->
+                                               ad_type->sat_equality,
+                                       SLAP_MR_VALUE_OF_ATTRIBUTE_SYNTAX,
+                                       &mod->sm_values[0], &scbva[0], text );
+                               if ( !match ) glue_attr_delete = 1;
+                       }
+               }
+               if ( glue_attr_delete )
+                       break;
+       }
+
+       if ( glue_attr_delete ) {
+               Attribute       **app = &e->e_attrs;
+               while ( *app != NULL ) {
+                       if ( !is_at_operational( (*app)->a_desc->ad_type )) {
+                               Attribute *save = *app;
+                               *app = (*app)->a_next;
+                               attr_free( save );
+                               continue;
+                       }
+                       app = &(*app)->a_next;
+               }
+       }
+
+       for ( ml = modlist; ml != NULL; ml = ml->sml_next ) {
+               mod = &ml->sml_mod;
+               got_delete = 0;
+
+               switch ( mod->sm_op ) {
+               case LDAP_MOD_ADD:
+                       Debug(LDAP_DEBUG_ARGS,
+                               "mdb_modify_internal: add %s\n",
+                               mod->sm_desc->ad_cname.bv_val, 0, 0);
+                       err = modify_add_values( e, mod, get_permissiveModify(op),
+                               text, textbuf, textlen );
+                       if( err != LDAP_SUCCESS ) {
+                               Debug(LDAP_DEBUG_ARGS, "mdb_modify_internal: %d %s\n",
+                                       err, *text, 0);
+                       }
+                       break;
+
+               case LDAP_MOD_DELETE:
+                       if ( glue_attr_delete ) {
+                               err = LDAP_SUCCESS;
+                               break;
+                       }
+
+                       Debug(LDAP_DEBUG_ARGS,
+                               "mdb_modify_internal: delete %s\n",
+                               mod->sm_desc->ad_cname.bv_val, 0, 0);
+                       err = modify_delete_values( e, mod, get_permissiveModify(op),
+                               text, textbuf, textlen );
+                       if( err != LDAP_SUCCESS ) {
+                               Debug(LDAP_DEBUG_ARGS, "mdb_modify_internal: %d %s\n",
+                                       err, *text, 0);
+                       } else {
+                               got_delete = 1;
+                       }
+                       break;
+
+               case LDAP_MOD_REPLACE:
+                       Debug(LDAP_DEBUG_ARGS,
+                               "mdb_modify_internal: replace %s\n",
+                               mod->sm_desc->ad_cname.bv_val, 0, 0);
+                       err = modify_replace_values( e, mod, get_permissiveModify(op),
+                               text, textbuf, textlen );
+                       if( err != LDAP_SUCCESS ) {
+                               Debug(LDAP_DEBUG_ARGS, "mdb_modify_internal: %d %s\n",
+                                       err, *text, 0);
+                       } else {
+                               got_delete = 1;
+                       }
+                       break;
+
+               case LDAP_MOD_INCREMENT:
+                       Debug(LDAP_DEBUG_ARGS,
+                               "mdb_modify_internal: increment %s\n",
+                               mod->sm_desc->ad_cname.bv_val, 0, 0);
+                       err = modify_increment_values( e, mod, get_permissiveModify(op),
+                               text, textbuf, textlen );
+                       if( err != LDAP_SUCCESS ) {
+                               Debug(LDAP_DEBUG_ARGS,
+                                       "mdb_modify_internal: %d %s\n",
+                                       err, *text, 0);
+                       } else {
+                               got_delete = 1;
+                       }
+                       break;
+
+               case SLAP_MOD_SOFTADD:
+                       Debug(LDAP_DEBUG_ARGS,
+                               "mdb_modify_internal: softadd %s\n",
+                               mod->sm_desc->ad_cname.bv_val, 0, 0);
+                       /* Avoid problems in index_add_mods()
+                        * We need to add index if necessary.
+                        */
+                       mod->sm_op = LDAP_MOD_ADD;
+
+                       err = modify_add_values( e, mod, get_permissiveModify(op),
+                               text, textbuf, textlen );
+
+                       mod->sm_op = SLAP_MOD_SOFTADD;
+
+                       if ( err == LDAP_TYPE_OR_VALUE_EXISTS ) {
+                               err = LDAP_SUCCESS;
+                       }
+
+                       if( err != LDAP_SUCCESS ) {
+                               Debug(LDAP_DEBUG_ARGS, "mdb_modify_internal: %d %s\n",
+                                       err, *text, 0);
+                       }
+                       break;
+
+               case SLAP_MOD_SOFTDEL:
+                       Debug(LDAP_DEBUG_ARGS,
+                               "mdb_modify_internal: softdel %s\n",
+                               mod->sm_desc->ad_cname.bv_val, 0, 0);
+                       /* Avoid problems in index_delete_mods()
+                        * We need to add index if necessary.
+                        */
+                       mod->sm_op = LDAP_MOD_DELETE;
+
+                       err = modify_delete_values( e, mod, get_permissiveModify(op),
+                               text, textbuf, textlen );
+
+                       mod->sm_op = SLAP_MOD_SOFTDEL;
+
+                       if ( err == LDAP_NO_SUCH_ATTRIBUTE ) {
+                               err = LDAP_SUCCESS;
+                       }
+
+                       if( err != LDAP_SUCCESS ) {
+                               Debug(LDAP_DEBUG_ARGS, "mdb_modify_internal: %d %s\n",
+                                       err, *text, 0);
+                       }
+                       break;
+
+               case SLAP_MOD_ADD_IF_NOT_PRESENT:
+                       if ( attr_find( e->e_attrs, mod->sm_desc ) != NULL ) {
+                               /* skip */
+                               err = LDAP_SUCCESS;
+                               break;
+                       }
+
+                       Debug(LDAP_DEBUG_ARGS,
+                               "mdb_modify_internal: add_if_not_present %s\n",
+                               mod->sm_desc->ad_cname.bv_val, 0, 0);
+                       /* Avoid problems in index_add_mods()
+                        * We need to add index if necessary.
+                        */
+                       mod->sm_op = LDAP_MOD_ADD;
+
+                       err = modify_add_values( e, mod, get_permissiveModify(op),
+                               text, textbuf, textlen );
+
+                       mod->sm_op = SLAP_MOD_ADD_IF_NOT_PRESENT;
+
+                       if( err != LDAP_SUCCESS ) {
+                               Debug(LDAP_DEBUG_ARGS, "mdb_modify_internal: %d %s\n",
+                                       err, *text, 0);
+                       }
+                       break;
+
+               default:
+                       Debug(LDAP_DEBUG_ANY, "mdb_modify_internal: invalid op %d\n",
+                               mod->sm_op, 0, 0);
+                       *text = "Invalid modify operation";
+                       err = LDAP_OTHER;
+                       Debug(LDAP_DEBUG_ARGS, "mdb_modify_internal: %d %s\n",
+                               err, *text, 0);
+               }
+
+               if ( err != LDAP_SUCCESS ) {
+                       attrs_free( e->e_attrs );
+                       e->e_attrs = save_attrs;
+                       /* unlock entry, delete from cache */
+                       return err; 
+               }
+
+               /* If objectClass was modified, reset the flags */
+               if ( mod->sm_desc == slap_schema.si_ad_objectClass ) {
+                       e->e_ocflags = 0;
+               }
+
+               if ( glue_attr_delete ) e->e_ocflags = 0;
+
+
+               /* check if modified attribute was indexed
+                * but not in case of NOOP... */
+               if ( !op->o_noop ) {
+                       mdb_modify_idxflags( op, mod->sm_desc, got_delete, e->e_attrs, save_attrs );
+               }
+       }
+
+       /* check that the entry still obeys the schema */
+       ap = NULL;
+       rc = entry_schema_check( op, e, save_attrs, get_relax(op), 0, &ap,
+               text, textbuf, textlen );
+       if ( rc != LDAP_SUCCESS || op->o_noop ) {
+               attrs_free( e->e_attrs );
+               /* clear the indexing flags */
+               for ( ap = save_attrs; ap != NULL; ap = ap->a_next ) {
+                       ap->a_flags &= ~(SLAP_ATTR_IXADD|SLAP_ATTR_IXDEL);
+               }
+               e->e_attrs = save_attrs;
+
+               if ( rc != LDAP_SUCCESS ) {
+                       Debug( LDAP_DEBUG_ANY,
+                               "entry failed schema check: %s\n",
+                               *text, 0, 0 );
+               }
+
+               /* if NOOP then silently revert to saved attrs */
+               return rc;
+       }
+
+       /* structuralObjectClass modified! */
+       if ( ap ) {
+               assert( ap->a_desc == slap_schema.si_ad_structuralObjectClass );
+               if ( !op->o_noop ) {
+                       mdb_modify_idxflags( op, slap_schema.si_ad_structuralObjectClass,
+                               1, e->e_attrs, save_attrs );
+               }
+       }
+
+       /* update the indices of the modified attributes */
+
+       /* start with deleting the old index entries */
+       for ( ap = save_attrs; ap != NULL; ap = ap->a_next ) {
+               if ( ap->a_flags & SLAP_ATTR_IXDEL ) {
+                       struct berval *vals;
+                       Attribute *a2;
+                       ap->a_flags &= ~SLAP_ATTR_IXDEL;
+                       a2 = attr_find( e->e_attrs, ap->a_desc );
+                       if ( a2 ) {
+                               /* need to detect which values were deleted */
+                               int i, j;
+                               vals = op->o_tmpalloc( (ap->a_numvals + 1) *
+                                       sizeof(struct berval), op->o_tmpmemctx );
+                               j = 0;
+                               for ( i=0; i < ap->a_numvals; i++ ) {
+                                       rc = attr_valfind( a2, SLAP_MR_ASSERTED_VALUE_NORMALIZED_MATCH,
+                                               &ap->a_nvals[i], NULL, op->o_tmpmemctx );
+                                       /* Save deleted values */
+                                       if ( rc == LDAP_NO_SUCH_ATTRIBUTE )
+                                               vals[j++] = ap->a_nvals[i];
+                               }
+                               BER_BVZERO(vals+j);
+                       } else {
+                               /* attribute was completely deleted */
+                               vals = ap->a_nvals;
+                       }
+                       rc = 0;
+                       if ( !BER_BVISNULL( vals )) {
+                               rc = mdb_index_values( op, tid, ap->a_desc,
+                                       vals, e->e_id, SLAP_INDEX_DELETE_OP );
+                               if ( rc != LDAP_SUCCESS ) {
+                                       Debug( LDAP_DEBUG_ANY,
+                                               "%s: attribute \"%s\" index delete failure\n",
+                                               op->o_log_prefix, ap->a_desc->ad_cname.bv_val, 0 );
+                                       attrs_free( e->e_attrs );
+                                       e->e_attrs = save_attrs;
+                               }
+                       }
+                       if ( vals != ap->a_nvals )
+                               op->o_tmpfree( vals, op->o_tmpmemctx );
+                       if ( rc ) return rc;
+               }
+       }
+
+       /* add the new index entries */
+       for ( ap = e->e_attrs; ap != NULL; ap = ap->a_next ) {
+               if (ap->a_flags & SLAP_ATTR_IXADD) {
+                       ap->a_flags &= ~SLAP_ATTR_IXADD;
+                       rc = mdb_index_values( op, tid, ap->a_desc,
+                               ap->a_nvals,
+                               e->e_id, SLAP_INDEX_ADD_OP );
+                       if ( rc != LDAP_SUCCESS ) {
+                               Debug( LDAP_DEBUG_ANY,
+                                      "%s: attribute \"%s\" index add failure\n",
+                                       op->o_log_prefix, ap->a_desc->ad_cname.bv_val, 0 );
+                               attrs_free( e->e_attrs );
+                               e->e_attrs = save_attrs;
+                               return rc;
+                       }
+               }
+       }
+
+       return rc;
+}
+
+
+int
+mdb_modify( Operation *op, SlapReply *rs )
+{
+       struct mdb_info *mdb = (struct mdb_info *) op->o_bd->be_private;
+       Entry           *e = NULL;
+       EntryInfo       *ei = NULL;
+       int             manageDSAit = get_manageDSAit( op );
+       char textbuf[SLAP_TEXT_BUFLEN];
+       size_t textlen = sizeof textbuf;
+       DB_TXN  *ltid = NULL, *lt2;
+       struct mdb_op_info opinfo = {{{ 0 }}};
+       Entry           dummy = {0};
+
+       DB_LOCK         lock;
+
+       int             num_retries = 0;
+
+       LDAPControl **preread_ctrl = NULL;
+       LDAPControl **postread_ctrl = NULL;
+       LDAPControl *ctrls[SLAP_MAX_RESPONSE_CONTROLS];
+       int num_ctrls = 0;
+
+       int rc;
+
+#ifdef LDAP_X_TXN
+       int settle = 0;
+#endif
+
+       Debug( LDAP_DEBUG_ARGS, LDAP_XSTRING(mdb_modify) ": %s\n",
+               op->o_req_dn.bv_val, 0, 0 );
+
+#ifdef LDAP_X_TXN
+       if( op->o_txnSpec ) {
+               /* acquire connection lock */
+               ldap_pvt_thread_mutex_lock( &op->o_conn->c_mutex );
+               if( op->o_conn->c_txn == CONN_TXN_INACTIVE ) {
+                       rs->sr_text = "invalid transaction identifier";
+                       rs->sr_err = LDAP_X_TXN_ID_INVALID;
+                       goto txnReturn;
+               } else if( op->o_conn->c_txn == CONN_TXN_SETTLE ) {
+                       settle=1;
+                       goto txnReturn;
+               }
+
+               if( op->o_conn->c_txn_backend == NULL ) {
+                       op->o_conn->c_txn_backend = op->o_bd;
+
+               } else if( op->o_conn->c_txn_backend != op->o_bd ) {
+                       rs->sr_text = "transaction cannot span multiple database contexts";
+                       rs->sr_err = LDAP_AFFECTS_MULTIPLE_DSAS;
+                       goto txnReturn;
+               }
+
+               /* insert operation into transaction */
+
+               rs->sr_text = "transaction specified";
+               rs->sr_err = LDAP_X_TXN_SPECIFY_OKAY;
+
+txnReturn:
+               /* release connection lock */
+               ldap_pvt_thread_mutex_unlock( &op->o_conn->c_mutex );
+
+               if( !settle ) {
+                       send_ldap_result( op, rs );
+                       return rs->sr_err;
+               }
+       }
+#endif
+
+       ctrls[num_ctrls] = NULL;
+
+       /* Don't touch the opattrs, if this is a contextCSN update
+        * initiated from updatedn */
+       if ( !be_isupdate(op) || !op->orm_modlist || op->orm_modlist->sml_next ||
+                op->orm_modlist->sml_desc != slap_schema.si_ad_contextCSN ) {
+
+               slap_mods_opattrs( op, &op->orm_modlist, 1 );
+       }
+
+       if( 0 ) {
+retry: /* transaction retry */
+               if ( dummy.e_attrs ) {
+                       attrs_free( dummy.e_attrs );
+                       dummy.e_attrs = NULL;
+               }
+               if( e != NULL ) {
+                       mdb_unlocked_cache_return_entry_w(&mdb->bi_cache, e);
+                       e = NULL;
+               }
+               Debug(LDAP_DEBUG_TRACE,
+                       LDAP_XSTRING(mdb_modify) ": retrying...\n", 0, 0, 0);
+
+               rs->sr_err = TXN_ABORT( ltid );
+               ltid = NULL;
+               LDAP_SLIST_REMOVE( &op->o_extra, &opinfo.boi_oe, OpExtra, oe_next );
+               opinfo.boi_oe.oe_key = NULL;
+               op->o_do_not_cache = opinfo.boi_acl_cache;
+               if( rs->sr_err != 0 ) {
+                       rs->sr_err = LDAP_OTHER;
+                       rs->sr_text = "internal error";
+                       goto return_results;
+               }
+               if ( op->o_abandon ) {
+                       rs->sr_err = SLAPD_ABANDON;
+                       goto return_results;
+               }
+               mdb_trans_backoff( ++num_retries );
+       }
+
+       /* begin transaction */
+       rs->sr_err = TXN_BEGIN( mdb->bi_dbenv, NULL, &ltid, 
+               mdb->bi_db_opflags );
+       rs->sr_text = NULL;
+       if( rs->sr_err != 0 ) {
+               Debug( LDAP_DEBUG_TRACE,
+                       LDAP_XSTRING(mdb_modify) ": txn_begin failed: "
+                       "%s (%d)\n", db_strerror(rs->sr_err), rs->sr_err, 0 );
+               rs->sr_err = LDAP_OTHER;
+               rs->sr_text = "internal error";
+               goto return_results;
+       }
+
+       opinfo.boi_oe.oe_key = mdb;
+       opinfo.boi_txn = ltid;
+       opinfo.boi_err = 0;
+       opinfo.boi_acl_cache = op->o_do_not_cache;
+       LDAP_SLIST_INSERT_HEAD( &op->o_extra, &opinfo.boi_oe, oe_next );
+
+       /* get entry or ancestor */
+       rs->sr_err = mdb_dn2entry( op, ltid, &op->o_req_ndn, &ei, 1,
+               &lock );
+
+       if ( rs->sr_err != 0 ) {
+               Debug( LDAP_DEBUG_TRACE,
+                       LDAP_XSTRING(mdb_modify) ": dn2entry failed (%d)\n",
+                       rs->sr_err, 0, 0 );
+               switch( rs->sr_err ) {
+               case DB_LOCK_DEADLOCK:
+               case DB_LOCK_NOTGRANTED:
+                       goto retry;
+               case DB_NOTFOUND:
+                       break;
+               case LDAP_BUSY:
+                       rs->sr_text = "ldap server busy";
+                       goto return_results;
+               default:
+                       rs->sr_err = LDAP_OTHER;
+                       rs->sr_text = "internal error";
+                       goto return_results;
+               }
+       }
+
+       e = ei->bei_e;
+
+       /* acquire and lock entry */
+       /* FIXME: dn2entry() should return non-glue entry */
+       if (( rs->sr_err == DB_NOTFOUND ) ||
+               ( !manageDSAit && e && is_entry_glue( e )))
+       {
+               if ( e != NULL ) {
+                       rs->sr_matched = ch_strdup( e->e_dn );
+                       rs->sr_ref = is_entry_referral( e )
+                               ? get_entry_referrals( op, e )
+                               : NULL;
+                       mdb_unlocked_cache_return_entry_r (&mdb->bi_cache, e);
+                       e = NULL;
+
+               } else {
+                       rs->sr_ref = referral_rewrite( default_referral, NULL,
+                               &op->o_req_dn, LDAP_SCOPE_DEFAULT );
+               }
+
+               rs->sr_err = LDAP_REFERRAL;
+               send_ldap_result( op, rs );
+
+               if ( rs->sr_ref != default_referral ) {
+                       ber_bvarray_free( rs->sr_ref );
+               }
+               free( (char *)rs->sr_matched );
+               rs->sr_ref = NULL;
+               rs->sr_matched = NULL;
+
+               goto done;
+       }
+
+       if ( !manageDSAit && is_entry_referral( e ) ) {
+               /* entry is a referral, don't allow modify */
+               rs->sr_ref = get_entry_referrals( op, e );
+
+               Debug( LDAP_DEBUG_TRACE,
+                       LDAP_XSTRING(mdb_modify) ": entry is referral\n",
+                       0, 0, 0 );
+
+               rs->sr_err = LDAP_REFERRAL;
+               rs->sr_matched = e->e_name.bv_val;
+               send_ldap_result( op, rs );
+
+               ber_bvarray_free( rs->sr_ref );
+               rs->sr_ref = NULL;
+               rs->sr_matched = NULL;
+               goto done;
+       }
+
+       if ( get_assert( op ) &&
+               ( test_filter( op, e, get_assertion( op )) != LDAP_COMPARE_TRUE ))
+       {
+               rs->sr_err = LDAP_ASSERTION_FAILED;
+               goto return_results;
+       }
+
+       if( op->o_preread ) {
+               if( preread_ctrl == NULL ) {
+                       preread_ctrl = &ctrls[num_ctrls++];
+                       ctrls[num_ctrls] = NULL;
+               }
+               if ( slap_read_controls( op, rs, e,
+                       &slap_pre_read_bv, preread_ctrl ) )
+               {
+                       Debug( LDAP_DEBUG_TRACE,
+                               "<=- " LDAP_XSTRING(mdb_modify) ": pre-read "
+                               "failed!\n", 0, 0, 0 );
+                       if ( op->o_preread & SLAP_CONTROL_CRITICAL ) {
+                               /* FIXME: is it correct to abort
+                                * operation if control fails? */
+                               goto return_results;
+                       }
+               }
+       }
+
+       /* nested transaction */
+       rs->sr_err = TXN_BEGIN( mdb->bi_dbenv, ltid, &lt2, mdb->bi_db_opflags );
+       rs->sr_text = NULL;
+       if( rs->sr_err != 0 ) {
+               Debug( LDAP_DEBUG_TRACE,
+                       LDAP_XSTRING(mdb_modify) ": txn_begin(2) failed: " "%s (%d)\n",
+                       db_strerror(rs->sr_err), rs->sr_err, 0 );
+               rs->sr_err = LDAP_OTHER;
+               rs->sr_text = "internal error";
+               goto return_results;
+       }
+       /* Modify the entry */
+       dummy = *e;
+       rs->sr_err = mdb_modify_internal( op, lt2, op->orm_modlist,
+               &dummy, &rs->sr_text, textbuf, textlen );
+
+       if( rs->sr_err != LDAP_SUCCESS ) {
+               Debug( LDAP_DEBUG_TRACE,
+                       LDAP_XSTRING(mdb_modify) ": modify failed (%d)\n",
+                       rs->sr_err, 0, 0 );
+               if ( (rs->sr_err == LDAP_INSUFFICIENT_ACCESS) && opinfo.boi_err ) {
+                       rs->sr_err = opinfo.boi_err;
+               }
+               /* Only free attrs if they were dup'd.  */
+               if ( dummy.e_attrs == e->e_attrs ) dummy.e_attrs = NULL;
+               switch( rs->sr_err ) {
+               case DB_LOCK_DEADLOCK:
+               case DB_LOCK_NOTGRANTED:
+                       goto retry;
+               }
+               goto return_results;
+       }
+
+       /* change the entry itself */
+       rs->sr_err = mdb_id2entry_update( op->o_bd, lt2, &dummy );
+       if ( rs->sr_err != 0 ) {
+               Debug( LDAP_DEBUG_TRACE,
+                       LDAP_XSTRING(mdb_modify) ": id2entry update failed " "(%d)\n",
+                       rs->sr_err, 0, 0 );
+               switch( rs->sr_err ) {
+               case DB_LOCK_DEADLOCK:
+               case DB_LOCK_NOTGRANTED:
+                       goto retry;
+               }
+               rs->sr_text = "entry update failed";
+               goto return_results;
+       }
+
+       if ( TXN_COMMIT( lt2, 0 ) != 0 ) {
+               rs->sr_err = LDAP_OTHER;
+               rs->sr_text = "txn_commit(2) failed";
+               goto return_results;
+       }
+
+       if( op->o_postread ) {
+               if( postread_ctrl == NULL ) {
+                       postread_ctrl = &ctrls[num_ctrls++];
+                       ctrls[num_ctrls] = NULL;
+               }
+               if( slap_read_controls( op, rs, &dummy,
+                       &slap_post_read_bv, postread_ctrl ) )
+               {
+                       Debug( LDAP_DEBUG_TRACE,
+                               "<=- " LDAP_XSTRING(mdb_modify)
+                               ": post-read failed!\n", 0, 0, 0 );
+                       if ( op->o_postread & SLAP_CONTROL_CRITICAL ) {
+                               /* FIXME: is it correct to abort
+                                * operation if control fails? */
+                               goto return_results;
+                       }
+               }
+       }
+
+       if( op->o_noop ) {
+               if ( ( rs->sr_err = TXN_ABORT( ltid ) ) != 0 ) {
+                       rs->sr_text = "txn_abort (no-op) failed";
+               } else {
+                       rs->sr_err = LDAP_X_NO_OPERATION;
+                       ltid = NULL;
+                       /* Only free attrs if they were dup'd.  */
+                       if ( dummy.e_attrs == e->e_attrs ) dummy.e_attrs = NULL;
+                       goto return_results;
+               }
+       } else {
+               /* may have changed in mdb_modify_internal() */
+               e->e_ocflags = dummy.e_ocflags;
+               rc = mdb_cache_modify( mdb, e, dummy.e_attrs, ltid, &lock );
+               switch( rc ) {
+               case DB_LOCK_DEADLOCK:
+               case DB_LOCK_NOTGRANTED:
+                       goto retry;
+               }
+               dummy.e_attrs = NULL;
+
+               rs->sr_err = TXN_COMMIT( ltid, 0 );
+       }
+       ltid = NULL;
+       LDAP_SLIST_REMOVE( &op->o_extra, &opinfo.boi_oe, OpExtra, oe_next );
+       opinfo.boi_oe.oe_key = NULL;
+
+       if( rs->sr_err != 0 ) {
+               Debug( LDAP_DEBUG_TRACE,
+                       LDAP_XSTRING(mdb_modify) ": txn_%s failed: %s (%d)\n",
+                       op->o_noop ? "abort (no-op)" : "commit",
+                       db_strerror(rs->sr_err), rs->sr_err );
+               rs->sr_err = LDAP_OTHER;
+               rs->sr_text = "commit failed";
+
+               goto return_results;
+       }
+
+       Debug( LDAP_DEBUG_TRACE,
+               LDAP_XSTRING(mdb_modify) ": updated%s id=%08lx dn=\"%s\"\n",
+               op->o_noop ? " (no-op)" : "",
+               dummy.e_id, op->o_req_dn.bv_val );
+
+       rs->sr_err = LDAP_SUCCESS;
+       rs->sr_text = NULL;
+       if( num_ctrls ) rs->sr_ctrls = ctrls;
+
+return_results:
+       if( dummy.e_attrs ) {
+               attrs_free( dummy.e_attrs );
+       }
+       send_ldap_result( op, rs );
+
+       if( rs->sr_err == LDAP_SUCCESS && mdb->bi_txn_cp_kbyte ) {
+               TXN_CHECKPOINT( mdb->bi_dbenv,
+                       mdb->bi_txn_cp_kbyte, mdb->bi_txn_cp_min, 0 );
+       }
+
+done:
+       slap_graduate_commit_csn( op );
+
+       if( ltid != NULL ) {
+               TXN_ABORT( ltid );
+       }
+       if ( opinfo.boi_oe.oe_key ) {
+               LDAP_SLIST_REMOVE( &op->o_extra, &opinfo.boi_oe, OpExtra, oe_next );
+       }
+
+       if( e != NULL ) {
+               mdb_unlocked_cache_return_entry_w (&mdb->bi_cache, e);
+       }
+
+       if( preread_ctrl != NULL && (*preread_ctrl) != NULL ) {
+               slap_sl_free( (*preread_ctrl)->ldctl_value.bv_val, op->o_tmpmemctx );
+               slap_sl_free( *preread_ctrl, op->o_tmpmemctx );
+       }
+       if( postread_ctrl != NULL && (*postread_ctrl) != NULL ) {
+               slap_sl_free( (*postread_ctrl)->ldctl_value.bv_val, op->o_tmpmemctx );
+               slap_sl_free( *postread_ctrl, op->o_tmpmemctx );
+       }
+
+       rs->sr_text = NULL;
+
+       return rs->sr_err;
+}
diff --git a/servers/slapd/back-mdb/modrdn.c b/servers/slapd/back-mdb/modrdn.c
new file mode 100644 (file)
index 0000000..01f236c
--- /dev/null
@@ -0,0 +1,838 @@
+/* modrdn.c - mdb backend modrdn routine */
+/* $OpenLDAP$ */
+/* This work is part of OpenLDAP Software <http://www.openldap.org/>.
+ *
+ * Copyright 2000-2011 The OpenLDAP Foundation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted only as authorized by the OpenLDAP
+ * Public License.
+ *
+ * A copy of this license is available in the file LICENSE in the
+ * top-level directory of the distribution or, alternatively, at
+ * <http://www.OpenLDAP.org/license.html>.
+ */
+
+#include "portable.h"
+
+#include <stdio.h>
+#include <ac/string.h>
+
+#include "back-mdb.h"
+
+int
+mdb_modrdn( Operation  *op, SlapReply *rs )
+{
+       struct mdb_info *mdb = (struct mdb_info *) op->o_bd->be_private;
+       AttributeDescription *children = slap_schema.si_ad_children;
+       AttributeDescription *entry = slap_schema.si_ad_entry;
+       struct berval   p_dn, p_ndn;
+       struct berval   new_dn = {0, NULL}, new_ndn = {0, NULL};
+       Entry           *e = NULL;
+       Entry           *p = NULL;
+       EntryInfo       *ei = NULL, *eip = NULL, *nei = NULL, *neip = NULL;
+       /* LDAP v2 supporting correct attribute handling. */
+       char textbuf[SLAP_TEXT_BUFLEN];
+       size_t textlen = sizeof textbuf;
+       DB_TXN          *ltid = NULL, *lt2;
+       struct mdb_op_info opinfo = {{{ 0 }}};
+       Entry dummy = {0};
+
+       Entry           *np = NULL;                     /* newSuperior Entry */
+       struct berval   *np_dn = NULL;                  /* newSuperior dn */
+       struct berval   *np_ndn = NULL;                 /* newSuperior ndn */
+       struct berval   *new_parent_dn = NULL;  /* np_dn, p_dn, or NULL */
+
+       int             manageDSAit = get_manageDSAit( op );
+
+       DB_LOCK         lock, plock, nplock;
+
+       int             num_retries = 0;
+
+       LDAPControl **preread_ctrl = NULL;
+       LDAPControl **postread_ctrl = NULL;
+       LDAPControl *ctrls[SLAP_MAX_RESPONSE_CONTROLS];
+       int num_ctrls = 0;
+
+       int     rc;
+
+       int parent_is_glue = 0;
+       int parent_is_leaf = 0;
+
+#ifdef LDAP_X_TXN
+       int settle = 0;
+#endif
+
+       Debug( LDAP_DEBUG_TRACE, "==>" LDAP_XSTRING(mdb_modrdn) "(%s,%s,%s)\n",
+               op->o_req_dn.bv_val,op->oq_modrdn.rs_newrdn.bv_val,
+               op->oq_modrdn.rs_newSup ? op->oq_modrdn.rs_newSup->bv_val : "NULL" );
+
+#ifdef LDAP_X_TXN
+       if( op->o_txnSpec ) {
+               /* acquire connection lock */
+               ldap_pvt_thread_mutex_lock( &op->o_conn->c_mutex );
+               if( op->o_conn->c_txn == CONN_TXN_INACTIVE ) {
+                       rs->sr_text = "invalid transaction identifier";
+                       rs->sr_err = LDAP_X_TXN_ID_INVALID;
+                       goto txnReturn;
+               } else if( op->o_conn->c_txn == CONN_TXN_SETTLE ) {
+                       settle=1;
+                       goto txnReturn;
+               }
+
+               if( op->o_conn->c_txn_backend == NULL ) {
+                       op->o_conn->c_txn_backend = op->o_bd;
+
+               } else if( op->o_conn->c_txn_backend != op->o_bd ) {
+                       rs->sr_text = "transaction cannot span multiple database contexts";
+                       rs->sr_err = LDAP_AFFECTS_MULTIPLE_DSAS;
+                       goto txnReturn;
+               }
+
+               /* insert operation into transaction */
+
+               rs->sr_text = "transaction specified";
+               rs->sr_err = LDAP_X_TXN_SPECIFY_OKAY;
+
+txnReturn:
+               /* release connection lock */
+               ldap_pvt_thread_mutex_unlock( &op->o_conn->c_mutex );
+
+               if( !settle ) {
+                       send_ldap_result( op, rs );
+                       return rs->sr_err;
+               }
+       }
+#endif
+
+       ctrls[num_ctrls] = NULL;
+
+       slap_mods_opattrs( op, &op->orr_modlist, 1 );
+
+       if( 0 ) {
+retry: /* transaction retry */
+               if ( dummy.e_attrs ) {
+                       attrs_free( dummy.e_attrs );
+                       dummy.e_attrs = NULL;
+               }
+               if (e != NULL) {
+                       mdb_unlocked_cache_return_entry_w(&mdb->bi_cache, e);
+                       e = NULL;
+               }
+               if (p != NULL) {
+                       mdb_unlocked_cache_return_entry_r(&mdb->bi_cache, p);
+                       p = NULL;
+               }
+               if (np != NULL) {
+                       mdb_unlocked_cache_return_entry_r(&mdb->bi_cache, np);
+                       np = NULL;
+               }
+               Debug( LDAP_DEBUG_TRACE, "==>" LDAP_XSTRING(mdb_modrdn)
+                               ": retrying...\n", 0, 0, 0 );
+
+               rs->sr_err = TXN_ABORT( ltid );
+               ltid = NULL;
+               LDAP_SLIST_REMOVE( &op->o_extra, &opinfo.boi_oe, OpExtra, oe_next );
+               opinfo.boi_oe.oe_key = NULL;
+               op->o_do_not_cache = opinfo.boi_acl_cache;
+               if( rs->sr_err != 0 ) {
+                       rs->sr_err = LDAP_OTHER;
+                       rs->sr_text = "internal error";
+                       goto return_results;
+               }
+               if ( op->o_abandon ) {
+                       rs->sr_err = SLAPD_ABANDON;
+                       goto return_results;
+               }
+               parent_is_glue = 0;
+               parent_is_leaf = 0;
+               mdb_trans_backoff( ++num_retries );
+       }
+
+       /* begin transaction */
+       rs->sr_err = TXN_BEGIN( mdb->bi_dbenv, NULL, &ltid, 
+               mdb->bi_db_opflags );
+       rs->sr_text = NULL;
+       if( rs->sr_err != 0 ) {
+               Debug( LDAP_DEBUG_TRACE,
+                       LDAP_XSTRING(mdb_modrdn) ": txn_begin failed: "
+                       "%s (%d)\n", db_strerror(rs->sr_err), rs->sr_err, 0 );
+               rs->sr_err = LDAP_OTHER;
+               rs->sr_text = "internal error";
+               goto return_results;
+       }
+
+       opinfo.boi_oe.oe_key = mdb;
+       opinfo.boi_txn = ltid;
+       opinfo.boi_err = 0;
+       opinfo.boi_acl_cache = op->o_do_not_cache;
+       LDAP_SLIST_INSERT_HEAD( &op->o_extra, &opinfo.boi_oe, oe_next );
+
+       /* get entry */
+       rs->sr_err = mdb_dn2entry( op, ltid, &op->o_req_ndn, &ei, 1,
+               &lock );
+
+       switch( rs->sr_err ) {
+       case 0:
+       case DB_NOTFOUND:
+               break;
+       case DB_LOCK_DEADLOCK:
+       case DB_LOCK_NOTGRANTED:
+               goto retry;
+       case LDAP_BUSY:
+               rs->sr_text = "ldap server busy";
+               goto return_results;
+       default:
+               rs->sr_err = LDAP_OTHER;
+               rs->sr_text = "internal error";
+               goto return_results;
+       }
+
+       e = ei->bei_e;
+       /* FIXME: dn2entry() should return non-glue entry */
+       if (( rs->sr_err == DB_NOTFOUND ) ||
+               ( !manageDSAit && e && is_entry_glue( e )))
+       {
+               if( e != NULL ) {
+                       rs->sr_matched = ch_strdup( e->e_dn );
+                       rs->sr_ref = is_entry_referral( e )
+                               ? get_entry_referrals( op, e )
+                               : NULL;
+                       mdb_unlocked_cache_return_entry_r( &mdb->bi_cache, e);
+                       e = NULL;
+
+               } else {
+                       rs->sr_ref = referral_rewrite( default_referral, NULL,
+                                       &op->o_req_dn, LDAP_SCOPE_DEFAULT );
+               }
+
+               rs->sr_err = LDAP_REFERRAL;
+               send_ldap_result( op, rs );
+
+               ber_bvarray_free( rs->sr_ref );
+               free( (char *)rs->sr_matched );
+               rs->sr_ref = NULL;
+               rs->sr_matched = NULL;
+
+               goto done;
+       }
+
+       if ( get_assert( op ) &&
+               ( test_filter( op, e, get_assertion( op )) != LDAP_COMPARE_TRUE ))
+       {
+               rs->sr_err = LDAP_ASSERTION_FAILED;
+               goto return_results;
+       }
+
+       /* check write on old entry */
+       rs->sr_err = access_allowed( op, e, entry, NULL, ACL_WRITE, NULL );
+       if ( ! rs->sr_err ) {
+               switch( opinfo.boi_err ) {
+               case DB_LOCK_DEADLOCK:
+               case DB_LOCK_NOTGRANTED:
+                       goto retry;
+               }
+
+               Debug( LDAP_DEBUG_TRACE, "no access to entry\n", 0,
+                       0, 0 );
+               rs->sr_text = "no write access to old entry";
+               rs->sr_err = LDAP_INSUFFICIENT_ACCESS;
+               goto return_results;
+       }
+
+#ifndef MDB_HIER
+       rs->sr_err = mdb_cache_children( op, ltid, e );
+       if ( rs->sr_err != DB_NOTFOUND ) {
+               switch( rs->sr_err ) {
+               case DB_LOCK_DEADLOCK:
+               case DB_LOCK_NOTGRANTED:
+                       goto retry;
+               case 0:
+                       Debug(LDAP_DEBUG_ARGS,
+                               "<=- " LDAP_XSTRING(mdb_modrdn)
+                               ": non-leaf %s\n",
+                               op->o_req_dn.bv_val, 0, 0);
+                       rs->sr_err = LDAP_NOT_ALLOWED_ON_NONLEAF;
+                       rs->sr_text = "subtree rename not supported";
+                       break;
+               default:
+                       Debug(LDAP_DEBUG_ARGS,
+                               "<=- " LDAP_XSTRING(mdb_modrdn)
+                               ": has_children failed: %s (%d)\n",
+                               db_strerror(rs->sr_err), rs->sr_err, 0 );
+                       rs->sr_err = LDAP_OTHER;
+                       rs->sr_text = "internal error";
+               }
+               goto return_results;
+       }
+       ei->bei_state |= CACHE_ENTRY_NO_KIDS;
+#endif
+
+       if (!manageDSAit && is_entry_referral( e ) ) {
+               /* parent is a referral, don't allow add */
+               rs->sr_ref = get_entry_referrals( op, e );
+
+               Debug( LDAP_DEBUG_TRACE, LDAP_XSTRING(mdb_modrdn)
+                       ": entry %s is referral\n", e->e_dn, 0, 0 );
+
+               rs->sr_err = LDAP_REFERRAL,
+               rs->sr_matched = e->e_name.bv_val;
+               send_ldap_result( op, rs );
+
+               ber_bvarray_free( rs->sr_ref );
+               rs->sr_ref = NULL;
+               rs->sr_matched = NULL;
+               goto done;
+       }
+
+       if ( be_issuffix( op->o_bd, &e->e_nname ) ) {
+#ifdef MDB_MULTIPLE_SUFFIXES
+               /* Allow renaming one suffix entry to another */
+               p_ndn = slap_empty_bv;
+#else
+               /* There can only be one suffix entry */
+               rs->sr_err = LDAP_NAMING_VIOLATION;
+               rs->sr_text = "cannot rename suffix entry";
+               goto return_results;
+#endif
+       } else {
+               dnParent( &e->e_nname, &p_ndn );
+       }
+       np_ndn = &p_ndn;
+       eip = ei->bei_parent;
+       if ( eip && eip->bei_id ) {
+               /* Make sure parent entry exist and we can write its 
+                * children.
+                */
+               rs->sr_err = mdb_cache_find_id( op, ltid,
+                       eip->bei_id, &eip, 0, &plock );
+
+               switch( rs->sr_err ) {
+               case 0:
+               case DB_NOTFOUND:
+                       break;
+               case DB_LOCK_DEADLOCK:
+               case DB_LOCK_NOTGRANTED:
+                       goto retry;
+               case LDAP_BUSY:
+                       rs->sr_text = "ldap server busy";
+                       goto return_results;
+               default:
+                       rs->sr_err = LDAP_OTHER;
+                       rs->sr_text = "internal error";
+                       goto return_results;
+               }
+
+               p = eip->bei_e;
+               if( p == NULL) {
+                       Debug( LDAP_DEBUG_TRACE, LDAP_XSTRING(mdb_modrdn)
+                               ": parent does not exist\n", 0, 0, 0);
+                       rs->sr_err = LDAP_OTHER;
+                       rs->sr_text = "old entry's parent does not exist";
+                       goto return_results;
+               }
+       } else {
+               p = (Entry *)&slap_entry_root;
+       }
+
+       /* check parent for "children" acl */
+       rs->sr_err = access_allowed( op, p,
+               children, NULL,
+               op->oq_modrdn.rs_newSup == NULL ?
+                       ACL_WRITE : ACL_WDEL,
+               NULL );
+
+       if ( !p_ndn.bv_len )
+               p = NULL;
+
+       if ( ! rs->sr_err ) {
+               switch( opinfo.boi_err ) {
+               case DB_LOCK_DEADLOCK:
+               case DB_LOCK_NOTGRANTED:
+                       goto retry;
+               }
+
+               rs->sr_err = LDAP_INSUFFICIENT_ACCESS;
+               Debug( LDAP_DEBUG_TRACE, "no access to parent\n", 0,
+                       0, 0 );
+               rs->sr_text = "no write access to old parent's children";
+               goto return_results;
+       }
+
+       Debug( LDAP_DEBUG_TRACE,
+               LDAP_XSTRING(mdb_modrdn) ": wr to children "
+               "of entry %s OK\n", p_ndn.bv_val, 0, 0 );
+       
+       if ( p_ndn.bv_val == slap_empty_bv.bv_val ) {
+               p_dn = slap_empty_bv;
+       } else {
+               dnParent( &e->e_name, &p_dn );
+       }
+
+       Debug( LDAP_DEBUG_TRACE,
+               LDAP_XSTRING(mdb_modrdn) ": parent dn=%s\n",
+               p_dn.bv_val, 0, 0 );
+
+       new_parent_dn = &p_dn;  /* New Parent unless newSuperior given */
+
+       if ( op->oq_modrdn.rs_newSup != NULL ) {
+               Debug( LDAP_DEBUG_TRACE, 
+                       LDAP_XSTRING(mdb_modrdn)
+                       ": new parent \"%s\" requested...\n",
+                       op->oq_modrdn.rs_newSup->bv_val, 0, 0 );
+
+               /*  newSuperior == oldParent? */
+               if( dn_match( &p_ndn, op->oq_modrdn.rs_nnewSup ) ) {
+                       Debug( LDAP_DEBUG_TRACE, "mdb_back_modrdn: "
+                               "new parent \"%s\" same as the old parent \"%s\"\n",
+                               op->oq_modrdn.rs_newSup->bv_val, p_dn.bv_val, 0 );
+                       op->oq_modrdn.rs_newSup = NULL; /* ignore newSuperior */
+               }
+       }
+
+       /* There's a MDB_MULTIPLE_SUFFIXES case here that this code doesn't
+        * support. E.g., two suffixes dc=foo,dc=com and dc=bar,dc=net.
+        * We do not allow modDN
+        *   dc=foo,dc=com
+        *    newrdn dc=bar
+        *    newsup dc=net
+        * and we probably should. But since MULTIPLE_SUFFIXES is deprecated
+        * I'm ignoring this problem for now.
+        */
+       if ( op->oq_modrdn.rs_newSup != NULL ) {
+               if ( op->oq_modrdn.rs_newSup->bv_len ) {
+                       np_dn = op->oq_modrdn.rs_newSup;
+                       np_ndn = op->oq_modrdn.rs_nnewSup;
+
+                       /* newSuperior == oldParent? - checked above */
+                       /* newSuperior == entry being moved?, if so ==> ERROR */
+                       if ( dnIsSuffix( np_ndn, &e->e_nname )) {
+                               rs->sr_err = LDAP_NO_SUCH_OBJECT;
+                               rs->sr_text = "new superior not found";
+                               goto return_results;
+                       }
+                       /* Get Entry with dn=newSuperior. Does newSuperior exist? */
+
+                       rs->sr_err = mdb_dn2entry( op, ltid, np_ndn,
+                               &neip, 0, &nplock );
+
+                       switch( rs->sr_err ) {
+                       case 0: np = neip->bei_e;
+                       case DB_NOTFOUND:
+                               break;
+                       case DB_LOCK_DEADLOCK:
+                       case DB_LOCK_NOTGRANTED:
+                               goto retry;
+                       case LDAP_BUSY:
+                               rs->sr_text = "ldap server busy";
+                               goto return_results;
+                       default:
+                               rs->sr_err = LDAP_OTHER;
+                               rs->sr_text = "internal error";
+                               goto return_results;
+                       }
+
+                       if( np == NULL) {
+                               Debug( LDAP_DEBUG_TRACE,
+                                       LDAP_XSTRING(mdb_modrdn)
+                                       ": newSup(ndn=%s) not here!\n",
+                                       np_ndn->bv_val, 0, 0);
+                               rs->sr_text = "new superior not found";
+                               rs->sr_err = LDAP_NO_SUCH_OBJECT;
+                               goto return_results;
+                       }
+
+                       Debug( LDAP_DEBUG_TRACE,
+                               LDAP_XSTRING(mdb_modrdn)
+                               ": wr to new parent OK np=%p, id=%ld\n",
+                               (void *) np, (long) np->e_id, 0 );
+
+                       /* check newSuperior for "children" acl */
+                       rs->sr_err = access_allowed( op, np, children,
+                               NULL, ACL_WADD, NULL );
+
+                       if( ! rs->sr_err ) {
+                               switch( opinfo.boi_err ) {
+                               case DB_LOCK_DEADLOCK:
+                               case DB_LOCK_NOTGRANTED:
+                                       goto retry;
+                               }
+
+                               Debug( LDAP_DEBUG_TRACE,
+                                       LDAP_XSTRING(mdb_modrdn)
+                                       ": no wr to newSup children\n",
+                                       0, 0, 0 );
+                               rs->sr_text = "no write access to new superior's children";
+                               rs->sr_err = LDAP_INSUFFICIENT_ACCESS;
+                               goto return_results;
+                       }
+
+                       if ( is_entry_alias( np ) ) {
+                               /* parent is an alias, don't allow add */
+                               Debug( LDAP_DEBUG_TRACE,
+                                       LDAP_XSTRING(mdb_modrdn)
+                                       ": entry is alias\n",
+                                       0, 0, 0 );
+                               rs->sr_text = "new superior is an alias";
+                               rs->sr_err = LDAP_ALIAS_PROBLEM;
+                               goto return_results;
+                       }
+
+                       if ( is_entry_referral( np ) ) {
+                               /* parent is a referral, don't allow add */
+                               Debug( LDAP_DEBUG_TRACE,
+                                       LDAP_XSTRING(mdb_modrdn)
+                                       ": entry is referral\n",
+                                       0, 0, 0 );
+                               rs->sr_text = "new superior is a referral";
+                               rs->sr_err = LDAP_OTHER;
+                               goto return_results;
+                       }
+
+               } else {
+                       np_dn = NULL;
+
+                       /* no parent, modrdn entry directly under root */
+                       if ( be_issuffix( op->o_bd, (struct berval *)&slap_empty_bv )
+                               || be_isupdate( op ) ) {
+                               np = (Entry *)&slap_entry_root;
+
+                               /* check parent for "children" acl */
+                               rs->sr_err = access_allowed( op, np,
+                                       children, NULL, ACL_WADD, NULL );
+
+                               np = NULL;
+
+                               if ( ! rs->sr_err ) {
+                                       switch( opinfo.boi_err ) {
+                                       case DB_LOCK_DEADLOCK:
+                                       case DB_LOCK_NOTGRANTED:
+                                               goto retry;
+                                       }
+
+                                       rs->sr_err = LDAP_INSUFFICIENT_ACCESS;
+                                       Debug( LDAP_DEBUG_TRACE, 
+                                               "no access to new superior\n", 
+                                               0, 0, 0 );
+                                       rs->sr_text =
+                                               "no write access to new superior's children";
+                                       goto return_results;
+                               }
+                       }
+               }
+
+               Debug( LDAP_DEBUG_TRACE,
+                       LDAP_XSTRING(mdb_modrdn)
+                       ": wr to new parent's children OK\n",
+                       0, 0, 0 );
+
+               new_parent_dn = np_dn;
+       }
+
+       /* Build target dn and make sure target entry doesn't exist already. */
+       if (!new_dn.bv_val) {
+               build_new_dn( &new_dn, new_parent_dn, &op->oq_modrdn.rs_newrdn, NULL ); 
+       }
+
+       if (!new_ndn.bv_val) {
+               struct berval bv = {0, NULL};
+               dnNormalize( 0, NULL, NULL, &new_dn, &bv, op->o_tmpmemctx );
+               ber_dupbv( &new_ndn, &bv );
+               /* FIXME: why not call dnNormalize() w/o ctx? */
+               op->o_tmpfree( bv.bv_val, op->o_tmpmemctx );
+       }
+
+       Debug( LDAP_DEBUG_TRACE, LDAP_XSTRING(mdb_modrdn) ": new ndn=%s\n",
+               new_ndn.bv_val, 0, 0 );
+
+       /* Shortcut the search */
+       nei = neip ? neip : eip;
+       rs->sr_err = mdb_cache_find_ndn ( op, ltid, &new_ndn, &nei );
+       if ( nei ) mdb_cache_entryinfo_unlock( nei );
+       switch( rs->sr_err ) {
+       case DB_LOCK_DEADLOCK:
+       case DB_LOCK_NOTGRANTED:
+               goto retry;
+       case DB_NOTFOUND:
+               break;
+       case 0:
+               /* Allow rename to same DN */
+               if ( nei == ei )
+                       break;
+               rs->sr_err = LDAP_ALREADY_EXISTS;
+               goto return_results;
+       default:
+               rs->sr_err = LDAP_OTHER;
+               rs->sr_text = "internal error";
+               goto return_results;
+       }
+
+       assert( op->orr_modlist != NULL );
+
+       if( op->o_preread ) {
+               if( preread_ctrl == NULL ) {
+                       preread_ctrl = &ctrls[num_ctrls++];
+                       ctrls[num_ctrls] = NULL;
+               }
+               if( slap_read_controls( op, rs, e,
+                       &slap_pre_read_bv, preread_ctrl ) )
+               {
+                       Debug( LDAP_DEBUG_TRACE,        
+                               "<=- " LDAP_XSTRING(mdb_modrdn)
+                               ": pre-read failed!\n", 0, 0, 0 );
+                       if ( op->o_preread & SLAP_CONTROL_CRITICAL ) {
+                               /* FIXME: is it correct to abort
+                                * operation if control fails? */
+                               goto return_results;
+                       }
+               }                   
+       }
+
+       /* nested transaction */
+       rs->sr_err = TXN_BEGIN( mdb->bi_dbenv, ltid, &lt2, mdb->bi_db_opflags );
+       rs->sr_text = NULL;
+       if( rs->sr_err != 0 ) {
+               Debug( LDAP_DEBUG_TRACE,
+                       LDAP_XSTRING(mdb_modrdn)
+                       ": txn_begin(2) failed: %s (%d)\n",
+                       db_strerror(rs->sr_err), rs->sr_err, 0 );
+               rs->sr_err = LDAP_OTHER;
+               rs->sr_text = "internal error";
+               goto return_results;
+       }
+
+       /* delete old DN */
+       rs->sr_err = mdb_dn2id_delete( op, lt2, eip, e );
+       if ( rs->sr_err != 0 ) {
+               Debug(LDAP_DEBUG_TRACE,
+                       "<=- " LDAP_XSTRING(mdb_modrdn)
+                       ": dn2id del failed: %s (%d)\n",
+                       db_strerror(rs->sr_err), rs->sr_err, 0 );
+               switch( rs->sr_err ) {
+               case DB_LOCK_DEADLOCK:
+               case DB_LOCK_NOTGRANTED:
+                       goto retry;
+               }
+               rs->sr_err = LDAP_OTHER;
+               rs->sr_text = "DN index delete fail";
+               goto return_results;
+       }
+
+       /* copy the entry, then override some fields */
+       dummy = *e;
+       dummy.e_name = new_dn;
+       dummy.e_nname = new_ndn;
+       dummy.e_attrs = NULL;
+
+       /* add new DN */
+       rs->sr_err = mdb_dn2id_add( op, lt2, neip ? neip : eip, &dummy );
+       if ( rs->sr_err != 0 ) {
+               Debug(LDAP_DEBUG_TRACE,
+                       "<=- " LDAP_XSTRING(mdb_modrdn)
+                       ": dn2id add failed: %s (%d)\n",
+                       db_strerror(rs->sr_err), rs->sr_err, 0 );
+               switch( rs->sr_err ) {
+               case DB_LOCK_DEADLOCK:
+               case DB_LOCK_NOTGRANTED:
+                       goto retry;
+               }
+               rs->sr_err = LDAP_OTHER;
+               rs->sr_text = "DN index add failed";
+               goto return_results;
+       }
+
+       dummy.e_attrs = e->e_attrs;
+
+       /* modify entry */
+       rs->sr_err = mdb_modify_internal( op, lt2, op->orr_modlist, &dummy,
+               &rs->sr_text, textbuf, textlen );
+       if( rs->sr_err != LDAP_SUCCESS ) {
+               Debug(LDAP_DEBUG_TRACE,
+                       "<=- " LDAP_XSTRING(mdb_modrdn)
+                       ": modify failed: %s (%d)\n",
+                       db_strerror(rs->sr_err), rs->sr_err, 0 );
+               if ( ( rs->sr_err == LDAP_INSUFFICIENT_ACCESS ) && opinfo.boi_err ) {
+                       rs->sr_err = opinfo.boi_err;
+               }
+               if ( dummy.e_attrs == e->e_attrs ) dummy.e_attrs = NULL;
+               switch( rs->sr_err ) {
+               case DB_LOCK_DEADLOCK:
+               case DB_LOCK_NOTGRANTED:
+                       goto retry;
+               }
+               goto return_results;
+       }
+
+       /* id2entry index */
+       rs->sr_err = mdb_id2entry_update( op->o_bd, lt2, &dummy );
+       if ( rs->sr_err != 0 ) {
+               Debug(LDAP_DEBUG_TRACE,
+                       "<=- " LDAP_XSTRING(mdb_modrdn)
+                       ": id2entry failed: %s (%d)\n",
+                       db_strerror(rs->sr_err), rs->sr_err, 0 );
+               switch( rs->sr_err ) {
+               case DB_LOCK_DEADLOCK:
+               case DB_LOCK_NOTGRANTED:
+                       goto retry;
+               }
+               rs->sr_err = LDAP_OTHER;
+               rs->sr_text = "entry update failed";
+               goto return_results;
+       }
+
+       if ( p_ndn.bv_len != 0 ) {
+               parent_is_glue = is_entry_glue(p);
+               rs->sr_err = mdb_cache_children( op, lt2, p );
+               if ( rs->sr_err != DB_NOTFOUND ) {
+                       switch( rs->sr_err ) {
+                       case DB_LOCK_DEADLOCK:
+                       case DB_LOCK_NOTGRANTED:
+                               goto retry;
+                       case 0:
+                               break;
+                       default:
+                               Debug(LDAP_DEBUG_ARGS,
+                                       "<=- " LDAP_XSTRING(mdb_modrdn)
+                                       ": has_children failed: %s (%d)\n",
+                                       db_strerror(rs->sr_err), rs->sr_err, 0 );
+                               rs->sr_err = LDAP_OTHER;
+                               rs->sr_text = "internal error";
+                               goto return_results;
+                       }
+                       parent_is_leaf = 1;
+               }
+               mdb_unlocked_cache_return_entry_r(&mdb->bi_cache, p);
+               p = NULL;
+       }
+
+       if ( TXN_COMMIT( lt2, 0 ) != 0 ) {
+               rs->sr_err = LDAP_OTHER;
+               rs->sr_text = "txn_commit(2) failed";
+               goto return_results;
+       }
+
+       if( op->o_postread ) {
+               if( postread_ctrl == NULL ) {
+                       postread_ctrl = &ctrls[num_ctrls++];
+                       ctrls[num_ctrls] = NULL;
+               }
+               if( slap_read_controls( op, rs, &dummy,
+                       &slap_post_read_bv, postread_ctrl ) )
+               {
+                       Debug( LDAP_DEBUG_TRACE,        
+                               "<=- " LDAP_XSTRING(mdb_modrdn)
+                               ": post-read failed!\n", 0, 0, 0 );
+                       if ( op->o_postread & SLAP_CONTROL_CRITICAL ) {
+                               /* FIXME: is it correct to abort
+                                * operation if control fails? */
+                               goto return_results;
+                       }
+               }                   
+       }
+
+       if( op->o_noop ) {
+               if(( rs->sr_err=TXN_ABORT( ltid )) != 0 ) {
+                       rs->sr_text = "txn_abort (no-op) failed";
+               } else {
+                       rs->sr_err = LDAP_X_NO_OPERATION;
+                       ltid = NULL;
+                       /* Only free attrs if they were dup'd.  */
+                       if ( dummy.e_attrs == e->e_attrs ) dummy.e_attrs = NULL;
+                       goto return_results;
+               }
+
+       } else {
+               rc = mdb_cache_modrdn( mdb, e, &op->orr_nnewrdn, &dummy, neip,
+                       ltid, &lock );
+               switch( rc ) {
+               case DB_LOCK_DEADLOCK:
+               case DB_LOCK_NOTGRANTED:
+                       goto retry;
+               }
+               dummy.e_attrs = NULL;
+               new_dn.bv_val = NULL;
+               new_ndn.bv_val = NULL;
+
+               if(( rs->sr_err=TXN_COMMIT( ltid, 0 )) != 0 ) {
+                       rs->sr_text = "txn_commit failed";
+               } else {
+                       rs->sr_err = LDAP_SUCCESS;
+               }
+       }
+       ltid = NULL;
+       LDAP_SLIST_REMOVE( &op->o_extra, &opinfo.boi_oe, OpExtra, oe_next );
+       opinfo.boi_oe.oe_key = NULL;
+       if( rs->sr_err != LDAP_SUCCESS ) {
+               Debug( LDAP_DEBUG_TRACE,
+                       LDAP_XSTRING(mdb_modrdn) ": %s : %s (%d)\n",
+                       rs->sr_text, db_strerror(rs->sr_err), rs->sr_err );
+               rs->sr_err = LDAP_OTHER;
+
+               goto return_results;
+       }
+
+       Debug(LDAP_DEBUG_TRACE,
+               LDAP_XSTRING(mdb_modrdn)
+               ": rdn modified%s id=%08lx dn=\"%s\"\n",
+               op->o_noop ? " (no-op)" : "",
+               dummy.e_id, op->o_req_dn.bv_val );
+       rs->sr_text = NULL;
+       if( num_ctrls ) rs->sr_ctrls = ctrls;
+
+return_results:
+       if ( dummy.e_attrs ) {
+               attrs_free( dummy.e_attrs );
+       }
+       send_ldap_result( op, rs );
+
+       if( rs->sr_err == LDAP_SUCCESS && mdb->bi_txn_cp_kbyte ) {
+               TXN_CHECKPOINT( mdb->bi_dbenv,
+                       mdb->bi_txn_cp_kbyte, mdb->bi_txn_cp_min, 0 );
+       }
+       
+       if ( rs->sr_err == LDAP_SUCCESS && parent_is_glue && parent_is_leaf ) {
+               op->o_delete_glue_parent = 1;
+       }
+
+done:
+       slap_graduate_commit_csn( op );
+
+       if( new_dn.bv_val != NULL ) free( new_dn.bv_val );
+       if( new_ndn.bv_val != NULL ) free( new_ndn.bv_val );
+
+       /* LDAP v3 Support */
+       if( np != NULL ) {
+               /* free new parent and reader lock */
+               mdb_unlocked_cache_return_entry_r(&mdb->bi_cache, np);
+       }
+
+       if( p != NULL ) {
+               /* free parent and reader lock */
+               mdb_unlocked_cache_return_entry_r(&mdb->bi_cache, p);
+       }
+
+       /* free entry */
+       if( e != NULL ) {
+               mdb_unlocked_cache_return_entry_w( &mdb->bi_cache, e);
+       }
+
+       if( ltid != NULL ) {
+               TXN_ABORT( ltid );
+       }
+       if ( opinfo.boi_oe.oe_key ) {
+               LDAP_SLIST_REMOVE( &op->o_extra, &opinfo.boi_oe, OpExtra, oe_next );
+       }
+
+       if( preread_ctrl != NULL && (*preread_ctrl) != NULL ) {
+               slap_sl_free( (*preread_ctrl)->ldctl_value.bv_val, op->o_tmpmemctx );
+               slap_sl_free( *preread_ctrl, op->o_tmpmemctx );
+       }
+       if( postread_ctrl != NULL && (*postread_ctrl) != NULL ) {
+               slap_sl_free( (*postread_ctrl)->ldctl_value.bv_val, op->o_tmpmemctx );
+               slap_sl_free( *postread_ctrl, op->o_tmpmemctx );
+       }
+       return rs->sr_err;
+}
diff --git a/servers/slapd/back-mdb/monitor.c b/servers/slapd/back-mdb/monitor.c
new file mode 100644 (file)
index 0000000..f629b21
--- /dev/null
@@ -0,0 +1,725 @@
+/* monitor.c - monitor mdb backend */
+/* $OpenLDAP$ */
+/* This work is part of OpenLDAP Software <http://www.openldap.org/>.
+ *
+ * Copyright 2000-2011 The OpenLDAP Foundation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted only as authorized by the OpenLDAP
+ * Public License.
+ *
+ * A copy of this license is available in the file LICENSE in the
+ * top-level directory of the distribution or, alternatively, at
+ * <http://www.OpenLDAP.org/license.html>.
+ */
+
+#include "portable.h"
+
+#include <stdio.h>
+#include <ac/string.h>
+#include <ac/unistd.h>
+#include <ac/stdlib.h>
+#include <ac/errno.h>
+#include <sys/stat.h>
+#include "lutil.h"
+#include "back-mdb.h"
+
+#include "../back-monitor/back-monitor.h"
+
+#include "config.h"
+
+static ObjectClass             *oc_olmMDBDatabase;
+
+static AttributeDescription    *ad_olmMDBEntryCache,
+       *ad_olmMDBDNCache, *ad_olmMDBIDLCache,
+       *ad_olmDbDirectory;
+
+#ifdef MDB_MONITOR_IDX
+static int
+mdb_monitor_idx_entry_add(
+       struct mdb_info *mdb,
+       Entry           *e );
+
+static AttributeDescription    *ad_olmMDBNotIndexed;
+#endif /* MDB_MONITOR_IDX */
+
+/*
+ * NOTE: there's some confusion in monitor OID arc;
+ * by now, let's consider:
+ * 
+ * Subsystems monitor attributes       1.3.6.1.4.1.4203.666.1.55.0
+ * Databases monitor attributes                1.3.6.1.4.1.4203.666.1.55.0.1
+ * MDB database monitor attributes     1.3.6.1.4.1.4203.666.1.55.0.1.1
+ *
+ * Subsystems monitor objectclasses    1.3.6.1.4.1.4203.666.3.16.0
+ * Databases monitor objectclasses     1.3.6.1.4.1.4203.666.3.16.0.1
+ * MDB database monitor objectclasses  1.3.6.1.4.1.4203.666.3.16.0.1.1
+ */
+
+static struct {
+       char                    *name;
+       char                    *oid;
+}              s_oid[] = {
+       { "olmMDBAttributes",                   "olmDatabaseAttributes:1" },
+       { "olmMDBObjectClasses",                "olmDatabaseObjectClasses:1" },
+
+       { NULL }
+};
+
+static struct {
+       char                    *desc;
+       AttributeDescription    **ad;
+}              s_at[] = {
+       { "( olmMDBAttributes:1 "
+               "NAME ( 'olmMDBEntryCache' ) "
+               "DESC 'Number of items in Entry Cache' "
+               "SUP monitorCounter "
+               "NO-USER-MODIFICATION "
+               "USAGE dSAOperation )",
+               &ad_olmMDBEntryCache },
+
+       { "( olmMDBAttributes:2 "
+               "NAME ( 'olmMDBDNCache' ) "
+               "DESC 'Number of items in DN Cache' "
+               "SUP monitorCounter "
+               "NO-USER-MODIFICATION "
+               "USAGE dSAOperation )",
+               &ad_olmMDBDNCache },
+
+       { "( olmMDBAttributes:3 "
+               "NAME ( 'olmMDBIDLCache' ) "
+               "DESC 'Number of items in IDL Cache' "
+               "SUP monitorCounter "
+               "NO-USER-MODIFICATION "
+               "USAGE dSAOperation )",
+               &ad_olmMDBIDLCache },
+
+       { "( olmMDBAttributes:4 "
+               "NAME ( 'olmDbDirectory' ) "
+               "DESC 'Path name of the directory "
+                       "where the database environment resides' "
+               "SUP monitoredInfo "
+               "NO-USER-MODIFICATION "
+               "USAGE dSAOperation )",
+               &ad_olmDbDirectory },
+
+#ifdef MDB_MONITOR_IDX
+       { "( olmMDBAttributes:5 "
+               "NAME ( 'olmMDBNotIndexed' ) "
+               "DESC 'Missing indexes resulting from candidate selection' "
+               "SUP monitoredInfo "
+               "NO-USER-MODIFICATION "
+               "USAGE dSAOperation )",
+               &ad_olmMDBNotIndexed },
+#endif /* MDB_MONITOR_IDX */
+
+       { NULL }
+};
+
+static struct {
+       char            *desc;
+       ObjectClass     **oc;
+}              s_oc[] = {
+       /* augments an existing object, so it must be AUXILIARY
+        * FIXME: derive from some ABSTRACT "monitoredEntity"? */
+       { "( olmMDBObjectClasses:1 "
+               "NAME ( 'olmMDBDatabase' ) "
+               "SUP top AUXILIARY "
+               "MAY ( "
+                       "olmMDBEntryCache "
+                       "$ olmMDBDNCache "
+                       "$ olmMDBIDLCache "
+                       "$ olmDbDirectory "
+#ifdef MDB_MONITOR_IDX
+                       "$ olmMDBNotIndexed "
+#endif /* MDB_MONITOR_IDX */
+                       ") )",
+               &oc_olmMDBDatabase },
+
+       { NULL }
+};
+
+static int
+mdb_monitor_update(
+       Operation       *op,
+       SlapReply       *rs,
+       Entry           *e,
+       void            *priv )
+{
+       struct mdb_info         *mdb = (struct mdb_info *) priv;
+       Attribute               *a;
+
+       char                    buf[ BUFSIZ ];
+       struct berval           bv;
+
+       assert( ad_olmMDBEntryCache != NULL );
+
+       a = attr_find( e->e_attrs, ad_olmMDBEntryCache );
+       assert( a != NULL );
+       bv.bv_val = buf;
+       bv.bv_len = snprintf( buf, sizeof( buf ), "%lu", mdb->bi_cache.c_cursize );
+       ber_bvreplace( &a->a_vals[ 0 ], &bv );
+
+       a = attr_find( e->e_attrs, ad_olmMDBDNCache );
+       assert( a != NULL );
+       bv.bv_len = snprintf( buf, sizeof( buf ), "%lu", mdb->bi_cache.c_eiused );
+       ber_bvreplace( &a->a_vals[ 0 ], &bv );
+
+       a = attr_find( e->e_attrs, ad_olmMDBIDLCache );
+       assert( a != NULL );
+       bv.bv_len = snprintf( buf, sizeof( buf ), "%lu", mdb->bi_idl_cache_size );
+       ber_bvreplace( &a->a_vals[ 0 ], &bv );
+       
+#ifdef MDB_MONITOR_IDX
+       mdb_monitor_idx_entry_add( mdb, e );
+#endif /* MDB_MONITOR_IDX */
+
+       return SLAP_CB_CONTINUE;
+}
+
+#if 0  /* uncomment if required */
+static int
+mdb_monitor_modify(
+       Operation       *op,
+       SlapReply       *rs,
+       Entry           *e,
+       void            *priv )
+{
+       return SLAP_CB_CONTINUE;
+}
+#endif
+
+static int
+mdb_monitor_free(
+       Entry           *e,
+       void            **priv )
+{
+       struct berval   values[ 2 ];
+       Modification    mod = { 0 };
+
+       const char      *text;
+       char            textbuf[ SLAP_TEXT_BUFLEN ];
+
+       int             i, rc;
+
+       /* NOTE: if slap_shutdown != 0, priv might have already been freed */
+       *priv = NULL;
+
+       /* Remove objectClass */
+       mod.sm_op = LDAP_MOD_DELETE;
+       mod.sm_desc = slap_schema.si_ad_objectClass;
+       mod.sm_values = values;
+       mod.sm_numvals = 1;
+       values[ 0 ] = oc_olmMDBDatabase->soc_cname;
+       BER_BVZERO( &values[ 1 ] );
+
+       rc = modify_delete_values( e, &mod, 1, &text,
+               textbuf, sizeof( textbuf ) );
+       /* don't care too much about return code... */
+
+       /* remove attrs */
+       mod.sm_values = NULL;
+       mod.sm_numvals = 0;
+       for ( i = 0; s_at[ i ].desc != NULL; i++ ) {
+               mod.sm_desc = *s_at[ i ].ad;
+               rc = modify_delete_values( e, &mod, 1, &text,
+                       textbuf, sizeof( textbuf ) );
+               /* don't care too much about return code... */
+       }
+       
+       return SLAP_CB_CONTINUE;
+}
+
+#define        mdb_monitor_initialize  MDB_SYMBOL(monitor_initialize)
+
+/*
+ * call from within mdb_initialize()
+ */
+static int
+mdb_monitor_initialize( void )
+{
+       int             i, code;
+       ConfigArgs c;
+       char    *argv[ 3 ];
+
+       static int      mdb_monitor_initialized = 0;
+
+       /* set to 0 when successfully initialized; otherwise, remember failure */
+       static int      mdb_monitor_initialized_failure = 1;
+
+       if ( mdb_monitor_initialized++ ) {
+               return mdb_monitor_initialized_failure;
+       }
+
+       if ( backend_info( "monitor" ) == NULL ) {
+               return -1;
+       }
+
+       /* register schema here */
+
+       argv[ 0 ] = "back-mdb/back-hdb monitor";
+       c.argv = argv;
+       c.argc = 3;
+       c.fname = argv[0];
+
+       for ( i = 0; s_oid[ i ].name; i++ ) {
+               c.lineno = i;
+               argv[ 1 ] = s_oid[ i ].name;
+               argv[ 2 ] = s_oid[ i ].oid;
+
+               if ( parse_oidm( &c, 0, NULL ) != 0 ) {
+                       Debug( LDAP_DEBUG_ANY, LDAP_XSTRING(mdb_monitor_initialize)
+                               ": unable to add "
+                               "objectIdentifier \"%s=%s\"\n",
+                               s_oid[ i ].name, s_oid[ i ].oid, 0 );
+                       return 2;
+               }
+       }
+
+       for ( i = 0; s_at[ i ].desc != NULL; i++ ) {
+               code = register_at( s_at[ i ].desc, s_at[ i ].ad, 1 );
+               if ( code != LDAP_SUCCESS ) {
+                       Debug( LDAP_DEBUG_ANY, LDAP_XSTRING(mdb_monitor_initialize)
+                               ": register_at failed for attributeType (%s)\n",
+                               s_at[ i ].desc, 0, 0 );
+                       return 3;
+
+               } else {
+                       (*s_at[ i ].ad)->ad_type->sat_flags |= SLAP_AT_HIDE;
+               }
+       }
+
+       for ( i = 0; s_oc[ i ].desc != NULL; i++ ) {
+               code = register_oc( s_oc[ i ].desc, s_oc[ i ].oc, 1 );
+               if ( code != LDAP_SUCCESS ) {
+                       Debug( LDAP_DEBUG_ANY, LDAP_XSTRING(mdb_monitor_initialize)
+                               ": register_oc failed for objectClass (%s)\n",
+                               s_oc[ i ].desc, 0, 0 );
+                       return 4;
+
+               } else {
+                       (*s_oc[ i ].oc)->soc_flags |= SLAP_OC_HIDE;
+               }
+       }
+
+       return ( mdb_monitor_initialized_failure = LDAP_SUCCESS );
+}
+
+/*
+ * call from within mdb_db_init()
+ */
+int
+mdb_monitor_db_init( BackendDB *be )
+{
+       struct mdb_info         *mdb = (struct mdb_info *) be->be_private;
+
+       if ( mdb_monitor_initialize() == LDAP_SUCCESS ) {
+               /* monitoring in back-mdb is on by default */
+               SLAP_DBFLAGS( be ) |= SLAP_DBFLAG_MONITORING;
+       }
+
+#ifdef MDB_MONITOR_IDX
+       mdb->bi_idx = NULL;
+       ldap_pvt_thread_mutex_init( &mdb->bi_idx_mutex );
+#endif /* MDB_MONITOR_IDX */
+
+       return 0;
+}
+
+/*
+ * call from within mdb_db_open()
+ */
+int
+mdb_monitor_db_open( BackendDB *be )
+{
+       struct mdb_info         *mdb = (struct mdb_info *) be->be_private;
+       Attribute               *a, *next;
+       monitor_callback_t      *cb = NULL;
+       int                     rc = 0;
+       BackendInfo             *mi;
+       monitor_extra_t         *mbe;
+       struct berval dummy = BER_BVC("");
+
+       if ( !SLAP_DBMONITORING( be ) ) {
+               return 0;
+       }
+
+       mi = backend_info( "monitor" );
+       if ( !mi || !mi->bi_extra ) {
+               SLAP_DBFLAGS( be ) ^= SLAP_DBFLAG_MONITORING;
+               return 0;
+       }
+       mbe = mi->bi_extra;
+
+       /* don't bother if monitor is not configured */
+       if ( !mbe->is_configured() ) {
+               static int warning = 0;
+
+               if ( warning++ == 0 ) {
+                       Debug( LDAP_DEBUG_ANY, LDAP_XSTRING(mdb_monitor_db_open)
+                               ": monitoring disabled; "
+                               "configure monitor database to enable\n",
+                               0, 0, 0 );
+               }
+
+               return 0;
+       }
+
+       /* alloc as many as required (plus 1 for objectClass) */
+       a = attrs_alloc( 1 + 4 );
+       if ( a == NULL ) {
+               rc = 1;
+               goto cleanup;
+       }
+
+       a->a_desc = slap_schema.si_ad_objectClass;
+       attr_valadd( a, &oc_olmMDBDatabase->soc_cname, NULL, 1 );
+       next = a->a_next;
+
+       {
+               struct berval   bv = BER_BVC( "0" );
+
+               next->a_desc = ad_olmMDBEntryCache;
+               attr_valadd( next, &bv, NULL, 1 );
+               next = next->a_next;
+
+               next->a_desc = ad_olmMDBDNCache;
+               attr_valadd( next, &bv, NULL, 1 );
+               next = next->a_next;
+
+               next->a_desc = ad_olmMDBIDLCache;
+               attr_valadd( next, &bv, NULL, 1 );
+               next = next->a_next;
+       }
+
+       {
+               struct berval   bv, nbv;
+               ber_len_t       pathlen = 0, len = 0;
+               char            path[ MAXPATHLEN ] = { '\0' };
+               char            *fname = mdb->bi_dbenv_home,
+                               *ptr;
+
+               len = strlen( fname );
+               if ( fname[ 0 ] != '/' ) {
+                       /* get full path name */
+                       getcwd( path, sizeof( path ) );
+                       pathlen = strlen( path );
+
+                       if ( fname[ 0 ] == '.' && fname[ 1 ] == '/' ) {
+                               fname += 2;
+                               len -= 2;
+                       }
+               }
+
+               bv.bv_len = pathlen + STRLENOF( "/" ) + len;
+               ptr = bv.bv_val = ch_malloc( bv.bv_len + STRLENOF( "/" ) + 1 );
+               if ( pathlen ) {
+                       ptr = lutil_strncopy( ptr, path, pathlen );
+                       ptr[ 0 ] = '/';
+                       ptr++;
+               }
+               ptr = lutil_strncopy( ptr, fname, len );
+               if ( ptr[ -1 ] != '/' ) {
+                       ptr[ 0 ] = '/';
+                       ptr++;
+               }
+               ptr[ 0 ] = '\0';
+               
+               attr_normalize_one( ad_olmDbDirectory, &bv, &nbv, NULL );
+
+               next->a_desc = ad_olmDbDirectory;
+               next->a_vals = ch_calloc( sizeof( struct berval ), 2 );
+               next->a_vals[ 0 ] = bv;
+               next->a_numvals = 1;
+
+               if ( BER_BVISNULL( &nbv ) ) {
+                       next->a_nvals = next->a_vals;
+
+               } else {
+                       next->a_nvals = ch_calloc( sizeof( struct berval ), 2 );
+                       next->a_nvals[ 0 ] = nbv;
+               }
+
+               next = next->a_next;
+       }
+
+       cb = ch_calloc( sizeof( monitor_callback_t ), 1 );
+       cb->mc_update = mdb_monitor_update;
+#if 0  /* uncomment if required */
+       cb->mc_modify = mdb_monitor_modify;
+#endif
+       cb->mc_free = mdb_monitor_free;
+       cb->mc_private = (void *)mdb;
+
+       /* make sure the database is registered; then add monitor attributes */
+       rc = mbe->register_database( be, &mdb->bi_monitor.bdm_ndn );
+       if ( rc == 0 ) {
+               rc = mbe->register_entry_attrs( &mdb->bi_monitor.bdm_ndn, a, cb,
+                       &dummy, 0, &dummy );
+       }
+
+cleanup:;
+       if ( rc != 0 ) {
+               if ( cb != NULL ) {
+                       ch_free( cb );
+                       cb = NULL;
+               }
+
+               if ( a != NULL ) {
+                       attrs_free( a );
+                       a = NULL;
+               }
+       }
+
+       /* store for cleanup */
+       mdb->bi_monitor.bdm_cb = (void *)cb;
+
+       /* we don't need to keep track of the attributes, because
+        * mdb_monitor_free() takes care of everything */
+       if ( a != NULL ) {
+               attrs_free( a );
+       }
+
+       return rc;
+}
+
+/*
+ * call from within mdb_db_close()
+ */
+int
+mdb_monitor_db_close( BackendDB *be )
+{
+       struct mdb_info         *mdb = (struct mdb_info *) be->be_private;
+
+       if ( !BER_BVISNULL( &mdb->bi_monitor.bdm_ndn ) ) {
+               BackendInfo             *mi = backend_info( "monitor" );
+               monitor_extra_t         *mbe;
+
+               if ( mi && &mi->bi_extra ) {
+                       mbe = mi->bi_extra;
+                       mbe->unregister_entry_callback( &mdb->bi_monitor.bdm_ndn,
+                               (monitor_callback_t *)mdb->bi_monitor.bdm_cb,
+                               NULL, 0, NULL );
+               }
+
+               memset( &mdb->bi_monitor, 0, sizeof( mdb->bi_monitor ) );
+       }
+
+       return 0;
+}
+
+/*
+ * call from within mdb_db_destroy()
+ */
+int
+mdb_monitor_db_destroy( BackendDB *be )
+{
+#ifdef MDB_MONITOR_IDX
+       struct mdb_info         *mdb = (struct mdb_info *) be->be_private;
+
+       /* TODO: free tree */
+       ldap_pvt_thread_mutex_destroy( &mdb->bi_idx_mutex );
+       avl_free( mdb->bi_idx, ch_free );
+#endif /* MDB_MONITOR_IDX */
+
+       return 0;
+}
+
+#ifdef MDB_MONITOR_IDX
+
+#define MDB_MONITOR_IDX_TYPES  (4)
+
+typedef struct monitor_idx_t monitor_idx_t;
+
+struct monitor_idx_t {
+       AttributeDescription    *idx_ad;
+       unsigned long           idx_count[MDB_MONITOR_IDX_TYPES];
+};
+
+static int
+mdb_monitor_bitmask2key( slap_mask_t bitmask )
+{
+       int     key;
+
+       for ( key = 0; key < 8 * (int)sizeof(slap_mask_t) && !( bitmask & 0x1U );
+                       key++ )
+               bitmask >>= 1;
+
+       return key;
+}
+
+static struct berval idxbv[] = {
+       BER_BVC( "present=" ),
+       BER_BVC( "equality=" ),
+       BER_BVC( "approx=" ),
+       BER_BVC( "substr=" ),
+       BER_BVNULL
+};
+
+static ber_len_t
+mdb_monitor_idx2len( monitor_idx_t *idx )
+{
+       int             i;
+       ber_len_t       len = 0;
+
+       for ( i = 0; i < MDB_MONITOR_IDX_TYPES; i++ ) {
+               if ( idx->idx_count[ i ] != 0 ) {
+                       len += idxbv[i].bv_len;
+               }
+       }
+
+       return len;
+}
+
+static int
+monitor_idx_cmp( const void *p1, const void *p2 )
+{
+       const monitor_idx_t     *idx1 = (const monitor_idx_t *)p1;
+       const monitor_idx_t     *idx2 = (const monitor_idx_t *)p2;
+
+       return SLAP_PTRCMP( idx1->idx_ad, idx2->idx_ad );
+}
+
+static int
+monitor_idx_dup( void *p1, void *p2 )
+{
+       monitor_idx_t   *idx1 = (monitor_idx_t *)p1;
+       monitor_idx_t   *idx2 = (monitor_idx_t *)p2;
+
+       return SLAP_PTRCMP( idx1->idx_ad, idx2->idx_ad ) == 0 ? -1 : 0;
+}
+
+int
+mdb_monitor_idx_add(
+       struct mdb_info         *mdb,
+       AttributeDescription    *desc,
+       slap_mask_t             type )
+{
+       monitor_idx_t           idx_dummy = { 0 },
+                               *idx;
+       int                     rc = 0, key;
+
+       idx_dummy.idx_ad = desc;
+       key = mdb_monitor_bitmask2key( type ) - 1;
+       if ( key >= MDB_MONITOR_IDX_TYPES ) {
+               /* invalid index type */
+               return -1;
+       }
+
+       ldap_pvt_thread_mutex_lock( &mdb->bi_idx_mutex );
+
+       idx = (monitor_idx_t *)avl_find( mdb->bi_idx,
+               (caddr_t)&idx_dummy, monitor_idx_cmp );
+       if ( idx == NULL ) {
+               idx = (monitor_idx_t *)ch_calloc( sizeof( monitor_idx_t ), 1 );
+               idx->idx_ad = desc;
+               idx->idx_count[ key ] = 1;
+
+               switch ( avl_insert( &mdb->bi_idx, (caddr_t)idx, 
+                       monitor_idx_cmp, monitor_idx_dup ) )
+               {
+               case 0:
+                       break;
+
+               default:
+                       ch_free( idx );
+                       rc = -1;
+               }
+
+       } else {
+               idx->idx_count[ key ]++;
+       }
+
+       ldap_pvt_thread_mutex_unlock( &mdb->bi_idx_mutex );
+
+       return rc;
+}
+
+static int
+mdb_monitor_idx_apply( void *v_idx, void *v_valp )
+{
+       monitor_idx_t   *idx = (monitor_idx_t *)v_idx;
+       BerVarray       *valp = (BerVarray *)v_valp;
+
+       struct berval   bv;
+       char            *ptr;
+       char            count_buf[ MDB_MONITOR_IDX_TYPES ][ SLAP_TEXT_BUFLEN ];
+       ber_len_t       count_len[ MDB_MONITOR_IDX_TYPES ],
+                       idx_len;
+       int             i, num = 0;
+
+       idx_len = mdb_monitor_idx2len( idx );
+
+       bv.bv_len = 0;
+       for ( i = 0; i < MDB_MONITOR_IDX_TYPES; i++ ) {
+               if ( idx->idx_count[ i ] == 0 ) {
+                       continue;
+               }
+
+               count_len[ i ] = snprintf( count_buf[ i ],
+                       sizeof( count_buf[ i ] ), "%lu", idx->idx_count[ i ] );
+               bv.bv_len += count_len[ i ];
+               num++;
+       }
+
+       bv.bv_len += idx->idx_ad->ad_cname.bv_len
+               + num
+               + idx_len;
+       ptr = bv.bv_val = ch_malloc( bv.bv_len + 1 );
+       ptr = lutil_strcopy( ptr, idx->idx_ad->ad_cname.bv_val );
+       for ( i = 0; i < MDB_MONITOR_IDX_TYPES; i++ ) {
+               if ( idx->idx_count[ i ] == 0 ) {
+                       continue;
+               }
+
+               ptr[ 0 ] = '#';
+               ++ptr;
+               ptr = lutil_strcopy( ptr, idxbv[ i ].bv_val );
+               ptr = lutil_strcopy( ptr, count_buf[ i ] );
+       }
+
+       ber_bvarray_add( valp, &bv );
+
+       return 0;
+}
+
+static int
+mdb_monitor_idx_entry_add(
+       struct mdb_info *mdb,
+       Entry           *e )
+{
+       BerVarray       vals = NULL;
+       Attribute       *a;
+
+       a = attr_find( e->e_attrs, ad_olmMDBNotIndexed );
+
+       ldap_pvt_thread_mutex_lock( &mdb->bi_idx_mutex );
+
+       avl_apply( mdb->bi_idx, mdb_monitor_idx_apply,
+               &vals, -1, AVL_INORDER );
+
+       ldap_pvt_thread_mutex_unlock( &mdb->bi_idx_mutex );
+
+       if ( vals != NULL ) {
+               if ( a != NULL ) {
+                       assert( a->a_nvals == a->a_vals );
+
+                       ber_bvarray_free( a->a_vals );
+
+               } else {
+                       Attribute       **ap;
+
+                       for ( ap = &e->e_attrs; *ap != NULL; ap = &(*ap)->a_next )
+                               ;
+                       *ap = attr_alloc( ad_olmMDBNotIndexed );
+                       a = *ap;
+               }
+               a->a_vals = vals;
+               a->a_nvals = a->a_vals;
+       }
+
+       return 0;
+}
+
+#endif /* MDB_MONITOR_IDX */
diff --git a/servers/slapd/back-mdb/nextid.c b/servers/slapd/back-mdb/nextid.c
new file mode 100644 (file)
index 0000000..9e95819
--- /dev/null
@@ -0,0 +1,80 @@
+/* init.c - initialize mdb backend */
+/* $OpenLDAP$ */
+/* This work is part of OpenLDAP Software <http://www.openldap.org/>.
+ *
+ * Copyright 2000-2011 The OpenLDAP Foundation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted only as authorized by the OpenLDAP
+ * Public License.
+ *
+ * A copy of this license is available in the file LICENSE in the
+ * top-level directory of the distribution or, alternatively, at
+ * <http://www.OpenLDAP.org/license.html>.
+ */
+
+#include "portable.h"
+
+#include <stdio.h>
+#include <ac/string.h>
+
+#include "back-mdb.h"
+
+int mdb_next_id( BackendDB *be, ID *out )
+{
+       struct mdb_info *mdb = (struct mdb_info *) be->be_private;
+
+       ldap_pvt_thread_mutex_lock( &mdb->bi_lastid_mutex );
+       *out = ++mdb->bi_lastid;
+       ldap_pvt_thread_mutex_unlock( &mdb->bi_lastid_mutex );
+
+       return 0;
+}
+
+int mdb_last_id( BackendDB *be, DB_TXN *tid )
+{
+       struct mdb_info *mdb = (struct mdb_info *) be->be_private;
+       int rc;
+       ID id = 0;
+       unsigned char idbuf[sizeof(ID)];
+       DBT key, data;
+       DBC *cursor;
+
+       DBTzero( &key );
+       key.flags = DB_DBT_USERMEM;
+       key.data = (char *) idbuf;
+       key.ulen = sizeof( idbuf );
+
+       DBTzero( &data );
+       data.flags = DB_DBT_USERMEM | DB_DBT_PARTIAL;
+
+       /* Get a read cursor */
+       rc = mdb->bi_id2entry->bdi_db->cursor( mdb->bi_id2entry->bdi_db,
+               tid, &cursor, 0 );
+
+       if (rc == 0) {
+               rc = cursor->c_get(cursor, &key, &data, DB_LAST);
+               cursor->c_close(cursor);
+       }
+
+       switch(rc) {
+       case DB_NOTFOUND:
+               rc = 0;
+               break;
+       case 0:
+               MDB_DISK2ID( idbuf, &id );
+               break;
+
+       default:
+               Debug( LDAP_DEBUG_ANY,
+                       "=> mdb_last_id: get failed: %s (%d)\n",
+                       db_strerror(rc), rc, 0 );
+               goto done;
+       }
+
+       mdb->bi_lastid = id;
+
+done:
+       return rc;
+}
diff --git a/servers/slapd/back-mdb/operational.c b/servers/slapd/back-mdb/operational.c
new file mode 100644 (file)
index 0000000..8985959
--- /dev/null
@@ -0,0 +1,151 @@
+/* operational.c - mdb backend operational attributes function */
+/* $OpenLDAP$ */
+/* This work is part of OpenLDAP Software <http://www.openldap.org/>.
+ *
+ * Copyright 2000-2011 The OpenLDAP Foundation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted only as authorized by the OpenLDAP
+ * Public License.
+ *
+ * A copy of this license is available in the file LICENSE in the
+ * top-level directory of the distribution or, alternatively, at
+ * <http://www.OpenLDAP.org/license.html>.
+ */
+
+#include "portable.h"
+
+#include <stdio.h>
+
+#include <ac/string.h>
+#include <ac/socket.h>
+
+#include "slap.h"
+#include "back-mdb.h"
+
+/*
+ * sets *hasSubordinates to LDAP_COMPARE_TRUE/LDAP_COMPARE_FALSE
+ * if the entry has children or not.
+ */
+int
+mdb_hasSubordinates(
+       Operation       *op,
+       Entry           *e,
+       int             *hasSubordinates )
+{
+       struct mdb_info *mdb = (struct mdb_info *) op->o_bd->be_private;
+       struct mdb_op_info      *opinfo;
+       OpExtra *oex;
+       DB_TXN          *rtxn;
+       int             rc;
+       int             release = 0;
+       
+       assert( e != NULL );
+
+       /* NOTE: this should never happen, but it actually happens
+        * when using back-relay; until we find a better way to
+        * preserve entry's private information while rewriting it,
+        * let's disable the hasSubordinate feature for back-relay.
+        */
+       if ( BEI( e ) == NULL ) {
+               Entry *ee = NULL;
+               rc = be_entry_get_rw( op, &e->e_nname, NULL, NULL, 0, &ee );
+               if ( rc != LDAP_SUCCESS || ee == NULL ) {
+                       rc = LDAP_OTHER;
+                       goto done;
+               }
+               e = ee;
+               release = 1;
+               if ( BEI( ee ) == NULL ) {
+                       rc = LDAP_OTHER;
+                       goto done;
+               }
+       }
+
+       /* Check for a txn in a parent op, otherwise use reader txn */
+       LDAP_SLIST_FOREACH( oex, &op->o_extra, oe_next ) {
+               if ( oex->oe_key == mdb )
+                       break;
+       }
+       opinfo = (struct mdb_op_info *) oex;
+       if ( opinfo && opinfo->boi_txn ) {
+               rtxn = opinfo->boi_txn;
+       } else {
+               rc = mdb_reader_get(op, mdb->bi_dbenv, &rtxn);
+               if ( rc ) {
+                       rc = LDAP_OTHER;
+                       goto done;
+               }
+       }
+
+retry:
+       /* FIXME: we can no longer assume the entry's e_private
+        * field is correctly populated; so we need to reacquire
+        * it with reader lock */
+       rc = mdb_cache_children( op, rtxn, e );
+
+       switch( rc ) {
+       case DB_LOCK_DEADLOCK:
+       case DB_LOCK_NOTGRANTED:
+               goto retry;
+
+       case 0:
+               *hasSubordinates = LDAP_COMPARE_TRUE;
+               break;
+
+       case DB_NOTFOUND:
+               *hasSubordinates = LDAP_COMPARE_FALSE;
+               rc = LDAP_SUCCESS;
+               break;
+
+       default:
+               Debug(LDAP_DEBUG_ARGS, 
+                       "<=- " LDAP_XSTRING(mdb_hasSubordinates)
+                       ": has_children failed: %s (%d)\n", 
+                       db_strerror(rc), rc, 0 );
+               rc = LDAP_OTHER;
+       }
+
+done:;
+       if ( release && e != NULL ) be_entry_release_r( op, e );
+       return rc;
+}
+
+/*
+ * sets the supported operational attributes (if required)
+ */
+int
+mdb_operational(
+       Operation       *op,
+       SlapReply       *rs )
+{
+       Attribute       **ap;
+
+       assert( rs->sr_entry != NULL );
+
+       for ( ap = &rs->sr_operational_attrs; *ap; ap = &(*ap)->a_next ) {
+               if ( (*ap)->a_desc == slap_schema.si_ad_hasSubordinates ) {
+                       break;
+               }
+       }
+
+       if ( *ap == NULL &&
+               attr_find( rs->sr_entry->e_attrs, slap_schema.si_ad_hasSubordinates ) == NULL &&
+               ( SLAP_OPATTRS( rs->sr_attr_flags ) ||
+                       ad_inlist( slap_schema.si_ad_hasSubordinates, rs->sr_attrs ) ) )
+       {
+               int     hasSubordinates, rc;
+
+               rc = mdb_hasSubordinates( op, rs->sr_entry, &hasSubordinates );
+               if ( rc == LDAP_SUCCESS ) {
+                       *ap = slap_operational_hasSubordinate( hasSubordinates == LDAP_COMPARE_TRUE );
+                       assert( *ap != NULL );
+
+                       ap = &(*ap)->a_next;
+               }
+       }
+
+       return LDAP_SUCCESS;
+}
+
diff --git a/servers/slapd/back-mdb/proto-mdb.h b/servers/slapd/back-mdb/proto-mdb.h
new file mode 100644 (file)
index 0000000..f117ff8
--- /dev/null
@@ -0,0 +1,350 @@
+/* $OpenLDAP$ */
+/* This work is part of OpenLDAP Software <http://www.openldap.org/>.
+ *
+ * Copyright 2000-2011 The OpenLDAP Foundation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted only as authorized by the OpenLDAP
+ * Public License.
+ *
+ * A copy of this license is available in the file LICENSE in the
+ * top-level directory of the distribution or, alternatively, at
+ * <http://www.OpenLDAP.org/license.html>.
+ */
+
+#ifndef _PROTO_MDB_H
+#define _PROTO_MDB_H
+
+LDAP_BEGIN_DECL
+
+#define MDB_UCTYPE     "MDB"
+
+/*
+ * attr.c
+ */
+
+AttrInfo *mdb_attr_mask( struct mdb_info *mdb,
+       AttributeDescription *desc );
+
+void mdb_attr_flush( struct mdb_info *mdb );
+
+int mdb_attr_slot( struct mdb_info *mdb,
+       AttributeDescription *desc, int *insert );
+
+int mdb_attr_index_config LDAP_P(( struct mdb_info *mdb,
+       const char *fname, int lineno,
+       int argc, char **argv, struct config_reply_s *cr ));
+
+void mdb_attr_index_unparse LDAP_P(( struct mdb_info *mdb, BerVarray *bva ));
+void mdb_attr_index_destroy LDAP_P(( struct mdb_info *mdb ));
+void mdb_attr_index_free LDAP_P(( struct mdb_info *mdb,
+       AttributeDescription *ad ));
+
+void mdb_attr_info_free( AttrInfo *ai );
+
+/*
+ * config.c
+ */
+
+int mdb_back_init_cf( BackendInfo *bi );
+
+/*
+ * dbcache.c
+ */
+
+int
+mdb_db_cache(
+    Backend    *be,
+    struct berval *name,
+       MDB_dbi *dbi );
+
+/*
+ * dn2entry.c
+ */
+
+int mdb_dn2entry LDAP_P(( Operation *op, MDB_txn *tid,
+       struct berval *dn, Entry *e, int matched ));
+
+/*
+ * dn2id.c
+ */
+
+int mdb_dn2id(
+       Operation *op,
+       MDB_txn *txn,
+       struct berval *dn,
+       ID *id );
+
+int mdb_dn2id_add(
+       Operation *op,
+       MDB_txn *tid,
+       ID pid,
+       Entry *e );
+
+int mdb_dn2id_delete(
+       Operation *op,
+       MDB_txn *tid,
+       ID pid,
+       Entry *e );
+
+int mdb_dn2id_children(
+       Operation *op,
+       MDB_txn *tid,
+       Entry *e );
+
+int mdb_dn2idl(
+       Operation *op,
+       MDB_txn *txn,
+       struct berval *ndn,
+       ID eid,
+       ID *ids,
+       ID *stack );
+
+int mdb_dn2id_parent(
+       Operation *op,
+       MDB_txn *txn,
+       ID eid,
+       ID *idp );
+
+MDB_cmp_func mdb_dup_compare;
+
+/*
+ * filterentry.c
+ */
+
+int mdb_filter_candidates(
+       Operation *op,
+       MDB_txn *txn,
+       Filter  *f,
+       ID *ids,
+       ID *tmp,
+       ID *stack );
+
+/*
+ * id2entry.c
+ */
+
+int mdb_id2entry_add(
+       BackendDB *be,
+       MDB_txn *tid,
+       Entry *e );
+
+int mdb_id2entry_update(
+       BackendDB *be,
+       MDB_txn *tid,
+       Entry *e );
+
+int mdb_id2entry_delete(
+       BackendDB *be,
+       MDB_txn *tid,
+       Entry *e);
+
+int mdb_id2entry(
+       BackendDB *be,
+       MDB_txn *tid,
+       ID id,
+       Entry **e);
+
+void mdb_entry_free ( Entry *e );
+int mdb_entry_return( Entry *e );
+BI_entry_release_rw mdb_entry_release;
+BI_entry_get_rw mdb_entry_get;
+
+
+/*
+ * idl.c
+ */
+
+unsigned mdb_idl_search( ID *ids, ID id );
+
+int mdb_idl_fetch_key(
+       BackendDB       *be,
+       MDB_txn         *txn,
+       MDB_dbi         dbi,
+       MDB_val         *key,
+       ID                      *ids,
+       MDB_cursor      **saved_cursor,
+       int                     get_flag );
+
+int mdb_idl_insert( ID *ids, ID id );
+
+int mdb_idl_insert_key(
+       BackendDB *be,
+       MDB_txn *txn,
+       MDB_dbi dbi,
+       MDB_val *key,
+       ID id );
+
+int mdb_idl_delete_key(
+       BackendDB *be,
+       MDB_txn *txn,
+       MDB_dbi dbi,
+       MDB_val *key,
+       ID id );
+
+int
+mdb_idl_intersection(
+       ID *a,
+       ID *b );
+
+int
+mdb_idl_union(
+       ID *a,
+       ID *b );
+
+ID mdb_idl_first( ID *ids, ID *cursor );
+ID mdb_idl_next( ID *ids, ID *cursor );
+
+void mdb_idl_sort( ID *ids, ID *tmp );
+int mdb_idl_append( ID *a, ID *b );
+int mdb_idl_append_one( ID *ids, ID id );
+
+
+/*
+ * index.c
+ */
+
+extern AttrInfo *
+mdb_index_mask LDAP_P((
+       Backend *be,
+       AttributeDescription *desc,
+       struct berval *name ));
+
+extern int
+mdb_index_param LDAP_P((
+       Backend *be,
+       AttributeDescription *desc,
+       int ftype,
+       MDB_dbi *dbi,
+       slap_mask_t *mask,
+       struct berval *prefix ));
+
+extern int
+mdb_index_values LDAP_P((
+       Operation *op,
+       MDB_txn *txn,
+       AttributeDescription *desc,
+       BerVarray vals,
+       ID id,
+       int opid ));
+
+extern int
+mdb_index_recset LDAP_P((
+       struct mdb_info *mdb,
+       Attribute *a,
+       AttributeType *type,
+       struct berval *tags,
+       IndexRec *ir ));
+
+extern int
+mdb_index_recrun LDAP_P((
+       Operation *op,
+       struct mdb_info *mdb,
+       IndexRec *ir,
+       ID id,
+       int base ));
+
+int mdb_index_entry LDAP_P(( Operation *op, MDB_txn *t, int r, Entry *e ));
+
+#define mdb_index_entry_add(op,t,e) \
+       mdb_index_entry((op),(t),SLAP_INDEX_ADD_OP,(e))
+#define mdb_index_entry_del(op,t,e) \
+       mdb_index_entry((op),(t),SLAP_INDEX_DELETE_OP,(e))
+
+/*
+ * key.c
+ */
+
+extern int
+mdb_key_read(
+    Backend    *be,
+       MDB_txn *txn,
+       MDB_dbi dbi,
+    struct berval *k,
+       ID *ids,
+    MDB_cursor **saved_cursor,
+        int get_flags );
+
+extern int
+mdb_key_change(
+    Backend     *be,
+       MDB_txn *txn,
+    MDB_dbi dbi,
+    struct berval *k,
+    ID id,
+    int        op );
+       
+/*
+ * nextid.c
+ */
+
+int mdb_next_id( BackendDB *be, MDB_txn *tid, ID *id );
+
+/*
+ * modify.c
+ */
+
+int mdb_modify_internal(
+       Operation *op,
+       MDB_txn *tid,
+       Modifications *modlist,
+       Entry *e,
+       const char **text,
+       char *textbuf,
+       size_t textlen );
+
+/*
+ * monitor.c
+ */
+
+int mdb_monitor_db_init( BackendDB *be );
+int mdb_monitor_db_open( BackendDB *be );
+int mdb_monitor_db_close( BackendDB *be );
+int mdb_monitor_db_destroy( BackendDB *be );
+
+#ifdef MDB_MONITOR_IDX
+int
+mdb_monitor_idx_add(
+       struct mdb_info         *mdb,
+       AttributeDescription    *desc,
+       slap_mask_t             type );
+#endif /* MDB_MONITOR_IDX */
+
+/*
+ * former external.h
+ */
+
+extern BI_init                         mdb_back_initialize;
+
+extern BI_db_config                    mdb_db_config;
+
+extern BI_op_add                       mdb_add;
+extern BI_op_bind                      mdb_bind;
+extern BI_op_compare                   mdb_compare;
+extern BI_op_delete                    mdb_delete;
+extern BI_op_modify                    mdb_modify;
+extern BI_op_modrdn                    mdb_modrdn;
+extern BI_op_search                    mdb_search;
+extern BI_op_extended                  mdb_extended;
+
+extern BI_chk_referrals                        mdb_referrals;
+
+extern BI_operational                  mdb_operational;
+
+extern BI_has_subordinates             mdb_hasSubordinates;
+
+/* tools.c */
+extern BI_tool_entry_open              mdb_tool_entry_open;
+extern BI_tool_entry_close             mdb_tool_entry_close;
+extern BI_tool_entry_first_x           mdb_tool_entry_first_x;
+extern BI_tool_entry_next              mdb_tool_entry_next;
+extern BI_tool_entry_get               mdb_tool_entry_get;
+extern BI_tool_entry_put               mdb_tool_entry_put;
+extern BI_tool_entry_reindex           mdb_tool_entry_reindex;
+extern BI_tool_dn2id_get               mdb_tool_dn2id_get;
+extern BI_tool_entry_modify            mdb_tool_entry_modify;
+
+LDAP_END_DECL
+
+#endif /* _PROTO_MDB_H */
diff --git a/servers/slapd/back-mdb/referral.c b/servers/slapd/back-mdb/referral.c
new file mode 100644 (file)
index 0000000..242fe3a
--- /dev/null
@@ -0,0 +1,152 @@
+/* referral.c - MDB backend referral handler */
+/* $OpenLDAP$ */
+/* This work is part of OpenLDAP Software <http://www.openldap.org/>.
+ *
+ * Copyright 2000-2011 The OpenLDAP Foundation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted only as authorized by the OpenLDAP
+ * Public License.
+ *
+ * A copy of this license is available in the file LICENSE in the
+ * top-level directory of the distribution or, alternatively, at
+ * <http://www.OpenLDAP.org/license.html>.
+ */
+
+#include "portable.h"
+#include <stdio.h>
+#include <ac/string.h>
+
+#include "back-mdb.h"
+
+int
+mdb_referrals( Operation *op, SlapReply *rs )
+{
+       struct mdb_info *mdb = (struct mdb_info *) op->o_bd->be_private;
+       Entry *e = NULL;
+       EntryInfo *ei;
+       int rc = LDAP_SUCCESS;
+
+       DB_TXN          *rtxn;
+       DB_LOCK         lock;
+
+       if( op->o_tag == LDAP_REQ_SEARCH ) {
+               /* let search take care of itself */
+               return rc;
+       }
+
+       if( get_manageDSAit( op ) ) {
+               /* let op take care of DSA management */
+               return rc;
+       } 
+
+       rc = mdb_reader_get(op, mdb->bi_dbenv, &rtxn);
+       switch(rc) {
+       case 0:
+               break;
+       default:
+               return LDAP_OTHER;
+       }
+
+dn2entry_retry:
+       /* get entry */
+       rc = mdb_dn2entry( op, rtxn, &op->o_req_ndn, &ei, 1, &lock );
+
+       /* mdb_dn2entry() may legally leave ei == NULL
+        * if rc != 0 and rc != DB_NOTFOUND
+        */
+       if ( ei ) {
+               e = ei->bei_e;
+       }
+
+       switch(rc) {
+       case DB_NOTFOUND:
+       case 0:
+               break;
+       case LDAP_BUSY:
+               rs->sr_text = "ldap server busy";
+               return LDAP_BUSY;
+       case DB_LOCK_DEADLOCK:
+       case DB_LOCK_NOTGRANTED:
+               goto dn2entry_retry;
+       default:
+               Debug( LDAP_DEBUG_TRACE,
+                       LDAP_XSTRING(mdb_referrals)
+                       ": dn2entry failed: %s (%d)\n",
+                       db_strerror(rc), rc, 0 ); 
+               rs->sr_text = "internal error";
+               return LDAP_OTHER;
+       }
+
+       if ( rc == DB_NOTFOUND ) {
+               rc = LDAP_SUCCESS;
+               rs->sr_matched = NULL;
+               if ( e != NULL ) {
+                       Debug( LDAP_DEBUG_TRACE,
+                               LDAP_XSTRING(mdb_referrals)
+                               ": tag=%lu target=\"%s\" matched=\"%s\"\n",
+                               (unsigned long)op->o_tag, op->o_req_dn.bv_val, e->e_name.bv_val );
+
+                       if( is_entry_referral( e ) ) {
+                               BerVarray ref = get_entry_referrals( op, e );
+                               rc = LDAP_OTHER;
+                               rs->sr_ref = referral_rewrite( ref, &e->e_name,
+                                       &op->o_req_dn, LDAP_SCOPE_DEFAULT );
+                               ber_bvarray_free( ref );
+                               if ( rs->sr_ref ) {
+                                       rs->sr_matched = ber_strdup_x(
+                                       e->e_name.bv_val, op->o_tmpmemctx );
+                               }
+                       }
+
+                       mdb_cache_return_entry_r (mdb, e, &lock);
+                       e = NULL;
+               }
+
+               if( rs->sr_ref != NULL ) {
+                       /* send referrals */
+                       rc = rs->sr_err = LDAP_REFERRAL;
+                       send_ldap_result( op, rs );
+                       ber_bvarray_free( rs->sr_ref );
+                       rs->sr_ref = NULL;
+               } else if ( rc != LDAP_SUCCESS ) {
+                       rs->sr_text = rs->sr_matched ? "bad referral object" : NULL;
+               }
+
+               if (rs->sr_matched) {
+                       op->o_tmpfree( (char *)rs->sr_matched, op->o_tmpmemctx );
+                       rs->sr_matched = NULL;
+               }
+               return rc;
+       }
+
+       if ( is_entry_referral( e ) ) {
+               /* entry is a referral */
+               BerVarray refs = get_entry_referrals( op, e );
+               rs->sr_ref = referral_rewrite(
+                       refs, &e->e_name, &op->o_req_dn, LDAP_SCOPE_DEFAULT );
+
+               Debug( LDAP_DEBUG_TRACE,
+                       LDAP_XSTRING(mdb_referrals)
+                       ": tag=%lu target=\"%s\" matched=\"%s\"\n",
+                       (unsigned long)op->o_tag, op->o_req_dn.bv_val, e->e_name.bv_val );
+
+               rs->sr_matched = e->e_name.bv_val;
+               if( rs->sr_ref != NULL ) {
+                       rc = rs->sr_err = LDAP_REFERRAL;
+                       send_ldap_result( op, rs );
+                       ber_bvarray_free( rs->sr_ref );
+                       rs->sr_ref = NULL;
+               } else {
+                       rc = LDAP_OTHER;
+                       rs->sr_text = "bad referral object";
+               }
+
+               rs->sr_matched = NULL;
+               ber_bvarray_free( refs );
+       }
+
+       mdb_cache_return_entry_r(mdb, e, &lock);
+       return rc;
+}
diff --git a/servers/slapd/back-mdb/search.c b/servers/slapd/back-mdb/search.c
new file mode 100644 (file)
index 0000000..4cec717
--- /dev/null
@@ -0,0 +1,1383 @@
+/* search.c - search operation */
+/* $OpenLDAP$ */
+/* This work is part of OpenLDAP Software <http://www.openldap.org/>.
+ *
+ * Copyright 2000-2011 The OpenLDAP Foundation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted only as authorized by the OpenLDAP
+ * Public License.
+ *
+ * A copy of this license is available in the file LICENSE in the
+ * top-level directory of the distribution or, alternatively, at
+ * <http://www.OpenLDAP.org/license.html>.
+ */
+
+#include "portable.h"
+
+#include <stdio.h>
+#include <ac/string.h>
+
+#include "back-mdb.h"
+#include "idl.h"
+
+static int base_candidate(
+       BackendDB       *be,
+       Entry   *e,
+       ID              *ids );
+
+static int search_candidates(
+       Operation *op,
+       SlapReply *rs,
+       Entry *e,
+       DB_TXN *txn,
+       ID      *ids,
+       ID      *scopes );
+
+static int parse_paged_cookie( Operation *op, SlapReply *rs );
+
+static void send_paged_response( 
+       Operation *op,
+       SlapReply *rs,
+       ID  *lastid,
+       int tentries );
+
+/* Dereference aliases for a single alias entry. Return the final
+ * dereferenced entry on success, NULL on any failure.
+ */
+static Entry * deref_base (
+       Operation *op,
+       SlapReply *rs,
+       Entry *e,
+       Entry **matched,
+       DB_TXN *txn,
+       DB_LOCK *lock,
+       ID      *tmp,
+       ID      *visited )
+{
+       struct mdb_info *mdb = (struct mdb_info *) op->o_bd->be_private;
+       struct berval ndn;
+       EntryInfo *ei;
+       DB_LOCK lockr;
+
+       rs->sr_err = LDAP_ALIAS_DEREF_PROBLEM;
+       rs->sr_text = "maximum deref depth exceeded";
+
+       for (;;) {
+               /* Remember the last entry we looked at, so we can
+                * report broken links
+                */
+               *matched = e;
+
+               if (MDB_IDL_N(tmp) >= op->o_bd->be_max_deref_depth) {
+                       e = NULL;
+                       break;
+               }
+
+               /* If this is part of a subtree or onelevel search,
+                * have we seen this ID before? If so, quit.
+                */
+               if ( visited && mdb_idl_insert( visited, e->e_id ) ) {
+                       e = NULL;
+                       break;
+               }
+
+               /* If we've seen this ID during this deref iteration,
+                * we've hit a loop.
+                */
+               if ( mdb_idl_insert( tmp, e->e_id ) ) {
+                       rs->sr_err = LDAP_ALIAS_PROBLEM;
+                       rs->sr_text = "circular alias";
+                       e = NULL;
+                       break;
+               }
+
+               /* If there was a problem getting the aliasedObjectName,
+                * get_alias_dn will have set the error status.
+                */
+               if ( get_alias_dn(e, &ndn, &rs->sr_err, &rs->sr_text) ) {
+                       e = NULL;
+                       break;
+               }
+
+               rs->sr_err = mdb_dn2entry( op, txn, &ndn, &ei,
+                       0, &lockr );
+               if ( rs->sr_err == DB_LOCK_DEADLOCK )
+                       return NULL;
+
+               if ( ei ) {
+                       e = ei->bei_e;
+               } else {
+                       e = NULL;
+               }
+
+               if (!e) {
+                       rs->sr_err = LDAP_ALIAS_PROBLEM;
+                       rs->sr_text = "aliasedObject not found";
+                       break;
+               }
+
+               /* Free the previous entry, continue to work with the
+                * one we just retrieved.
+                */
+               mdb_cache_return_entry_r( mdb, *matched, lock);
+               *lock = lockr;
+
+               /* We found a regular entry. Return this to the caller. The
+                * entry is still locked for Read.
+                */
+               if (!is_entry_alias(e)) {
+                       rs->sr_err = LDAP_SUCCESS;
+                       rs->sr_text = NULL;
+                       break;
+               }
+       }
+       return e;
+}
+
+/* Look for and dereference all aliases within the search scope. Adds
+ * the dereferenced entries to the "ids" list. Requires "stack" to be
+ * able to hold 8 levels of DB_SIZE IDLs. Of course we're hardcoded to
+ * require a minimum of 8 UM_SIZE IDLs so this is never a problem.
+ */
+static int search_aliases(
+       Operation *op,
+       SlapReply *rs,
+       Entry *e,
+       DB_TXN *txn,
+       ID *ids,
+       ID *scopes,
+       ID *stack )
+{
+       struct mdb_info *mdb = (struct mdb_info *) op->o_bd->be_private;
+       ID *aliases, *curscop, *subscop, *visited, *newsubs, *oldsubs, *tmp;
+       ID cursora, ida, cursoro, ido, *subscop2;
+       Entry *matched, *a;
+       EntryInfo *ei;
+       struct berval bv_alias = BER_BVC( "alias" );
+       AttributeAssertion aa_alias = ATTRIBUTEASSERTION_INIT;
+       Filter  af;
+       DB_LOCK locka, lockr;
+       int first = 1;
+
+       aliases = stack;        /* IDL of all aliases in the database */
+       curscop = aliases + MDB_IDL_DB_SIZE;    /* Aliases in the current scope */
+       subscop = curscop + MDB_IDL_DB_SIZE;    /* The current scope */
+       visited = subscop + MDB_IDL_DB_SIZE;    /* IDs we've seen in this search */
+       newsubs = visited + MDB_IDL_DB_SIZE;    /* New subtrees we've added */
+       oldsubs = newsubs + MDB_IDL_DB_SIZE;    /* Subtrees added previously */
+       tmp = oldsubs + MDB_IDL_DB_SIZE;        /* Scratch space for deref_base() */
+
+       /* A copy of subscop, because subscop gets clobbered by
+        * the mdb_idl_union/intersection routines
+        */
+       subscop2 = tmp + MDB_IDL_DB_SIZE;
+
+       af.f_choice = LDAP_FILTER_EQUALITY;
+       af.f_ava = &aa_alias;
+       af.f_av_desc = slap_schema.si_ad_objectClass;
+       af.f_av_value = bv_alias;
+       af.f_next = NULL;
+
+       /* Find all aliases in database */
+       MDB_IDL_ZERO( aliases );
+       rs->sr_err = mdb_filter_candidates( op, txn, &af, aliases,
+               curscop, visited );
+       if (rs->sr_err != LDAP_SUCCESS) {
+               return rs->sr_err;
+       }
+       oldsubs[0] = 1;
+       oldsubs[1] = e->e_id;
+
+       MDB_IDL_ZERO( ids );
+       MDB_IDL_ZERO( visited );
+       MDB_IDL_ZERO( newsubs );
+
+       cursoro = 0;
+       ido = mdb_idl_first( oldsubs, &cursoro );
+
+       for (;;) {
+               /* Set curscop to only the aliases in the current scope. Start with
+                * all the aliases, obtain the IDL for the current scope, and then
+                * get the intersection of these two IDLs. Add the current scope
+                * to the cumulative list of candidates.
+                */
+               MDB_IDL_CPY( curscop, aliases );
+               rs->sr_err = mdb_dn2idl( op, txn, &e->e_nname, BEI(e), subscop,
+                       subscop2+MDB_IDL_DB_SIZE );
+
+               if (first) {
+                       first = 0;
+               } else {
+                       mdb_cache_return_entry_r (mdb, e, &locka);
+               }
+               if ( rs->sr_err == DB_LOCK_DEADLOCK )
+                       return rs->sr_err;
+
+               MDB_IDL_CPY(subscop2, subscop);
+               rs->sr_err = mdb_idl_intersection(curscop, subscop);
+               mdb_idl_union( ids, subscop2 );
+
+               /* Dereference all of the aliases in the current scope. */
+               cursora = 0;
+               for (ida = mdb_idl_first(curscop, &cursora); ida != NOID;
+                       ida = mdb_idl_next(curscop, &cursora))
+               {
+                       ei = NULL;
+retry1:
+                       rs->sr_err = mdb_cache_find_id(op, txn,
+                               ida, &ei, 0, &lockr );
+                       if (rs->sr_err != LDAP_SUCCESS) {
+                               if ( rs->sr_err == DB_LOCK_DEADLOCK )
+                                       return rs->sr_err;
+                               if ( rs->sr_err == DB_LOCK_NOTGRANTED )
+                                       goto retry1;
+                               continue;
+                       }
+                       a = ei->bei_e;
+
+                       /* This should only happen if the curscop IDL has maxed out and
+                        * turned into a range that spans IDs indiscriminately
+                        */
+                       if (!is_entry_alias(a)) {
+                               mdb_cache_return_entry_r (mdb, a, &lockr);
+                               continue;
+                       }
+
+                       /* Actually dereference the alias */
+                       MDB_IDL_ZERO(tmp);
+                       a = deref_base( op, rs, a, &matched, txn, &lockr,
+                               tmp, visited );
+                       if (a) {
+                               /* If the target was not already in our current candidates,
+                                * make note of it in the newsubs list. Also
+                                * set it in the scopes list so that mdb_search
+                                * can check it.
+                                */
+                               if (mdb_idl_insert(ids, a->e_id) == 0) {
+                                       mdb_idl_insert(newsubs, a->e_id);
+                                       mdb_idl_insert(scopes, a->e_id);
+                               }
+                               mdb_cache_return_entry_r( mdb, a, &lockr);
+
+                       } else if ( rs->sr_err == DB_LOCK_DEADLOCK ) {
+                               return rs->sr_err;
+                       } else if (matched) {
+                               /* Alias could not be dereferenced, or it deref'd to
+                                * an ID we've already seen. Ignore it.
+                                */
+                               mdb_cache_return_entry_r( mdb, matched, &lockr );
+                               rs->sr_text = NULL;
+                       }
+               }
+               /* If this is a OneLevel search, we're done; oldsubs only had one
+                * ID in it. For a Subtree search, oldsubs may be a list of scope IDs.
+                */
+               if ( op->ors_scope == LDAP_SCOPE_ONELEVEL ) break;
+nextido:
+               ido = mdb_idl_next( oldsubs, &cursoro );
+               
+               /* If we're done processing the old scopes, did we add any new
+                * scopes in this iteration? If so, go back and do those now.
+                */
+               if (ido == NOID) {
+                       if (MDB_IDL_IS_ZERO(newsubs)) break;
+                       MDB_IDL_CPY(oldsubs, newsubs);
+                       MDB_IDL_ZERO(newsubs);
+                       cursoro = 0;
+                       ido = mdb_idl_first( oldsubs, &cursoro );
+               }
+
+               /* Find the entry corresponding to the next scope. If it can't
+                * be found, ignore it and move on. This should never happen;
+                * we should never see the ID of an entry that doesn't exist.
+                * Set the name so that the scope's IDL can be retrieved.
+                */
+               ei = NULL;
+sameido:
+               rs->sr_err = mdb_cache_find_id(op, txn, ido, &ei,
+                       0, &locka );
+               if ( rs->sr_err != LDAP_SUCCESS ) {
+                       if ( rs->sr_err == DB_LOCK_DEADLOCK )
+                               return rs->sr_err;
+                       if ( rs->sr_err == DB_LOCK_NOTGRANTED )
+                               goto sameido;
+                       goto nextido;
+               }
+               e = ei->bei_e;
+       }
+       return rs->sr_err;
+}
+
+/* Get the next ID from the DB. Used if the candidate list is
+ * a range and simple iteration hits missing entryIDs
+ */
+static int
+mdb_get_nextid(struct mdb_info *mdb, DB_TXN *ltid, ID *cursor)
+{
+       DBC *curs;
+       DBT key, data;
+       ID id, nid;
+       int rc;
+
+       id = *cursor + 1;
+       MDB_ID2DISK( id, &nid );
+       rc = mdb->bi_id2entry->bdi_db->cursor(
+               mdb->bi_id2entry->bdi_db, ltid, &curs, mdb->bi_db_opflags );
+       if ( rc )
+               return rc;
+       key.data = &nid;
+       key.size = key.ulen = sizeof(ID);
+       key.flags = DB_DBT_USERMEM;
+       data.flags = DB_DBT_USERMEM | DB_DBT_PARTIAL;
+       data.dlen = data.ulen = 0;
+       rc = curs->c_get( curs, &key, &data, DB_SET_RANGE );
+       curs->c_close( curs );
+       if ( rc )
+               return rc;
+       MDB_DISK2ID( &nid, cursor );
+       return 0;
+}
+
+int
+mdb_search( Operation *op, SlapReply *rs )
+{
+       struct mdb_info *mdb = (struct mdb_info *) op->o_bd->be_private;
+       ID              id, cursor;
+       ID              lastid = NOID;
+       ID              candidates[MDB_IDL_UM_SIZE];
+       ID              scopes[MDB_IDL_DB_SIZE];
+       Entry           *e = NULL, base, *e_root;
+       Entry           *matched = NULL;
+       EntryInfo       *ei;
+       AttributeName   *attrs;
+       struct berval   realbase = BER_BVNULL;
+       slap_mask_t     mask;
+       time_t          stoptime;
+       int             manageDSAit;
+       int             tentries = 0;
+       unsigned        nentries = 0;
+       int             idflag = 0;
+
+       DB_LOCK         lock;
+       struct  mdb_op_info     *opinfo = NULL;
+       DB_TXN                  *ltid = NULL;
+       OpExtra *oex;
+
+       Debug( LDAP_DEBUG_TRACE, "=> " LDAP_XSTRING(mdb_search) "\n", 0, 0, 0);
+       attrs = op->oq_search.rs_attrs;
+
+       LDAP_SLIST_FOREACH( oex, &op->o_extra, oe_next ) {
+               if ( oex->oe_key == mdb )
+                       break;
+       }
+       opinfo = (struct mdb_op_info *) oex;
+
+       manageDSAit = get_manageDSAit( op );
+
+       if ( opinfo && opinfo->boi_txn ) {
+               ltid = opinfo->boi_txn;
+       } else {
+               rs->sr_err = mdb_reader_get( op, mdb->bi_dbenv, &ltid );
+
+               switch(rs->sr_err) {
+               case 0:
+                       break;
+               default:
+                       send_ldap_error( op, rs, LDAP_OTHER, "internal error" );
+                       return rs->sr_err;
+               }
+       }
+
+       e_root = mdb->bi_cache.c_dntree.bei_e;
+       if ( op->o_req_ndn.bv_len == 0 ) {
+               /* DIT root special case */
+               ei = e_root->e_private;
+               rs->sr_err = LDAP_SUCCESS;
+       } else {
+               if ( op->ors_deref & LDAP_DEREF_FINDING ) {
+                       MDB_IDL_ZERO(candidates);
+               }
+dn2entry_retry:
+               /* get entry with reader lock */
+               rs->sr_err = mdb_dn2entry( op, ltid, &op->o_req_ndn, &ei,
+                       1, &lock );
+       }
+
+       switch(rs->sr_err) {
+       case DB_NOTFOUND:
+               matched = ei->bei_e;
+               break;
+       case 0:
+               e = ei->bei_e;
+               break;
+       case DB_LOCK_DEADLOCK:
+               if ( !opinfo ) {
+                       ltid->flags &= ~TXN_DEADLOCK;
+                       goto dn2entry_retry;
+               }
+               opinfo->boi_err = rs->sr_err;
+               /* FALLTHRU */
+       case LDAP_BUSY:
+               send_ldap_error( op, rs, LDAP_BUSY, "ldap server busy" );
+               return LDAP_BUSY;
+       case DB_LOCK_NOTGRANTED:
+               goto dn2entry_retry;
+       default:
+               send_ldap_error( op, rs, LDAP_OTHER, "internal error" );
+               return rs->sr_err;
+       }
+
+       if ( op->ors_deref & LDAP_DEREF_FINDING ) {
+               if ( matched && is_entry_alias( matched )) {
+                       struct berval stub;
+
+                       stub.bv_val = op->o_req_ndn.bv_val;
+                       stub.bv_len = op->o_req_ndn.bv_len - matched->e_nname.bv_len - 1;
+                       e = deref_base( op, rs, matched, &matched, ltid, &lock,
+                               candidates, NULL );
+                       if ( e ) {
+                               build_new_dn( &op->o_req_ndn, &e->e_nname, &stub,
+                                       op->o_tmpmemctx );
+                               mdb_cache_return_entry_r (mdb, e, &lock);
+                               matched = NULL;
+                               goto dn2entry_retry;
+                       }
+               } else if ( e && is_entry_alias( e )) {
+                       e = deref_base( op, rs, e, &matched, ltid, &lock,
+                               candidates, NULL );
+               }
+       }
+
+       if ( e == NULL ) {
+               struct berval matched_dn = BER_BVNULL;
+
+               if ( matched != NULL ) {
+                       BerVarray erefs = NULL;
+
+                       /* return referral only if "disclose"
+                        * is granted on the object */
+                       if ( ! access_allowed( op, matched,
+                                               slap_schema.si_ad_entry,
+                                               NULL, ACL_DISCLOSE, NULL ) )
+                       {
+                               rs->sr_err = LDAP_NO_SUCH_OBJECT;
+
+                       } else {
+                               ber_dupbv( &matched_dn, &matched->e_name );
+
+                               erefs = is_entry_referral( matched )
+                                       ? get_entry_referrals( op, matched )
+                                       : NULL;
+                               if ( rs->sr_err == DB_NOTFOUND )
+                                       rs->sr_err = LDAP_REFERRAL;
+                               rs->sr_matched = matched_dn.bv_val;
+                       }
+
+#ifdef SLAP_ZONE_ALLOC
+                       slap_zn_runlock(mdb->bi_cache.c_zctx, matched);
+#endif
+                       mdb_cache_return_entry_r (mdb, matched, &lock);
+                       matched = NULL;
+
+                       if ( erefs ) {
+                               rs->sr_ref = referral_rewrite( erefs, &matched_dn,
+                                       &op->o_req_dn, op->oq_search.rs_scope );
+                               ber_bvarray_free( erefs );
+                       }
+
+               } else {
+#ifdef SLAP_ZONE_ALLOC
+                       slap_zn_runlock(mdb->bi_cache.c_zctx, matched);
+#endif
+                       rs->sr_ref = referral_rewrite( default_referral,
+                               NULL, &op->o_req_dn, op->oq_search.rs_scope );
+                       rs->sr_err = rs->sr_ref != NULL ? LDAP_REFERRAL : LDAP_NO_SUCH_OBJECT;
+               }
+
+               send_ldap_result( op, rs );
+
+               if ( rs->sr_ref ) {
+                       ber_bvarray_free( rs->sr_ref );
+                       rs->sr_ref = NULL;
+               }
+               if ( !BER_BVISNULL( &matched_dn ) ) {
+                       ber_memfree( matched_dn.bv_val );
+                       rs->sr_matched = NULL;
+               }
+               return rs->sr_err;
+       }
+
+       /* NOTE: __NEW__ "search" access is required
+        * on searchBase object */
+       if ( ! access_allowed_mask( op, e, slap_schema.si_ad_entry,
+                               NULL, ACL_SEARCH, NULL, &mask ) )
+       {
+               if ( !ACL_GRANT( mask, ACL_DISCLOSE ) ) {
+                       rs->sr_err = LDAP_NO_SUCH_OBJECT;
+               } else {
+                       rs->sr_err = LDAP_INSUFFICIENT_ACCESS;
+               }
+
+#ifdef SLAP_ZONE_ALLOC
+               slap_zn_runlock(mdb->bi_cache.c_zctx, e);
+#endif
+               if ( e != e_root ) {
+                       mdb_cache_return_entry_r(mdb, e, &lock);
+               }
+               send_ldap_result( op, rs );
+               return rs->sr_err;
+       }
+
+       if ( !manageDSAit && e != e_root && is_entry_referral( e ) ) {
+               /* entry is a referral, don't allow add */
+               struct berval matched_dn = BER_BVNULL;
+               BerVarray erefs = NULL;
+               
+               ber_dupbv( &matched_dn, &e->e_name );
+               erefs = get_entry_referrals( op, e );
+
+               rs->sr_err = LDAP_REFERRAL;
+
+#ifdef SLAP_ZONE_ALLOC
+               slap_zn_runlock(mdb->bi_cache.c_zctx, e);
+#endif
+               mdb_cache_return_entry_r( mdb, e, &lock );
+               e = NULL;
+
+               if ( erefs ) {
+                       rs->sr_ref = referral_rewrite( erefs, &matched_dn,
+                               &op->o_req_dn, op->oq_search.rs_scope );
+                       ber_bvarray_free( erefs );
+
+                       if ( !rs->sr_ref ) {
+                               rs->sr_text = "bad_referral object";
+                       }
+               }
+
+               Debug( LDAP_DEBUG_TRACE,
+                       LDAP_XSTRING(mdb_search) ": entry is referral\n",
+                       0, 0, 0 );
+
+               rs->sr_matched = matched_dn.bv_val;
+               send_ldap_result( op, rs );
+
+               ber_bvarray_free( rs->sr_ref );
+               rs->sr_ref = NULL;
+               ber_memfree( matched_dn.bv_val );
+               rs->sr_matched = NULL;
+               return 1;
+       }
+
+       if ( get_assert( op ) &&
+               ( test_filter( op, e, get_assertion( op )) != LDAP_COMPARE_TRUE ))
+       {
+               rs->sr_err = LDAP_ASSERTION_FAILED;
+#ifdef SLAP_ZONE_ALLOC
+               slap_zn_runlock(mdb->bi_cache.c_zctx, e);
+#endif
+               if ( e != e_root ) {
+                       mdb_cache_return_entry_r(mdb, e, &lock);
+               }
+               send_ldap_result( op, rs );
+               return 1;
+       }
+
+       /* compute it anyway; root does not use it */
+       stoptime = op->o_time + op->ors_tlimit;
+
+       /* need normalized dn below */
+       ber_dupbv( &realbase, &e->e_nname );
+
+       /* Copy info to base, must free entry before accessing the database
+        * in search_candidates, to avoid deadlocks.
+        */
+       base.e_private = e->e_private;
+       base.e_nname = realbase;
+       base.e_id = e->e_id;
+
+#ifdef SLAP_ZONE_ALLOC
+       slap_zn_runlock(mdb->bi_cache.c_zctx, e);
+#endif
+       if ( e != e_root ) {
+               mdb_cache_return_entry_r(mdb, e, &lock);
+       }
+       e = NULL;
+
+       /* select candidates */
+       if ( op->oq_search.rs_scope == LDAP_SCOPE_BASE ) {
+               rs->sr_err = base_candidate( op->o_bd, &base, candidates );
+
+       } else {
+cand_retry:
+               MDB_IDL_ZERO( candidates );
+               MDB_IDL_ZERO( scopes );
+               rs->sr_err = search_candidates( op, rs, &base,
+                       ltid, candidates, scopes );
+               if ( rs->sr_err == DB_LOCK_DEADLOCK ) {
+                       if ( !opinfo ) {
+                               ltid->flags &= ~TXN_DEADLOCK;
+                               goto cand_retry;
+                       }
+                       opinfo->boi_err = rs->sr_err;
+                       send_ldap_error( op, rs, LDAP_BUSY, "ldap server busy" );
+                       return LDAP_BUSY;
+               }
+       }
+
+       /* start cursor at beginning of candidates.
+        */
+       cursor = 0;
+
+       if ( candidates[0] == 0 ) {
+               Debug( LDAP_DEBUG_TRACE,
+                       LDAP_XSTRING(mdb_search) ": no candidates\n",
+                       0, 0, 0 );
+
+               goto nochange;
+       }
+
+       /* if not root and candidates exceed to-be-checked entries, abort */
+       if ( op->ors_limit      /* isroot == FALSE */ &&
+               op->ors_limit->lms_s_unchecked != -1 &&
+               MDB_IDL_N(candidates) > (unsigned) op->ors_limit->lms_s_unchecked )
+       {
+               rs->sr_err = LDAP_ADMINLIMIT_EXCEEDED;
+               send_ldap_result( op, rs );
+               rs->sr_err = LDAP_SUCCESS;
+               goto done;
+       }
+
+       if ( op->ors_limit == NULL      /* isroot == TRUE */ ||
+               !op->ors_limit->lms_s_pr_hide )
+       {
+               tentries = MDB_IDL_N(candidates);
+       }
+
+       if ( get_pagedresults( op ) > SLAP_CONTROL_IGNORED ) {
+               PagedResultsState *ps = op->o_pagedresults_state;
+               /* deferred cookie parsing */
+               rs->sr_err = parse_paged_cookie( op, rs );
+               if ( rs->sr_err != LDAP_SUCCESS ) {
+                       send_ldap_result( op, rs );
+                       goto done;
+               }
+
+               cursor = (ID) ps->ps_cookie;
+               if ( cursor && ps->ps_size == 0 ) {
+                       rs->sr_err = LDAP_SUCCESS;
+                       rs->sr_text = "search abandoned by pagedResult size=0";
+                       send_ldap_result( op, rs );
+                       goto done;
+               }
+               id = mdb_idl_first( candidates, &cursor );
+               if ( id == NOID ) {
+                       Debug( LDAP_DEBUG_TRACE, 
+                               LDAP_XSTRING(mdb_search)
+                               ": no paged results candidates\n",
+                               0, 0, 0 );
+                       send_paged_response( op, rs, &lastid, 0 );
+
+                       rs->sr_err = LDAP_OTHER;
+                       goto done;
+               }
+               nentries = ps->ps_count;
+               if ( id == (ID)ps->ps_cookie )
+                       id = mdb_idl_next( candidates, &cursor );
+               goto loop_begin;
+       }
+
+       for ( id = mdb_idl_first( candidates, &cursor );
+                 id != NOID ; id = mdb_idl_next( candidates, &cursor ) )
+       {
+               int scopeok;
+
+loop_begin:
+
+               /* check for abandon */
+               if ( op->o_abandon ) {
+                       rs->sr_err = SLAPD_ABANDON;
+                       send_ldap_result( op, rs );
+                       goto done;
+               }
+
+               /* mostly needed by internal searches,
+                * e.g. related to syncrepl, for whom
+                * abandon does not get set... */
+               if ( slapd_shutdown ) {
+                       rs->sr_err = LDAP_UNAVAILABLE;
+                       send_ldap_disconnect( op, rs );
+                       goto done;
+               }
+
+               /* check time limit */
+               if ( op->ors_tlimit != SLAP_NO_LIMIT
+                               && slap_get_time() > stoptime )
+               {
+                       rs->sr_err = LDAP_TIMELIMIT_EXCEEDED;
+                       rs->sr_ref = rs->sr_v2ref;
+                       send_ldap_result( op, rs );
+                       rs->sr_err = LDAP_SUCCESS;
+                       goto done;
+               }
+
+               /* If we inspect more entries than will
+                * fit into the entry cache, stop caching
+                * any subsequent entries
+                */
+               nentries++;
+               if ( nentries > mdb->bi_cache.c_maxsize && !idflag ) {
+                       idflag = ID_NOCACHE;
+               }
+
+fetch_entry_retry:
+               /* get the entry with reader lock */
+               ei = NULL;
+               rs->sr_err = mdb_cache_find_id( op, ltid,
+                       id, &ei, idflag, &lock );
+
+               if (rs->sr_err == LDAP_BUSY) {
+                       rs->sr_text = "ldap server busy";
+                       send_ldap_result( op, rs );
+                       goto done;
+
+               } else if ( rs->sr_err == DB_LOCK_DEADLOCK ) {
+                       if ( !opinfo ) {
+                               ltid->flags &= ~TXN_DEADLOCK;
+                               goto fetch_entry_retry;
+                       }
+txnfail:
+                       opinfo->boi_err = rs->sr_err;
+                       send_ldap_error( op, rs, LDAP_BUSY, "ldap server busy" );
+                       goto done;
+
+               } else if ( rs->sr_err == DB_LOCK_NOTGRANTED )
+               {
+                       goto fetch_entry_retry;
+               } else if ( rs->sr_err == LDAP_OTHER ) {
+                       rs->sr_text = "internal error";
+                       send_ldap_result( op, rs );
+                       goto done;
+               }
+
+               if ( ei && rs->sr_err == LDAP_SUCCESS ) {
+                       e = ei->bei_e;
+               } else {
+                       e = NULL;
+               }
+
+               if ( e == NULL ) {
+                       if( !MDB_IDL_IS_RANGE(candidates) ) {
+                               /* only complain for non-range IDLs */
+                               Debug( LDAP_DEBUG_TRACE,
+                                       LDAP_XSTRING(mdb_search)
+                                       ": candidate %ld not found\n",
+                                       (long) id, 0, 0 );
+                       } else {
+                               /* get the next ID from the DB */
+id_retry:
+                               rs->sr_err = mdb_get_nextid( mdb, ltid, &cursor );
+                               if ( rs->sr_err == DB_NOTFOUND ) {
+                                       break;
+                               } else if ( rs->sr_err == DB_LOCK_DEADLOCK ) {
+                                       if ( opinfo )
+                                               goto txnfail;
+                                       ltid->flags &= ~TXN_DEADLOCK;
+                                       goto id_retry;
+                               } else if ( rs->sr_err == DB_LOCK_NOTGRANTED ) {
+                                       goto id_retry;
+                               }
+                               if ( rs->sr_err ) {
+                                       rs->sr_err = LDAP_OTHER;
+                                       rs->sr_text = "internal error in get_nextid";
+                                       send_ldap_result( op, rs );
+                                       goto done;
+                               }
+                               cursor--;
+                       }
+
+                       goto loop_continue;
+               }
+
+               if ( is_entry_subentry( e ) ) {
+                       if( op->oq_search.rs_scope != LDAP_SCOPE_BASE ) {
+                               if(!get_subentries_visibility( op )) {
+                                       /* only subentries are visible */
+                                       goto loop_continue;
+                               }
+
+                       } else if ( get_subentries( op ) &&
+                               !get_subentries_visibility( op ))
+                       {
+                               /* only subentries are visible */
+                               goto loop_continue;
+                       }
+
+               } else if ( get_subentries_visibility( op )) {
+                       /* only subentries are visible */
+                       goto loop_continue;
+               }
+
+               /* Does this candidate actually satisfy the search scope?
+                *
+                * Note that we don't lock access to the bei_parent pointer.
+                * Since only leaf nodes can be deleted, the parent of any
+                * node will always be a valid node. Also since we have
+                * a Read lock on the data, it cannot be renamed out of the
+                * scope while we are looking at it, and unless we're using
+                * MDB_HIER, its parents cannot be moved either.
+                */
+               scopeok = 0;
+               switch( op->ors_scope ) {
+               case LDAP_SCOPE_BASE:
+                       /* This is always true, yes? */
+                       if ( id == base.e_id ) scopeok = 1;
+                       break;
+
+               case LDAP_SCOPE_ONELEVEL:
+                       if ( ei->bei_parent->bei_id == base.e_id ) scopeok = 1;
+                       break;
+
+#ifdef LDAP_SCOPE_CHILDREN
+               case LDAP_SCOPE_CHILDREN:
+                       if ( id == base.e_id ) break;
+                       /* Fall-thru */
+#endif
+               case LDAP_SCOPE_SUBTREE: {
+                       EntryInfo *tmp;
+                       for ( tmp = BEI(e); tmp; tmp = tmp->bei_parent ) {
+                               if ( tmp->bei_id == base.e_id ) {
+                                       scopeok = 1;
+                                       break;
+                               }
+                       }
+                       } break;
+               }
+
+               /* aliases were already dereferenced in candidate list */
+               if ( op->ors_deref & LDAP_DEREF_SEARCHING ) {
+                       /* but if the search base is an alias, and we didn't
+                        * deref it when finding, return it.
+                        */
+                       if ( is_entry_alias(e) &&
+                               ((op->ors_deref & LDAP_DEREF_FINDING) ||
+                                       !bvmatch(&e->e_nname, &op->o_req_ndn)))
+                       {
+                               goto loop_continue;
+                       }
+
+                       /* scopes is only non-empty for onelevel or subtree */
+                       if ( !scopeok && MDB_IDL_N(scopes) ) {
+                               unsigned x;
+                               if ( op->ors_scope == LDAP_SCOPE_ONELEVEL ) {
+                                       x = mdb_idl_search( scopes, e->e_id );
+                                       if ( scopes[x] == e->e_id ) scopeok = 1;
+                               } else {
+                                       /* subtree, walk up the tree */
+                                       EntryInfo *tmp = BEI(e);
+                                       for (;tmp->bei_parent; tmp=tmp->bei_parent) {
+                                               x = mdb_idl_search( scopes, tmp->bei_id );
+                                               if ( scopes[x] == tmp->bei_id ) {
+                                                       scopeok = 1;
+                                                       break;
+                                               }
+                                       }
+                               }
+                       }
+               }
+
+               /* Not in scope, ignore it */
+               if ( !scopeok )
+               {
+                       Debug( LDAP_DEBUG_TRACE,
+                               LDAP_XSTRING(mdb_search)
+                               ": %ld scope not okay\n",
+                               (long) id, 0, 0 );
+                       goto loop_continue;
+               }
+
+               /*
+                * if it's a referral, add it to the list of referrals. only do
+                * this for non-base searches, and don't check the filter
+                * explicitly here since it's only a candidate anyway.
+                */
+               if ( !manageDSAit && op->oq_search.rs_scope != LDAP_SCOPE_BASE
+                       && is_entry_referral( e ) )
+               {
+                       struct mdb_op_info bois;
+                       struct mdb_lock_info blis;
+                       BerVarray erefs = get_entry_referrals( op, e );
+                       rs->sr_ref = referral_rewrite( erefs, &e->e_name, NULL,
+                               op->oq_search.rs_scope == LDAP_SCOPE_ONELEVEL
+                                       ? LDAP_SCOPE_BASE : LDAP_SCOPE_SUBTREE );
+
+                       /* Must set lockinfo so that entry_release will work */
+                       if (!opinfo) {
+                               bois.boi_oe.oe_key = mdb;
+                               bois.boi_txn = NULL;
+                               bois.boi_err = 0;
+                               bois.boi_acl_cache = op->o_do_not_cache;
+                               bois.boi_flag = BOI_DONTFREE;
+                               bois.boi_locks = &blis;
+                               blis.bli_next = NULL;
+                               LDAP_SLIST_INSERT_HEAD( &op->o_extra, &bois.boi_oe,
+                                       oe_next );
+                       } else {
+                               blis.bli_next = opinfo->boi_locks;
+                               opinfo->boi_locks = &blis;
+                       }
+                       blis.bli_id = e->e_id;
+                       blis.bli_lock = lock;
+                       blis.bli_flag = BLI_DONTFREE;
+
+                       rs->sr_entry = e;
+                       rs->sr_flags = REP_ENTRY_MUSTRELEASE;
+
+                       send_search_reference( op, rs );
+
+                       if ( blis.bli_flag ) {
+#ifdef SLAP_ZONE_ALLOC
+                               slap_zn_runlock(mdb->bi_cache.c_zctx, e);
+#endif
+                               mdb_cache_return_entry_r(mdb, e, &lock);
+                               if ( opinfo ) {
+                                       opinfo->boi_locks = blis.bli_next;
+                               } else {
+                                       LDAP_SLIST_REMOVE( &op->o_extra, &bois.boi_oe,
+                                               OpExtra, oe_next );
+                               }
+                       }
+                       rs->sr_entry = NULL;
+                       e = NULL;
+
+                       ber_bvarray_free( rs->sr_ref );
+                       ber_bvarray_free( erefs );
+                       rs->sr_ref = NULL;
+
+                       goto loop_continue;
+               }
+
+               if ( !manageDSAit && is_entry_glue( e )) {
+                       goto loop_continue;
+               }
+
+               /* if it matches the filter and scope, send it */
+               rs->sr_err = test_filter( op, e, op->oq_search.rs_filter );
+
+               if ( rs->sr_err == LDAP_COMPARE_TRUE ) {
+                       /* check size limit */
+                       if ( get_pagedresults(op) > SLAP_CONTROL_IGNORED ) {
+                               if ( rs->sr_nentries >= ((PagedResultsState *)op->o_pagedresults_state)->ps_size ) {
+#ifdef SLAP_ZONE_ALLOC
+                                       slap_zn_runlock(mdb->bi_cache.c_zctx, e);
+#endif
+                                       mdb_cache_return_entry_r( mdb, e, &lock );
+                                       e = NULL;
+                                       send_paged_response( op, rs, &lastid, tentries );
+                                       goto done;
+                               }
+                               lastid = id;
+                       }
+
+                       if (e) {
+                               struct mdb_op_info bois;
+                               struct mdb_lock_info blis;
+
+                               /* Must set lockinfo so that entry_release will work */
+                               if (!opinfo) {
+                                       bois.boi_oe.oe_key = mdb;
+                                       bois.boi_txn = NULL;
+                                       bois.boi_err = 0;
+                                       bois.boi_acl_cache = op->o_do_not_cache;
+                                       bois.boi_flag = BOI_DONTFREE;
+                                       bois.boi_locks = &blis;
+                                       blis.bli_next = NULL;
+                                       LDAP_SLIST_INSERT_HEAD( &op->o_extra, &bois.boi_oe,
+                                               oe_next );
+                               } else {
+                                       blis.bli_next = opinfo->boi_locks;
+                                       opinfo->boi_locks = &blis;
+                               }
+                               blis.bli_id = e->e_id;
+                               blis.bli_lock = lock;
+                               blis.bli_flag = BLI_DONTFREE;
+
+                               /* safe default */
+                               rs->sr_attrs = op->oq_search.rs_attrs;
+                               rs->sr_operational_attrs = NULL;
+                               rs->sr_ctrls = NULL;
+                               rs->sr_entry = e;
+                               RS_ASSERT( e->e_private != NULL );
+                               rs->sr_flags = REP_ENTRY_MUSTRELEASE;
+                               rs->sr_err = LDAP_SUCCESS;
+                               rs->sr_err = send_search_entry( op, rs );
+                               rs->sr_attrs = NULL;
+                               rs->sr_entry = NULL;
+
+                               /* send_search_entry will usually free it.
+                                * an overlay might leave its own copy here;
+                                * bli_flag will be 0 if lock was already released.
+                                */
+                               if ( blis.bli_flag ) {
+#ifdef SLAP_ZONE_ALLOC
+                                       slap_zn_runlock(mdb->bi_cache.c_zctx, e);
+#endif
+                                       mdb_cache_return_entry_r(mdb, e, &lock);
+                                       if ( opinfo ) {
+                                               opinfo->boi_locks = blis.bli_next;
+                                       } else {
+                                               LDAP_SLIST_REMOVE( &op->o_extra, &bois.boi_oe,
+                                                       OpExtra, oe_next );
+                                       }
+                               }
+                               e = NULL;
+
+                               switch ( rs->sr_err ) {
+                               case LDAP_SUCCESS:      /* entry sent ok */
+                                       break;
+                               default:                /* entry not sent */
+                                       break;
+                               case LDAP_UNAVAILABLE:
+                               case LDAP_SIZELIMIT_EXCEEDED:
+                                       if ( rs->sr_err == LDAP_SIZELIMIT_EXCEEDED ) {
+                                               rs->sr_ref = rs->sr_v2ref;
+                                               send_ldap_result( op, rs );
+                                               rs->sr_err = LDAP_SUCCESS;
+
+                                       } else {
+                                               rs->sr_err = LDAP_OTHER;
+                                       }
+                                       goto done;
+                               }
+                       }
+
+               } else {
+                       Debug( LDAP_DEBUG_TRACE,
+                               LDAP_XSTRING(mdb_search)
+                               ": %ld does not match filter\n",
+                               (long) id, 0, 0 );
+               }
+
+loop_continue:
+               if( e != NULL ) {
+                       /* free reader lock */
+#ifdef SLAP_ZONE_ALLOC
+                       slap_zn_runlock(mdb->bi_cache.c_zctx, e);
+#endif
+                       mdb_cache_return_entry_r( mdb, e , &lock );
+                       RS_ASSERT( rs->sr_entry == NULL );
+                       e = NULL;
+                       rs->sr_entry = NULL;
+               }
+       }
+
+nochange:
+       rs->sr_ctrls = NULL;
+       rs->sr_ref = rs->sr_v2ref;
+       rs->sr_err = (rs->sr_v2ref == NULL) ? LDAP_SUCCESS : LDAP_REFERRAL;
+       rs->sr_rspoid = NULL;
+       if ( get_pagedresults(op) > SLAP_CONTROL_IGNORED ) {
+               send_paged_response( op, rs, NULL, 0 );
+       } else {
+               send_ldap_result( op, rs );
+       }
+
+       rs->sr_err = LDAP_SUCCESS;
+
+done:
+       if( rs->sr_v2ref ) {
+               ber_bvarray_free( rs->sr_v2ref );
+               rs->sr_v2ref = NULL;
+       }
+       if( realbase.bv_val ) ch_free( realbase.bv_val );
+
+       return rs->sr_err;
+}
+
+
+static int base_candidate(
+       BackendDB       *be,
+       Entry   *e,
+       ID              *ids )
+{
+       Debug(LDAP_DEBUG_ARGS, "base_candidates: base: \"%s\" (0x%08lx)\n",
+               e->e_nname.bv_val, (long) e->e_id, 0);
+
+       ids[0] = 1;
+       ids[1] = e->e_id;
+       return 0;
+}
+
+/* Look for "objectClass Present" in this filter.
+ * Also count depth of filter tree while we're at it.
+ */
+static int oc_filter(
+       Filter *f,
+       int cur,
+       int *max )
+{
+       int rc = 0;
+
+       assert( f != NULL );
+
+       if( cur > *max ) *max = cur;
+
+       switch( f->f_choice ) {
+       case LDAP_FILTER_PRESENT:
+               if (f->f_desc == slap_schema.si_ad_objectClass) {
+                       rc = 1;
+               }
+               break;
+
+       case LDAP_FILTER_AND:
+       case LDAP_FILTER_OR:
+               cur++;
+               for ( f=f->f_and; f; f=f->f_next ) {
+                       (void) oc_filter(f, cur, max);
+               }
+               break;
+
+       default:
+               break;
+       }
+       return rc;
+}
+
+static void search_stack_free( void *key, void *data )
+{
+       ber_memfree_x(data, NULL);
+}
+
+static void *search_stack( Operation *op )
+{
+       struct mdb_info *mdb = (struct mdb_info *) op->o_bd->be_private;
+       void *ret = NULL;
+
+       if ( op->o_threadctx ) {
+               ldap_pvt_thread_pool_getkey( op->o_threadctx, (void *)search_stack,
+                       &ret, NULL );
+       } else {
+               ret = mdb->bi_search_stack;
+       }
+
+       if ( !ret ) {
+               ret = ch_malloc( mdb->bi_search_stack_depth * MDB_IDL_UM_SIZE
+                       * sizeof( ID ) );
+               if ( op->o_threadctx ) {
+                       ldap_pvt_thread_pool_setkey( op->o_threadctx, (void *)search_stack,
+                               ret, search_stack_free, NULL, NULL );
+               } else {
+                       mdb->bi_search_stack = ret;
+               }
+       }
+       return ret;
+}
+
+static int search_candidates(
+       Operation *op,
+       SlapReply *rs,
+       Entry *e,
+       DB_TXN *txn,
+       ID      *ids,
+       ID      *scopes )
+{
+       struct mdb_info *mdb = (struct mdb_info *) op->o_bd->be_private;
+       int rc, depth = 1;
+       Filter          f, rf, xf, nf;
+       ID              *stack;
+       AttributeAssertion aa_ref = ATTRIBUTEASSERTION_INIT;
+       Filter  sf;
+       AttributeAssertion aa_subentry = ATTRIBUTEASSERTION_INIT;
+
+       /*
+        * This routine takes as input a filter (user-filter)
+        * and rewrites it as follows:
+        *      (&(scope=DN)[(objectClass=subentry)]
+        *              (|[(objectClass=referral)(objectClass=alias)](user-filter))
+        */
+
+       Debug(LDAP_DEBUG_TRACE,
+               "search_candidates: base=\"%s\" (0x%08lx) scope=%d\n",
+               e->e_nname.bv_val, (long) e->e_id, op->oq_search.rs_scope );
+
+       xf.f_or = op->oq_search.rs_filter;
+       xf.f_choice = LDAP_FILTER_OR;
+       xf.f_next = NULL;
+
+       /* If the user's filter uses objectClass=*,
+        * these clauses are redundant.
+        */
+       if (!oc_filter(op->oq_search.rs_filter, 1, &depth)
+               && !get_subentries_visibility(op)) {
+               if( !get_manageDSAit(op) && !get_domainScope(op) ) {
+                       /* match referral objects */
+                       struct berval bv_ref = BER_BVC( "referral" );
+                       rf.f_choice = LDAP_FILTER_EQUALITY;
+                       rf.f_ava = &aa_ref;
+                       rf.f_av_desc = slap_schema.si_ad_objectClass;
+                       rf.f_av_value = bv_ref;
+                       rf.f_next = xf.f_or;
+                       xf.f_or = &rf;
+                       depth++;
+               }
+       }
+
+       f.f_next = NULL;
+       f.f_choice = LDAP_FILTER_AND;
+       f.f_and = &nf;
+       /* Dummy; we compute scope separately now */
+       nf.f_choice = SLAPD_FILTER_COMPUTED;
+       nf.f_result = LDAP_SUCCESS;
+       nf.f_next = ( xf.f_or == op->oq_search.rs_filter )
+               ? op->oq_search.rs_filter : &xf ;
+       /* Filter depth increased again, adding dummy clause */
+       depth++;
+
+       if( get_subentries_visibility( op ) ) {
+               struct berval bv_subentry = BER_BVC( "subentry" );
+               sf.f_choice = LDAP_FILTER_EQUALITY;
+               sf.f_ava = &aa_subentry;
+               sf.f_av_desc = slap_schema.si_ad_objectClass;
+               sf.f_av_value = bv_subentry;
+               sf.f_next = nf.f_next;
+               nf.f_next = &sf;
+       }
+
+       /* Allocate IDL stack, plus 1 more for former tmp */
+       if ( depth+1 > mdb->bi_search_stack_depth ) {
+               stack = ch_malloc( (depth + 1) * MDB_IDL_UM_SIZE * sizeof( ID ) );
+       } else {
+               stack = search_stack( op );
+       }
+
+       if( op->ors_deref & LDAP_DEREF_SEARCHING ) {
+               rc = search_aliases( op, rs, e, txn, ids, scopes, stack );
+       } else {
+               rc = mdb_dn2idl( op, txn, &e->e_nname, BEI(e), ids, stack );
+       }
+
+       if ( rc == LDAP_SUCCESS ) {
+               rc = mdb_filter_candidates( op, txn, &f, ids,
+                       stack, stack+MDB_IDL_UM_SIZE );
+       }
+
+       if ( depth+1 > mdb->bi_search_stack_depth ) {
+               ch_free( stack );
+       }
+
+       if( rc ) {
+               Debug(LDAP_DEBUG_TRACE,
+                       "mdb_search_candidates: failed (rc=%d)\n",
+                       rc, NULL, NULL );
+
+       } else {
+               Debug(LDAP_DEBUG_TRACE,
+                       "mdb_search_candidates: id=%ld first=%ld last=%ld\n",
+                       (long) ids[0],
+                       (long) MDB_IDL_FIRST(ids),
+                       (long) MDB_IDL_LAST(ids) );
+       }
+
+       return rc;
+}
+
+static int
+parse_paged_cookie( Operation *op, SlapReply *rs )
+{
+       int             rc = LDAP_SUCCESS;
+       PagedResultsState *ps = op->o_pagedresults_state;
+
+       /* this function must be invoked only if the pagedResults
+        * control has been detected, parsed and partially checked
+        * by the frontend */
+       assert( get_pagedresults( op ) > SLAP_CONTROL_IGNORED );
+
+       /* cookie decoding/checks deferred to backend... */
+       if ( ps->ps_cookieval.bv_len ) {
+               PagedResultsCookie reqcookie;
+               if( ps->ps_cookieval.bv_len != sizeof( reqcookie ) ) {
+                       /* bad cookie */
+                       rs->sr_text = "paged results cookie is invalid";
+                       rc = LDAP_PROTOCOL_ERROR;
+                       goto done;
+               }
+
+               AC_MEMCPY( &reqcookie, ps->ps_cookieval.bv_val, sizeof( reqcookie ));
+
+               if ( reqcookie > ps->ps_cookie ) {
+                       /* bad cookie */
+                       rs->sr_text = "paged results cookie is invalid";
+                       rc = LDAP_PROTOCOL_ERROR;
+                       goto done;
+
+               } else if ( reqcookie < ps->ps_cookie ) {
+                       rs->sr_text = "paged results cookie is invalid or old";
+                       rc = LDAP_UNWILLING_TO_PERFORM;
+                       goto done;
+               }
+
+       } else {
+               /* we're going to use ps_cookie */
+               op->o_conn->c_pagedresults_state.ps_cookie = 0;
+       }
+
+done:;
+
+       return rc;
+}
+
+static void
+send_paged_response( 
+       Operation       *op,
+       SlapReply       *rs,
+       ID              *lastid,
+       int             tentries )
+{
+       LDAPControl     *ctrls[2];
+       BerElementBuffer berbuf;
+       BerElement      *ber = (BerElement *)&berbuf;
+       PagedResultsCookie respcookie;
+       struct berval cookie;
+
+       Debug(LDAP_DEBUG_ARGS,
+               "send_paged_response: lastid=0x%08lx nentries=%d\n", 
+               lastid ? *lastid : 0, rs->sr_nentries, NULL );
+
+       ctrls[1] = NULL;
+
+       ber_init2( ber, NULL, LBER_USE_DER );
+
+       if ( lastid ) {
+               respcookie = ( PagedResultsCookie )(*lastid);
+               cookie.bv_len = sizeof( respcookie );
+               cookie.bv_val = (char *)&respcookie;
+
+       } else {
+               respcookie = ( PagedResultsCookie )0;
+               BER_BVSTR( &cookie, "" );
+       }
+
+       op->o_conn->c_pagedresults_state.ps_cookie = respcookie;
+       op->o_conn->c_pagedresults_state.ps_count =
+               ((PagedResultsState *)op->o_pagedresults_state)->ps_count +
+               rs->sr_nentries;
+
+       /* return size of 0 -- no estimate */
+       ber_printf( ber, "{iO}", 0, &cookie ); 
+
+       ctrls[0] = op->o_tmpalloc( sizeof(LDAPControl), op->o_tmpmemctx );
+       if ( ber_flatten2( ber, &ctrls[0]->ldctl_value, 0 ) == -1 ) {
+               goto done;
+       }
+
+       ctrls[0]->ldctl_oid = LDAP_CONTROL_PAGEDRESULTS;
+       ctrls[0]->ldctl_iscritical = 0;
+
+       slap_add_ctrls( op, rs, ctrls );
+       rs->sr_err = LDAP_SUCCESS;
+       send_ldap_result( op, rs );
+
+done:
+       (void) ber_free_buf( ber );
+}
diff --git a/servers/slapd/back-mdb/tools.c b/servers/slapd/back-mdb/tools.c
new file mode 100644 (file)
index 0000000..208afab
--- /dev/null
@@ -0,0 +1,999 @@
+/* tools.c - tools for slap tools */
+/* $OpenLDAP$ */
+/* This work is part of OpenLDAP Software <http://www.openldap.org/>.
+ *
+ * Copyright 2000-2011 The OpenLDAP Foundation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted only as authorized by the OpenLDAP
+ * Public License.
+ *
+ * A copy of this license is available in the file LICENSE in the
+ * top-level directory of the distribution or, alternatively, at
+ * <http://www.OpenLDAP.org/license.html>.
+ */
+
+#include "portable.h"
+
+#include <stdio.h>
+#include <ac/string.h>
+#include <ac/errno.h>
+
+#define AVL_INTERNAL
+#include "back-mdb.h"
+#include "idl.h"
+
+static DBC *cursor = NULL;
+static DBT key, data;
+static EntryHeader eh;
+static ID nid, previd = NOID;
+static char ehbuf[16];
+
+typedef struct dn_id {
+       ID id;
+       struct berval dn;
+} dn_id;
+
+#define        HOLE_SIZE       4096
+static dn_id hbuf[HOLE_SIZE], *holes = hbuf;
+static unsigned nhmax = HOLE_SIZE;
+static unsigned nholes;
+
+static int index_nattrs;
+
+static struct berval   *tool_base;
+static int             tool_scope;
+static Filter          *tool_filter;
+static Entry           *tool_next_entry;
+
+static ID mdb_tool_ix_id;
+static Operation *mdb_tool_ix_op;
+static int *mdb_tool_index_threads, mdb_tool_index_tcount;
+static void *mdb_tool_index_rec;
+static struct mdb_info *mdb_tool_info;
+static ldap_pvt_thread_mutex_t mdb_tool_index_mutex;
+static ldap_pvt_thread_cond_t mdb_tool_index_cond_main;
+static ldap_pvt_thread_cond_t mdb_tool_index_cond_work;
+
+#if DB_VERSION_FULL >= 0x04060000
+#define        USE_TRICKLE     1
+#else
+/* Seems to slow things down too much in MDB 4.5 */
+#undef USE_TRICKLE
+#endif
+
+#ifdef USE_TRICKLE
+static ldap_pvt_thread_mutex_t mdb_tool_trickle_mutex;
+static ldap_pvt_thread_cond_t mdb_tool_trickle_cond;
+static ldap_pvt_thread_cond_t mdb_tool_trickle_cond_end;
+
+static void * mdb_tool_trickle_task( void *ctx, void *ptr );
+static int mdb_tool_trickle_active;
+#endif
+
+static void * mdb_tool_index_task( void *ctx, void *ptr );
+
+static int
+mdb_tool_entry_get_int( BackendDB *be, ID id, Entry **ep );
+
+int mdb_tool_entry_open(
+       BackendDB *be, int mode )
+{
+       struct mdb_info *mdb = (struct mdb_info *) be->be_private;
+
+       /* initialize key and data thangs */
+       DBTzero( &key );
+       DBTzero( &data );
+       key.flags = DB_DBT_USERMEM;
+       key.data = &nid;
+       key.size = key.ulen = sizeof( nid );
+       data.flags = DB_DBT_USERMEM;
+
+       if (cursor == NULL) {
+               int rc = mdb->bi_id2entry->bdi_db->cursor(
+                       mdb->bi_id2entry->bdi_db, mdb->bi_cache.c_txn, &cursor,
+                       mdb->bi_db_opflags );
+               if( rc != 0 ) {
+                       return -1;
+               }
+       }
+
+       /* Set up for threaded slapindex */
+       if (( slapMode & (SLAP_TOOL_QUICK|SLAP_TOOL_READONLY)) == SLAP_TOOL_QUICK ) {
+               if ( !mdb_tool_info ) {
+#ifdef USE_TRICKLE
+                       ldap_pvt_thread_mutex_init( &mdb_tool_trickle_mutex );
+                       ldap_pvt_thread_cond_init( &mdb_tool_trickle_cond );
+                       ldap_pvt_thread_cond_init( &mdb_tool_trickle_cond_end );
+                       ldap_pvt_thread_pool_submit( &connection_pool, mdb_tool_trickle_task, mdb->bi_dbenv );
+#endif
+
+                       ldap_pvt_thread_mutex_init( &mdb_tool_index_mutex );
+                       ldap_pvt_thread_cond_init( &mdb_tool_index_cond_main );
+                       ldap_pvt_thread_cond_init( &mdb_tool_index_cond_work );
+                       if ( mdb->bi_nattrs ) {
+                               int i;
+                               mdb_tool_index_threads = ch_malloc( slap_tool_thread_max * sizeof( int ));
+                               mdb_tool_index_rec = ch_malloc( mdb->bi_nattrs * sizeof( IndexRec ));
+                               mdb_tool_index_tcount = slap_tool_thread_max - 1;
+                               for (i=1; i<slap_tool_thread_max; i++) {
+                                       int *ptr = ch_malloc( sizeof( int ));
+                                       *ptr = i;
+                                       ldap_pvt_thread_pool_submit( &connection_pool,
+                                               mdb_tool_index_task, ptr );
+                               }
+                       }
+                       mdb_tool_info = mdb;
+               }
+       }
+
+       return 0;
+}
+
+int mdb_tool_entry_close(
+       BackendDB *be )
+{
+       if ( mdb_tool_info ) {
+               slapd_shutdown = 1;
+#ifdef USE_TRICKLE
+               ldap_pvt_thread_mutex_lock( &mdb_tool_trickle_mutex );
+
+               /* trickle thread may not have started yet */
+               while ( !mdb_tool_trickle_active )
+                       ldap_pvt_thread_cond_wait( &mdb_tool_trickle_cond_end,
+                                       &mdb_tool_trickle_mutex );
+
+               ldap_pvt_thread_cond_signal( &mdb_tool_trickle_cond );
+               while ( mdb_tool_trickle_active )
+                       ldap_pvt_thread_cond_wait( &mdb_tool_trickle_cond_end,
+                                       &mdb_tool_trickle_mutex );
+               ldap_pvt_thread_mutex_unlock( &mdb_tool_trickle_mutex );
+#endif
+               ldap_pvt_thread_mutex_lock( &mdb_tool_index_mutex );
+
+               /* There might still be some threads starting */
+               while ( mdb_tool_index_tcount ) {
+                       ldap_pvt_thread_cond_wait( &mdb_tool_index_cond_main,
+                                       &mdb_tool_index_mutex );
+               }
+
+               mdb_tool_index_tcount = slap_tool_thread_max - 1;
+               ldap_pvt_thread_cond_broadcast( &mdb_tool_index_cond_work );
+
+               /* Make sure all threads are stopped */
+               while ( mdb_tool_index_tcount ) {
+                       ldap_pvt_thread_cond_wait( &mdb_tool_index_cond_main,
+                               &mdb_tool_index_mutex );
+               }
+               ldap_pvt_thread_mutex_unlock( &mdb_tool_index_mutex );
+
+               mdb_tool_info = NULL;
+               slapd_shutdown = 0;
+               ch_free( mdb_tool_index_threads );
+               ch_free( mdb_tool_index_rec );
+               mdb_tool_index_tcount = slap_tool_thread_max - 1;
+       }
+
+       if( eh.bv.bv_val ) {
+               ch_free( eh.bv.bv_val );
+               eh.bv.bv_val = NULL;
+       }
+
+       if( cursor ) {
+               cursor->c_close( cursor );
+               cursor = NULL;
+       }
+
+       if( nholes ) {
+               unsigned i;
+               fprintf( stderr, "Error, entries missing!\n");
+               for (i=0; i<nholes; i++) {
+                       fprintf(stderr, "  entry %ld: %s\n",
+                               holes[i].id, holes[i].dn.bv_val);
+               }
+               return -1;
+       }
+                       
+       return 0;
+}
+
+ID
+mdb_tool_entry_first_x(
+       BackendDB *be,
+       struct berval *base,
+       int scope,
+       Filter *f )
+{
+       tool_base = base;
+       tool_scope = scope;
+       tool_filter = f;
+       
+       return mdb_tool_entry_next( be );
+}
+
+ID mdb_tool_entry_next(
+       BackendDB *be )
+{
+       int rc;
+       ID id;
+       struct mdb_info *mdb;
+
+       assert( be != NULL );
+       assert( slapMode & SLAP_TOOL_MODE );
+
+       mdb = (struct mdb_info *) be->be_private;
+       assert( mdb != NULL );
+
+next:;
+       /* Get the header */
+       data.ulen = data.dlen = sizeof( ehbuf );
+       data.data = ehbuf;
+       data.flags |= DB_DBT_PARTIAL;
+       rc = cursor->c_get( cursor, &key, &data, DB_NEXT );
+
+       if( rc ) {
+               /* If we're doing linear indexing and there are more attrs to
+                * index, and we're at the end of the database, start over.
+                */
+               if ( index_nattrs && rc == DB_NOTFOUND ) {
+                       /* optional - do a checkpoint here? */
+                       mdb_attr_info_free( mdb->bi_attrs[0] );
+                       mdb->bi_attrs[0] = mdb->bi_attrs[index_nattrs];
+                       index_nattrs--;
+                       rc = cursor->c_get( cursor, &key, &data, DB_FIRST );
+                       if ( rc ) {
+                               return NOID;
+                       }
+               } else {
+                       return NOID;
+               }
+       }
+
+       MDB_DISK2ID( key.data, &id );
+       previd = id;
+
+       if ( tool_filter || tool_base ) {
+               static Operation op = {0};
+               static Opheader ohdr = {0};
+
+               op.o_hdr = &ohdr;
+               op.o_bd = be;
+               op.o_tmpmemctx = NULL;
+               op.o_tmpmfuncs = &ch_mfuncs;
+
+               if ( tool_next_entry ) {
+                       mdb_entry_release( &op, tool_next_entry, 0 );
+                       tool_next_entry = NULL;
+               }
+
+               rc = mdb_tool_entry_get_int( be, id, &tool_next_entry );
+               if ( rc == LDAP_NO_SUCH_OBJECT ) {
+                       goto next;
+               }
+
+               assert( tool_next_entry != NULL );
+
+#ifdef MDB_HIER
+               /* TODO: needed until MDB_HIER is handled accordingly
+                * in mdb_tool_entry_get_int() */
+               if ( tool_base && !dnIsSuffixScope( &tool_next_entry->e_nname, tool_base, tool_scope ) )
+               {
+                       mdb_entry_release( &op, tool_next_entry, 0 );
+                       tool_next_entry = NULL;
+                       goto next;
+               }
+#endif
+
+               if ( tool_filter && test_filter( NULL, tool_next_entry, tool_filter ) != LDAP_COMPARE_TRUE )
+               {
+                       mdb_entry_release( &op, tool_next_entry, 0 );
+                       tool_next_entry = NULL;
+                       goto next;
+               }
+       }
+
+       return id;
+}
+
+ID mdb_tool_dn2id_get(
+       Backend *be,
+       struct berval *dn
+)
+{
+       Operation op = {0};
+       Opheader ohdr = {0};
+       EntryInfo *ei = NULL;
+       int rc;
+
+       if ( BER_BVISEMPTY(dn) )
+               return 0;
+
+       op.o_hdr = &ohdr;
+       op.o_bd = be;
+       op.o_tmpmemctx = NULL;
+       op.o_tmpmfuncs = &ch_mfuncs;
+
+       rc = mdb_cache_find_ndn( &op, 0, dn, &ei );
+       if ( ei ) mdb_cache_entryinfo_unlock( ei );
+       if ( rc == DB_NOTFOUND )
+               return NOID;
+       
+       return ei->bei_id;
+}
+
+static int
+mdb_tool_entry_get_int( BackendDB *be, ID id, Entry **ep )
+{
+       Entry *e = NULL;
+       char *dptr;
+       int rc, eoff;
+
+       assert( be != NULL );
+       assert( slapMode & SLAP_TOOL_MODE );
+
+       if ( ( tool_filter || tool_base ) && id == previd && tool_next_entry != NULL ) {
+               *ep = tool_next_entry;
+               tool_next_entry = NULL;
+               return LDAP_SUCCESS;
+       }
+
+       if ( id != previd ) {
+               data.ulen = data.dlen = sizeof( ehbuf );
+               data.data = ehbuf;
+               data.flags |= DB_DBT_PARTIAL;
+
+               MDB_ID2DISK( id, &nid );
+               rc = cursor->c_get( cursor, &key, &data, DB_SET );
+               if ( rc ) {
+                       rc = LDAP_OTHER;
+                       goto done;
+               }
+       }
+
+       /* Get the header */
+       dptr = eh.bv.bv_val;
+       eh.bv.bv_val = ehbuf;
+       eh.bv.bv_len = data.size;
+       rc = entry_header( &eh );
+       eoff = eh.data - eh.bv.bv_val;
+       eh.bv.bv_val = dptr;
+       if ( rc ) {
+               rc = LDAP_OTHER;
+               goto done;
+       }
+
+       /* Get the size */
+       data.flags &= ~DB_DBT_PARTIAL;
+       data.ulen = 0;
+       rc = cursor->c_get( cursor, &key, &data, DB_CURRENT );
+       if ( rc != DB_BUFFER_SMALL ) {
+               rc = LDAP_OTHER;
+               goto done;
+       }
+
+       /* Allocate a block and retrieve the data */
+       eh.bv.bv_len = eh.nvals * sizeof( struct berval ) + data.size;
+       eh.bv.bv_val = ch_realloc( eh.bv.bv_val, eh.bv.bv_len );
+       eh.data = eh.bv.bv_val + eh.nvals * sizeof( struct berval );
+       data.data = eh.data;
+       data.ulen = data.size;
+
+       /* Skip past already parsed nattr/nvals */
+       eh.data += eoff;
+
+       rc = cursor->c_get( cursor, &key, &data, DB_CURRENT );
+       if ( rc ) {
+               rc = LDAP_OTHER;
+               goto done;
+       }
+
+#ifndef MDB_HIER
+       /* TODO: handle MDB_HIER accordingly */
+       if ( tool_base != NULL ) {
+               struct berval ndn;
+               entry_decode_dn( &eh, NULL, &ndn );
+
+               if ( !dnIsSuffixScope( &ndn, tool_base, tool_scope ) ) {
+                       return LDAP_NO_SUCH_OBJECT;
+               }
+       }
+#endif
+
+#ifdef SLAP_ZONE_ALLOC
+       /* FIXME: will add ctx later */
+       rc = entry_decode( &eh, &e, NULL );
+#else
+       rc = entry_decode( &eh, &e );
+#endif
+
+       if( rc == LDAP_SUCCESS ) {
+               e->e_id = id;
+#ifdef MDB_HIER
+               if ( slapMode & SLAP_TOOL_READONLY ) {
+                       struct mdb_info *mdb = (struct mdb_info *) be->be_private;
+                       EntryInfo *ei = NULL;
+                       Operation op = {0};
+                       Opheader ohdr = {0};
+
+                       op.o_hdr = &ohdr;
+                       op.o_bd = be;
+                       op.o_tmpmemctx = NULL;
+                       op.o_tmpmfuncs = &ch_mfuncs;
+
+                       rc = mdb_cache_find_parent( &op, mdb->bi_cache.c_txn, id, &ei );
+                       if ( rc == LDAP_SUCCESS ) {
+                               mdb_cache_entryinfo_unlock( ei );
+                               e->e_private = ei;
+                               ei->bei_e = e;
+                               mdb_fix_dn( e, 0 );
+                               ei->bei_e = NULL;
+                               e->e_private = NULL;
+                       }
+               }
+#endif
+       }
+done:
+       if ( e != NULL ) {
+               *ep = e;
+       }
+
+       return rc;
+}
+
+Entry*
+mdb_tool_entry_get( BackendDB *be, ID id )
+{
+       Entry *e = NULL;
+
+       (void)mdb_tool_entry_get_int( be, id, &e );
+       return e;
+}
+
+static int mdb_tool_next_id(
+       Operation *op,
+       DB_TXN *tid,
+       Entry *e,
+       struct berval *text,
+       int hole )
+{
+       struct berval dn = e->e_name;
+       struct berval ndn = e->e_nname;
+       struct berval pdn, npdn;
+       EntryInfo *ei = NULL, eidummy;
+       int rc;
+
+       if (ndn.bv_len == 0) {
+               e->e_id = 0;
+               return 0;
+       }
+
+       rc = mdb_cache_find_ndn( op, tid, &ndn, &ei );
+       if ( ei ) mdb_cache_entryinfo_unlock( ei );
+       if ( rc == DB_NOTFOUND ) {
+               if ( !be_issuffix( op->o_bd, &ndn ) ) {
+                       ID eid = e->e_id;
+                       dnParent( &dn, &pdn );
+                       dnParent( &ndn, &npdn );
+                       e->e_name = pdn;
+                       e->e_nname = npdn;
+                       rc = mdb_tool_next_id( op, tid, e, text, 1 );
+                       e->e_name = dn;
+                       e->e_nname = ndn;
+                       if ( rc ) {
+                               return rc;
+                       }
+                       /* If parent didn't exist, it was created just now
+                        * and its ID is now in e->e_id. Make sure the current
+                        * entry gets added under the new parent ID.
+                        */
+                       if ( eid != e->e_id ) {
+                               eidummy.bei_id = e->e_id;
+                               ei = &eidummy;
+                       }
+               }
+               rc = mdb_next_id( op->o_bd, &e->e_id );
+               if ( rc ) {
+                       snprintf( text->bv_val, text->bv_len,
+                               "next_id failed: %s (%d)",
+                               db_strerror(rc), rc );
+               Debug( LDAP_DEBUG_ANY,
+                       "=> mdb_tool_next_id: %s\n", text->bv_val, 0, 0 );
+                       return rc;
+               }
+               rc = mdb_dn2id_add( op, tid, ei, e );
+               if ( rc ) {
+                       snprintf( text->bv_val, text->bv_len, 
+                               "dn2id_add failed: %s (%d)",
+                               db_strerror(rc), rc );
+               Debug( LDAP_DEBUG_ANY,
+                       "=> mdb_tool_next_id: %s\n", text->bv_val, 0, 0 );
+               } else if ( hole ) {
+                       if ( nholes == nhmax - 1 ) {
+                               if ( holes == hbuf ) {
+                                       holes = ch_malloc( nhmax * sizeof(dn_id) * 2 );
+                                       AC_MEMCPY( holes, hbuf, sizeof(hbuf) );
+                               } else {
+                                       holes = ch_realloc( holes, nhmax * sizeof(dn_id) * 2 );
+                               }
+                               nhmax *= 2;
+                       }
+                       ber_dupbv( &holes[nholes].dn, &ndn );
+                       holes[nholes++].id = e->e_id;
+               }
+       } else if ( !hole ) {
+               unsigned i, j;
+
+               e->e_id = ei->bei_id;
+
+               for ( i=0; i<nholes; i++) {
+                       if ( holes[i].id == e->e_id ) {
+                               free(holes[i].dn.bv_val);
+                               for (j=i;j<nholes;j++) holes[j] = holes[j+1];
+                               holes[j].id = 0;
+                               nholes--;
+                               break;
+                       } else if ( holes[i].id > e->e_id ) {
+                               break;
+                       }
+               }
+       }
+       return rc;
+}
+
+static int
+mdb_tool_index_add(
+       Operation *op,
+       DB_TXN *txn,
+       Entry *e )
+{
+       struct mdb_info *mdb = (struct mdb_info *) op->o_bd->be_private;
+
+       if ( !mdb->bi_nattrs )
+               return 0;
+
+       if ( slapMode & SLAP_TOOL_QUICK ) {
+               IndexRec *ir;
+               int i, rc;
+               Attribute *a;
+               
+               ir = mdb_tool_index_rec;
+               memset(ir, 0, mdb->bi_nattrs * sizeof( IndexRec ));
+
+               for ( a = e->e_attrs; a != NULL; a = a->a_next ) {
+                       rc = mdb_index_recset( mdb, a, a->a_desc->ad_type, 
+                               &a->a_desc->ad_tags, ir );
+                       if ( rc )
+                               return rc;
+               }
+               mdb_tool_ix_id = e->e_id;
+               mdb_tool_ix_op = op;
+               ldap_pvt_thread_mutex_lock( &mdb_tool_index_mutex );
+               /* Wait for all threads to be ready */
+               while ( mdb_tool_index_tcount ) {
+                       ldap_pvt_thread_cond_wait( &mdb_tool_index_cond_main, 
+                               &mdb_tool_index_mutex );
+               }
+               for ( i=1; i<slap_tool_thread_max; i++ )
+                       mdb_tool_index_threads[i] = LDAP_BUSY;
+               mdb_tool_index_tcount = slap_tool_thread_max - 1;
+               ldap_pvt_thread_cond_broadcast( &mdb_tool_index_cond_work );
+               ldap_pvt_thread_mutex_unlock( &mdb_tool_index_mutex );
+               rc = mdb_index_recrun( op, mdb, ir, e->e_id, 0 );
+               if ( rc )
+                       return rc;
+               ldap_pvt_thread_mutex_lock( &mdb_tool_index_mutex );
+               for ( i=1; i<slap_tool_thread_max; i++ ) {
+                       if ( mdb_tool_index_threads[i] == LDAP_BUSY ) {
+                               ldap_pvt_thread_cond_wait( &mdb_tool_index_cond_main, 
+                                       &mdb_tool_index_mutex );
+                               i--;
+                               continue;
+                       }
+                       if ( mdb_tool_index_threads[i] ) {
+                               rc = mdb_tool_index_threads[i];
+                               break;
+                       }
+               }
+               ldap_pvt_thread_mutex_unlock( &mdb_tool_index_mutex );
+               return rc;
+       } else {
+               return mdb_index_entry_add( op, txn, e );
+       }
+}
+
+ID mdb_tool_entry_put(
+       BackendDB *be,
+       Entry *e,
+       struct berval *text )
+{
+       int rc;
+       struct mdb_info *mdb;
+       DB_TXN *tid = NULL;
+       Operation op = {0};
+       Opheader ohdr = {0};
+
+       assert( be != NULL );
+       assert( slapMode & SLAP_TOOL_MODE );
+
+       assert( text != NULL );
+       assert( text->bv_val != NULL );
+       assert( text->bv_val[0] == '\0' );      /* overconservative? */
+
+       Debug( LDAP_DEBUG_TRACE, "=> " LDAP_XSTRING(mdb_tool_entry_put)
+               "( %ld, \"%s\" )\n", (long) e->e_id, e->e_dn, 0 );
+
+       mdb = (struct mdb_info *) be->be_private;
+
+       if (! (slapMode & SLAP_TOOL_QUICK)) {
+       rc = TXN_BEGIN( mdb->bi_dbenv, NULL, &tid, 
+               mdb->bi_db_opflags );
+       if( rc != 0 ) {
+               snprintf( text->bv_val, text->bv_len,
+                       "txn_begin failed: %s (%d)",
+                       db_strerror(rc), rc );
+               Debug( LDAP_DEBUG_ANY,
+                       "=> " LDAP_XSTRING(mdb_tool_entry_put) ": %s\n",
+                        text->bv_val, 0, 0 );
+               return NOID;
+       }
+       }
+
+       op.o_hdr = &ohdr;
+       op.o_bd = be;
+       op.o_tmpmemctx = NULL;
+       op.o_tmpmfuncs = &ch_mfuncs;
+
+       /* add dn2id indices */
+       rc = mdb_tool_next_id( &op, tid, e, text, 0 );
+       if( rc != 0 ) {
+               goto done;
+       }
+
+#ifdef USE_TRICKLE
+       if (( slapMode & SLAP_TOOL_QUICK ) && (( e->e_id & 0xfff ) == 0xfff )) {
+               ldap_pvt_thread_cond_signal( &mdb_tool_trickle_cond );
+       }
+#endif
+
+       if ( !mdb->bi_linear_index )
+               rc = mdb_tool_index_add( &op, tid, e );
+       if( rc != 0 ) {
+               snprintf( text->bv_val, text->bv_len,
+                               "index_entry_add failed: %s (%d)",
+                               rc == LDAP_OTHER ? "Internal error" :
+                               db_strerror(rc), rc );
+               Debug( LDAP_DEBUG_ANY,
+                       "=> " LDAP_XSTRING(mdb_tool_entry_put) ": %s\n",
+                       text->bv_val, 0, 0 );
+               goto done;
+       }
+
+       /* id2entry index */
+       rc = mdb_id2entry_add( be, tid, e );
+       if( rc != 0 ) {
+               snprintf( text->bv_val, text->bv_len,
+                               "id2entry_add failed: %s (%d)",
+                               db_strerror(rc), rc );
+               Debug( LDAP_DEBUG_ANY,
+                       "=> " LDAP_XSTRING(mdb_tool_entry_put) ": %s\n",
+                       text->bv_val, 0, 0 );
+               goto done;
+       }
+
+done:
+       if( rc == 0 ) {
+               if ( !( slapMode & SLAP_TOOL_QUICK )) {
+               rc = TXN_COMMIT( tid, 0 );
+               if( rc != 0 ) {
+                       snprintf( text->bv_val, text->bv_len,
+                                       "txn_commit failed: %s (%d)",
+                                       db_strerror(rc), rc );
+                       Debug( LDAP_DEBUG_ANY,
+                               "=> " LDAP_XSTRING(mdb_tool_entry_put) ": %s\n",
+                               text->bv_val, 0, 0 );
+                       e->e_id = NOID;
+               }
+               }
+
+       } else {
+               if ( !( slapMode & SLAP_TOOL_QUICK )) {
+               TXN_ABORT( tid );
+               snprintf( text->bv_val, text->bv_len,
+                       "txn_aborted! %s (%d)",
+                       rc == LDAP_OTHER ? "Internal error" :
+                       db_strerror(rc), rc );
+               Debug( LDAP_DEBUG_ANY,
+                       "=> " LDAP_XSTRING(mdb_tool_entry_put) ": %s\n",
+                       text->bv_val, 0, 0 );
+               }
+               e->e_id = NOID;
+       }
+
+       return e->e_id;
+}
+
+int mdb_tool_entry_reindex(
+       BackendDB *be,
+       ID id,
+       AttributeDescription **adv )
+{
+       struct mdb_info *bi = (struct mdb_info *) be->be_private;
+       int rc;
+       Entry *e;
+       DB_TXN *tid = NULL;
+       Operation op = {0};
+       Opheader ohdr = {0};
+
+       Debug( LDAP_DEBUG_ARGS,
+               "=> " LDAP_XSTRING(mdb_tool_entry_reindex) "( %ld )\n",
+               (long) id, 0, 0 );
+       assert( tool_base == NULL );
+       assert( tool_filter == NULL );
+
+       /* No indexes configured, nothing to do. Could return an
+        * error here to shortcut things.
+        */
+       if (!bi->bi_attrs) {
+               return 0;
+       }
+
+       /* Check for explicit list of attrs to index */
+       if ( adv ) {
+               int i, j, n;
+
+               if ( bi->bi_attrs[0]->ai_desc != adv[0] ) {
+                       /* count */
+                       for ( n = 0; adv[n]; n++ ) ;
+
+                       /* insertion sort */
+                       for ( i = 0; i < n; i++ ) {
+                               AttributeDescription *ad = adv[i];
+                               for ( j = i-1; j>=0; j--) {
+                                       if ( SLAP_PTRCMP( adv[j], ad ) <= 0 ) break;
+                                       adv[j+1] = adv[j];
+                               }
+                               adv[j+1] = ad;
+                       }
+               }
+
+               for ( i = 0; adv[i]; i++ ) {
+                       if ( bi->bi_attrs[i]->ai_desc != adv[i] ) {
+                               for ( j = i+1; j < bi->bi_nattrs; j++ ) {
+                                       if ( bi->bi_attrs[j]->ai_desc == adv[i] ) {
+                                               AttrInfo *ai = bi->bi_attrs[i];
+                                               bi->bi_attrs[i] = bi->bi_attrs[j];
+                                               bi->bi_attrs[j] = ai;
+                                               break;
+                                       }
+                               }
+                               if ( j == bi->bi_nattrs ) {
+                                       Debug( LDAP_DEBUG_ANY,
+                                               LDAP_XSTRING(mdb_tool_entry_reindex)
+                                               ": no index configured for %s\n",
+                                               adv[i]->ad_cname.bv_val, 0, 0 );
+                                       return -1;
+                               }
+                       }
+               }
+               bi->bi_nattrs = i;
+       }
+
+       /* Get the first attribute to index */
+       if (bi->bi_linear_index && !index_nattrs) {
+               index_nattrs = bi->bi_nattrs - 1;
+               bi->bi_nattrs = 1;
+       }
+
+       e = mdb_tool_entry_get( be, id );
+
+       if( e == NULL ) {
+               Debug( LDAP_DEBUG_ANY,
+                       LDAP_XSTRING(mdb_tool_entry_reindex)
+                       ": could not locate id=%ld\n",
+                       (long) id, 0, 0 );
+               return -1;
+       }
+
+       if (! (slapMode & SLAP_TOOL_QUICK)) {
+       rc = TXN_BEGIN( bi->bi_dbenv, NULL, &tid, bi->bi_db_opflags );
+       if( rc != 0 ) {
+               Debug( LDAP_DEBUG_ANY,
+                       "=> " LDAP_XSTRING(mdb_tool_entry_reindex) ": "
+                       "txn_begin failed: %s (%d)\n",
+                       db_strerror(rc), rc, 0 );
+               goto done;
+       }
+       }
+       
+       /*
+        * just (re)add them for now
+        * assume that some other routine (not yet implemented)
+        * will zap index databases
+        *
+        */
+
+       Debug( LDAP_DEBUG_TRACE,
+               "=> " LDAP_XSTRING(mdb_tool_entry_reindex) "( %ld, \"%s\" )\n",
+               (long) id, e->e_dn, 0 );
+
+       op.o_hdr = &ohdr;
+       op.o_bd = be;
+       op.o_tmpmemctx = NULL;
+       op.o_tmpmfuncs = &ch_mfuncs;
+
+       rc = mdb_tool_index_add( &op, tid, e );
+
+done:
+       if( rc == 0 ) {
+               if (! (slapMode & SLAP_TOOL_QUICK)) {
+               rc = TXN_COMMIT( tid, 0 );
+               if( rc != 0 ) {
+                       Debug( LDAP_DEBUG_ANY,
+                               "=> " LDAP_XSTRING(mdb_tool_entry_reindex)
+                               ": txn_commit failed: %s (%d)\n",
+                               db_strerror(rc), rc, 0 );
+                       e->e_id = NOID;
+               }
+               }
+
+       } else {
+               if (! (slapMode & SLAP_TOOL_QUICK)) {
+               TXN_ABORT( tid );
+               Debug( LDAP_DEBUG_ANY,
+                       "=> " LDAP_XSTRING(mdb_tool_entry_reindex)
+                       ": txn_aborted! %s (%d)\n",
+                       db_strerror(rc), rc, 0 );
+               }
+               e->e_id = NOID;
+       }
+       mdb_entry_release( &op, e, 0 );
+
+       return rc;
+}
+
+ID mdb_tool_entry_modify(
+       BackendDB *be,
+       Entry *e,
+       struct berval *text )
+{
+       int rc;
+       struct mdb_info *mdb;
+       DB_TXN *tid = NULL;
+       Operation op = {0};
+       Opheader ohdr = {0};
+
+       assert( be != NULL );
+       assert( slapMode & SLAP_TOOL_MODE );
+
+       assert( text != NULL );
+       assert( text->bv_val != NULL );
+       assert( text->bv_val[0] == '\0' );      /* overconservative? */
+
+       assert ( e->e_id != NOID );
+
+       Debug( LDAP_DEBUG_TRACE,
+               "=> " LDAP_XSTRING(mdb_tool_entry_modify) "( %ld, \"%s\" )\n",
+               (long) e->e_id, e->e_dn, 0 );
+
+       mdb = (struct mdb_info *) be->be_private;
+
+       if (! (slapMode & SLAP_TOOL_QUICK)) {
+               if( cursor ) {
+                       cursor->c_close( cursor );
+                       cursor = NULL;
+               }
+               rc = TXN_BEGIN( mdb->bi_dbenv, NULL, &tid, 
+                       mdb->bi_db_opflags );
+               if( rc != 0 ) {
+                       snprintf( text->bv_val, text->bv_len,
+                               "txn_begin failed: %s (%d)",
+                               db_strerror(rc), rc );
+                       Debug( LDAP_DEBUG_ANY,
+                               "=> " LDAP_XSTRING(mdb_tool_entry_modify) ": %s\n",
+                                text->bv_val, 0, 0 );
+                       return NOID;
+               }
+       }
+
+       op.o_hdr = &ohdr;
+       op.o_bd = be;
+       op.o_tmpmemctx = NULL;
+       op.o_tmpmfuncs = &ch_mfuncs;
+
+       /* id2entry index */
+       rc = mdb_id2entry_update( be, tid, e );
+       if( rc != 0 ) {
+               snprintf( text->bv_val, text->bv_len,
+                               "id2entry_add failed: %s (%d)",
+                               db_strerror(rc), rc );
+               Debug( LDAP_DEBUG_ANY,
+                       "=> " LDAP_XSTRING(mdb_tool_entry_modify) ": %s\n",
+                       text->bv_val, 0, 0 );
+               goto done;
+       }
+
+done:
+       if( rc == 0 ) {
+               if (! (slapMode & SLAP_TOOL_QUICK)) {
+               rc = TXN_COMMIT( tid, 0 );
+               if( rc != 0 ) {
+                       snprintf( text->bv_val, text->bv_len,
+                                       "txn_commit failed: %s (%d)",
+                                       db_strerror(rc), rc );
+                       Debug( LDAP_DEBUG_ANY,
+                               "=> " LDAP_XSTRING(mdb_tool_entry_modify) ": "
+                               "%s\n", text->bv_val, 0, 0 );
+                       e->e_id = NOID;
+               }
+               }
+
+       } else {
+               if (! (slapMode & SLAP_TOOL_QUICK)) {
+               TXN_ABORT( tid );
+               snprintf( text->bv_val, text->bv_len,
+                       "txn_aborted! %s (%d)",
+                       db_strerror(rc), rc );
+               Debug( LDAP_DEBUG_ANY,
+                       "=> " LDAP_XSTRING(mdb_tool_entry_modify) ": %s\n",
+                       text->bv_val, 0, 0 );
+               }
+               e->e_id = NOID;
+       }
+
+       return e->e_id;
+}
+
+#ifdef USE_TRICKLE
+static void *
+mdb_tool_trickle_task( void *ctx, void *ptr )
+{
+       DB_ENV *env = ptr;
+       int wrote;
+
+       ldap_pvt_thread_mutex_lock( &mdb_tool_trickle_mutex );
+       mdb_tool_trickle_active = 1;
+       ldap_pvt_thread_cond_signal( &mdb_tool_trickle_cond_end );
+       while ( 1 ) {
+               ldap_pvt_thread_cond_wait( &mdb_tool_trickle_cond,
+                       &mdb_tool_trickle_mutex );
+               if ( slapd_shutdown )
+                       break;
+               env->memp_trickle( env, 30, &wrote );
+       }
+       mdb_tool_trickle_active = 0;
+       ldap_pvt_thread_cond_signal( &mdb_tool_trickle_cond_end );
+       ldap_pvt_thread_mutex_unlock( &mdb_tool_trickle_mutex );
+
+       return NULL;
+}
+#endif
+
+static void *
+mdb_tool_index_task( void *ctx, void *ptr )
+{
+       int base = *(int *)ptr;
+
+       free( ptr );
+       while ( 1 ) {
+               ldap_pvt_thread_mutex_lock( &mdb_tool_index_mutex );
+               mdb_tool_index_tcount--;
+               if ( !mdb_tool_index_tcount )
+                       ldap_pvt_thread_cond_signal( &mdb_tool_index_cond_main );
+               ldap_pvt_thread_cond_wait( &mdb_tool_index_cond_work,
+                       &mdb_tool_index_mutex );
+               if ( slapd_shutdown ) {
+                       mdb_tool_index_tcount--;
+                       if ( !mdb_tool_index_tcount )
+                               ldap_pvt_thread_cond_signal( &mdb_tool_index_cond_main );
+                       ldap_pvt_thread_mutex_unlock( &mdb_tool_index_mutex );
+                       break;
+               }
+               ldap_pvt_thread_mutex_unlock( &mdb_tool_index_mutex );
+
+               mdb_tool_index_threads[base] = mdb_index_recrun( mdb_tool_ix_op,
+                       mdb_tool_info, mdb_tool_index_rec, mdb_tool_ix_id, base );
+       }
+
+       return NULL;
+}