]>
git.ipfire.org Git - thirdparty/glibc.git/blob - db2/hash/hash.c
2 * See the file LICENSE for redistribution information.
4 * Copyright (c) 1996, 1997
5 * Sleepycat Software. All rights reserved.
8 * Copyright (c) 1990, 1993, 1994
9 * Margo Seltzer. All rights reserved.
12 * Copyright (c) 1990, 1993, 1994
13 * The Regents of the University of California. All rights reserved.
15 * This code is derived from software contributed to Berkeley by
18 * Redistribution and use in source and binary forms, with or without
19 * modification, are permitted provided that the following conditions
21 * 1. Redistributions of source code must retain the above copyright
22 * notice, this list of conditions and the following disclaimer.
23 * 2. Redistributions in binary form must reproduce the above copyright
24 * notice, this list of conditions and the following disclaimer in the
25 * documentation and/or other materials provided with the distribution.
26 * 3. All advertising materials mentioning features or use of this software
27 * must display the following acknowledgement:
28 * This product includes software developed by the University of
29 * California, Berkeley and its contributors.
30 * 4. Neither the name of the University nor the names of its contributors
31 * may be used to endorse or promote products derived from this software
32 * without specific prior written permission.
34 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
35 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
36 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
37 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
38 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
39 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
40 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
41 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
42 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
43 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
50 static const char sccsid
[] = "@(#)hash.c 10.36 (Sleepycat) 1/8/98";
53 #ifndef NO_SYSTEM_INCLUDES
54 #include <sys/types.h>
73 static int __ham_c_close
__P((DBC
*));
74 static int __ham_c_del
__P((DBC
*, int));
75 static int __ham_c_get
__P((DBC
*, DBT
*, DBT
*, int));
76 static int __ham_c_put
__P((DBC
*, DBT
*, DBT
*, int));
77 static int __ham_c_init
__P((DB
*, DB_TXN
*, DBC
**));
78 static int __ham_cursor
__P((DB
*, DB_TXN
*, DBC
**));
79 static int __ham_delete
__P((DB
*, DB_TXN
*, DBT
*, int));
80 static int __ham_dup_return
__P((HTAB
*, HASH_CURSOR
*, DBT
*, int));
81 static int __ham_get
__P((DB
*, DB_TXN
*, DBT
*, DBT
*, int));
82 static void __ham_init_htab
__P((HTAB
*, u_int
));
83 static int __ham_lookup
__P((HTAB
*,
84 HASH_CURSOR
*, const DBT
*, u_int32_t
, db_lockmode_t
));
85 static int __ham_overwrite
__P((HTAB
*, HASH_CURSOR
*, DBT
*));
86 static int __ham_put
__P((DB
*, DB_TXN
*, DBT
*, DBT
*, int));
87 static int __ham_sync
__P((DB
*, int));
89 /************************** INTERFACE ROUTINES ***************************/
95 * PUBLIC: int __ham_open __P((DB *, DB_INFO *));
98 __ham_open(dbp
, dbinfo
)
105 int file_existed
, ret
;
109 if ((hashp
= (HTAB
*)__db_calloc(1, sizeof(HTAB
))) == NULL
)
113 /* Set the hash function if specified by the user. */
114 if (dbinfo
!= NULL
&& dbinfo
->h_hash
!= NULL
)
115 hashp
->hash
= dbinfo
->h_hash
;
118 * Initialize the remaining fields of the dbp. The type, close and
119 * fd functions are all set in db_open.
121 dbp
->internal
= hashp
;
122 dbp
->cursor
= __ham_cursor
;
123 dbp
->del
= __ham_delete
;
124 dbp
->get
= __ham_get
;
125 dbp
->put
= __ham_put
;
126 dbp
->sync
= __ham_sync
;
128 /* If locking is turned on, lock the meta data page. */
129 if (F_ISSET(dbp
, DB_AM_LOCKING
)) {
130 dbp
->lock
.pgno
= BUCKET_INVALID
;
131 if ((ret
= lock_get(dbenv
->lk_info
, dbp
->locker
,
132 0, &dbp
->lock_dbt
, DB_LOCK_READ
, &hashp
->hlock
)) != 0) {
140 * Now, we can try to read the meta-data page and figure out
141 * if we set up locking and get the meta-data page properly.
142 * If this is a new file, initialize it, and put it back dirty.
144 if ((ret
= __ham_get_page(hashp
->dbp
, 0, (PAGE
**)&hashp
->hdr
)) != 0)
147 /* Initialize the hashp structure */
148 if (hashp
->hdr
->magic
== DB_HASHMAGIC
) {
150 /* File exists, verify the data in the header. */
151 if (hashp
->hash
== NULL
)
153 hashp
->hdr
->version
< 5 ? __ham_func4
: __ham_func5
;
154 if (hashp
->hash(CHARKEY
, sizeof(CHARKEY
)) !=
155 hashp
->hdr
->h_charkey
) {
156 __db_err(hashp
->dbp
->dbenv
,
157 "hash: incompatible hash function");
161 if (F_ISSET(hashp
->hdr
, DB_HASH_DUP
))
162 F_SET(dbp
, DB_AM_DUP
);
165 * File does not exist, we must initialize the header. If
166 * locking is enabled that means getting a write lock first.
169 if (F_ISSET(dbp
, DB_AM_LOCKING
) &&
170 ((ret
= lock_put(dbenv
->lk_info
, hashp
->hlock
)) != 0 ||
171 (ret
= lock_get(dbenv
->lk_info
, dbp
->locker
, 0,
172 &dbp
->lock_dbt
, DB_LOCK_WRITE
, &hashp
->hlock
)) != 0)) {
178 hashp
->hdr
->ffactor
=
179 dbinfo
!= NULL
&& dbinfo
->h_ffactor
? dbinfo
->h_ffactor
: 0;
180 __ham_init_htab(hashp
, dbinfo
!= NULL
? dbinfo
->h_nelem
: 0);
181 if (F_ISSET(dbp
, DB_AM_DUP
))
182 F_SET(hashp
->hdr
, DB_HASH_DUP
);
183 if ((ret
= __ham_dirty_page(hashp
, (PAGE
*)hashp
->hdr
)) != 0)
187 /* Initialize the default cursor. */
188 __ham_c_init(dbp
, NULL
, &curs
);
189 TAILQ_INSERT_TAIL(&dbp
->curs_queue
, curs
, links
);
191 /* Allocate memory for our split buffer. */
192 if ((hashp
->split_buf
= (PAGE
*)__db_malloc(dbp
->pgsize
)) == NULL
) {
197 #ifdef NO_STATISTICS_FOR_DB_ERR
199 "%s%lx\n%s%ld\n%s%ld\n%s%ld\n%s%ld\n%s0x%lx\n%s0x%lx\n%s%ld\n%s%ld\n%s0x%lx",
200 "TABLE POINTER ", (long)hashp
,
201 "BUCKET SIZE ", (long)hashp
->hdr
->pagesize
,
202 "FILL FACTOR ", (long)hashp
->hdr
->ffactor
,
203 "MAX BUCKET ", (long)hashp
->hdr
->max_bucket
,
204 "OVFL POINT ", (long)hashp
->hdr
->ovfl_point
,
205 "LAST FREED ", (long)hashp
->hdr
->last_freed
,
206 "HIGH MASK ", (long)hashp
->hdr
->high_mask
,
207 "LOW MASK ", (long)hashp
->hdr
->low_mask
,
208 "NELEM ", (long)hashp
->hdr
->nelem
,
209 "FLAGS ", (long)hashp
->hdr
->flags
);
212 /* Release the meta data page */
213 (void)__ham_put_page(hashp
->dbp
, (PAGE
*)hashp
->hdr
, 0);
214 if (F_ISSET(dbp
, DB_AM_LOCKING
) &&
215 (ret
= lock_put(dbenv
->lk_info
, hashp
->hlock
)) != 0) {
223 /* Sync the file so that we know that the meta data goes to disk. */
224 if (!file_existed
&& (ret
= dbp
->sync(dbp
, 0)) != 0)
228 out
: (void)__ham_close(dbp
);
233 * PUBLIC: int __ham_close __P((DB *));
242 DEBUG_LWRITE(dbp
, NULL
, "ham_close", NULL
, NULL
, 0);
243 hashp
= (HTAB
*)dbp
->internal
;
246 /* Free the split page. */
247 if (hashp
->split_buf
)
248 FREE(hashp
->split_buf
, dbp
->pgsize
);
250 if (hashp
->hdr
&& (t_ret
= __ham_put_page(hashp
->dbp
,
251 (PAGE
*)hashp
->hdr
, 0)) != 0 && ret
== 0)
253 if (hashp
->hlock
&& (t_ret
= lock_put(hashp
->dbp
->dbenv
->lk_info
,
254 hashp
->hlock
)) != 0 && ret
== 0)
257 FREE(hashp
, sizeof(HTAB
));
258 dbp
->internal
= NULL
;
262 /************************** LOCAL CREATION ROUTINES **********************/
264 * Returns 0 on No Error
267 __ham_init_htab(hashp
, nelem
)
271 int32_t l2
, nbuckets
;
273 hashp
->hdr
->nelem
= 0;
274 hashp
->hdr
->pagesize
= hashp
->dbp
->pgsize
;
275 ZERO_LSN(hashp
->hdr
->lsn
);
276 hashp
->hdr
->magic
= DB_HASHMAGIC
;
277 hashp
->hdr
->version
= DB_HASHVERSION
;
278 if (hashp
->hash
== NULL
)
280 hashp
->hdr
->version
< 5 ? __ham_func4
: __ham_func5
;
281 hashp
->hdr
->h_charkey
= hashp
->hash(CHARKEY
, sizeof(CHARKEY
));
282 if (nelem
!= 0 && hashp
->hdr
->ffactor
!= 0) {
283 nelem
= (nelem
- 1) / hashp
->hdr
->ffactor
+ 1;
284 l2
= __db_log2(nelem
> 2 ? nelem
: 2);
290 hashp
->hdr
->spares
[l2
] = 0;
291 hashp
->hdr
->spares
[l2
+ 1] = 0;
292 hashp
->hdr
->ovfl_point
= l2
;
293 hashp
->hdr
->last_freed
= PGNO_INVALID
;
295 hashp
->hdr
->max_bucket
= hashp
->hdr
->high_mask
= nbuckets
- 1;
296 hashp
->hdr
->low_mask
= (nbuckets
>> 1) - 1;
297 memcpy(hashp
->hdr
->uid
, hashp
->dbp
->lock
.fileid
, DB_FILE_ID_LEN
);
300 /********************** DESTROY/CLOSE ROUTINES ************************/
304 * Write modified pages to disk
311 __ham_sync(dbp
, flags
)
317 DEBUG_LWRITE(dbp
, NULL
, "ham_sync", NULL
, NULL
, flags
);
318 if ((ret
= __db_syncchk(dbp
, flags
)) != 0)
320 if (F_ISSET(dbp
, DB_AM_RDONLY
))
323 if ((ret
= memp_fsync(dbp
->mpf
)) == DB_INCOMPLETE
)
329 /*******************************SEARCH ROUTINES *****************************/
331 * All the access routines return
335 * 1 to indicate an external ERROR (i.e. key not found, etc)
336 * -1 to indicate an internal ERROR (i.e. out of memory, etc)
340 __ham_get(dbp
, txn
, key
, data
, flags
)
353 DEBUG_LREAD(dbp
, txn
, "ham_get", key
, NULL
, flags
);
354 if ((ret
= __db_getchk(dbp
, key
, data
, flags
)) != 0)
358 if (F_ISSET(dbp
, DB_AM_THREAD
) &&
359 (ret
= __db_gethandle(dbp
, __ham_hdup
, &ldbp
)) != 0)
362 hashp
= (HTAB
*)ldbp
->internal
;
363 SET_LOCKER(ldbp
, txn
);
364 GET_META(ldbp
, hashp
);
365 cp
= TAILQ_FIRST(&ldbp
->curs_queue
);
367 hashp
->hash_accesses
++;
368 hcp
= (HASH_CURSOR
*)TAILQ_FIRST(&ldbp
->curs_queue
)->internal
;
369 if ((ret
= __ham_lookup(hashp
, hcp
, key
, 0, DB_LOCK_READ
)) == 0)
370 if (F_ISSET(hcp
, H_OK
))
371 ret
= __ham_dup_return(hashp
, hcp
, data
, DB_FIRST
);
372 else /* Key was not found */
375 if ((t_ret
= __ham_item_done(hashp
, hcp
, 0)) != 0 && ret
== 0)
377 RELEASE_META(ldbp
, hashp
);
378 if (F_ISSET(dbp
, DB_AM_THREAD
))
379 __db_puthandle(ldbp
);
384 __ham_put(dbp
, txn
, key
, data
, flags
)
398 DEBUG_LWRITE(dbp
, txn
, "ham_put", key
, data
, flags
);
399 if ((ret
= __db_putchk(dbp
, key
, data
,
400 flags
, F_ISSET(dbp
, DB_AM_RDONLY
), F_ISSET(dbp
, DB_AM_DUP
))) != 0)
404 if (F_ISSET(dbp
, DB_AM_THREAD
) &&
405 (ret
= __db_gethandle(dbp
, __ham_hdup
, &ldbp
)) != 0)
408 hashp
= (HTAB
*)ldbp
->internal
;
409 SET_LOCKER(ldbp
, txn
);
410 GET_META(ldbp
, hashp
);
411 hcp
= TAILQ_FIRST(&ldbp
->curs_queue
)->internal
;
413 nbytes
= (ISBIG(hashp
, key
->size
) ? HOFFPAGE_PSIZE
:
414 HKEYDATA_PSIZE(key
->size
)) +
415 (ISBIG(hashp
, data
->size
) ? HOFFPAGE_PSIZE
:
416 HKEYDATA_PSIZE(data
->size
));
418 hashp
->hash_accesses
++;
419 ret
= __ham_lookup(hashp
, hcp
, key
, nbytes
, DB_LOCK_WRITE
);
421 if (ret
== DB_NOTFOUND
) {
423 if (hcp
->seek_found_page
!= PGNO_INVALID
&&
424 hcp
->seek_found_page
!= hcp
->pgno
) {
425 if ((ret
= __ham_item_done(hashp
, hcp
, 0)) != 0)
427 hcp
->pgno
= hcp
->seek_found_page
;
428 hcp
->bndx
= NDX_INVALID
;
431 if (F_ISSET(data
, DB_DBT_PARTIAL
) && data
->doff
!= 0) {
433 * Doing a partial put, but the key does not exist
434 * and we are not beginning the write at 0. We
435 * must create a data item padded up to doff and
436 * then write the new bytes represented by val.
438 ret
= __ham_init_dbt(&tmp_val
, data
->size
+ data
->doff
,
439 &hcp
->big_data
, &hcp
->big_datalen
);
441 memset(tmp_val
.data
, 0, data
->doff
);
442 memcpy((u_int8_t
*)tmp_val
.data
+ data
->doff
,
443 data
->data
, data
->size
);
450 ret
= __ham_add_el(hashp
, hcp
, key
, myval
, H_KEYDATA
);
451 } else if (ret
== 0 && F_ISSET(hcp
, H_OK
)) {
452 if (flags
== DB_NOOVERWRITE
)
454 else if (F_ISSET(ldbp
, DB_AM_DUP
))
455 ret
= __ham_add_dup(hashp
, hcp
, data
, DB_KEYLAST
);
457 ret
= __ham_overwrite(hashp
, hcp
, data
);
460 /* Free up all the cursor pages. */
461 if ((t_ret
= __ham_item_done(hashp
, hcp
, ret
== 0)) != 0 && ret
== 0)
463 /* Now check if we have to grow. */
464 out
: if (ret
== 0 && F_ISSET(hcp
, H_EXPAND
)) {
465 ret
= __ham_expand_table(hashp
);
466 F_CLR(hcp
, H_EXPAND
);
469 if ((t_ret
= __ham_item_done(hashp
, hcp
, ret
== 0)) != 0 && ret
== 0)
471 RELEASE_META(ldbp
, hashp
);
472 if (F_ISSET(dbp
, DB_AM_THREAD
))
473 __db_puthandle(ldbp
);
478 __ham_cursor(dbp
, txnid
, dbcp
)
485 DEBUG_LWRITE(dbp
, txnid
, "ham_cursor", NULL
, NULL
, 0);
486 if ((ret
= __ham_c_init(dbp
, txnid
, dbcp
)) != 0)
490 TAILQ_INSERT_TAIL(&dbp
->curs_queue
, *dbcp
, links
);
491 DB_THREAD_UNLOCK(dbp
);
496 __ham_c_init(dbp
, txnid
, dbcp
)
502 HASH_CURSOR
*new_curs
;
504 if ((db_curs
= (DBC
*)__db_calloc(sizeof(DBC
), 1)) == NULL
)
508 (HASH_CURSOR
*)__db_calloc(sizeof(struct cursor_t
), 1)) == NULL
) {
509 FREE(db_curs
, sizeof(DBC
));
513 db_curs
->internal
= new_curs
;
514 db_curs
->c_close
= __ham_c_close
;
515 db_curs
->c_del
= __ham_c_del
;
516 db_curs
->c_get
= __ham_c_get
;
517 db_curs
->c_put
= __ham_c_put
;
518 db_curs
->txn
= txnid
;
521 new_curs
->db_cursor
= db_curs
;
522 __ham_item_init(new_curs
);
530 __ham_delete(dbp
, txn
, key
, flags
)
541 DEBUG_LWRITE(dbp
, txn
, "ham_delete", key
, NULL
, flags
);
542 if ((ret
= __db_delchk(dbp
, flags
, F_ISSET(dbp
, DB_AM_RDONLY
))) != 0)
546 if (F_ISSET(dbp
, DB_AM_THREAD
) &&
547 (ret
= __db_gethandle(dbp
, __ham_hdup
, &ldbp
)) != 0)
549 hashp
= (HTAB
*)ldbp
->internal
;
550 SET_LOCKER(ldbp
, txn
);
551 GET_META(ldbp
, hashp
);
552 hcp
= TAILQ_FIRST(&ldbp
->curs_queue
)->internal
;
554 hashp
->hash_accesses
++;
555 if ((ret
= __ham_lookup(hashp
, hcp
, key
, 0, DB_LOCK_WRITE
)) == 0)
556 if (F_ISSET(hcp
, H_OK
))
557 ret
= __ham_del_pair(hashp
, hcp
, 1);
561 if ((t_ret
= __ham_item_done(hashp
, hcp
, ret
== 0)) != 0 && ret
== 0)
563 RELEASE_META(ldbp
, hashp
);
564 if (F_ISSET(dbp
, DB_AM_THREAD
))
565 __db_puthandle(ldbp
);
569 /* ****************** CURSORS ********************************** */
571 __ham_c_close(cursor
)
577 DEBUG_LWRITE(cursor
->dbp
, cursor
->txn
, "ham_c_close", NULL
, NULL
, 0);
579 * If the pagep, dpagep, and lock fields of the cursor are all NULL,
580 * then there really isn't a need to get a handle here. However,
581 * the normal case is that at least one of those fields is non-NULL,
582 * and putting those checks in here would couple the ham_item_done
583 * functionality with cursor close which would be pretty disgusting.
584 * Instead, we pay the overhead here of always getting the handle.
587 if (F_ISSET(cursor
->dbp
, DB_AM_THREAD
) &&
588 (ret
= __db_gethandle(cursor
->dbp
, __ham_hdup
, &ldbp
)) != 0)
591 ret
= __ham_c_iclose(ldbp
, cursor
);
593 if (F_ISSET(ldbp
, DB_AM_THREAD
))
594 __db_puthandle(ldbp
);
600 * Internal cursor close routine; assumes it is being passed the correct
601 * handle, rather than getting and putting a handle.
603 * PUBLIC: int __ham_c_iclose __P((DB *, DBC *));
606 __ham_c_iclose(dbp
, dbc
)
614 hashp
= (HTAB
*)dbp
->internal
;
615 hcp
= (HASH_CURSOR
*)dbc
->internal
;
616 ret
= __ham_item_done(hashp
, hcp
, 0);
619 FREE(hcp
->big_key
, hcp
->big_keylen
);
621 FREE(hcp
->big_data
, hcp
->big_datalen
);
624 * All cursors (except the default ones) are linked off the master.
625 * Therefore, when we close the cursor, we have to remove it from
626 * the master, not the local one.
627 * XXX I am always removing from the master; what about local cursors?
629 DB_THREAD_LOCK(dbc
->dbp
);
630 TAILQ_REMOVE(&dbc
->dbp
->curs_queue
, dbc
, links
);
631 DB_THREAD_UNLOCK(dbc
->dbp
);
633 FREE(hcp
, sizeof(HASH_CURSOR
));
634 FREE(dbc
, sizeof(DBC
));
640 __ham_c_del(cursor
, flags
)
647 HASH_CURSOR save_curs
;
648 db_pgno_t ppgno
, chg_pgno
;
651 DEBUG_LWRITE(cursor
->dbp
, cursor
->txn
, "ham_c_del", NULL
, NULL
, flags
);
653 if (F_ISSET(cursor
->dbp
, DB_AM_THREAD
) &&
654 (ret
= __db_gethandle(cursor
->dbp
, __ham_hdup
, &ldbp
)) != 0)
656 hashp
= (HTAB
*)ldbp
->internal
;
657 hcp
= (HASH_CURSOR
*)cursor
->internal
;
659 if ((ret
= __db_cdelchk(ldbp
, flags
,
660 F_ISSET(ldbp
, DB_AM_RDONLY
), IS_VALID(hcp
))) != 0)
662 if (F_ISSET(hcp
, H_DELETED
))
663 return (DB_NOTFOUND
);
665 SET_LOCKER(hashp
->dbp
, cursor
->txn
);
666 GET_META(hashp
->dbp
, hashp
);
667 hashp
->hash_accesses
++;
668 if ((ret
= __ham_get_cpage(hashp
, hcp
, DB_LOCK_WRITE
)) != 0)
670 if (F_ISSET(hcp
, H_ISDUP
) && hcp
->dpgno
!= PGNO_INVALID
) {
672 * We are about to remove a duplicate from offpage.
675 * 1. We will remove an item on a page, but there are more
676 * items on that page.
677 * 2. We will remove the last item on a page, but there is a
678 * following page of duplicates.
679 * 3. We will remove the last item on a page, this page was the
680 * last page in a duplicate set, but there were dups before
682 * 4. We will remove the last item on a page, removing the last
684 * In case 1 hcp->dpagep is unchanged.
685 * In case 2 hcp->dpagep comes back pointing to the next dup
687 * In case 3 hcp->dpagep comes back NULL.
688 * In case 4 hcp->dpagep comes back NULL.
690 * Case 4 results in deleting the pair off the master page.
691 * The normal code for doing this knows how to delete the
692 * duplicates, so we will handle this case in the normal code.
694 ppgno
= PREV_PGNO(hcp
->dpagep
);
695 if (ppgno
== PGNO_INVALID
&&
696 NEXT_PGNO(hcp
->dpagep
) == PGNO_INVALID
&&
697 NUM_ENT(hcp
->dpagep
) == 1)
700 /* Remove item from duplicate page. */
701 chg_pgno
= hcp
->dpgno
;
702 if ((ret
= __db_drem(hashp
->dbp
,
703 &hcp
->dpagep
, hcp
->dndx
, __ham_del_page
)) != 0)
706 if (hcp
->dpagep
== NULL
) {
707 if (ppgno
!= PGNO_INVALID
) { /* Case 3 */
709 if ((ret
= __ham_get_cpage(hashp
, hcp
,
712 hcp
->dndx
= NUM_ENT(hcp
->dpagep
);
713 F_SET(hcp
, H_DELETED
);
714 } else { /* Case 4 */
715 ret
= __ham_del_pair(hashp
, hcp
, 1);
716 hcp
->dpgno
= PGNO_INVALID
;
718 * Delpair updated the cursor queue, so we
719 * don't have to do that here.
721 chg_pgno
= PGNO_INVALID
;
723 } else if (PGNO(hcp
->dpagep
) != hcp
->dpgno
) {
724 hcp
->dndx
= 0; /* Case 2 */
725 hcp
->dpgno
= PGNO(hcp
->dpagep
);
726 if (ppgno
== PGNO_INVALID
)
727 memcpy(HOFFDUP_PGNO(P_ENTRY(hcp
->pagep
,
728 H_DATAINDEX(hcp
->bndx
))),
729 &hcp
->dpgno
, sizeof(db_pgno_t
));
730 F_SET(hcp
, H_DELETED
);
732 F_SET(hcp
, H_DELETED
);
733 if (chg_pgno
!= PGNO_INVALID
)
734 __ham_c_update(hcp
, chg_pgno
, 0, 0, 1);
735 } else if (F_ISSET(hcp
, H_ISDUP
)) { /* on page */
736 if (hcp
->dup_off
== 0 && DUP_SIZE(hcp
->dup_len
) ==
737 LEN_HDATA(hcp
->pagep
, hashp
->hdr
->pagesize
, hcp
->bndx
))
738 ret
= __ham_del_pair(hashp
, hcp
, 1);
743 F_SET(&repldbt
, DB_DBT_PARTIAL
);
744 repldbt
.doff
= hcp
->dup_off
;
745 repldbt
.dlen
= DUP_SIZE(hcp
->dup_len
);
747 ret
= __ham_replpair(hashp
, hcp
, &repldbt
, 0);
748 hcp
->dup_tlen
-= DUP_SIZE(hcp
->dup_len
);
749 F_SET(hcp
, H_DELETED
);
750 __ham_c_update(hcp
, hcp
->pgno
,
751 DUP_SIZE(hcp
->dup_len
), 0, 1);
755 /* Not a duplicate */
756 normal
: ret
= __ham_del_pair(hashp
, hcp
, 1);
758 out
: if ((t_ret
= __ham_item_done(hashp
, hcp
, ret
== 0)) != 0 && ret
== 0)
762 RELEASE_META(hashp
->dbp
, hashp
);
763 if (F_ISSET(cursor
->dbp
, DB_AM_THREAD
))
764 __db_puthandle(ldbp
);
769 __ham_c_get(cursor
, key
, data
, flags
)
777 HASH_CURSOR
*hcp
, save_curs
;
778 int get_key
, ret
, t_ret
;
780 DEBUG_LREAD(cursor
->dbp
, cursor
->txn
, "ham_c_get",
781 flags
== DB_SET
|| flags
== DB_SET_RANGE
? key
: NULL
,
784 if (F_ISSET(cursor
->dbp
, DB_AM_THREAD
) &&
785 (ret
= __db_gethandle(cursor
->dbp
, __ham_hdup
, &ldbp
)) != 0)
787 hashp
= (HTAB
*)(ldbp
->internal
);
788 hcp
= (HASH_CURSOR
*)cursor
->internal
;
791 __db_cgetchk(hashp
->dbp
, key
, data
, flags
, IS_VALID(hcp
))) != 0)
794 SET_LOCKER(hashp
->dbp
, cursor
->txn
);
795 GET_META(hashp
->dbp
, hashp
);
796 hashp
->hash_accesses
++;
804 if (hcp
->bucket
!= BUCKET_INVALID
) {
805 ret
= __ham_item_prev(hashp
, hcp
, DB_LOCK_READ
);
810 ret
= __ham_item_last(hashp
, hcp
, DB_LOCK_READ
);
813 ret
= __ham_item_first(hashp
, hcp
, DB_LOCK_READ
);
816 if (hcp
->bucket
== BUCKET_INVALID
)
818 ret
= __ham_item_next(hashp
, hcp
, DB_LOCK_READ
);
822 ret
= __ham_lookup(hashp
, hcp
, key
, 0, DB_LOCK_READ
);
826 if (F_ISSET(hcp
, H_DELETED
)) {
831 ret
= __ham_item(hashp
, hcp
, DB_LOCK_READ
);
836 * Must always enter this loop to do error handling and
837 * check for big key/data pair.
840 if (ret
!= 0 && ret
!= DB_NOTFOUND
)
842 else if (F_ISSET(hcp
, H_OK
)) {
844 if (get_key
&& (ret
= __db_ret(hashp
->dbp
, hcp
->pagep
,
845 H_KEYINDEX(hcp
->bndx
), key
, &hcp
->big_key
,
846 &hcp
->big_keylen
)) != 0)
849 ret
= __ham_dup_return(hashp
, hcp
, data
, flags
);
851 } else if (!F_ISSET(hcp
, H_NOMORE
)) {
857 * Ran out of entries in a bucket; change buckets.
862 ret
= __ham_item_done(hashp
, hcp
, 0);
863 if (hcp
->bucket
== 0) {
868 hcp
->bndx
= NDX_INVALID
;
870 ret
= __ham_item_prev(hashp
,
875 ret
= __ham_item_done(hashp
, hcp
, 0);
876 hcp
->bndx
= NDX_INVALID
;
878 hcp
->pgno
= PGNO_INVALID
;
880 if (hcp
->bucket
> hashp
->hdr
->max_bucket
) {
885 ret
= __ham_item_next(hashp
,
895 out1
: if ((t_ret
= __ham_item_done(hashp
, hcp
, 0)) != 0 && ret
== 0)
899 RELEASE_META(hashp
->dbp
, hashp
);
900 if (F_ISSET(cursor
->dbp
, DB_AM_THREAD
))
901 __db_puthandle(ldbp
);
906 __ham_c_put(cursor
, key
, data
, flags
)
914 HASH_CURSOR
*hcp
, save_curs
;
918 DEBUG_LWRITE(cursor
->dbp
, cursor
->txn
, "ham_c_put",
919 flags
== DB_KEYFIRST
|| flags
== DB_KEYLAST
? key
: NULL
,
922 if (F_ISSET(cursor
->dbp
, DB_AM_THREAD
) &&
923 (ret
= __db_gethandle(cursor
->dbp
, __ham_hdup
, &ldbp
)) != 0)
925 hashp
= (HTAB
*)(ldbp
->internal
);
926 hcp
= (HASH_CURSOR
*)cursor
->internal
;
929 if ((ret
= __db_cputchk(hashp
->dbp
, key
, data
, flags
,
930 F_ISSET(ldbp
, DB_AM_RDONLY
), IS_VALID(hcp
))) != 0)
932 if (F_ISSET(hcp
, H_DELETED
))
933 return (DB_NOTFOUND
);
935 SET_LOCKER(hashp
->dbp
, cursor
->txn
);
936 GET_META(hashp
->dbp
, hashp
);
942 nbytes
= (ISBIG(hashp
, key
->size
) ? HOFFPAGE_PSIZE
:
943 HKEYDATA_PSIZE(key
->size
)) +
944 (ISBIG(hashp
, data
->size
) ? HOFFPAGE_PSIZE
:
945 HKEYDATA_PSIZE(data
->size
));
946 ret
= __ham_lookup(hashp
, hcp
, key
, nbytes
, DB_LOCK_WRITE
);
951 ret
= __ham_item(hashp
, hcp
, DB_LOCK_WRITE
);
956 if (flags
== DB_CURRENT
&& !F_ISSET(ldbp
, DB_AM_DUP
))
957 ret
= __ham_overwrite(hashp
, hcp
, data
);
959 ret
= __ham_add_dup(hashp
, hcp
, data
, flags
);
962 if (ret
== 0 && F_ISSET(hcp
, H_EXPAND
)) {
963 ret
= __ham_expand_table(hashp
);
964 F_CLR(hcp
, H_EXPAND
);
967 if ((t_ret
= __ham_item_done(hashp
, hcp
, ret
== 0)) != 0 && ret
== 0)
971 RELEASE_META(hashp
->dbp
, hashp
);
972 if (F_ISSET(cursor
->dbp
, DB_AM_THREAD
))
973 __db_puthandle(ldbp
);
977 /********************************* UTILITIES ************************/
980 * __ham_expand_table --
982 * PUBLIC: int __ham_expand_table __P((HTAB *));
985 __ham_expand_table(hashp
)
989 u_int32_t old_bucket
, new_bucket
, spare_ndx
;
993 DIRTY_META(hashp
, ret
);
998 * If the split point is about to increase, make sure that we
999 * have enough extra pages. The calculation here is weird.
1000 * We'd like to do this after we've upped max_bucket, but it's
1001 * too late then because we've logged the meta-data split. What
1002 * we'll do between then and now is increment max bucket and then
1003 * see what the log of one greater than that is; here we have to
1004 * look at the log of max + 2. VERY NASTY STUFF.
1006 if (__db_log2(hashp
->hdr
->max_bucket
+ 2) > hashp
->hdr
->ovfl_point
) {
1008 * We are about to shift the split point. Make sure that
1009 * if the next doubling is going to be big (more than 8
1010 * pages), we have some extra pages around.
1012 if (hashp
->hdr
->max_bucket
+ 1 >= 8 &&
1013 hashp
->hdr
->spares
[hashp
->hdr
->ovfl_point
] <
1014 hashp
->hdr
->spares
[hashp
->hdr
->ovfl_point
- 1] +
1015 hashp
->hdr
->ovfl_point
+ 1)
1016 __ham_init_ovflpages(hashp
);
1019 /* Now we can log the meta-data split. */
1020 if (DB_LOGGING(hashp
->dbp
)) {
1021 if ((ret
= __ham_splitmeta_log(hashp
->dbp
->dbenv
->lg_info
,
1022 (DB_TXN
*)hashp
->dbp
->txn
, &new_lsn
, 0,
1023 hashp
->dbp
->log_fileid
,
1024 hashp
->hdr
->max_bucket
, hashp
->hdr
->ovfl_point
,
1025 hashp
->hdr
->spares
[hashp
->hdr
->ovfl_point
],
1026 &hashp
->hdr
->lsn
)) != 0)
1029 hashp
->hdr
->lsn
= new_lsn
;
1032 hashp
->hash_expansions
++;
1033 new_bucket
= ++hashp
->hdr
->max_bucket
;
1034 old_bucket
= (hashp
->hdr
->max_bucket
& hashp
->hdr
->low_mask
);
1037 * If the split point is increasing, copy the current contents
1038 * of the spare split bucket to the next bucket.
1040 spare_ndx
= __db_log2(hashp
->hdr
->max_bucket
+ 1);
1041 if (spare_ndx
> hashp
->hdr
->ovfl_point
) {
1042 hashp
->hdr
->spares
[spare_ndx
] =
1043 hashp
->hdr
->spares
[hashp
->hdr
->ovfl_point
];
1044 hashp
->hdr
->ovfl_point
= spare_ndx
;
1047 if (new_bucket
> hashp
->hdr
->high_mask
) {
1048 /* Starting a new doubling */
1049 hashp
->hdr
->low_mask
= hashp
->hdr
->high_mask
;
1050 hashp
->hdr
->high_mask
= new_bucket
| hashp
->hdr
->low_mask
;
1053 if (BUCKET_TO_PAGE(hashp
, new_bucket
) > MAX_PAGES(hashp
)) {
1054 __db_err(hashp
->dbp
->dbenv
,
1055 "hash: Cannot allocate new bucket. Pages exhausted.");
1059 /* Relocate records to the new bucket */
1060 return (__ham_split_page(hashp
, old_bucket
, new_bucket
));
1064 * PUBLIC: u_int32_t __ham_call_hash __P((HTAB *, u_int8_t *, int32_t));
1067 __ham_call_hash(hashp
, k
, len
)
1072 u_int32_t n
, bucket
;
1074 n
= (u_int32_t
)hashp
->hash(k
, len
);
1075 bucket
= n
& hashp
->hdr
->high_mask
;
1076 if (bucket
> hashp
->hdr
->max_bucket
)
1077 bucket
= bucket
& hashp
->hdr
->low_mask
;
1082 * Check for duplicates, and call __db_ret appropriately. Release
1083 * everything held by the cursor.
1086 __ham_dup_return(hashp
, hcp
, val
, flags
)
1093 DBT
*myval
, tmp_val
;
1100 /* Check for duplicate and return the first one. */
1101 ndx
= H_DATAINDEX(hcp
->bndx
);
1102 type
= HPAGE_TYPE(hcp
->pagep
, ndx
);
1107 * There are 3 cases:
1108 * 1. We are not in duplicate, simply call db_ret.
1109 * 2. We are looking at keys and stumbled onto a duplicate.
1110 * 3. We are in the middle of a duplicate set. (ISDUP set)
1114 * Here we check for the case where we just stumbled onto a
1115 * duplicate. In this case, we do initialization and then
1116 * let the normal duplicate code handle it.
1118 if (!F_ISSET(hcp
, H_ISDUP
))
1119 if (type
== H_DUPLICATE
) {
1120 F_SET(hcp
, H_ISDUP
);
1121 hcp
->dup_tlen
= LEN_HDATA(hcp
->pagep
,
1122 hashp
->hdr
->pagesize
, hcp
->bndx
);
1123 hk
= H_PAIRDATA(hcp
->pagep
, hcp
->bndx
);
1124 if (flags
== DB_LAST
|| flags
== DB_PREV
) {
1129 HKEYDATA_DATA(hk
) + hcp
->dup_off
,
1131 hcp
->dup_off
+= DUP_SIZE(len
);
1133 } while (hcp
->dup_off
< hcp
->dup_tlen
);
1134 hcp
->dup_off
-= DUP_SIZE(len
);
1138 HKEYDATA_DATA(hk
), sizeof(db_indx_t
));
1143 } else if (type
== H_OFFDUP
) {
1144 F_SET(hcp
, H_ISDUP
);
1145 memcpy(&pgno
, HOFFDUP_PGNO(P_ENTRY(hcp
->pagep
, ndx
)),
1147 if (flags
== DB_LAST
|| flags
== DB_PREV
) {
1148 indx
= (int)hcp
->dndx
;
1149 if ((ret
= __db_dend(hashp
->dbp
,
1150 pgno
, &hcp
->dpagep
)) != 0)
1152 hcp
->dpgno
= PGNO(hcp
->dpagep
);
1153 hcp
->dndx
= NUM_ENT(hcp
->dpagep
) - 1;
1154 } else if ((ret
= __ham_next_cpage(hashp
,
1155 hcp
, pgno
, 0, H_ISDUP
)) != 0)
1161 * Now, everything is initialized, grab a duplicate if
1164 if (F_ISSET(hcp
, H_ISDUP
))
1165 if (hcp
->dpgno
!= PGNO_INVALID
) {
1170 * Copy the DBT in case we are retrieving into
1171 * user memory and we need the parameters for
1174 memcpy(&tmp_val
, val
, sizeof(*val
));
1175 F_SET(&tmp_val
, DB_DBT_PARTIAL
);
1176 tmp_val
.dlen
= hcp
->dup_len
;
1177 tmp_val
.doff
= hcp
->dup_off
+ sizeof(db_indx_t
);
1183 * Finally, if we had a duplicate, pp, ndx, and myval should be
1184 * set appropriately.
1186 if ((ret
= __db_ret(hashp
->dbp
, pp
, ndx
, myval
, &hcp
->big_data
,
1187 &hcp
->big_datalen
)) != 0)
1191 * In case we sent a temporary off to db_ret, set the real
1194 val
->data
= myval
->data
;
1195 val
->size
= myval
->size
;
1201 __ham_overwrite(hashp
, hcp
, nval
)
1206 DBT
*myval
, tmp_val
;
1209 if (F_ISSET(hashp
->dbp
, DB_AM_DUP
))
1210 return (__ham_add_dup(hashp
, hcp
, nval
, DB_KEYLAST
));
1211 else if (!F_ISSET(nval
, DB_DBT_PARTIAL
)) {
1213 memcpy(&tmp_val
, nval
, sizeof(*nval
));
1214 F_SET(&tmp_val
, DB_DBT_PARTIAL
);
1216 hk
= H_PAIRDATA(hcp
->pagep
, hcp
->bndx
);
1217 if (HPAGE_PTYPE(hk
) == H_OFFPAGE
)
1218 memcpy(&tmp_val
.dlen
,
1219 HOFFPAGE_TLEN(hk
), sizeof(u_int32_t
));
1221 tmp_val
.dlen
= LEN_HDATA(hcp
->pagep
,
1222 hashp
->hdr
->pagesize
,hcp
->bndx
);
1224 } else /* Regular partial put */
1227 return (__ham_replpair(hashp
, hcp
, myval
, 0));
1231 * Given a key and a cursor, sets the cursor to the page/ndx on which
1232 * the key resides. If the key is found, the cursor H_OK flag is set
1233 * and the pagep, bndx, pgno (dpagep, dndx, dpgno) fields are set.
1234 * If the key is not found, the H_OK flag is not set. If the sought
1235 * field is non-0, the pagep, bndx, pgno (dpagep, dndx, dpgno) fields
1236 * are set indicating where an add might take place. If it is 0,
1237 * non of the cursor pointer field are valid.
1240 __ham_lookup(hashp
, hcp
, key
, sought
, mode
)
1249 int match
, ret
, t_ret
;
1253 * Set up cursor so that we're looking for space to add an item
1254 * as we cycle through the pages looking for the key.
1256 if ((ret
= __ham_item_reset(hashp
, hcp
)) != 0)
1258 hcp
->seek_size
= sought
;
1260 hcp
->bucket
= __ham_call_hash(hashp
, (u_int8_t
*)key
->data
, key
->size
);
1262 if ((ret
= __ham_item_next(hashp
, hcp
, mode
)) != 0)
1265 if (F_ISSET(hcp
, H_NOMORE
))
1268 hk
= H_PAIRKEY(hcp
->pagep
, hcp
->bndx
);
1269 switch (HPAGE_PTYPE(hk
)) {
1271 memcpy(&tlen
, HOFFPAGE_TLEN(hk
), sizeof(u_int32_t
));
1272 if (tlen
== key
->size
) {
1274 HOFFPAGE_PGNO(hk
), sizeof(db_pgno_t
));
1275 match
= __db_moff(hashp
->dbp
, key
, pgno
);
1283 if (key
->size
== LEN_HKEY(hcp
->pagep
,
1284 hashp
->hdr
->pagesize
, hcp
->bndx
) &&
1286 HKEYDATA_DATA(hk
), key
->size
) == 0) {
1294 * These are errors because keys are never
1295 * duplicated, only data items are.
1297 return (__db_pgfmt(hashp
->dbp
, PGNO(hcp
->pagep
)));
1299 hashp
->hash_collisions
++;
1303 * Item was not found, adjust cursor properly.
1309 if ((t_ret
= __ham_item_done(hashp
, hcp
, 0)) != 0 && ret
== 0)
1315 * Initialize a dbt using some possibly already allocated storage
1317 * PUBLIC: int __ham_init_dbt __P((DBT *, u_int32_t, void **, u_int32_t *));
1320 __ham_init_dbt(dbt
, size
, bufp
, sizep
)
1326 memset(dbt
, 0, sizeof(*dbt
));
1327 if (*sizep
< size
) {
1328 if ((*bufp
= (void *)(*bufp
== NULL
?
1329 __db_malloc(size
) : __db_realloc(*bufp
, size
))) == NULL
) {
1341 * Adjust the cursor after an insert or delete. The cursor passed is
1342 * the one that was operated upon; we just need to check any of the
1345 * len indicates the length of the item added/deleted
1346 * add indicates if the item indicated by the cursor has just been
1347 * added (add == 1) or deleted (add == 0).
1348 * dup indicates if the addition occurred into a duplicate set.
1350 * PUBLIC: void __ham_c_update
1351 * PUBLIC: __P((HASH_CURSOR *, db_pgno_t, u_int32_t, int, int));
1354 __ham_c_update(hcp
, chg_pgno
, len
, add
, is_dup
)
1366 * Regular adds are always at the end of a given page, so we never
1367 * have to adjust anyone's cursor after a regular add.
1373 * Determine if a page was deleted. If this is a regular update
1374 * (i.e., not is_dup) then the deleted page's number will be that in
1375 * chg_pgno, and the pgno in the cursor will be different. If this
1376 * was an onpage-duplicate, then the same conditions apply. If this
1377 * was an off-page duplicate, then we need to verify if hcp->dpgno
1378 * is the same (no delete) or different (delete) than chg_pgno.
1380 if (!is_dup
|| hcp
->dpgno
== PGNO_INVALID
)
1382 chg_pgno
!= PGNO_INVALID
&& chg_pgno
!= hcp
->pgno
;
1385 chg_pgno
!= PGNO_INVALID
&& chg_pgno
!= hcp
->dpgno
;
1387 hp
= hcp
->db_cursor
->dbp
->master
->internal
;
1388 DB_THREAD_LOCK(hp
->dbp
);
1390 for (cp
= TAILQ_FIRST(&hp
->dbp
->curs_queue
); cp
!= NULL
;
1391 cp
= TAILQ_NEXT(cp
, links
)) {
1392 if (cp
->internal
== hcp
)
1395 lcp
= (HASH_CURSOR
*)cp
->internal
;
1397 if (!is_dup
&& lcp
->pgno
!= chg_pgno
)
1401 if (F_ISSET(hcp
, H_DELETED
) && lcp
->pgno
!= chg_pgno
)
1403 if (!F_ISSET(hcp
, H_DELETED
) && lcp
->dpgno
!= chg_pgno
)
1409 lcp
->dpgno
= hcp
->dpgno
;
1410 lcp
->dndx
= hcp
->dndx
;
1412 lcp
->pgno
= hcp
->pgno
;
1413 lcp
->bndx
= hcp
->bndx
;
1414 lcp
->bucket
= hcp
->bucket
;
1416 F_CLR(lcp
, H_ISDUP
);
1420 if (!is_dup
&& lcp
->bndx
> hcp
->bndx
)
1422 else if (!is_dup
&& lcp
->bndx
== hcp
->bndx
)
1423 F_SET(lcp
, H_DELETED
);
1424 else if (is_dup
&& lcp
->bndx
== hcp
->bndx
) {
1425 /* Assign dpgno in case there was page conversion. */
1426 lcp
->dpgno
= hcp
->dpgno
;
1427 if (add
&& lcp
->dndx
>= hcp
->dndx
)
1429 else if (!add
&& lcp
->dndx
> hcp
->dndx
)
1431 else if (!add
&& lcp
->dndx
== hcp
->dndx
)
1432 F_SET(lcp
, H_DELETED
);
1434 /* Now adjust on-page information. */
1435 if (lcp
->dpgno
== PGNO_INVALID
)
1437 lcp
->dup_tlen
+= len
;
1438 if (lcp
->dndx
> hcp
->dndx
)
1439 lcp
->dup_off
+= len
;
1441 lcp
->dup_tlen
-= len
;
1442 if (lcp
->dndx
> hcp
->dndx
)
1443 lcp
->dup_off
-= len
;
1447 DB_THREAD_UNLOCK(hp
->dbp
);
1452 * This function gets called when we create a duplicate handle for a
1453 * threaded DB. It should create the private part of the DB structure.
1454 * PUBLIC: int __ham_hdup __P((DB *, DB *));
1457 __ham_hdup(orig
, new)
1464 if ((hashp
= (HTAB
*)__db_malloc(sizeof(HTAB
))) == NULL
)
1467 new->internal
= hashp
;
1472 hashp
->hash
= ((HTAB
*)orig
->internal
)->hash
;
1473 if ((hashp
->split_buf
= (PAGE
*)__db_malloc(orig
->pgsize
)) == NULL
)
1475 hashp
->local_errno
= 0;
1476 hashp
->hash_accesses
= 0;
1477 hashp
->hash_collisions
= 0;
1478 hashp
->hash_expansions
= 0;
1479 hashp
->hash_overflows
= 0;
1480 hashp
->hash_bigpages
= 0;
1481 /* Initialize the cursor queue. */
1482 ret
= __ham_c_init(new, NULL
, &curs
);
1483 TAILQ_INSERT_TAIL(&new->curs_queue
, curs
, links
);