]> git.ipfire.org Git - thirdparty/gcc.git/blob - libiberty/hashtab.c
Add commentary.
[thirdparty/gcc.git] / libiberty / hashtab.c
1 /* An expandable hash tables datatype.
2 Copyright (C) 1999, 2000, 2001 Free Software Foundation, Inc.
3 Contributed by Vladimir Makarov (vmakarov@cygnus.com).
4
5 This file is part of the libiberty library.
6 Libiberty is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Library General Public
8 License as published by the Free Software Foundation; either
9 version 2 of the License, or (at your option) any later version.
10
11 Libiberty is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Library General Public License for more details.
15
16 You should have received a copy of the GNU Library General Public
17 License along with libiberty; see the file COPYING.LIB. If
18 not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 Boston, MA 02111-1307, USA. */
20
21 /* This package implements basic hash table functionality. It is possible
22 to search for an entry, create an entry and destroy an entry.
23
24 Elements in the table are generic pointers.
25
26 The size of the table is not fixed; if the occupancy of the table
27 grows too high the hash table will be expanded.
28
29 The abstract data implementation is based on generalized Algorithm D
30 from Knuth's book "The art of computer programming". Hash table is
31 expanded by creation of new hash table and transferring elements from
32 the old table to the new table. */
33
34 #ifdef HAVE_CONFIG_H
35 #include "config.h"
36 #endif
37
38 #include <sys/types.h>
39
40 #ifdef HAVE_STDLIB_H
41 #include <stdlib.h>
42 #endif
43
44 #ifdef HAVE_STRING_H
45 #include <string.h>
46 #endif
47
48 #include <stdio.h>
49
50 #include "libiberty.h"
51 #include "hashtab.h"
52
53 /* This macro defines reserved value for empty table entry. */
54
55 #define EMPTY_ENTRY ((PTR) 0)
56
57 /* This macro defines reserved value for table entry which contained
58 a deleted element. */
59
60 #define DELETED_ENTRY ((PTR) 1)
61
62 static unsigned long higher_prime_number PARAMS ((unsigned long));
63 static hashval_t hash_pointer PARAMS ((const void *));
64 static int eq_pointer PARAMS ((const void *, const void *));
65 static int htab_expand PARAMS ((htab_t));
66 static PTR *find_empty_slot_for_expand PARAMS ((htab_t, hashval_t));
67
68 /* At some point, we could make these be NULL, and modify the
69 hash-table routines to handle NULL specially; that would avoid
70 function-call overhead for the common case of hashing pointers. */
71 htab_hash htab_hash_pointer = hash_pointer;
72 htab_eq htab_eq_pointer = eq_pointer;
73
74 /* The following function returns a nearest prime number which is
75 greater than N, and near a power of two. */
76
77 static unsigned long
78 higher_prime_number (n)
79 unsigned long n;
80 {
81 /* These are primes that are near, but slightly smaller than, a
82 power of two. */
83 static unsigned long primes[] = {
84 (unsigned long) 2,
85 (unsigned long) 7,
86 (unsigned long) 13,
87 (unsigned long) 31,
88 (unsigned long) 61,
89 (unsigned long) 127,
90 (unsigned long) 251,
91 (unsigned long) 509,
92 (unsigned long) 1021,
93 (unsigned long) 2039,
94 (unsigned long) 4093,
95 (unsigned long) 8191,
96 (unsigned long) 16381,
97 (unsigned long) 32749,
98 (unsigned long) 65521,
99 (unsigned long) 131071,
100 (unsigned long) 262139,
101 (unsigned long) 524287,
102 (unsigned long) 1048573,
103 (unsigned long) 2097143,
104 (unsigned long) 4194301,
105 (unsigned long) 8388593,
106 (unsigned long) 16777213,
107 (unsigned long) 33554393,
108 (unsigned long) 67108859,
109 (unsigned long) 134217689,
110 (unsigned long) 268435399,
111 (unsigned long) 536870909,
112 (unsigned long) 1073741789,
113 (unsigned long) 2147483647,
114 /* 4294967291L */
115 ((unsigned long) 2147483647) + ((unsigned long) 2147483644),
116 };
117
118 unsigned long* low = &primes[0];
119 unsigned long* high = &primes[sizeof(primes) / sizeof(primes[0])];
120
121 while (low != high)
122 {
123 unsigned long* mid = low + (high - low) / 2;
124 if (n > *mid)
125 low = mid + 1;
126 else
127 high = mid;
128 }
129
130 /* If we've run out of primes, abort. */
131 if (n > *low)
132 {
133 fprintf (stderr, "Cannot find prime bigger than %lu\n", n);
134 abort ();
135 }
136
137 return *low;
138 }
139
140 /* Returns a hash code for P. */
141
142 static hashval_t
143 hash_pointer (p)
144 const PTR p;
145 {
146 return (hashval_t) ((long)p >> 3);
147 }
148
149 /* Returns non-zero if P1 and P2 are equal. */
150
151 static int
152 eq_pointer (p1, p2)
153 const PTR p1;
154 const PTR p2;
155 {
156 return p1 == p2;
157 }
158
159 /* This function creates table with length slightly longer than given
160 source length. Created hash table is initiated as empty (all the
161 hash table entries are EMPTY_ENTRY). The function returns the
162 created hash table. Memory allocation must not fail. */
163
164 htab_t
165 htab_create (size, hash_f, eq_f, del_f)
166 size_t size;
167 htab_hash hash_f;
168 htab_eq eq_f;
169 htab_del del_f;
170 {
171 htab_t result;
172
173 size = higher_prime_number (size);
174 result = (htab_t) xcalloc (1, sizeof (struct htab));
175 result->entries = (PTR *) xcalloc (size, sizeof (PTR));
176 result->size = size;
177 result->hash_f = hash_f;
178 result->eq_f = eq_f;
179 result->del_f = del_f;
180 result->return_allocation_failure = 0;
181 return result;
182 }
183
184 /* This function creates table with length slightly longer than given
185 source length. The created hash table is initiated as empty (all the
186 hash table entries are EMPTY_ENTRY). The function returns the created
187 hash table. Memory allocation may fail; it may return NULL. */
188
189 htab_t
190 htab_try_create (size, hash_f, eq_f, del_f)
191 size_t size;
192 htab_hash hash_f;
193 htab_eq eq_f;
194 htab_del del_f;
195 {
196 htab_t result;
197
198 size = higher_prime_number (size);
199 result = (htab_t) calloc (1, sizeof (struct htab));
200 if (result == NULL)
201 return NULL;
202
203 result->entries = (PTR *) calloc (size, sizeof (PTR));
204 if (result->entries == NULL)
205 {
206 free (result);
207 return NULL;
208 }
209
210 result->size = size;
211 result->hash_f = hash_f;
212 result->eq_f = eq_f;
213 result->del_f = del_f;
214 result->return_allocation_failure = 1;
215 return result;
216 }
217
218 /* This function frees all memory allocated for given hash table.
219 Naturally the hash table must already exist. */
220
221 void
222 htab_delete (htab)
223 htab_t htab;
224 {
225 int i;
226
227 if (htab->del_f)
228 for (i = htab->size - 1; i >= 0; i--)
229 if (htab->entries[i] != EMPTY_ENTRY
230 && htab->entries[i] != DELETED_ENTRY)
231 (*htab->del_f) (htab->entries[i]);
232
233 free (htab->entries);
234 free (htab);
235 }
236
237 /* This function clears all entries in the given hash table. */
238
239 void
240 htab_empty (htab)
241 htab_t htab;
242 {
243 int i;
244
245 if (htab->del_f)
246 for (i = htab->size - 1; i >= 0; i--)
247 if (htab->entries[i] != EMPTY_ENTRY
248 && htab->entries[i] != DELETED_ENTRY)
249 (*htab->del_f) (htab->entries[i]);
250
251 memset (htab->entries, 0, htab->size * sizeof (PTR));
252 }
253
254 /* Similar to htab_find_slot, but without several unwanted side effects:
255 - Does not call htab->eq_f when it finds an existing entry.
256 - Does not change the count of elements/searches/collisions in the
257 hash table.
258 This function also assumes there are no deleted entries in the table.
259 HASH is the hash value for the element to be inserted. */
260
261 static PTR *
262 find_empty_slot_for_expand (htab, hash)
263 htab_t htab;
264 hashval_t hash;
265 {
266 size_t size = htab->size;
267 hashval_t hash2 = 1 + hash % (size - 2);
268 unsigned int index = hash % size;
269
270 for (;;)
271 {
272 PTR *slot = htab->entries + index;
273
274 if (*slot == EMPTY_ENTRY)
275 return slot;
276 else if (*slot == DELETED_ENTRY)
277 abort ();
278
279 index += hash2;
280 if (index >= size)
281 index -= size;
282 }
283 }
284
285 /* The following function changes size of memory allocated for the
286 entries and repeatedly inserts the table elements. The occupancy
287 of the table after the call will be about 50%. Naturally the hash
288 table must already exist. Remember also that the place of the
289 table entries is changed. If memory allocation failures are allowed,
290 this function will return zero, indicating that the table could not be
291 expanded. If all goes well, it will return a non-zero value. */
292
293 static int
294 htab_expand (htab)
295 htab_t htab;
296 {
297 PTR *oentries;
298 PTR *olimit;
299 PTR *p;
300
301 oentries = htab->entries;
302 olimit = oentries + htab->size;
303
304 htab->size = higher_prime_number (htab->size * 2);
305
306 if (htab->return_allocation_failure)
307 {
308 PTR *nentries = (PTR *) calloc (htab->size, sizeof (PTR *));
309 if (nentries == NULL)
310 return 0;
311 htab->entries = nentries;
312 }
313 else
314 htab->entries = (PTR *) xcalloc (htab->size, sizeof (PTR *));
315
316 htab->n_elements -= htab->n_deleted;
317 htab->n_deleted = 0;
318
319 p = oentries;
320 do
321 {
322 PTR x = *p;
323
324 if (x != EMPTY_ENTRY && x != DELETED_ENTRY)
325 {
326 PTR *q = find_empty_slot_for_expand (htab, (*htab->hash_f) (x));
327
328 *q = x;
329 }
330
331 p++;
332 }
333 while (p < olimit);
334
335 free (oentries);
336 return 1;
337 }
338
339 /* This function searches for a hash table entry equal to the given
340 element. It cannot be used to insert or delete an element. */
341
342 PTR
343 htab_find_with_hash (htab, element, hash)
344 htab_t htab;
345 const PTR element;
346 hashval_t hash;
347 {
348 unsigned int index;
349 hashval_t hash2;
350 size_t size;
351 PTR entry;
352
353 htab->searches++;
354 size = htab->size;
355 index = hash % size;
356
357 entry = htab->entries[index];
358 if (entry == EMPTY_ENTRY
359 || (entry != DELETED_ENTRY && (*htab->eq_f) (entry, element)))
360 return entry;
361
362 hash2 = 1 + hash % (size - 2);
363
364 for (;;)
365 {
366 htab->collisions++;
367 index += hash2;
368 if (index >= size)
369 index -= size;
370
371 entry = htab->entries[index];
372 if (entry == EMPTY_ENTRY
373 || (entry != DELETED_ENTRY && (*htab->eq_f) (entry, element)))
374 return entry;
375 }
376 }
377
378 /* Like htab_find_slot_with_hash, but compute the hash value from the
379 element. */
380
381 PTR
382 htab_find (htab, element)
383 htab_t htab;
384 const PTR element;
385 {
386 return htab_find_with_hash (htab, element, (*htab->hash_f) (element));
387 }
388
389 /* This function searches for a hash table slot containing an entry
390 equal to the given element. To delete an entry, call this with
391 INSERT = 0, then call htab_clear_slot on the slot returned (possibly
392 after doing some checks). To insert an entry, call this with
393 INSERT = 1, then write the value you want into the returned slot.
394 When inserting an entry, NULL may be returned if memory allocation
395 fails. */
396
397 PTR *
398 htab_find_slot_with_hash (htab, element, hash, insert)
399 htab_t htab;
400 const PTR element;
401 hashval_t hash;
402 enum insert_option insert;
403 {
404 PTR *first_deleted_slot;
405 unsigned int index;
406 hashval_t hash2;
407 size_t size;
408
409 if (insert == INSERT && htab->size * 3 <= htab->n_elements * 4
410 && htab_expand (htab) == 0)
411 return NULL;
412
413 size = htab->size;
414 hash2 = 1 + hash % (size - 2);
415 index = hash % size;
416
417 htab->searches++;
418 first_deleted_slot = NULL;
419
420 for (;;)
421 {
422 PTR entry = htab->entries[index];
423 if (entry == EMPTY_ENTRY)
424 {
425 if (insert == NO_INSERT)
426 return NULL;
427
428 htab->n_elements++;
429
430 if (first_deleted_slot)
431 {
432 *first_deleted_slot = EMPTY_ENTRY;
433 return first_deleted_slot;
434 }
435
436 return &htab->entries[index];
437 }
438
439 if (entry == DELETED_ENTRY)
440 {
441 if (!first_deleted_slot)
442 first_deleted_slot = &htab->entries[index];
443 }
444 else if ((*htab->eq_f) (entry, element))
445 return &htab->entries[index];
446
447 htab->collisions++;
448 index += hash2;
449 if (index >= size)
450 index -= size;
451 }
452 }
453
454 /* Like htab_find_slot_with_hash, but compute the hash value from the
455 element. */
456
457 PTR *
458 htab_find_slot (htab, element, insert)
459 htab_t htab;
460 const PTR element;
461 enum insert_option insert;
462 {
463 return htab_find_slot_with_hash (htab, element, (*htab->hash_f) (element),
464 insert);
465 }
466
467 /* This function deletes an element with the given value from hash
468 table. If there is no matching element in the hash table, this
469 function does nothing. */
470
471 void
472 htab_remove_elt (htab, element)
473 htab_t htab;
474 PTR element;
475 {
476 PTR *slot;
477
478 slot = htab_find_slot (htab, element, NO_INSERT);
479 if (*slot == EMPTY_ENTRY)
480 return;
481
482 if (htab->del_f)
483 (*htab->del_f) (*slot);
484
485 *slot = DELETED_ENTRY;
486 htab->n_deleted++;
487 }
488
489 /* This function clears a specified slot in a hash table. It is
490 useful when you've already done the lookup and don't want to do it
491 again. */
492
493 void
494 htab_clear_slot (htab, slot)
495 htab_t htab;
496 PTR *slot;
497 {
498 if (slot < htab->entries || slot >= htab->entries + htab->size
499 || *slot == EMPTY_ENTRY || *slot == DELETED_ENTRY)
500 abort ();
501
502 if (htab->del_f)
503 (*htab->del_f) (*slot);
504
505 *slot = DELETED_ENTRY;
506 htab->n_deleted++;
507 }
508
509 /* This function scans over the entire hash table calling
510 CALLBACK for each live entry. If CALLBACK returns false,
511 the iteration stops. INFO is passed as CALLBACK's second
512 argument. */
513
514 void
515 htab_traverse (htab, callback, info)
516 htab_t htab;
517 htab_trav callback;
518 PTR info;
519 {
520 PTR *slot = htab->entries;
521 PTR *limit = slot + htab->size;
522
523 do
524 {
525 PTR x = *slot;
526
527 if (x != EMPTY_ENTRY && x != DELETED_ENTRY)
528 if (!(*callback) (slot, info))
529 break;
530 }
531 while (++slot < limit);
532 }
533
534 /* Return the current size of given hash table. */
535
536 size_t
537 htab_size (htab)
538 htab_t htab;
539 {
540 return htab->size;
541 }
542
543 /* Return the current number of elements in given hash table. */
544
545 size_t
546 htab_elements (htab)
547 htab_t htab;
548 {
549 return htab->n_elements - htab->n_deleted;
550 }
551
552 /* Return the fraction of fixed collisions during all work with given
553 hash table. */
554
555 double
556 htab_collisions (htab)
557 htab_t htab;
558 {
559 if (htab->searches == 0)
560 return 0.0;
561
562 return (double) htab->collisions / (double) htab->searches;
563 }
564
565 /* Hash P as a null-terminated string.
566
567 Copied from gcc/hashtable.c. Zack had the following to say with respect
568 to applicability, though note that unlike hashtable.c, this hash table
569 implementation re-hashes rather than chain buckets.
570
571 http://gcc.gnu.org/ml/gcc-patches/2001-08/msg01021.html
572 From: Zack Weinberg <zackw@panix.com>
573 Date: Fri, 17 Aug 2001 02:15:56 -0400
574
575 I got it by extracting all the identifiers from all the source code
576 I had lying around in mid-1999, and testing many recurrences of
577 the form "H_n = H_{n-1} * K + c_n * L + M" where K, L, M were either
578 prime numbers or the appropriate identity. This was the best one.
579 I don't remember exactly what constituted "best", except I was
580 looking at bucket-length distributions mostly.
581
582 So it should be very good at hashing identifiers, but might not be
583 as good at arbitrary strings.
584
585 I'll add that it thoroughly trounces the hash functions recommended
586 for this use at http://burtleburtle.net/bob/hash/index.html, both
587 on speed and bucket distribution. I haven't tried it against the
588 function they just started using for Perl's hashes. */
589
590 hashval_t
591 htab_hash_string (p)
592 const PTR p;
593 {
594 const unsigned char *str = (const unsigned char *) p;
595 hashval_t r = 0;
596 unsigned char c;
597
598 while ((c = *str++) != 0)
599 r = r * 67 + c - 113;
600
601 return r;
602 }