]> git.ipfire.org Git - thirdparty/binutils-gdb.git/blob - gdb/bcache.c
*** empty log message ***
[thirdparty/binutils-gdb.git] / gdb / bcache.c
1 /* Implement a cached obstack.
2 Written by Fred Fish <fnf@cygnus.com>
3 Rewritten by Jim Blandy <jimb@cygnus.com>
4
5 Copyright (C) 1999, 2000, 2002, 2003, 2007 Free Software Foundation, Inc.
6
7 This file is part of GDB.
8
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3 of the License, or
12 (at your option) any later version.
13
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with this program. If not, see <http://www.gnu.org/licenses/>. */
21
22 #include "defs.h"
23 #include "gdb_obstack.h"
24 #include "bcache.h"
25 #include "gdb_string.h" /* For memcpy declaration */
26 #include "gdb_assert.h"
27
28 #include <stddef.h>
29 #include <stdlib.h>
30
31 /* The type used to hold a single bcache string. The user data is
32 stored in d.data. Since it can be any type, it needs to have the
33 same alignment as the most strict alignment of any type on the host
34 machine. I don't know of any really correct way to do this in
35 stock ANSI C, so just do it the same way obstack.h does. */
36
37 struct bstring
38 {
39 /* Hash chain. */
40 struct bstring *next;
41 /* Assume the data length is no more than 64k. */
42 unsigned short length;
43 /* The half hash hack. This contains the upper 16 bits of the hash
44 value and is used as a pre-check when comparing two strings and
45 avoids the need to do length or memcmp calls. It proves to be
46 roughly 100% effective. */
47 unsigned short half_hash;
48
49 union
50 {
51 char data[1];
52 double dummy;
53 }
54 d;
55 };
56
57
58 /* The structure for a bcache itself. The bcache is initialized, in
59 bcache_xmalloc(), by filling it with zeros and then setting the
60 corresponding obstack's malloc() and free() methods. */
61
62 struct bcache
63 {
64 /* All the bstrings are allocated here. */
65 struct obstack cache;
66
67 /* How many hash buckets we're using. */
68 unsigned int num_buckets;
69
70 /* Hash buckets. This table is allocated using malloc, so when we
71 grow the table we can return the old table to the system. */
72 struct bstring **bucket;
73
74 /* Statistics. */
75 unsigned long unique_count; /* number of unique strings */
76 long total_count; /* total number of strings cached, including dups */
77 long unique_size; /* size of unique strings, in bytes */
78 long total_size; /* total number of bytes cached, including dups */
79 long structure_size; /* total size of bcache, including infrastructure */
80 /* Number of times that the hash table is expanded and hence
81 re-built, and the corresponding number of times that a string is
82 [re]hashed as part of entering it into the expanded table. The
83 total number of hashes can be computed by adding TOTAL_COUNT to
84 expand_hash_count. */
85 unsigned long expand_count;
86 unsigned long expand_hash_count;
87 /* Number of times that the half-hash compare hit (compare the upper
88 16 bits of hash values) hit, but the corresponding combined
89 length/data compare missed. */
90 unsigned long half_hash_miss_count;
91 };
92
93 /* The old hash function was stolen from SDBM. This is what DB 3.0 uses now,
94 * and is better than the old one.
95 */
96 \f
97 unsigned long
98 hash(const void *addr, int length)
99 {
100 const unsigned char *k, *e;
101 unsigned long h;
102
103 k = (const unsigned char *)addr;
104 e = k+length;
105 for (h=0; k< e;++k)
106 {
107 h *=16777619;
108 h ^= *k;
109 }
110 return (h);
111 }
112 \f
113 /* Growing the bcache's hash table. */
114
115 /* If the average chain length grows beyond this, then we want to
116 resize our hash table. */
117 #define CHAIN_LENGTH_THRESHOLD (5)
118
119 static void
120 expand_hash_table (struct bcache *bcache)
121 {
122 /* A table of good hash table sizes. Whenever we grow, we pick the
123 next larger size from this table. sizes[i] is close to 1 << (i+10),
124 so we roughly double the table size each time. After we fall off
125 the end of this table, we just double. Don't laugh --- there have
126 been executables sighted with a gigabyte of debug info. */
127 static unsigned long sizes[] = {
128 1021, 2053, 4099, 8191, 16381, 32771,
129 65537, 131071, 262144, 524287, 1048573, 2097143,
130 4194301, 8388617, 16777213, 33554467, 67108859, 134217757,
131 268435459, 536870923, 1073741827, 2147483659UL
132 };
133 unsigned int new_num_buckets;
134 struct bstring **new_buckets;
135 unsigned int i;
136
137 /* Count the stats. Every unique item needs to be re-hashed and
138 re-entered. */
139 bcache->expand_count++;
140 bcache->expand_hash_count += bcache->unique_count;
141
142 /* Find the next size. */
143 new_num_buckets = bcache->num_buckets * 2;
144 for (i = 0; i < (sizeof (sizes) / sizeof (sizes[0])); i++)
145 if (sizes[i] > bcache->num_buckets)
146 {
147 new_num_buckets = sizes[i];
148 break;
149 }
150
151 /* Allocate the new table. */
152 {
153 size_t new_size = new_num_buckets * sizeof (new_buckets[0]);
154 new_buckets = (struct bstring **) xmalloc (new_size);
155 memset (new_buckets, 0, new_size);
156
157 bcache->structure_size -= (bcache->num_buckets
158 * sizeof (bcache->bucket[0]));
159 bcache->structure_size += new_size;
160 }
161
162 /* Rehash all existing strings. */
163 for (i = 0; i < bcache->num_buckets; i++)
164 {
165 struct bstring *s, *next;
166
167 for (s = bcache->bucket[i]; s; s = next)
168 {
169 struct bstring **new_bucket;
170 next = s->next;
171
172 new_bucket = &new_buckets[(hash (&s->d.data, s->length)
173 % new_num_buckets)];
174 s->next = *new_bucket;
175 *new_bucket = s;
176 }
177 }
178
179 /* Plug in the new table. */
180 if (bcache->bucket)
181 xfree (bcache->bucket);
182 bcache->bucket = new_buckets;
183 bcache->num_buckets = new_num_buckets;
184 }
185
186 \f
187 /* Looking up things in the bcache. */
188
189 /* The number of bytes needed to allocate a struct bstring whose data
190 is N bytes long. */
191 #define BSTRING_SIZE(n) (offsetof (struct bstring, d.data) + (n))
192
193 /* Find a copy of the LENGTH bytes at ADDR in BCACHE. If BCACHE has
194 never seen those bytes before, add a copy of them to BCACHE. In
195 either case, return a pointer to BCACHE's copy of that string. */
196 static void *
197 bcache_data (const void *addr, int length, struct bcache *bcache)
198 {
199 unsigned long full_hash;
200 unsigned short half_hash;
201 int hash_index;
202 struct bstring *s;
203
204 /* If our average chain length is too high, expand the hash table. */
205 if (bcache->unique_count >= bcache->num_buckets * CHAIN_LENGTH_THRESHOLD)
206 expand_hash_table (bcache);
207
208 bcache->total_count++;
209 bcache->total_size += length;
210
211 full_hash = hash (addr, length);
212 half_hash = (full_hash >> 16);
213 hash_index = full_hash % bcache->num_buckets;
214
215 /* Search the hash bucket for a string identical to the caller's.
216 As a short-circuit first compare the upper part of each hash
217 values. */
218 for (s = bcache->bucket[hash_index]; s; s = s->next)
219 {
220 if (s->half_hash == half_hash)
221 {
222 if (s->length == length
223 && ! memcmp (&s->d.data, addr, length))
224 return &s->d.data;
225 else
226 bcache->half_hash_miss_count++;
227 }
228 }
229
230 /* The user's string isn't in the list. Insert it after *ps. */
231 {
232 struct bstring *new
233 = obstack_alloc (&bcache->cache, BSTRING_SIZE (length));
234 memcpy (&new->d.data, addr, length);
235 new->length = length;
236 new->next = bcache->bucket[hash_index];
237 new->half_hash = half_hash;
238 bcache->bucket[hash_index] = new;
239
240 bcache->unique_count++;
241 bcache->unique_size += length;
242 bcache->structure_size += BSTRING_SIZE (length);
243
244 return &new->d.data;
245 }
246 }
247
248 void *
249 deprecated_bcache (const void *addr, int length, struct bcache *bcache)
250 {
251 return bcache_data (addr, length, bcache);
252 }
253
254 const void *
255 bcache (const void *addr, int length, struct bcache *bcache)
256 {
257 return bcache_data (addr, length, bcache);
258 }
259 \f
260 /* Allocating and freeing bcaches. */
261
262 struct bcache *
263 bcache_xmalloc (void)
264 {
265 /* Allocate the bcache pre-zeroed. */
266 struct bcache *b = XCALLOC (1, struct bcache);
267 /* We could use obstack_specify_allocation here instead, but
268 gdb_obstack.h specifies the allocation/deallocation
269 functions. */
270 obstack_init (&b->cache);
271 return b;
272 }
273
274 /* Free all the storage associated with BCACHE. */
275 void
276 bcache_xfree (struct bcache *bcache)
277 {
278 if (bcache == NULL)
279 return;
280 obstack_free (&bcache->cache, 0);
281 xfree (bcache->bucket);
282 xfree (bcache);
283 }
284
285
286 \f
287 /* Printing statistics. */
288
289 static int
290 compare_ints (const void *ap, const void *bp)
291 {
292 /* Because we know we're comparing two ints which are positive,
293 there's no danger of overflow here. */
294 return * (int *) ap - * (int *) bp;
295 }
296
297
298 static void
299 print_percentage (int portion, int total)
300 {
301 if (total == 0)
302 /* i18n: Like "Percentage of duplicates, by count: (not applicable)" */
303 printf_filtered (_("(not applicable)\n"));
304 else
305 printf_filtered ("%3d%%\n", (int) (portion * 100.0 / total));
306 }
307
308
309 /* Print statistics on BCACHE's memory usage and efficacity at
310 eliminating duplication. NAME should describe the kind of data
311 BCACHE holds. Statistics are printed using `printf_filtered' and
312 its ilk. */
313 void
314 print_bcache_statistics (struct bcache *c, char *type)
315 {
316 int occupied_buckets;
317 int max_chain_length;
318 int median_chain_length;
319 int max_entry_size;
320 int median_entry_size;
321
322 /* Count the number of occupied buckets, tally the various string
323 lengths, and measure chain lengths. */
324 {
325 unsigned int b;
326 int *chain_length = XCALLOC (c->num_buckets + 1, int);
327 int *entry_size = XCALLOC (c->unique_count + 1, int);
328 int stringi = 0;
329
330 occupied_buckets = 0;
331
332 for (b = 0; b < c->num_buckets; b++)
333 {
334 struct bstring *s = c->bucket[b];
335
336 chain_length[b] = 0;
337
338 if (s)
339 {
340 occupied_buckets++;
341
342 while (s)
343 {
344 gdb_assert (b < c->num_buckets);
345 chain_length[b]++;
346 gdb_assert (stringi < c->unique_count);
347 entry_size[stringi++] = s->length;
348 s = s->next;
349 }
350 }
351 }
352
353 /* To compute the median, we need the set of chain lengths sorted. */
354 qsort (chain_length, c->num_buckets, sizeof (chain_length[0]),
355 compare_ints);
356 qsort (entry_size, c->unique_count, sizeof (entry_size[0]),
357 compare_ints);
358
359 if (c->num_buckets > 0)
360 {
361 max_chain_length = chain_length[c->num_buckets - 1];
362 median_chain_length = chain_length[c->num_buckets / 2];
363 }
364 else
365 {
366 max_chain_length = 0;
367 median_chain_length = 0;
368 }
369 if (c->unique_count > 0)
370 {
371 max_entry_size = entry_size[c->unique_count - 1];
372 median_entry_size = entry_size[c->unique_count / 2];
373 }
374 else
375 {
376 max_entry_size = 0;
377 median_entry_size = 0;
378 }
379
380 xfree (chain_length);
381 xfree (entry_size);
382 }
383
384 printf_filtered (_(" Cached '%s' statistics:\n"), type);
385 printf_filtered (_(" Total object count: %ld\n"), c->total_count);
386 printf_filtered (_(" Unique object count: %lu\n"), c->unique_count);
387 printf_filtered (_(" Percentage of duplicates, by count: "));
388 print_percentage (c->total_count - c->unique_count, c->total_count);
389 printf_filtered ("\n");
390
391 printf_filtered (_(" Total object size: %ld\n"), c->total_size);
392 printf_filtered (_(" Unique object size: %ld\n"), c->unique_size);
393 printf_filtered (_(" Percentage of duplicates, by size: "));
394 print_percentage (c->total_size - c->unique_size, c->total_size);
395 printf_filtered ("\n");
396
397 printf_filtered (_(" Max entry size: %d\n"), max_entry_size);
398 printf_filtered (_(" Average entry size: "));
399 if (c->unique_count > 0)
400 printf_filtered ("%ld\n", c->unique_size / c->unique_count);
401 else
402 /* i18n: "Average entry size: (not applicable)" */
403 printf_filtered (_("(not applicable)\n"));
404 printf_filtered (_(" Median entry size: %d\n"), median_entry_size);
405 printf_filtered ("\n");
406
407 printf_filtered (_(" Total memory used by bcache, including overhead: %ld\n"),
408 c->structure_size);
409 printf_filtered (_(" Percentage memory overhead: "));
410 print_percentage (c->structure_size - c->unique_size, c->unique_size);
411 printf_filtered (_(" Net memory savings: "));
412 print_percentage (c->total_size - c->structure_size, c->total_size);
413 printf_filtered ("\n");
414
415 printf_filtered (_(" Hash table size: %3d\n"), c->num_buckets);
416 printf_filtered (_(" Hash table expands: %lu\n"),
417 c->expand_count);
418 printf_filtered (_(" Hash table hashes: %lu\n"),
419 c->total_count + c->expand_hash_count);
420 printf_filtered (_(" Half hash misses: %lu\n"),
421 c->half_hash_miss_count);
422 printf_filtered (_(" Hash table population: "));
423 print_percentage (occupied_buckets, c->num_buckets);
424 printf_filtered (_(" Median hash chain length: %3d\n"),
425 median_chain_length);
426 printf_filtered (_(" Average hash chain length: "));
427 if (c->num_buckets > 0)
428 printf_filtered ("%3lu\n", c->unique_count / c->num_buckets);
429 else
430 /* i18n: "Average hash chain length: (not applicable)" */
431 printf_filtered (_("(not applicable)\n"));
432 printf_filtered (_(" Maximum hash chain length: %3d\n"), max_chain_length);
433 printf_filtered ("\n");
434 }
435
436 int
437 bcache_memory_used (struct bcache *bcache)
438 {
439 return obstack_memory_used (&bcache->cache);
440 }