gdb/bcache.c

   1 /* Implement a cached obstack.
   2    Written by Fred Fish <fnf@cygnus.com>
   3    Rewritten by Jim Blandy <jimb@cygnus.com>
   4
   5    Copyright (C) 1999-2019 Free Software Foundation, Inc.
   6
   7    This file is part of GDB.
   8
   9    This program is free software; you can redistribute it and/or modify
  10    it under the terms of the GNU General Public License as published by
  11    the Free Software Foundation; either version 3 of the License, or
  12    (at your option) any later version.
  13
  14    This program is distributed in the hope that it will be useful,
  15    but WITHOUT ANY WARRANTY; without even the implied warranty of
  16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17    GNU General Public License for more details.
  18
  19    You should have received a copy of the GNU General Public License
  20    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
  21
  22 #include "defs.h"
  23 #include "gdb_obstack.h"
  24 #include "bcache.h"
  25
  26 #include <algorithm>
  27
  28 /* The type used to hold a single bcache string.  The user data is
  29    stored in d.data.  Since it can be any type, it needs to have the
  30    same alignment as the most strict alignment of any type on the host
  31    machine.  I don't know of any really correct way to do this in
  32    stock ANSI C, so just do it the same way obstack.h does.  */
  33
  34 struct bstring
  35 {
  36   /* Hash chain.  */
  37   struct bstring *next;
  38   /* Assume the data length is no more than 64k.  */
  39   unsigned short length;
  40   /* The half hash hack.  This contains the upper 16 bits of the hash
  41      value and is used as a pre-check when comparing two strings and
  42      avoids the need to do length or memcmp calls.  It proves to be
  43      roughly 100% effective.  */
  44   unsigned short half_hash;
  45
  46   union
  47   {
  48     char data[1];
  49     double dummy;
  50   }
  51   d;
  52 };
  53
  54 /* The old hash function was stolen from SDBM. This is what DB 3.0
  55    uses now, and is better than the old one.  */
  56 \f
  57 unsigned long
  58 hash(const void *addr, int length)
  59 {
  60   return hash_continue (addr, length, 0);
  61 }
  62
  63 /* Continue the calculation of the hash H at the given address.  */
  64
  65 unsigned long
  66 hash_continue (const void *addr, int length, unsigned long h)
  67 {
  68   const unsigned char *k, *e;
  69
  70   k = (const unsigned char *)addr;
  71   e = k+length;
  72   for (; k< e;++k)
  73     {
  74       h *=16777619;
  75       h ^= *k;
  76     }
  77   return (h);
  78 }
  79 \f
  80 /* Growing the bcache's hash table.  */
  81
  82 /* If the average chain length grows beyond this, then we want to
  83    resize our hash table.  */
  84 #define CHAIN_LENGTH_THRESHOLD (5)
  85
  86 void
  87 bcache::expand_hash_table ()
  88 {
  89   /* A table of good hash table sizes.  Whenever we grow, we pick the
  90      next larger size from this table.  sizes[i] is close to 1 << (i+10),
  91      so we roughly double the table size each time.  After we fall off
  92      the end of this table, we just double.  Don't laugh --- there have
  93      been executables sighted with a gigabyte of debug info.  */
  94   static unsigned long sizes[] = {
  95     1021, 2053, 4099, 8191, 16381, 32771,
  96     65537, 131071, 262144, 524287, 1048573, 2097143,
  97     4194301, 8388617, 16777213, 33554467, 67108859, 134217757,
  98     268435459, 536870923, 1073741827, 2147483659UL
  99   };
 100   unsigned int new_num_buckets;
 101   struct bstring **new_buckets;
 102   unsigned int i;
 103
 104   /* Count the stats.  Every unique item needs to be re-hashed and
 105      re-entered.  */
 106   m_expand_count++;
 107   m_expand_hash_count += m_unique_count;
 108
 109   /* Find the next size.  */
 110   new_num_buckets = m_num_buckets * 2;
 111   for (i = 0; i < (sizeof (sizes) / sizeof (sizes[0])); i++)
 112     if (sizes[i] > m_num_buckets)
 113       {
 114         new_num_buckets = sizes[i];
 115         break;
 116       }
 117
 118   /* Allocate the new table.  */
 119   {
 120     size_t new_size = new_num_buckets * sizeof (new_buckets[0]);
 121
 122     new_buckets = (struct bstring **) xmalloc (new_size);
 123     memset (new_buckets, 0, new_size);
 124
 125     m_structure_size -= m_num_buckets * sizeof (m_bucket[0]);
 126     m_structure_size += new_size;
 127   }
 128
 129   /* Rehash all existing strings.  */
 130   for (i = 0; i < m_num_buckets; i++)
 131     {
 132       struct bstring *s, *next;
 133
 134       for (s = m_bucket[i]; s; s = next)
 135         {
 136           struct bstring **new_bucket;
 137           next = s->next;
 138
 139           new_bucket = &new_buckets[(m_hash_function (&s->d.data, s->length)
 140                                      % new_num_buckets)];
 141           s->next = *new_bucket;
 142           *new_bucket = s;
 143         }
 144     }
 145
 146   /* Plug in the new table.  */
 147   xfree (m_bucket);
 148   m_bucket = new_buckets;
 149   m_num_buckets = new_num_buckets;
 150 }
 151
 152 \f
 153 /* Looking up things in the bcache.  */
 154
 155 /* The number of bytes needed to allocate a struct bstring whose data
 156    is N bytes long.  */
 157 #define BSTRING_SIZE(n) (offsetof (struct bstring, d.data) + (n))
 158
 159 /* Find a copy of the LENGTH bytes at ADDR in BCACHE.  If BCACHE has
 160    never seen those bytes before, add a copy of them to BCACHE.  In
 161    either case, return a pointer to BCACHE's copy of that string.  If
 162    optional ADDED is not NULL, return 1 in case of new entry or 0 if
 163    returning an old entry.  */
 164
 165 const void *
 166 bcache::insert (const void *addr, int length, int *added)
 167 {
 168   unsigned long full_hash;
 169   unsigned short half_hash;
 170   int hash_index;
 171   struct bstring *s;
 172
 173   if (added)
 174     *added = 0;
 175
 176   /* Lazily initialize the obstack.  This can save quite a bit of
 177      memory in some cases.  */
 178   if (m_total_count == 0)
 179     {
 180       /* We could use obstack_specify_allocation here instead, but
 181          gdb_obstack.h specifies the allocation/deallocation
 182          functions.  */
 183       obstack_init (&m_cache);
 184     }
 185
 186   /* If our average chain length is too high, expand the hash table.  */
 187   if (m_unique_count >= m_num_buckets * CHAIN_LENGTH_THRESHOLD)
 188     expand_hash_table ();
 189
 190   m_total_count++;
 191   m_total_size += length;
 192
 193   full_hash = m_hash_function (addr, length);
 194
 195   half_hash = (full_hash >> 16);
 196   hash_index = full_hash % m_num_buckets;
 197
 198   /* Search the hash m_bucket for a string identical to the caller's.
 199      As a short-circuit first compare the upper part of each hash
 200      values.  */
 201   for (s = m_bucket[hash_index]; s; s = s->next)
 202     {
 203       if (s->half_hash == half_hash)
 204         {
 205           if (s->length == length
 206               && m_compare_function (&s->d.data, addr, length))
 207             return &s->d.data;
 208           else
 209             m_half_hash_miss_count++;
 210         }
 211     }
 212
 213   /* The user's string isn't in the list.  Insert it after *ps.  */
 214   {
 215     struct bstring *newobj
 216       = (struct bstring *) obstack_alloc (&m_cache,
 217                                           BSTRING_SIZE (length));
 218
 219     memcpy (&newobj->d.data, addr, length);
 220     newobj->length = length;
 221     newobj->next = m_bucket[hash_index];
 222     newobj->half_hash = half_hash;
 223     m_bucket[hash_index] = newobj;
 224
 225     m_unique_count++;
 226     m_unique_size += length;
 227     m_structure_size += BSTRING_SIZE (length);
 228
 229     if (added)
 230       *added = 1;
 231
 232     return &newobj->d.data;
 233   }
 234 }
 235 \f
 236
 237 /* Compare the byte string at ADDR1 of lenght LENGHT to the
 238    string at ADDR2.  Return 1 if they are equal.  */
 239
 240 int
 241 bcache::compare (const void *addr1, const void *addr2, int length)
 242 {
 243   return memcmp (addr1, addr2, length) == 0;
 244 }
 245
 246 /* Free all the storage associated with BCACHE.  */
 247 bcache::~bcache ()
 248 {
 249   /* Only free the obstack if we actually initialized it.  */
 250   if (m_total_count > 0)
 251     obstack_free (&m_cache, 0);
 252   xfree (m_bucket);
 253 }
 254
 255
 256 \f
 257 /* Printing statistics.  */
 258
 259 static void
 260 print_percentage (int portion, int total)
 261 {
 262   if (total == 0)
 263     /* i18n: Like "Percentage of duplicates, by count: (not applicable)".  */
 264     printf_filtered (_("(not applicable)\n"));
 265   else
 266     printf_filtered ("%3d%%\n", (int) (portion * 100.0 / total));
 267 }
 268
 269
 270 /* Print statistics on BCACHE's memory usage and efficacity at
 271    eliminating duplication.  NAME should describe the kind of data
 272    BCACHE holds.  Statistics are printed using `printf_filtered' and
 273    its ilk.  */
 274 void
 275 bcache::print_statistics (const char *type)
 276 {
 277   int occupied_buckets;
 278   int max_chain_length;
 279   int median_chain_length;
 280   int max_entry_size;
 281   int median_entry_size;
 282
 283   /* Count the number of occupied buckets, tally the various string
 284      lengths, and measure chain lengths.  */
 285   {
 286     unsigned int b;
 287     int *chain_length = XCNEWVEC (int, m_num_buckets + 1);
 288     int *entry_size = XCNEWVEC (int, m_unique_count + 1);
 289     int stringi = 0;
 290
 291     occupied_buckets = 0;
 292
 293     for (b = 0; b < m_num_buckets; b++)
 294       {
 295         struct bstring *s = m_bucket[b];
 296
 297         chain_length[b] = 0;
 298
 299         if (s)
 300           {
 301             occupied_buckets++;
 302
 303             while (s)
 304               {
 305                 gdb_assert (b < m_num_buckets);
 306                 chain_length[b]++;
 307                 gdb_assert (stringi < m_unique_count);
 308                 entry_size[stringi++] = s->length;
 309                 s = s->next;
 310               }
 311           }
 312       }
 313
 314     /* To compute the median, we need the set of chain lengths
 315        sorted.  */
 316     std::sort (chain_length, chain_length + m_num_buckets);
 317     std::sort (entry_size, entry_size + m_unique_count);
 318
 319     if (m_num_buckets > 0)
 320       {
 321         max_chain_length = chain_length[m_num_buckets - 1];
 322         median_chain_length = chain_length[m_num_buckets / 2];
 323       }
 324     else
 325       {
 326         max_chain_length = 0;
 327         median_chain_length = 0;
 328       }
 329     if (m_unique_count > 0)
 330       {
 331         max_entry_size = entry_size[m_unique_count - 1];
 332         median_entry_size = entry_size[m_unique_count / 2];
 333       }
 334     else
 335       {
 336         max_entry_size = 0;
 337         median_entry_size = 0;
 338       }
 339
 340     xfree (chain_length);
 341     xfree (entry_size);
 342   }
 343
 344   printf_filtered (_("  M_Cached '%s' statistics:\n"), type);
 345   printf_filtered (_("    Total object count:  %ld\n"), m_total_count);
 346   printf_filtered (_("    Unique object count: %lu\n"), m_unique_count);
 347   printf_filtered (_("    Percentage of duplicates, by count: "));
 348   print_percentage (m_total_count - m_unique_count, m_total_count);
 349   printf_filtered ("\n");
 350
 351   printf_filtered (_("    Total object size:   %ld\n"), m_total_size);
 352   printf_filtered (_("    Unique object size:  %ld\n"), m_unique_size);
 353   printf_filtered (_("    Percentage of duplicates, by size:  "));
 354   print_percentage (m_total_size - m_unique_size, m_total_size);
 355   printf_filtered ("\n");
 356
 357   printf_filtered (_("    Max entry size:     %d\n"), max_entry_size);
 358   printf_filtered (_("    Average entry size: "));
 359   if (m_unique_count > 0)
 360     printf_filtered ("%ld\n", m_unique_size / m_unique_count);
 361   else
 362     /* i18n: "Average entry size: (not applicable)".  */
 363     printf_filtered (_("(not applicable)\n"));
 364   printf_filtered (_("    Median entry size:  %d\n"), median_entry_size);
 365   printf_filtered ("\n");
 366
 367   printf_filtered (_("    \
 368 Total memory used by bcache, including overhead: %ld\n"),
 369                    m_structure_size);
 370   printf_filtered (_("    Percentage memory overhead: "));
 371   print_percentage (m_structure_size - m_unique_size, m_unique_size);
 372   printf_filtered (_("    Net memory savings:         "));
 373   print_percentage (m_total_size - m_structure_size, m_total_size);
 374   printf_filtered ("\n");
 375
 376   printf_filtered (_("    Hash table size:           %3d\n"),
 377                    m_num_buckets);
 378   printf_filtered (_("    Hash table expands:        %lu\n"),
 379                    m_expand_count);
 380   printf_filtered (_("    Hash table hashes:         %lu\n"),
 381                    m_total_count + m_expand_hash_count);
 382   printf_filtered (_("    Half hash misses:          %lu\n"),
 383                    m_half_hash_miss_count);
 384   printf_filtered (_("    Hash table population:     "));
 385   print_percentage (occupied_buckets, m_num_buckets);
 386   printf_filtered (_("    Median hash chain length:  %3d\n"),
 387                    median_chain_length);
 388   printf_filtered (_("    Average hash chain length: "));
 389   if (m_num_buckets > 0)
 390     printf_filtered ("%3lu\n", m_unique_count / m_num_buckets);
 391   else
 392     /* i18n: "Average hash chain length: (not applicable)".  */
 393     printf_filtered (_("(not applicable)\n"));
 394   printf_filtered (_("    Maximum hash chain length: %3d\n"),
 395                    max_chain_length);
 396   printf_filtered ("\n");
 397 }
 398
 399 int
 400 bcache::memory_used ()
 401 {
 402   if (m_total_count == 0)
 403     return 0;
 404   return obstack_memory_used (&m_cache);
 405 }