gdb/bcache.c

   1 /* Implement a cached obstack.
   2    Written by Fred Fish <fnf@cygnus.com>
   3    Rewritten by Jim Blandy <jimb@cygnus.com>
   4
   5    Copyright (C) 1999-2024 Free Software Foundation, Inc.
   6
   7    This file is part of GDB.
   8
   9    This program is free software; you can redistribute it and/or modify
  10    it under the terms of the GNU General Public License as published by
  11    the Free Software Foundation; either version 3 of the License, or
  12    (at your option) any later version.
  13
  14    This program is distributed in the hope that it will be useful,
  15    but WITHOUT ANY WARRANTY; without even the implied warranty of
  16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17    GNU General Public License for more details.
  18
  19    You should have received a copy of the GNU General Public License
  20    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
  21
  22 #include "gdbsupport/gdb_obstack.h"
  23 #include "bcache.h"
  24
  25 #include <algorithm>
  26
  27 namespace gdb {
  28
  29 /* The type used to hold a single bcache string.  The user data is
  30    stored in d.data.  Since it can be any type, it needs to have the
  31    same alignment as the most strict alignment of any type on the host
  32    machine.  I don't know of any really correct way to do this in
  33    stock ANSI C, so just do it the same way obstack.h does.  */
  34
  35 struct bstring
  36 {
  37   /* Hash chain.  */
  38   struct bstring *next;
  39   /* Assume the data length is no more than 64k.  */
  40   unsigned short length;
  41   /* The half hash hack.  This contains the upper 16 bits of the hash
  42      value and is used as a pre-check when comparing two strings and
  43      avoids the need to do length or memcmp calls.  It proves to be
  44      roughly 100% effective.  */
  45   unsigned short half_hash;
  46
  47   union
  48   {
  49     char data[1];
  50     double dummy;
  51   }
  52   d;
  53 };
  54
  55 \f
  56 /* Growing the bcache's hash table.  */
  57
  58 /* If the average chain length grows beyond this, then we want to
  59    resize our hash table.  */
  60 #define CHAIN_LENGTH_THRESHOLD (5)
  61
  62 void
  63 bcache::expand_hash_table ()
  64 {
  65   /* A table of good hash table sizes.  Whenever we grow, we pick the
  66      next larger size from this table.  sizes[i] is close to 1 << (i+10),
  67      so we roughly double the table size each time.  After we fall off
  68      the end of this table, we just double.  Don't laugh --- there have
  69      been executables sighted with a gigabyte of debug info.  */
  70   static const unsigned long sizes[] = {
  71     1021, 2053, 4099, 8191, 16381, 32771,
  72     65537, 131071, 262144, 524287, 1048573, 2097143,
  73     4194301, 8388617, 16777213, 33554467, 67108859, 134217757,
  74     268435459, 536870923, 1073741827, 2147483659UL
  75   };
  76   unsigned int new_num_buckets;
  77   struct bstring **new_buckets;
  78   unsigned int i;
  79
  80   /* Count the stats.  Every unique item needs to be re-hashed and
  81      re-entered.  */
  82   m_expand_count++;
  83   m_expand_hash_count += m_unique_count;
  84
  85   /* Find the next size.  */
  86   new_num_buckets = m_num_buckets * 2;
  87   for (unsigned long a_size : sizes)
  88     if (a_size > m_num_buckets)
  89       {
  90         new_num_buckets = a_size;
  91         break;
  92       }
  93
  94   /* Allocate the new table.  */
  95   {
  96     size_t new_size = new_num_buckets * sizeof (new_buckets[0]);
  97
  98     new_buckets = (struct bstring **) xmalloc (new_size);
  99     memset (new_buckets, 0, new_size);
 100
 101     m_structure_size -= m_num_buckets * sizeof (m_bucket[0]);
 102     m_structure_size += new_size;
 103   }
 104
 105   /* Rehash all existing strings.  */
 106   for (i = 0; i < m_num_buckets; i++)
 107     {
 108       struct bstring *s, *next;
 109
 110       for (s = m_bucket[i]; s; s = next)
 111         {
 112           struct bstring **new_bucket;
 113           next = s->next;
 114
 115           new_bucket = &new_buckets[(this->hash (&s->d.data, s->length)
 116                                      % new_num_buckets)];
 117           s->next = *new_bucket;
 118           *new_bucket = s;
 119         }
 120     }
 121
 122   /* Plug in the new table.  */
 123   xfree (m_bucket);
 124   m_bucket = new_buckets;
 125   m_num_buckets = new_num_buckets;
 126 }
 127
 128 \f
 129 /* Looking up things in the bcache.  */
 130
 131 /* The number of bytes needed to allocate a struct bstring whose data
 132    is N bytes long.  */
 133 #define BSTRING_SIZE(n) (offsetof (struct bstring, d.data) + (n))
 134
 135 /* Find a copy of the LENGTH bytes at ADDR in BCACHE.  If BCACHE has
 136    never seen those bytes before, add a copy of them to BCACHE.  In
 137    either case, return a pointer to BCACHE's copy of that string.  If
 138    optional ADDED is not NULL, return 1 in case of new entry or 0 if
 139    returning an old entry.  */
 140
 141 const void *
 142 bcache::insert (const void *addr, int length, bool *added)
 143 {
 144   unsigned long full_hash;
 145   unsigned short half_hash;
 146   int hash_index;
 147   struct bstring *s;
 148
 149   if (added != nullptr)
 150     *added = false;
 151
 152   /* Lazily initialize the obstack.  This can save quite a bit of
 153      memory in some cases.  */
 154   if (m_total_count == 0)
 155     {
 156       /* We could use obstack_specify_allocation here instead, but
 157          gdb_obstack.h specifies the allocation/deallocation
 158          functions.  */
 159       obstack_init (&m_cache);
 160     }
 161
 162   /* If our average chain length is too high, expand the hash table.  */
 163   if (m_unique_count >= m_num_buckets * CHAIN_LENGTH_THRESHOLD)
 164     expand_hash_table ();
 165
 166   m_total_count++;
 167   m_total_size += length;
 168
 169   full_hash = this->hash (addr, length);
 170
 171   half_hash = (full_hash >> 16);
 172   hash_index = full_hash % m_num_buckets;
 173
 174   /* Search the hash m_bucket for a string identical to the caller's.
 175      As a short-circuit first compare the upper part of each hash
 176      values.  */
 177   for (s = m_bucket[hash_index]; s; s = s->next)
 178     {
 179       if (s->half_hash == half_hash)
 180         {
 181           if (s->length == length
 182               && this->compare (&s->d.data, addr, length))
 183             return &s->d.data;
 184           else
 185             m_half_hash_miss_count++;
 186         }
 187     }
 188
 189   /* The user's string isn't in the list.  Insert it after *ps.  */
 190   {
 191     struct bstring *newobj
 192       = (struct bstring *) obstack_alloc (&m_cache,
 193                                           BSTRING_SIZE (length));
 194
 195     memcpy (&newobj->d.data, addr, length);
 196     newobj->length = length;
 197     newobj->next = m_bucket[hash_index];
 198     newobj->half_hash = half_hash;
 199     m_bucket[hash_index] = newobj;
 200
 201     m_unique_count++;
 202     m_unique_size += length;
 203     m_structure_size += BSTRING_SIZE (length);
 204
 205     if (added != nullptr)
 206       *added = true;
 207
 208     return &newobj->d.data;
 209   }
 210 }
 211 \f
 212
 213 /* See bcache.h.  */
 214
 215 unsigned long
 216 bcache::hash (const void *addr, int length)
 217 {
 218   return fast_hash (addr, length, 0);
 219 }
 220
 221 /* See bcache.h.  */
 222
 223 int
 224 bcache::compare (const void *left, const void *right, int length)
 225 {
 226   return memcmp (left, right, length) == 0;
 227 }
 228
 229 /* Free all the storage associated with BCACHE.  */
 230 bcache::~bcache ()
 231 {
 232   /* Only free the obstack if we actually initialized it.  */
 233   if (m_total_count > 0)
 234     obstack_free (&m_cache, 0);
 235   xfree (m_bucket);
 236 }
 237
 238
 239 \f
 240 /* Printing statistics.  */
 241
 242 static void
 243 print_percentage (int portion, int total)
 244 {
 245   if (total == 0)
 246     /* i18n: Like "Percentage of duplicates, by count: (not applicable)".  */
 247     gdb_printf (_("(not applicable)\n"));
 248   else
 249     gdb_printf ("%3d%%\n", (int) (portion * 100.0 / total));
 250 }
 251
 252
 253 /* Print statistics on BCACHE's memory usage and efficacity at
 254    eliminating duplication.  NAME should describe the kind of data
 255    BCACHE holds.  Statistics are printed using `gdb_printf' and
 256    its ilk.  */
 257 void
 258 bcache::print_statistics (const char *type)
 259 {
 260   int occupied_buckets;
 261   int max_chain_length;
 262   int median_chain_length;
 263   int max_entry_size;
 264   int median_entry_size;
 265
 266   /* Count the number of occupied buckets, tally the various string
 267      lengths, and measure chain lengths.  */
 268   {
 269     unsigned int b;
 270     int *chain_length = XCNEWVEC (int, m_num_buckets + 1);
 271     int *entry_size = XCNEWVEC (int, m_unique_count + 1);
 272     int stringi = 0;
 273
 274     occupied_buckets = 0;
 275
 276     for (b = 0; b < m_num_buckets; b++)
 277       {
 278         struct bstring *s = m_bucket[b];
 279
 280         chain_length[b] = 0;
 281
 282         if (s)
 283           {
 284             occupied_buckets++;
 285
 286             while (s)
 287               {
 288                 gdb_assert (b < m_num_buckets);
 289                 chain_length[b]++;
 290                 gdb_assert (stringi < m_unique_count);
 291                 entry_size[stringi++] = s->length;
 292                 s = s->next;
 293               }
 294           }
 295       }
 296
 297     /* To compute the median, we need the set of chain lengths
 298        sorted.  */
 299     std::sort (chain_length, chain_length + m_num_buckets);
 300     std::sort (entry_size, entry_size + m_unique_count);
 301
 302     if (m_num_buckets > 0)
 303       {
 304         max_chain_length = chain_length[m_num_buckets - 1];
 305         median_chain_length = chain_length[m_num_buckets / 2];
 306       }
 307     else
 308       {
 309         max_chain_length = 0;
 310         median_chain_length = 0;
 311       }
 312     if (m_unique_count > 0)
 313       {
 314         max_entry_size = entry_size[m_unique_count - 1];
 315         median_entry_size = entry_size[m_unique_count / 2];
 316       }
 317     else
 318       {
 319         max_entry_size = 0;
 320         median_entry_size = 0;
 321       }
 322
 323     xfree (chain_length);
 324     xfree (entry_size);
 325   }
 326
 327   gdb_printf (_("  M_Cached '%s' statistics:\n"), type);
 328   gdb_printf (_("    Total object count:  %ld\n"), m_total_count);
 329   gdb_printf (_("    Unique object count: %lu\n"), m_unique_count);
 330   gdb_printf (_("    Percentage of duplicates, by count: "));
 331   print_percentage (m_total_count - m_unique_count, m_total_count);
 332   gdb_printf ("\n");
 333
 334   gdb_printf (_("    Total object size:   %ld\n"), m_total_size);
 335   gdb_printf (_("    Unique object size:  %ld\n"), m_unique_size);
 336   gdb_printf (_("    Percentage of duplicates, by size:  "));
 337   print_percentage (m_total_size - m_unique_size, m_total_size);
 338   gdb_printf ("\n");
 339
 340   gdb_printf (_("    Max entry size:     %d\n"), max_entry_size);
 341   gdb_printf (_("    Average entry size: "));
 342   if (m_unique_count > 0)
 343     gdb_printf ("%ld\n", m_unique_size / m_unique_count);
 344   else
 345     /* i18n: "Average entry size: (not applicable)".  */
 346     gdb_printf (_("(not applicable)\n"));
 347   gdb_printf (_("    Median entry size:  %d\n"), median_entry_size);
 348   gdb_printf ("\n");
 349
 350   gdb_printf (_("    \
 351 Total memory used by bcache, including overhead: %ld\n"),
 352               m_structure_size);
 353   gdb_printf (_("    Percentage memory overhead: "));
 354   print_percentage (m_structure_size - m_unique_size, m_unique_size);
 355   gdb_printf (_("    Net memory savings:         "));
 356   print_percentage (m_total_size - m_structure_size, m_total_size);
 357   gdb_printf ("\n");
 358
 359   gdb_printf (_("    Hash table size:           %3d\n"),
 360               m_num_buckets);
 361   gdb_printf (_("    Hash table expands:        %lu\n"),
 362               m_expand_count);
 363   gdb_printf (_("    Hash table hashes:         %lu\n"),
 364               m_total_count + m_expand_hash_count);
 365   gdb_printf (_("    Half hash misses:          %lu\n"),
 366               m_half_hash_miss_count);
 367   gdb_printf (_("    Hash table population:     "));
 368   print_percentage (occupied_buckets, m_num_buckets);
 369   gdb_printf (_("    Median hash chain length:  %3d\n"),
 370               median_chain_length);
 371   gdb_printf (_("    Average hash chain length: "));
 372   if (m_num_buckets > 0)
 373     gdb_printf ("%3lu\n", m_unique_count / m_num_buckets);
 374   else
 375     /* i18n: "Average hash chain length: (not applicable)".  */
 376     gdb_printf (_("(not applicable)\n"));
 377   gdb_printf (_("    Maximum hash chain length: %3d\n"),
 378               max_chain_length);
 379   gdb_printf ("\n");
 380 }
 381
 382 int
 383 bcache::memory_used ()
 384 {
 385   if (m_total_count == 0)
 386     return 0;
 387   return obstack_memory_used (&m_cache);
 388 }
 389
 390 } /* namespace gdb */