]> git.ipfire.org Git - thirdparty/binutils-gdb.git/blame - gdb/dwarf2/index-write.c
Change how DWARF index writer finds address map
[thirdparty/binutils-gdb.git] / gdb / dwarf2 / index-write.c
CommitLineData
cd4fb1b2
SM
1/* DWARF index writing support for GDB.
2
3666a048 3 Copyright (C) 1994-2021 Free Software Foundation, Inc.
cd4fb1b2
SM
4
5 This file is part of GDB.
6
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
11
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19
20#include "defs.h"
21
82ca8957 22#include "dwarf2/index-write.h"
23baa4cc 23
cd4fb1b2
SM
24#include "addrmap.h"
25#include "cli/cli-decode.h"
268a13a5
TT
26#include "gdbsupport/byte-vector.h"
27#include "gdbsupport/filestuff.h"
28#include "gdbsupport/gdb_unlinker.h"
29#include "gdbsupport/pathstuff.h"
30#include "gdbsupport/scoped_fd.h"
cd4fb1b2 31#include "complaints.h"
82ca8957 32#include "dwarf2/index-common.h"
cd4fb1b2 33#include "dwarf2.h"
82ca8957 34#include "dwarf2/read.h"
9fda78b6 35#include "dwarf2/dwz.h"
cd4fb1b2
SM
36#include "gdb/gdb-index.h"
37#include "gdbcmd.h"
38#include "objfiles.h"
39#include "psympriv.h"
3b00ef10 40#include "ada-lang.h"
cd4fb1b2 41
4de283e4
TT
42#include <algorithm>
43#include <cmath>
159ed7d9 44#include <forward_list>
4de283e4
TT
45#include <set>
46#include <unordered_map>
47#include <unordered_set>
48
cd4fb1b2
SM
49/* Ensure only legit values are used. */
50#define DW2_GDB_INDEX_SYMBOL_STATIC_SET_VALUE(cu_index, value) \
51 do { \
52 gdb_assert ((unsigned int) (value) <= 1); \
53 GDB_INDEX_SYMBOL_STATIC_SET_VALUE((cu_index), (value)); \
54 } while (0)
55
56/* Ensure only legit values are used. */
57#define DW2_GDB_INDEX_SYMBOL_KIND_SET_VALUE(cu_index, value) \
58 do { \
59 gdb_assert ((value) >= GDB_INDEX_SYMBOL_KIND_TYPE \
dda83cd7 60 && (value) <= GDB_INDEX_SYMBOL_KIND_OTHER); \
cd4fb1b2
SM
61 GDB_INDEX_SYMBOL_KIND_SET_VALUE((cu_index), (value)); \
62 } while (0)
63
85102364 64/* Ensure we don't use more than the allotted number of bits for the CU. */
cd4fb1b2
SM
65#define DW2_GDB_INDEX_CU_SET_VALUE(cu_index, value) \
66 do { \
67 gdb_assert (((value) & ~GDB_INDEX_CU_MASK) == 0); \
68 GDB_INDEX_CU_SET_VALUE((cu_index), (value)); \
69 } while (0)
70
71/* The "save gdb-index" command. */
72
73/* Write SIZE bytes from the buffer pointed to by DATA to FILE, with
74 error checking. */
75
76static void
77file_write (FILE *file, const void *data, size_t size)
78{
79 if (fwrite (data, 1, size, file) != size)
80 error (_("couldn't data write to file"));
81}
82
83/* Write the contents of VEC to FILE, with error checking. */
84
85template<typename Elem, typename Alloc>
86static void
87file_write (FILE *file, const std::vector<Elem, Alloc> &vec)
88{
1f88d0c8
SM
89 if (!vec.empty ())
90 file_write (file, vec.data (), vec.size () * sizeof (vec[0]));
cd4fb1b2
SM
91}
92
93/* In-memory buffer to prepare data to be written later to a file. */
94class data_buf
95{
96public:
97 /* Copy DATA to the end of the buffer. */
98 template<typename T>
99 void append_data (const T &data)
100 {
101 std::copy (reinterpret_cast<const gdb_byte *> (&data),
102 reinterpret_cast<const gdb_byte *> (&data + 1),
103 grow (sizeof (data)));
104 }
105
106 /* Copy CSTR (a zero-terminated string) to the end of buffer. The
107 terminating zero is appended too. */
108 void append_cstr0 (const char *cstr)
109 {
110 const size_t size = strlen (cstr) + 1;
111 std::copy (cstr, cstr + size, grow (size));
112 }
113
114 /* Store INPUT as ULEB128 to the end of buffer. */
115 void append_unsigned_leb128 (ULONGEST input)
116 {
117 for (;;)
118 {
119 gdb_byte output = input & 0x7f;
120 input >>= 7;
121 if (input)
122 output |= 0x80;
123 append_data (output);
124 if (input == 0)
125 break;
126 }
127 }
128
129 /* Accept a host-format integer in VAL and append it to the buffer
130 as a target-format integer which is LEN bytes long. */
131 void append_uint (size_t len, bfd_endian byte_order, ULONGEST val)
132 {
133 ::store_unsigned_integer (grow (len), len, byte_order, val);
134 }
135
136 /* Return the size of the buffer. */
137 size_t size () const
138 {
139 return m_vec.size ();
140 }
141
142 /* Return true iff the buffer is empty. */
143 bool empty () const
144 {
145 return m_vec.empty ();
146 }
147
148 /* Write the buffer to FILE. */
149 void file_write (FILE *file) const
150 {
151 ::file_write (file, m_vec);
152 }
153
154private:
155 /* Grow SIZE bytes at the end of the buffer. Returns a pointer to
156 the start of the new block. */
157 gdb_byte *grow (size_t size)
158 {
159 m_vec.resize (m_vec.size () + size);
b4be9bfd 160 return &*(m_vec.end () - size);
cd4fb1b2
SM
161 }
162
163 gdb::byte_vector m_vec;
164};
165
166/* An entry in the symbol table. */
167struct symtab_index_entry
168{
169 /* The name of the symbol. */
170 const char *name;
171 /* The offset of the name in the constant pool. */
172 offset_type index_offset;
173 /* A sorted vector of the indices of all the CUs that hold an object
174 of this name. */
175 std::vector<offset_type> cu_indices;
176};
177
178/* The symbol table. This is a power-of-2-sized hash table. */
179struct mapped_symtab
180{
181 mapped_symtab ()
182 {
183 data.resize (1024);
184 }
185
186 offset_type n_elements = 0;
187 std::vector<symtab_index_entry> data;
7ab96794
TV
188
189 /* Temporary storage for Ada names. */
190 auto_obstack m_string_obstack;
cd4fb1b2
SM
191};
192
193/* Find a slot in SYMTAB for the symbol NAME. Returns a reference to
194 the slot.
195
196 Function is used only during write_hash_table so no index format backward
197 compatibility is needed. */
198
199static symtab_index_entry &
200find_slot (struct mapped_symtab *symtab, const char *name)
201{
202 offset_type index, step, hash = mapped_index_string_hash (INT_MAX, name);
203
204 index = hash & (symtab->data.size () - 1);
205 step = ((hash * 17) & (symtab->data.size () - 1)) | 1;
206
207 for (;;)
208 {
209 if (symtab->data[index].name == NULL
210 || strcmp (name, symtab->data[index].name) == 0)
211 return symtab->data[index];
212 index = (index + step) & (symtab->data.size () - 1);
213 }
214}
215
216/* Expand SYMTAB's hash table. */
217
218static void
219hash_expand (struct mapped_symtab *symtab)
220{
221 auto old_entries = std::move (symtab->data);
222
223 symtab->data.clear ();
224 symtab->data.resize (old_entries.size () * 2);
225
226 for (auto &it : old_entries)
227 if (it.name != NULL)
228 {
229 auto &ref = find_slot (symtab, it.name);
230 ref = std::move (it);
231 }
232}
233
234/* Add an entry to SYMTAB. NAME is the name of the symbol.
235 CU_INDEX is the index of the CU in which the symbol appears.
236 IS_STATIC is one if the symbol is static, otherwise zero (global). */
237
238static void
239add_index_entry (struct mapped_symtab *symtab, const char *name,
240 int is_static, gdb_index_symbol_kind kind,
241 offset_type cu_index)
242{
243 offset_type cu_index_and_attrs;
244
245 ++symtab->n_elements;
246 if (4 * symtab->n_elements / 3 >= symtab->data.size ())
247 hash_expand (symtab);
248
249 symtab_index_entry &slot = find_slot (symtab, name);
250 if (slot.name == NULL)
251 {
252 slot.name = name;
253 /* index_offset is set later. */
254 }
255
256 cu_index_and_attrs = 0;
257 DW2_GDB_INDEX_CU_SET_VALUE (cu_index_and_attrs, cu_index);
258 DW2_GDB_INDEX_SYMBOL_STATIC_SET_VALUE (cu_index_and_attrs, is_static);
259 DW2_GDB_INDEX_SYMBOL_KIND_SET_VALUE (cu_index_and_attrs, kind);
260
261 /* We don't want to record an index value twice as we want to avoid the
262 duplication.
263 We process all global symbols and then all static symbols
264 (which would allow us to avoid the duplication by only having to check
265 the last entry pushed), but a symbol could have multiple kinds in one CU.
266 To keep things simple we don't worry about the duplication here and
85102364 267 sort and uniquify the list after we've processed all symbols. */
cd4fb1b2
SM
268 slot.cu_indices.push_back (cu_index_and_attrs);
269}
270
271/* Sort and remove duplicates of all symbols' cu_indices lists. */
272
273static void
274uniquify_cu_indices (struct mapped_symtab *symtab)
275{
276 for (auto &entry : symtab->data)
277 {
278 if (entry.name != NULL && !entry.cu_indices.empty ())
279 {
280 auto &cu_indices = entry.cu_indices;
281 std::sort (cu_indices.begin (), cu_indices.end ());
282 auto from = std::unique (cu_indices.begin (), cu_indices.end ());
283 cu_indices.erase (from, cu_indices.end ());
284 }
285 }
286}
287
288/* A form of 'const char *' suitable for container keys. Only the
289 pointer is stored. The strings themselves are compared, not the
290 pointers. */
291class c_str_view
292{
293public:
294 c_str_view (const char *cstr)
295 : m_cstr (cstr)
296 {}
297
298 bool operator== (const c_str_view &other) const
299 {
300 return strcmp (m_cstr, other.m_cstr) == 0;
301 }
302
303 /* Return the underlying C string. Note, the returned string is
304 only a reference with lifetime of this object. */
305 const char *c_str () const
306 {
307 return m_cstr;
308 }
309
310private:
311 friend class c_str_view_hasher;
312 const char *const m_cstr;
313};
314
315/* A std::unordered_map::hasher for c_str_view that uses the right
316 hash function for strings in a mapped index. */
317class c_str_view_hasher
318{
319public:
320 size_t operator () (const c_str_view &x) const
321 {
322 return mapped_index_string_hash (INT_MAX, x.m_cstr);
323 }
324};
325
326/* A std::unordered_map::hasher for std::vector<>. */
327template<typename T>
328class vector_hasher
329{
330public:
331 size_t operator () (const std::vector<T> &key) const
332 {
333 return iterative_hash (key.data (),
334 sizeof (key.front ()) * key.size (), 0);
335 }
336};
337
338/* Write the mapped hash table SYMTAB to the data buffer OUTPUT, with
339 constant pool entries going into the data buffer CPOOL. */
340
341static void
342write_hash_table (mapped_symtab *symtab, data_buf &output, data_buf &cpool)
343{
344 {
345 /* Elements are sorted vectors of the indices of all the CUs that
346 hold an object of this name. */
347 std::unordered_map<std::vector<offset_type>, offset_type,
348 vector_hasher<offset_type>>
349 symbol_hash_table;
350
351 /* We add all the index vectors to the constant pool first, to
352 ensure alignment is ok. */
353 for (symtab_index_entry &entry : symtab->data)
354 {
355 if (entry.name == NULL)
356 continue;
357 gdb_assert (entry.index_offset == 0);
358
359 /* Finding before inserting is faster than always trying to
360 insert, because inserting always allocates a node, does the
361 lookup, and then destroys the new node if another node
362 already had the same key. C++17 try_emplace will avoid
363 this. */
364 const auto found
365 = symbol_hash_table.find (entry.cu_indices);
366 if (found != symbol_hash_table.end ())
367 {
368 entry.index_offset = found->second;
369 continue;
370 }
371
372 symbol_hash_table.emplace (entry.cu_indices, cpool.size ());
373 entry.index_offset = cpool.size ();
374 cpool.append_data (MAYBE_SWAP (entry.cu_indices.size ()));
375 for (const auto index : entry.cu_indices)
376 cpool.append_data (MAYBE_SWAP (index));
377 }
378 }
379
380 /* Now write out the hash table. */
381 std::unordered_map<c_str_view, offset_type, c_str_view_hasher> str_table;
382 for (const auto &entry : symtab->data)
383 {
384 offset_type str_off, vec_off;
385
386 if (entry.name != NULL)
387 {
388 const auto insertpair = str_table.emplace (entry.name, cpool.size ());
389 if (insertpair.second)
390 cpool.append_cstr0 (entry.name);
391 str_off = insertpair.first->second;
392 vec_off = entry.index_offset;
393 }
394 else
395 {
396 /* While 0 is a valid constant pool index, it is not valid
397 to have 0 for both offsets. */
398 str_off = 0;
399 vec_off = 0;
400 }
401
402 output.append_data (MAYBE_SWAP (str_off));
403 output.append_data (MAYBE_SWAP (vec_off));
404 }
405}
406
edfe0a0c 407typedef std::unordered_map<partial_symtab *, unsigned int> psym_index_map;
cd4fb1b2
SM
408
409/* Helper struct for building the address table. */
410struct addrmap_index_data
411{
412 addrmap_index_data (data_buf &addr_vec_, psym_index_map &cu_index_htab_)
413 : addr_vec (addr_vec_), cu_index_htab (cu_index_htab_)
414 {}
415
cd4fb1b2
SM
416 data_buf &addr_vec;
417 psym_index_map &cu_index_htab;
418
419 /* Non-zero if the previous_* fields are valid.
420 We can't write an entry until we see the next entry (since it is only then
421 that we know the end of the entry). */
422 int previous_valid;
423 /* Index of the CU in the table of all CUs in the index file. */
424 unsigned int previous_cu_index;
425 /* Start address of the CU. */
426 CORE_ADDR previous_cu_start;
427};
428
429/* Write an address entry to ADDR_VEC. */
430
431static void
79cc99f6 432add_address_entry (data_buf &addr_vec,
cd4fb1b2
SM
433 CORE_ADDR start, CORE_ADDR end, unsigned int cu_index)
434{
79748972
TT
435 addr_vec.append_uint (8, BFD_ENDIAN_LITTLE, start);
436 addr_vec.append_uint (8, BFD_ENDIAN_LITTLE, end);
cd4fb1b2
SM
437 addr_vec.append_data (MAYBE_SWAP (cu_index));
438}
439
440/* Worker function for traversing an addrmap to build the address table. */
441
442static int
443add_address_entry_worker (void *datap, CORE_ADDR start_addr, void *obj)
444{
445 struct addrmap_index_data *data = (struct addrmap_index_data *) datap;
edfe0a0c 446 partial_symtab *pst = (partial_symtab *) obj;
cd4fb1b2
SM
447
448 if (data->previous_valid)
79cc99f6 449 add_address_entry (data->addr_vec,
cd4fb1b2
SM
450 data->previous_cu_start, start_addr,
451 data->previous_cu_index);
452
453 data->previous_cu_start = start_addr;
454 if (pst != NULL)
455 {
456 const auto it = data->cu_index_htab.find (pst);
457 gdb_assert (it != data->cu_index_htab.cend ());
458 data->previous_cu_index = it->second;
459 data->previous_valid = 1;
460 }
461 else
462 data->previous_valid = 0;
463
464 return 0;
465}
466
79cc99f6 467/* Write PER_BFD's address map to ADDR_VEC.
cd4fb1b2
SM
468 CU_INDEX_HTAB is used to map addrmap entries to their CU indices
469 in the index file. */
470
471static void
79cc99f6 472write_address_map (dwarf2_per_bfd *per_bfd, data_buf &addr_vec,
cd4fb1b2
SM
473 psym_index_map &cu_index_htab)
474{
475 struct addrmap_index_data addrmap_index_data (addr_vec, cu_index_htab);
476
477 /* When writing the address table, we have to cope with the fact that
478 the addrmap iterator only provides the start of a region; we have to
479 wait until the next invocation to get the start of the next region. */
480
cd4fb1b2
SM
481 addrmap_index_data.previous_valid = 0;
482
79cc99f6 483 addrmap_foreach (per_bfd->partial_symtabs->psymtabs_addrmap,
d320c2b5 484 add_address_entry_worker, &addrmap_index_data);
cd4fb1b2
SM
485
486 /* It's highly unlikely the last entry (end address = 0xff...ff)
487 is valid, but we should still handle it.
488 The end address is recorded as the start of the next region, but that
489 doesn't work here. To cope we pass 0xff...ff, this is a rare situation
490 anyway. */
491 if (addrmap_index_data.previous_valid)
79cc99f6 492 add_address_entry (addr_vec,
cd4fb1b2
SM
493 addrmap_index_data.previous_cu_start, (CORE_ADDR) -1,
494 addrmap_index_data.previous_cu_index);
495}
496
497/* Return the symbol kind of PSYM. */
498
499static gdb_index_symbol_kind
500symbol_kind (struct partial_symbol *psym)
501{
8a6d4234
TT
502 domain_enum domain = psym->domain;
503 enum address_class aclass = psym->aclass;
cd4fb1b2
SM
504
505 switch (domain)
506 {
507 case VAR_DOMAIN:
508 switch (aclass)
509 {
510 case LOC_BLOCK:
511 return GDB_INDEX_SYMBOL_KIND_FUNCTION;
512 case LOC_TYPEDEF:
513 return GDB_INDEX_SYMBOL_KIND_TYPE;
514 case LOC_COMPUTED:
515 case LOC_CONST_BYTES:
516 case LOC_OPTIMIZED_OUT:
517 case LOC_STATIC:
518 return GDB_INDEX_SYMBOL_KIND_VARIABLE;
519 case LOC_CONST:
520 /* Note: It's currently impossible to recognize psyms as enum values
521 short of reading the type info. For now punt. */
522 return GDB_INDEX_SYMBOL_KIND_VARIABLE;
523 default:
524 /* There are other LOC_FOO values that one might want to classify
525 as variables, but dwarf2read.c doesn't currently use them. */
526 return GDB_INDEX_SYMBOL_KIND_OTHER;
527 }
528 case STRUCT_DOMAIN:
529 return GDB_INDEX_SYMBOL_KIND_TYPE;
530 default:
531 return GDB_INDEX_SYMBOL_KIND_OTHER;
532 }
533}
534
535/* Add a list of partial symbols to SYMTAB. */
536
537static void
538write_psymbols (struct mapped_symtab *symtab,
539 std::unordered_set<partial_symbol *> &psyms_seen,
932539d7 540 const std::vector<partial_symbol *> &symbols,
cd4fb1b2
SM
541 offset_type cu_index,
542 int is_static)
543{
932539d7 544 for (partial_symbol *psym : symbols)
cd4fb1b2 545 {
7ab96794 546 const char *name = psym->ginfo.search_name ();
cd4fb1b2 547
c1b5c1eb 548 if (psym->ginfo.language () == language_ada)
7ab96794
TV
549 {
550 /* We want to ensure that the Ada main function's name appears
551 verbatim in the index. However, this name will be of the
552 form "_ada_mumble", and will be rewritten by ada_decode.
553 So, recognize it specially here and add it to the index by
554 hand. */
555 if (strcmp (main_name (), name) == 0)
556 {
557 gdb_index_symbol_kind kind = symbol_kind (psym);
558
559 add_index_entry (symtab, name, is_static, kind, cu_index);
560 }
561
562 /* In order for the index to work when read back into gdb, it
563 has to supply a funny form of the name: it should be the
564 encoded name, with any suffixes stripped. Using the
565 ordinary encoded name will not work properly with the
566 searching logic in find_name_components_bounds; nor will
567 using the decoded name. Furthermore, an Ada "verbatim"
568 name (of the form "<MumBle>") must be entered without the
569 angle brackets. Note that the current index is unusual,
570 see PR symtab/24820 for details. */
571 std::string decoded = ada_decode (name);
572 if (decoded[0] == '<')
573 name = (char *) obstack_copy0 (&symtab->m_string_obstack,
574 decoded.c_str () + 1,
575 decoded.length () - 2);
576 else
577 name = obstack_strdup (&symtab->m_string_obstack,
578 ada_encode (decoded.c_str ()));
579 }
cd4fb1b2
SM
580
581 /* Only add a given psymbol once. */
582 if (psyms_seen.insert (psym).second)
583 {
584 gdb_index_symbol_kind kind = symbol_kind (psym);
585
7ab96794 586 add_index_entry (symtab, name, is_static, kind, cu_index);
cd4fb1b2
SM
587 }
588 }
589}
590
591/* A helper struct used when iterating over debug_types. */
592struct signatured_type_index_data
593{
594 signatured_type_index_data (data_buf &types_list_,
dda83cd7 595 std::unordered_set<partial_symbol *> &psyms_seen_)
cd4fb1b2
SM
596 : types_list (types_list_), psyms_seen (psyms_seen_)
597 {}
598
599 struct objfile *objfile;
600 struct mapped_symtab *symtab;
601 data_buf &types_list;
602 std::unordered_set<partial_symbol *> &psyms_seen;
603 int cu_index;
604};
605
606/* A helper function that writes a single signatured_type to an
607 obstack. */
608
609static int
610write_one_signatured_type (void **slot, void *d)
611{
612 struct signatured_type_index_data *info
613 = (struct signatured_type_index_data *) d;
614 struct signatured_type *entry = (struct signatured_type *) *slot;
edfe0a0c 615 partial_symtab *psymtab = entry->per_cu.v.psymtab;
cd4fb1b2 616
2bd3e4b8
TV
617 if (psymtab == nullptr)
618 {
619 /* We can end up here when processing a skeleton CU referring to a
620 .dwo file that hasn't been found. There's not much we can do in
621 such a case, so skip this CU. */
622 return 1;
623 }
624
932539d7
TT
625 write_psymbols (info->symtab, info->psyms_seen,
626 psymtab->global_psymbols, info->cu_index,
cd4fb1b2 627 0);
932539d7
TT
628 write_psymbols (info->symtab, info->psyms_seen,
629 psymtab->static_psymbols, info->cu_index,
cd4fb1b2
SM
630 1);
631
632 info->types_list.append_uint (8, BFD_ENDIAN_LITTLE,
633 to_underlying (entry->per_cu.sect_off));
634 info->types_list.append_uint (8, BFD_ENDIAN_LITTLE,
635 to_underlying (entry->type_offset_in_tu));
636 info->types_list.append_uint (8, BFD_ENDIAN_LITTLE, entry->signature);
637
638 ++info->cu_index;
639
640 return 1;
641}
642
643/* Recurse into all "included" dependencies and count their symbols as
644 if they appeared in this psymtab. */
645
646static void
edfe0a0c 647recursively_count_psymbols (partial_symtab *psymtab,
cd4fb1b2
SM
648 size_t &psyms_seen)
649{
650 for (int i = 0; i < psymtab->number_of_dependencies; ++i)
651 if (psymtab->dependencies[i]->user != NULL)
edfe0a0c 652 recursively_count_psymbols (psymtab->dependencies[i],
cd4fb1b2
SM
653 psyms_seen);
654
932539d7
TT
655 psyms_seen += psymtab->global_psymbols.size ();
656 psyms_seen += psymtab->static_psymbols.size ();
cd4fb1b2
SM
657}
658
659/* Recurse into all "included" dependencies and write their symbols as
660 if they appeared in this psymtab. */
661
662static void
663recursively_write_psymbols (struct objfile *objfile,
edfe0a0c 664 partial_symtab *psymtab,
cd4fb1b2
SM
665 struct mapped_symtab *symtab,
666 std::unordered_set<partial_symbol *> &psyms_seen,
667 offset_type cu_index)
668{
669 int i;
670
671 for (i = 0; i < psymtab->number_of_dependencies; ++i)
672 if (psymtab->dependencies[i]->user != NULL)
891813be 673 recursively_write_psymbols (objfile,
edfe0a0c 674 psymtab->dependencies[i],
cd4fb1b2
SM
675 symtab, psyms_seen, cu_index);
676
932539d7
TT
677 write_psymbols (symtab, psyms_seen,
678 psymtab->global_psymbols, cu_index,
cd4fb1b2 679 0);
932539d7
TT
680 write_psymbols (symtab, psyms_seen,
681 psymtab->static_psymbols, cu_index,
cd4fb1b2
SM
682 1);
683}
684
685/* DWARF-5 .debug_names builder. */
686class debug_names
687{
688public:
976ca316 689 debug_names (dwarf2_per_objfile *per_objfile, bool is_dwarf64,
cd4fb1b2
SM
690 bfd_endian dwarf5_byte_order)
691 : m_dwarf5_byte_order (dwarf5_byte_order),
692 m_dwarf32 (dwarf5_byte_order),
693 m_dwarf64 (dwarf5_byte_order),
694 m_dwarf (is_dwarf64
695 ? static_cast<dwarf &> (m_dwarf64)
696 : static_cast<dwarf &> (m_dwarf32)),
697 m_name_table_string_offs (m_dwarf.name_table_string_offs),
698 m_name_table_entry_offs (m_dwarf.name_table_entry_offs),
976ca316 699 m_debugstrlookup (per_objfile)
cd4fb1b2
SM
700 {}
701
702 int dwarf5_offset_size () const
703 {
704 const bool dwarf5_is_dwarf64 = &m_dwarf == &m_dwarf64;
705 return dwarf5_is_dwarf64 ? 8 : 4;
706 }
707
708 /* Is this symbol from DW_TAG_compile_unit or DW_TAG_type_unit? */
709 enum class unit_kind { cu, tu };
710
711 /* Insert one symbol. */
712 void insert (const partial_symbol *psym, int cu_index, bool is_static,
713 unit_kind kind)
714 {
715 const int dwarf_tag = psymbol_tag (psym);
716 if (dwarf_tag == 0)
717 return;
c9d95fa3 718 const char *name = psym->ginfo.search_name ();
3b00ef10 719
c1b5c1eb 720 if (psym->ginfo.language () == language_ada)
3b00ef10
TT
721 {
722 /* We want to ensure that the Ada main function's name appears
723 verbatim in the index. However, this name will be of the
724 form "_ada_mumble", and will be rewritten by ada_decode.
725 So, recognize it specially here and add it to the index by
726 hand. */
727 if (strcmp (main_name (), name) == 0)
728 {
729 const auto insertpair
730 = m_name_to_value_set.emplace (c_str_view (name),
731 std::set<symbol_value> ());
732 std::set<symbol_value> &value_set = insertpair.first->second;
733 value_set.emplace (symbol_value (dwarf_tag, cu_index, is_static,
734 kind));
735 }
736
737 /* In order for the index to work when read back into gdb, it
738 has to supply a funny form of the name: it should be the
739 encoded name, with any suffixes stripped. Using the
740 ordinary encoded name will not work properly with the
741 searching logic in find_name_components_bounds; nor will
742 using the decoded name. Furthermore, an Ada "verbatim"
743 name (of the form "<MumBle>") must be entered without the
744 angle brackets. Note that the current index is unusual,
745 see PR symtab/24820 for details. */
f945dedf 746 std::string decoded = ada_decode (name);
3b00ef10
TT
747 if (decoded[0] == '<')
748 name = (char *) obstack_copy0 (&m_string_obstack,
f945dedf
CB
749 decoded.c_str () + 1,
750 decoded.length () - 2);
3b00ef10 751 else
f945dedf
CB
752 name = obstack_strdup (&m_string_obstack,
753 ada_encode (decoded.c_str ()));
3b00ef10
TT
754 }
755
cd4fb1b2
SM
756 const auto insertpair
757 = m_name_to_value_set.emplace (c_str_view (name),
758 std::set<symbol_value> ());
759 std::set<symbol_value> &value_set = insertpair.first->second;
760 value_set.emplace (symbol_value (dwarf_tag, cu_index, is_static, kind));
761 }
762
763 /* Build all the tables. All symbols must be already inserted.
764 This function does not call file_write, caller has to do it
765 afterwards. */
766 void build ()
767 {
768 /* Verify the build method has not be called twice. */
769 gdb_assert (m_abbrev_table.empty ());
770 const size_t name_count = m_name_to_value_set.size ();
771 m_bucket_table.resize
772 (std::pow (2, std::ceil (std::log2 (name_count * 4 / 3))));
773 m_hash_table.reserve (name_count);
774 m_name_table_string_offs.reserve (name_count);
775 m_name_table_entry_offs.reserve (name_count);
776
777 /* Map each hash of symbol to its name and value. */
778 struct hash_it_pair
779 {
780 uint32_t hash;
781 decltype (m_name_to_value_set)::const_iterator it;
782 };
783 std::vector<std::forward_list<hash_it_pair>> bucket_hash;
784 bucket_hash.resize (m_bucket_table.size ());
785 for (decltype (m_name_to_value_set)::const_iterator it
786 = m_name_to_value_set.cbegin ();
787 it != m_name_to_value_set.cend ();
788 ++it)
789 {
790 const char *const name = it->first.c_str ();
791 const uint32_t hash = dwarf5_djb_hash (name);
792 hash_it_pair hashitpair;
793 hashitpair.hash = hash;
794 hashitpair.it = it;
795 auto &slot = bucket_hash[hash % bucket_hash.size()];
796 slot.push_front (std::move (hashitpair));
797 }
798 for (size_t bucket_ix = 0; bucket_ix < bucket_hash.size (); ++bucket_ix)
799 {
800 const std::forward_list<hash_it_pair> &hashitlist
801 = bucket_hash[bucket_ix];
802 if (hashitlist.empty ())
803 continue;
804 uint32_t &bucket_slot = m_bucket_table[bucket_ix];
805 /* The hashes array is indexed starting at 1. */
806 store_unsigned_integer (reinterpret_cast<gdb_byte *> (&bucket_slot),
807 sizeof (bucket_slot), m_dwarf5_byte_order,
808 m_hash_table.size () + 1);
809 for (const hash_it_pair &hashitpair : hashitlist)
810 {
811 m_hash_table.push_back (0);
812 store_unsigned_integer (reinterpret_cast<gdb_byte *>
813 (&m_hash_table.back ()),
814 sizeof (m_hash_table.back ()),
815 m_dwarf5_byte_order, hashitpair.hash);
816 const c_str_view &name = hashitpair.it->first;
817 const std::set<symbol_value> &value_set = hashitpair.it->second;
818 m_name_table_string_offs.push_back_reorder
819 (m_debugstrlookup.lookup (name.c_str ()));
820 m_name_table_entry_offs.push_back_reorder (m_entry_pool.size ());
821 gdb_assert (!value_set.empty ());
822 for (const symbol_value &value : value_set)
823 {
824 int &idx = m_indexkey_to_idx[index_key (value.dwarf_tag,
825 value.is_static,
826 value.kind)];
827 if (idx == 0)
828 {
829 idx = m_idx_next++;
830 m_abbrev_table.append_unsigned_leb128 (idx);
831 m_abbrev_table.append_unsigned_leb128 (value.dwarf_tag);
832 m_abbrev_table.append_unsigned_leb128
833 (value.kind == unit_kind::cu ? DW_IDX_compile_unit
834 : DW_IDX_type_unit);
835 m_abbrev_table.append_unsigned_leb128 (DW_FORM_udata);
836 m_abbrev_table.append_unsigned_leb128 (value.is_static
837 ? DW_IDX_GNU_internal
838 : DW_IDX_GNU_external);
839 m_abbrev_table.append_unsigned_leb128 (DW_FORM_flag_present);
840
841 /* Terminate attributes list. */
842 m_abbrev_table.append_unsigned_leb128 (0);
843 m_abbrev_table.append_unsigned_leb128 (0);
844 }
845
846 m_entry_pool.append_unsigned_leb128 (idx);
847 m_entry_pool.append_unsigned_leb128 (value.cu_index);
848 }
849
850 /* Terminate the list of CUs. */
851 m_entry_pool.append_unsigned_leb128 (0);
852 }
853 }
854 gdb_assert (m_hash_table.size () == name_count);
855
856 /* Terminate tags list. */
857 m_abbrev_table.append_unsigned_leb128 (0);
858 }
859
860 /* Return .debug_names bucket count. This must be called only after
861 calling the build method. */
862 uint32_t bucket_count () const
863 {
864 /* Verify the build method has been already called. */
865 gdb_assert (!m_abbrev_table.empty ());
866 const uint32_t retval = m_bucket_table.size ();
867
868 /* Check for overflow. */
869 gdb_assert (retval == m_bucket_table.size ());
870 return retval;
871 }
872
873 /* Return .debug_names names count. This must be called only after
874 calling the build method. */
875 uint32_t name_count () const
876 {
877 /* Verify the build method has been already called. */
878 gdb_assert (!m_abbrev_table.empty ());
879 const uint32_t retval = m_hash_table.size ();
880
881 /* Check for overflow. */
882 gdb_assert (retval == m_hash_table.size ());
883 return retval;
884 }
885
886 /* Return number of bytes of .debug_names abbreviation table. This
887 must be called only after calling the build method. */
888 uint32_t abbrev_table_bytes () const
889 {
890 gdb_assert (!m_abbrev_table.empty ());
891 return m_abbrev_table.size ();
892 }
893
894 /* Recurse into all "included" dependencies and store their symbols
895 as if they appeared in this psymtab. */
896 void recursively_write_psymbols
897 (struct objfile *objfile,
edfe0a0c 898 partial_symtab *psymtab,
cd4fb1b2
SM
899 std::unordered_set<partial_symbol *> &psyms_seen,
900 int cu_index)
901 {
902 for (int i = 0; i < psymtab->number_of_dependencies; ++i)
903 if (psymtab->dependencies[i]->user != NULL)
891813be 904 recursively_write_psymbols
edfe0a0c 905 (objfile, psymtab->dependencies[i], psyms_seen, cu_index);
cd4fb1b2 906
932539d7
TT
907 write_psymbols (psyms_seen, psymtab->global_psymbols,
908 cu_index, false, unit_kind::cu);
909 write_psymbols (psyms_seen, psymtab->static_psymbols,
910 cu_index, true, unit_kind::cu);
cd4fb1b2
SM
911 }
912
913 /* Return number of bytes the .debug_names section will have. This
914 must be called only after calling the build method. */
915 size_t bytes () const
916 {
917 /* Verify the build method has been already called. */
918 gdb_assert (!m_abbrev_table.empty ());
919 size_t expected_bytes = 0;
920 expected_bytes += m_bucket_table.size () * sizeof (m_bucket_table[0]);
921 expected_bytes += m_hash_table.size () * sizeof (m_hash_table[0]);
922 expected_bytes += m_name_table_string_offs.bytes ();
923 expected_bytes += m_name_table_entry_offs.bytes ();
924 expected_bytes += m_abbrev_table.size ();
925 expected_bytes += m_entry_pool.size ();
926 return expected_bytes;
927 }
928
929 /* Write .debug_names to FILE_NAMES and .debug_str addition to
930 FILE_STR. This must be called only after calling the build
931 method. */
932 void file_write (FILE *file_names, FILE *file_str) const
933 {
934 /* Verify the build method has been already called. */
935 gdb_assert (!m_abbrev_table.empty ());
936 ::file_write (file_names, m_bucket_table);
937 ::file_write (file_names, m_hash_table);
938 m_name_table_string_offs.file_write (file_names);
939 m_name_table_entry_offs.file_write (file_names);
940 m_abbrev_table.file_write (file_names);
941 m_entry_pool.file_write (file_names);
942 m_debugstrlookup.file_write (file_str);
943 }
944
945 /* A helper user data for write_one_signatured_type. */
946 class write_one_signatured_type_data
947 {
948 public:
949 write_one_signatured_type_data (debug_names &nametable_,
dda83cd7 950 signatured_type_index_data &&info_)
cd4fb1b2
SM
951 : nametable (nametable_), info (std::move (info_))
952 {}
953 debug_names &nametable;
954 struct signatured_type_index_data info;
955 };
956
957 /* A helper function to pass write_one_signatured_type to
958 htab_traverse_noresize. */
959 static int
960 write_one_signatured_type (void **slot, void *d)
961 {
962 write_one_signatured_type_data *data = (write_one_signatured_type_data *) d;
963 struct signatured_type_index_data *info = &data->info;
964 struct signatured_type *entry = (struct signatured_type *) *slot;
965
966 data->nametable.write_one_signatured_type (entry, info);
967
968 return 1;
969 }
970
971private:
972
973 /* Storage for symbol names mapping them to their .debug_str section
974 offsets. */
975 class debug_str_lookup
976 {
977 public:
978
30baf67b 979 /* Object constructor to be called for current DWARF2_PER_OBJFILE.
cd4fb1b2 980 All .debug_str section strings are automatically stored. */
976ca316
SM
981 debug_str_lookup (dwarf2_per_objfile *per_objfile)
982 : m_abfd (per_objfile->objfile->obfd),
983 m_per_objfile (per_objfile)
cd4fb1b2 984 {
976ca316
SM
985 per_objfile->per_bfd->str.read (per_objfile->objfile);
986 if (per_objfile->per_bfd->str.buffer == NULL)
cd4fb1b2 987 return;
976ca316
SM
988 for (const gdb_byte *data = per_objfile->per_bfd->str.buffer;
989 data < (per_objfile->per_bfd->str.buffer
990 + per_objfile->per_bfd->str.size);)
cd4fb1b2
SM
991 {
992 const char *const s = reinterpret_cast<const char *> (data);
993 const auto insertpair
994 = m_str_table.emplace (c_str_view (s),
976ca316 995 data - per_objfile->per_bfd->str.buffer);
cd4fb1b2 996 if (!insertpair.second)
b98664d3 997 complaint (_("Duplicate string \"%s\" in "
cd4fb1b2
SM
998 ".debug_str section [in module %s]"),
999 s, bfd_get_filename (m_abfd));
1000 data += strlen (s) + 1;
1001 }
1002 }
1003
1004 /* Return offset of symbol name S in the .debug_str section. Add
1005 such symbol to the section's end if it does not exist there
1006 yet. */
1007 size_t lookup (const char *s)
1008 {
1009 const auto it = m_str_table.find (c_str_view (s));
1010 if (it != m_str_table.end ())
1011 return it->second;
976ca316 1012 const size_t offset = (m_per_objfile->per_bfd->str.size
cd4fb1b2
SM
1013 + m_str_add_buf.size ());
1014 m_str_table.emplace (c_str_view (s), offset);
1015 m_str_add_buf.append_cstr0 (s);
1016 return offset;
1017 }
1018
1019 /* Append the end of the .debug_str section to FILE. */
1020 void file_write (FILE *file) const
1021 {
1022 m_str_add_buf.file_write (file);
1023 }
1024
1025 private:
1026 std::unordered_map<c_str_view, size_t, c_str_view_hasher> m_str_table;
1027 bfd *const m_abfd;
976ca316 1028 dwarf2_per_objfile *m_per_objfile;
cd4fb1b2
SM
1029
1030 /* Data to add at the end of .debug_str for new needed symbol names. */
1031 data_buf m_str_add_buf;
1032 };
1033
1034 /* Container to map used DWARF tags to their .debug_names abbreviation
1035 tags. */
1036 class index_key
1037 {
1038 public:
1039 index_key (int dwarf_tag_, bool is_static_, unit_kind kind_)
1040 : dwarf_tag (dwarf_tag_), is_static (is_static_), kind (kind_)
1041 {
1042 }
1043
1044 bool
1045 operator== (const index_key &other) const
1046 {
1047 return (dwarf_tag == other.dwarf_tag && is_static == other.is_static
1048 && kind == other.kind);
1049 }
1050
1051 const int dwarf_tag;
1052 const bool is_static;
1053 const unit_kind kind;
1054 };
1055
1056 /* Provide std::unordered_map::hasher for index_key. */
1057 class index_key_hasher
1058 {
1059 public:
1060 size_t
1061 operator () (const index_key &key) const
1062 {
1063 return (std::hash<int>() (key.dwarf_tag) << 1) | key.is_static;
1064 }
1065 };
1066
1067 /* Parameters of one symbol entry. */
1068 class symbol_value
1069 {
1070 public:
1071 const int dwarf_tag, cu_index;
1072 const bool is_static;
1073 const unit_kind kind;
1074
1075 symbol_value (int dwarf_tag_, int cu_index_, bool is_static_,
1076 unit_kind kind_)
1077 : dwarf_tag (dwarf_tag_), cu_index (cu_index_), is_static (is_static_),
dda83cd7 1078 kind (kind_)
cd4fb1b2
SM
1079 {}
1080
1081 bool
1082 operator< (const symbol_value &other) const
1083 {
1084#define X(n) \
1085 do \
1086 { \
1087 if (n < other.n) \
1088 return true; \
1089 if (n > other.n) \
1090 return false; \
1091 } \
1092 while (0)
1093 X (dwarf_tag);
1094 X (is_static);
1095 X (kind);
1096 X (cu_index);
1097#undef X
1098 return false;
1099 }
1100 };
1101
1102 /* Abstract base class to unify DWARF-32 and DWARF-64 name table
1103 output. */
1104 class offset_vec
1105 {
1106 protected:
1107 const bfd_endian dwarf5_byte_order;
1108 public:
1109 explicit offset_vec (bfd_endian dwarf5_byte_order_)
1110 : dwarf5_byte_order (dwarf5_byte_order_)
1111 {}
1112
1113 /* Call std::vector::reserve for NELEM elements. */
1114 virtual void reserve (size_t nelem) = 0;
1115
1116 /* Call std::vector::push_back with store_unsigned_integer byte
1117 reordering for ELEM. */
1118 virtual void push_back_reorder (size_t elem) = 0;
1119
1120 /* Return expected output size in bytes. */
1121 virtual size_t bytes () const = 0;
1122
1123 /* Write name table to FILE. */
1124 virtual void file_write (FILE *file) const = 0;
1125 };
1126
1127 /* Template to unify DWARF-32 and DWARF-64 output. */
1128 template<typename OffsetSize>
1129 class offset_vec_tmpl : public offset_vec
1130 {
1131 public:
1132 explicit offset_vec_tmpl (bfd_endian dwarf5_byte_order_)
1133 : offset_vec (dwarf5_byte_order_)
1134 {}
1135
1136 /* Implement offset_vec::reserve. */
1137 void reserve (size_t nelem) override
1138 {
1139 m_vec.reserve (nelem);
1140 }
1141
1142 /* Implement offset_vec::push_back_reorder. */
1143 void push_back_reorder (size_t elem) override
1144 {
1145 m_vec.push_back (elem);
1146 /* Check for overflow. */
1147 gdb_assert (m_vec.back () == elem);
1148 store_unsigned_integer (reinterpret_cast<gdb_byte *> (&m_vec.back ()),
1149 sizeof (m_vec.back ()), dwarf5_byte_order, elem);
1150 }
1151
1152 /* Implement offset_vec::bytes. */
1153 size_t bytes () const override
1154 {
1155 return m_vec.size () * sizeof (m_vec[0]);
1156 }
1157
1158 /* Implement offset_vec::file_write. */
1159 void file_write (FILE *file) const override
1160 {
1161 ::file_write (file, m_vec);
1162 }
1163
1164 private:
1165 std::vector<OffsetSize> m_vec;
1166 };
1167
1168 /* Base class to unify DWARF-32 and DWARF-64 .debug_names output
1169 respecting name table width. */
1170 class dwarf
1171 {
1172 public:
1173 offset_vec &name_table_string_offs, &name_table_entry_offs;
1174
1175 dwarf (offset_vec &name_table_string_offs_,
1176 offset_vec &name_table_entry_offs_)
1177 : name_table_string_offs (name_table_string_offs_),
1178 name_table_entry_offs (name_table_entry_offs_)
1179 {
1180 }
1181 };
1182
1183 /* Template to unify DWARF-32 and DWARF-64 .debug_names output
1184 respecting name table width. */
1185 template<typename OffsetSize>
1186 class dwarf_tmpl : public dwarf
1187 {
1188 public:
1189 explicit dwarf_tmpl (bfd_endian dwarf5_byte_order_)
1190 : dwarf (m_name_table_string_offs, m_name_table_entry_offs),
1191 m_name_table_string_offs (dwarf5_byte_order_),
1192 m_name_table_entry_offs (dwarf5_byte_order_)
1193 {}
1194
1195 private:
1196 offset_vec_tmpl<OffsetSize> m_name_table_string_offs;
1197 offset_vec_tmpl<OffsetSize> m_name_table_entry_offs;
1198 };
1199
1200 /* Try to reconstruct original DWARF tag for given partial_symbol.
1201 This function is not DWARF-5 compliant but it is sufficient for
1202 GDB as a DWARF-5 index consumer. */
1203 static int psymbol_tag (const struct partial_symbol *psym)
1204 {
8a6d4234
TT
1205 domain_enum domain = psym->domain;
1206 enum address_class aclass = psym->aclass;
cd4fb1b2
SM
1207
1208 switch (domain)
1209 {
1210 case VAR_DOMAIN:
1211 switch (aclass)
1212 {
1213 case LOC_BLOCK:
1214 return DW_TAG_subprogram;
1215 case LOC_TYPEDEF:
1216 return DW_TAG_typedef;
1217 case LOC_COMPUTED:
1218 case LOC_CONST_BYTES:
1219 case LOC_OPTIMIZED_OUT:
1220 case LOC_STATIC:
1221 return DW_TAG_variable;
1222 case LOC_CONST:
1223 /* Note: It's currently impossible to recognize psyms as enum values
1224 short of reading the type info. For now punt. */
1225 return DW_TAG_variable;
1226 default:
1227 /* There are other LOC_FOO values that one might want to classify
1228 as variables, but dwarf2read.c doesn't currently use them. */
1229 return DW_TAG_variable;
1230 }
1231 case STRUCT_DOMAIN:
1232 return DW_TAG_structure_type;
7666722f
TV
1233 case MODULE_DOMAIN:
1234 return DW_TAG_module;
cd4fb1b2
SM
1235 default:
1236 return 0;
1237 }
1238 }
1239
1240 /* Call insert for all partial symbols and mark them in PSYMS_SEEN. */
1241 void write_psymbols (std::unordered_set<partial_symbol *> &psyms_seen,
932539d7
TT
1242 const std::vector<partial_symbol *> &symbols,
1243 int cu_index, bool is_static, unit_kind kind)
cd4fb1b2 1244 {
932539d7 1245 for (partial_symbol *psym : symbols)
cd4fb1b2 1246 {
cd4fb1b2
SM
1247 /* Only add a given psymbol once. */
1248 if (psyms_seen.insert (psym).second)
1249 insert (psym, cu_index, is_static, kind);
1250 }
1251 }
1252
1253 /* A helper function that writes a single signatured_type
1254 to a debug_names. */
1255 void
1256 write_one_signatured_type (struct signatured_type *entry,
1257 struct signatured_type_index_data *info)
1258 {
edfe0a0c 1259 partial_symtab *psymtab = entry->per_cu.v.psymtab;
cd4fb1b2 1260
932539d7
TT
1261 write_psymbols (info->psyms_seen, psymtab->global_psymbols,
1262 info->cu_index, false, unit_kind::tu);
1263 write_psymbols (info->psyms_seen, psymtab->static_psymbols,
1264 info->cu_index, true, unit_kind::tu);
cd4fb1b2
SM
1265
1266 info->types_list.append_uint (dwarf5_offset_size (), m_dwarf5_byte_order,
1267 to_underlying (entry->per_cu.sect_off));
1268
1269 ++info->cu_index;
1270 }
1271
1272 /* Store value of each symbol. */
1273 std::unordered_map<c_str_view, std::set<symbol_value>, c_str_view_hasher>
1274 m_name_to_value_set;
1275
1276 /* Tables of DWARF-5 .debug_names. They are in object file byte
1277 order. */
1278 std::vector<uint32_t> m_bucket_table;
1279 std::vector<uint32_t> m_hash_table;
1280
1281 const bfd_endian m_dwarf5_byte_order;
1282 dwarf_tmpl<uint32_t> m_dwarf32;
1283 dwarf_tmpl<uint64_t> m_dwarf64;
1284 dwarf &m_dwarf;
1285 offset_vec &m_name_table_string_offs, &m_name_table_entry_offs;
1286 debug_str_lookup m_debugstrlookup;
1287
1288 /* Map each used .debug_names abbreviation tag parameter to its
1289 index value. */
1290 std::unordered_map<index_key, int, index_key_hasher> m_indexkey_to_idx;
1291
1292 /* Next unused .debug_names abbreviation tag for
1293 m_indexkey_to_idx. */
1294 int m_idx_next = 1;
1295
1296 /* .debug_names abbreviation table. */
1297 data_buf m_abbrev_table;
1298
1299 /* .debug_names entry pool. */
1300 data_buf m_entry_pool;
3b00ef10
TT
1301
1302 /* Temporary storage for Ada names. */
1303 auto_obstack m_string_obstack;
cd4fb1b2
SM
1304};
1305
1306/* Return iff any of the needed offsets does not fit into 32-bit
1307 .debug_names section. */
1308
1309static bool
976ca316 1310check_dwarf64_offsets (dwarf2_per_objfile *per_objfile)
cd4fb1b2 1311{
976ca316 1312 for (dwarf2_per_cu_data *per_cu : per_objfile->per_bfd->all_comp_units)
cd4fb1b2 1313 {
b76e467d 1314 if (to_underlying (per_cu->sect_off) >= (static_cast<uint64_t> (1) << 32))
cd4fb1b2
SM
1315 return true;
1316 }
976ca316 1317 for (const signatured_type *sigtype : per_objfile->per_bfd->all_type_units)
cd4fb1b2 1318 {
b2bdb8cf 1319 const dwarf2_per_cu_data &per_cu = sigtype->per_cu;
cd4fb1b2
SM
1320
1321 if (to_underlying (per_cu.sect_off) >= (static_cast<uint64_t> (1) << 32))
1322 return true;
1323 }
1324 return false;
1325}
1326
1327/* The psyms_seen set is potentially going to be largish (~40k
1328 elements when indexing a -g3 build of GDB itself). Estimate the
1329 number of elements in order to avoid too many rehashes, which
1330 require rebuilding buckets and thus many trips to
1331 malloc/free. */
1332
1333static size_t
976ca316 1334psyms_seen_size (dwarf2_per_objfile *per_objfile)
cd4fb1b2
SM
1335{
1336 size_t psyms_count = 0;
976ca316 1337 for (dwarf2_per_cu_data *per_cu : per_objfile->per_bfd->all_comp_units)
cd4fb1b2 1338 {
edfe0a0c 1339 partial_symtab *psymtab = per_cu->v.psymtab;
cd4fb1b2
SM
1340
1341 if (psymtab != NULL && psymtab->user == NULL)
1342 recursively_count_psymbols (psymtab, psyms_count);
1343 }
1344 /* Generating an index for gdb itself shows a ratio of
1345 TOTAL_SEEN_SYMS/UNIQUE_SYMS or ~5. 4 seems like a good bet. */
1346 return psyms_count / 4;
1347}
1348
c4973306
SM
1349/* Assert that FILE's size is EXPECTED_SIZE. Assumes file's seek
1350 position is at the end of the file. */
cd4fb1b2 1351
c4973306
SM
1352static void
1353assert_file_size (FILE *file, size_t expected_size)
1354{
1355 const auto file_size = ftell (file);
1356 if (file_size == -1)
1357 perror_with_name (("ftell"));
1358 gdb_assert (file_size == expected_size);
1359}
1360
1361/* Write a gdb index file to OUT_FILE from all the sections passed as
1362 arguments. */
1363
1364static void
1365write_gdbindex_1 (FILE *out_file,
1366 const data_buf &cu_list,
1367 const data_buf &types_cu_list,
1368 const data_buf &addr_vec,
1369 const data_buf &symtab_vec,
1370 const data_buf &constant_pool)
1371{
1372 data_buf contents;
1373 const offset_type size_of_header = 6 * sizeof (offset_type);
1374 offset_type total_len = size_of_header;
1375
1376 /* The version number. */
1377 contents.append_data (MAYBE_SWAP (8));
1378
1379 /* The offset of the CU list from the start of the file. */
1380 contents.append_data (MAYBE_SWAP (total_len));
1381 total_len += cu_list.size ();
1382
1383 /* The offset of the types CU list from the start of the file. */
1384 contents.append_data (MAYBE_SWAP (total_len));
1385 total_len += types_cu_list.size ();
1386
1387 /* The offset of the address table from the start of the file. */
1388 contents.append_data (MAYBE_SWAP (total_len));
1389 total_len += addr_vec.size ();
1390
1391 /* The offset of the symbol table from the start of the file. */
1392 contents.append_data (MAYBE_SWAP (total_len));
1393 total_len += symtab_vec.size ();
1394
1395 /* The offset of the constant pool from the start of the file. */
1396 contents.append_data (MAYBE_SWAP (total_len));
1397 total_len += constant_pool.size ();
1398
1399 gdb_assert (contents.size () == size_of_header);
1400
1401 contents.file_write (out_file);
1402 cu_list.file_write (out_file);
1403 types_cu_list.file_write (out_file);
1404 addr_vec.file_write (out_file);
1405 symtab_vec.file_write (out_file);
1406 constant_pool.file_write (out_file);
1407
1408 assert_file_size (out_file, total_len);
1409}
1410
1411/* Write contents of a .gdb_index section for OBJFILE into OUT_FILE.
1412 If OBJFILE has an associated dwz file, write contents of a .gdb_index
1413 section for that dwz file into DWZ_OUT_FILE. If OBJFILE does not have an
1414 associated dwz file, DWZ_OUT_FILE must be NULL. */
1415
1416static void
976ca316 1417write_gdbindex (dwarf2_per_objfile *per_objfile, FILE *out_file,
c4973306 1418 FILE *dwz_out_file)
cd4fb1b2 1419{
976ca316 1420 struct objfile *objfile = per_objfile->objfile;
cd4fb1b2 1421 mapped_symtab symtab;
c4973306
SM
1422 data_buf objfile_cu_list;
1423 data_buf dwz_cu_list;
cd4fb1b2
SM
1424
1425 /* While we're scanning CU's create a table that maps a psymtab pointer
1426 (which is what addrmap records) to its index (which is what is recorded
1427 in the index file). This will later be needed to write the address
1428 table. */
1429 psym_index_map cu_index_htab;
976ca316 1430 cu_index_htab.reserve (per_objfile->per_bfd->all_comp_units.size ());
cd4fb1b2
SM
1431
1432 /* The CU list is already sorted, so we don't need to do additional
1433 work here. Also, the debug_types entries do not appear in
1434 all_comp_units, but only in their own hash table. */
1435
1436 std::unordered_set<partial_symbol *> psyms_seen
976ca316
SM
1437 (psyms_seen_size (per_objfile));
1438 for (int i = 0; i < per_objfile->per_bfd->all_comp_units.size (); ++i)
cd4fb1b2 1439 {
976ca316 1440 dwarf2_per_cu_data *per_cu = per_objfile->per_bfd->all_comp_units[i];
edfe0a0c 1441 partial_symtab *psymtab = per_cu->v.psymtab;
cd4fb1b2 1442
efba5c23
TV
1443 if (psymtab != NULL)
1444 {
1445 if (psymtab->user == NULL)
1446 recursively_write_psymbols (objfile, psymtab, &symtab,
1447 psyms_seen, i);
1448
1449 const auto insertpair = cu_index_htab.emplace (psymtab, i);
1450 gdb_assert (insertpair.second);
1451 }
cd4fb1b2 1452
c4973306
SM
1453 /* The all_comp_units list contains CUs read from the objfile as well as
1454 from the eventual dwz file. We need to place the entry in the
1455 corresponding index. */
1456 data_buf &cu_list = per_cu->is_dwz ? dwz_cu_list : objfile_cu_list;
cd4fb1b2
SM
1457 cu_list.append_uint (8, BFD_ENDIAN_LITTLE,
1458 to_underlying (per_cu->sect_off));
1459 cu_list.append_uint (8, BFD_ENDIAN_LITTLE, per_cu->length);
1460 }
1461
1462 /* Dump the address map. */
1463 data_buf addr_vec;
79cc99f6 1464 write_address_map (per_objfile->per_bfd, addr_vec, cu_index_htab);
cd4fb1b2
SM
1465
1466 /* Write out the .debug_type entries, if any. */
1467 data_buf types_cu_list;
976ca316 1468 if (per_objfile->per_bfd->signatured_types)
cd4fb1b2
SM
1469 {
1470 signatured_type_index_data sig_data (types_cu_list,
1471 psyms_seen);
1472
1473 sig_data.objfile = objfile;
1474 sig_data.symtab = &symtab;
976ca316
SM
1475 sig_data.cu_index = per_objfile->per_bfd->all_comp_units.size ();
1476 htab_traverse_noresize (per_objfile->per_bfd->signatured_types.get (),
cd4fb1b2
SM
1477 write_one_signatured_type, &sig_data);
1478 }
1479
1480 /* Now that we've processed all symbols we can shrink their cu_indices
1481 lists. */
1482 uniquify_cu_indices (&symtab);
1483
1484 data_buf symtab_vec, constant_pool;
1485 write_hash_table (&symtab, symtab_vec, constant_pool);
1486
c4973306
SM
1487 write_gdbindex_1(out_file, objfile_cu_list, types_cu_list, addr_vec,
1488 symtab_vec, constant_pool);
cd4fb1b2 1489
c4973306
SM
1490 if (dwz_out_file != NULL)
1491 write_gdbindex_1 (dwz_out_file, dwz_cu_list, {}, {}, {}, {});
1492 else
1493 gdb_assert (dwz_cu_list.empty ());
cd4fb1b2
SM
1494}
1495
1496/* DWARF-5 augmentation string for GDB's DW_IDX_GNU_* extension. */
1497static const gdb_byte dwarf5_gdb_augmentation[] = { 'G', 'D', 'B', 0 };
1498
1499/* Write a new .debug_names section for OBJFILE into OUT_FILE, write
1500 needed addition to .debug_str section to OUT_FILE_STR. Return how
1501 many bytes were expected to be written into OUT_FILE. */
1502
c4973306 1503static void
976ca316 1504write_debug_names (dwarf2_per_objfile *per_objfile,
cd4fb1b2
SM
1505 FILE *out_file, FILE *out_file_str)
1506{
976ca316
SM
1507 const bool dwarf5_is_dwarf64 = check_dwarf64_offsets (per_objfile);
1508 struct objfile *objfile = per_objfile->objfile;
cd4fb1b2 1509 const enum bfd_endian dwarf5_byte_order
08feed99 1510 = gdbarch_byte_order (objfile->arch ());
cd4fb1b2
SM
1511
1512 /* The CU list is already sorted, so we don't need to do additional
1513 work here. Also, the debug_types entries do not appear in
1514 all_comp_units, but only in their own hash table. */
1515 data_buf cu_list;
976ca316 1516 debug_names nametable (per_objfile, dwarf5_is_dwarf64, dwarf5_byte_order);
cd4fb1b2 1517 std::unordered_set<partial_symbol *>
976ca316
SM
1518 psyms_seen (psyms_seen_size (per_objfile));
1519 for (int i = 0; i < per_objfile->per_bfd->all_comp_units.size (); ++i)
cd4fb1b2 1520 {
976ca316 1521 const dwarf2_per_cu_data *per_cu = per_objfile->per_bfd->all_comp_units[i];
edfe0a0c 1522 partial_symtab *psymtab = per_cu->v.psymtab;
cd4fb1b2
SM
1523
1524 /* CU of a shared file from 'dwz -m' may be unused by this main
1525 file. It may be referenced from a local scope but in such
1526 case it does not need to be present in .debug_names. */
1527 if (psymtab == NULL)
1528 continue;
1529
1530 if (psymtab->user == NULL)
1531 nametable.recursively_write_psymbols (objfile, psymtab, psyms_seen, i);
1532
1533 cu_list.append_uint (nametable.dwarf5_offset_size (), dwarf5_byte_order,
1534 to_underlying (per_cu->sect_off));
1535 }
1536
1537 /* Write out the .debug_type entries, if any. */
1538 data_buf types_cu_list;
976ca316 1539 if (per_objfile->per_bfd->signatured_types)
cd4fb1b2
SM
1540 {
1541 debug_names::write_one_signatured_type_data sig_data (nametable,
1542 signatured_type_index_data (types_cu_list, psyms_seen));
1543
1544 sig_data.info.objfile = objfile;
1545 /* It is used only for gdb_index. */
1546 sig_data.info.symtab = nullptr;
1547 sig_data.info.cu_index = 0;
976ca316 1548 htab_traverse_noresize (per_objfile->per_bfd->signatured_types.get (),
cd4fb1b2
SM
1549 debug_names::write_one_signatured_type,
1550 &sig_data);
1551 }
1552
1553 nametable.build ();
1554
1555 /* No addr_vec - DWARF-5 uses .debug_aranges generated by GCC. */
1556
1557 const offset_type bytes_of_header
1558 = ((dwarf5_is_dwarf64 ? 12 : 4)
1559 + 2 + 2 + 7 * 4
1560 + sizeof (dwarf5_gdb_augmentation));
1561 size_t expected_bytes = 0;
1562 expected_bytes += bytes_of_header;
1563 expected_bytes += cu_list.size ();
1564 expected_bytes += types_cu_list.size ();
1565 expected_bytes += nametable.bytes ();
1566 data_buf header;
1567
1568 if (!dwarf5_is_dwarf64)
1569 {
1570 const uint64_t size64 = expected_bytes - 4;
1571 gdb_assert (size64 < 0xfffffff0);
1572 header.append_uint (4, dwarf5_byte_order, size64);
1573 }
1574 else
1575 {
1576 header.append_uint (4, dwarf5_byte_order, 0xffffffff);
1577 header.append_uint (8, dwarf5_byte_order, expected_bytes - 12);
1578 }
1579
1580 /* The version number. */
1581 header.append_uint (2, dwarf5_byte_order, 5);
1582
1583 /* Padding. */
1584 header.append_uint (2, dwarf5_byte_order, 0);
1585
1586 /* comp_unit_count - The number of CUs in the CU list. */
b76e467d 1587 header.append_uint (4, dwarf5_byte_order,
976ca316 1588 per_objfile->per_bfd->all_comp_units.size ());
cd4fb1b2
SM
1589
1590 /* local_type_unit_count - The number of TUs in the local TU
1591 list. */
b2bdb8cf 1592 header.append_uint (4, dwarf5_byte_order,
976ca316 1593 per_objfile->per_bfd->all_type_units.size ());
cd4fb1b2
SM
1594
1595 /* foreign_type_unit_count - The number of TUs in the foreign TU
1596 list. */
1597 header.append_uint (4, dwarf5_byte_order, 0);
1598
1599 /* bucket_count - The number of hash buckets in the hash lookup
1600 table. */
1601 header.append_uint (4, dwarf5_byte_order, nametable.bucket_count ());
1602
1603 /* name_count - The number of unique names in the index. */
1604 header.append_uint (4, dwarf5_byte_order, nametable.name_count ());
1605
1606 /* abbrev_table_size - The size in bytes of the abbreviations
1607 table. */
1608 header.append_uint (4, dwarf5_byte_order, nametable.abbrev_table_bytes ());
1609
1610 /* augmentation_string_size - The size in bytes of the augmentation
1611 string. This value is rounded up to a multiple of 4. */
1612 static_assert (sizeof (dwarf5_gdb_augmentation) % 4 == 0, "");
1613 header.append_uint (4, dwarf5_byte_order, sizeof (dwarf5_gdb_augmentation));
1614 header.append_data (dwarf5_gdb_augmentation);
1615
1616 gdb_assert (header.size () == bytes_of_header);
1617
1618 header.file_write (out_file);
1619 cu_list.file_write (out_file);
1620 types_cu_list.file_write (out_file);
1621 nametable.file_write (out_file, out_file_str);
1622
c4973306 1623 assert_file_size (out_file, expected_bytes);
cd4fb1b2
SM
1624}
1625
c4973306 1626/* This represents an index file being written (work-in-progress).
cd4fb1b2 1627
c4973306
SM
1628 The data is initially written to a temporary file. When the finalize method
1629 is called, the file is closed and moved to its final location.
1630
1631 On failure (if this object is being destroyed with having called finalize),
1632 the temporary file is closed and deleted. */
1633
1634struct index_wip_file
cd4fb1b2 1635{
c4973306
SM
1636 index_wip_file (const char *dir, const char *basename,
1637 const char *suffix)
1638 {
1639 filename = (std::string (dir) + SLASH_STRING + basename
1640 + suffix);
1641
1642 filename_temp = make_temp_filename (filename);
1643
1644 scoped_fd out_file_fd (gdb_mkostemp_cloexec (filename_temp.data (),
1645 O_BINARY));
1646 if (out_file_fd.get () == -1)
1647 perror_with_name (("mkstemp"));
1648
1649 out_file = out_file_fd.to_file ("wb");
1650
1651 if (out_file == nullptr)
1652 error (_("Can't open `%s' for writing"), filename_temp.data ());
1653
1654 unlink_file.emplace (filename_temp.data ());
1655 }
1656
1657 void finalize ()
1658 {
1659 /* We want to keep the file. */
1660 unlink_file->keep ();
1661
1662 /* Close and move the str file in place. */
1663 unlink_file.reset ();
1664 if (rename (filename_temp.data (), filename.c_str ()) != 0)
1665 perror_with_name (("rename"));
1666 }
1667
1668 std::string filename;
1669 gdb::char_vector filename_temp;
1670
1671 /* Order matters here; we want FILE to be closed before
1672 FILENAME_TEMP is unlinked, because on MS-Windows one cannot
1673 delete a file that is still open. So, we wrap the unlinker in an
1674 optional and emplace it once we know the file name. */
1675 gdb::optional<gdb::unlinker> unlink_file;
1676
1677 gdb_file_up out_file;
1678};
cd4fb1b2 1679
87d6a7aa 1680/* See dwarf-index-write.h. */
cd4fb1b2 1681
87d6a7aa 1682void
976ca316
SM
1683write_psymtabs_to_index (dwarf2_per_objfile *per_objfile, const char *dir,
1684 const char *basename, const char *dwz_basename,
cd4fb1b2
SM
1685 dw_index_kind index_kind)
1686{
79cc99f6 1687 dwarf2_per_bfd *per_bfd = per_objfile->per_bfd;
976ca316 1688 struct objfile *objfile = per_objfile->objfile;
cd4fb1b2 1689
976ca316 1690 if (per_objfile->per_bfd->using_index)
cd4fb1b2
SM
1691 error (_("Cannot use an index to create the index"));
1692
976ca316 1693 if (per_objfile->per_bfd->types.size () > 1)
cd4fb1b2
SM
1694 error (_("Cannot make an index when the file has multiple .debug_types sections"));
1695
79cc99f6
TT
1696 if (!per_bfd->partial_symtabs->psymtabs
1697 || !per_bfd->partial_symtabs->psymtabs_addrmap)
cd4fb1b2
SM
1698 return;
1699
1700 struct stat st;
1701 if (stat (objfile_name (objfile), &st) < 0)
1702 perror_with_name (objfile_name (objfile));
1703
c4973306
SM
1704 const char *index_suffix = (index_kind == dw_index_kind::DEBUG_NAMES
1705 ? INDEX5_SUFFIX : INDEX4_SUFFIX);
cd4fb1b2 1706
c4973306
SM
1707 index_wip_file objfile_index_wip (dir, basename, index_suffix);
1708 gdb::optional<index_wip_file> dwz_index_wip;
cd4fb1b2 1709
c4973306
SM
1710 if (dwz_basename != NULL)
1711 dwz_index_wip.emplace (dir, dwz_basename, index_suffix);
cd4fb1b2
SM
1712
1713 if (index_kind == dw_index_kind::DEBUG_NAMES)
1714 {
c4973306
SM
1715 index_wip_file str_wip_file (dir, basename, DEBUG_STR_SUFFIX);
1716
976ca316 1717 write_debug_names (per_objfile, objfile_index_wip.out_file.get (),
c4973306
SM
1718 str_wip_file.out_file.get ());
1719
1720 str_wip_file.finalize ();
cd4fb1b2
SM
1721 }
1722 else
976ca316 1723 write_gdbindex (per_objfile, objfile_index_wip.out_file.get (),
c4973306
SM
1724 (dwz_index_wip.has_value ()
1725 ? dwz_index_wip->out_file.get () : NULL));
cd4fb1b2 1726
c4973306 1727 objfile_index_wip.finalize ();
87d6a7aa 1728
c4973306
SM
1729 if (dwz_index_wip.has_value ())
1730 dwz_index_wip->finalize ();
cd4fb1b2
SM
1731}
1732
1733/* Implementation of the `save gdb-index' command.
1734
1735 Note that the .gdb_index file format used by this command is
1736 documented in the GDB manual. Any changes here must be documented
1737 there. */
1738
1739static void
1740save_gdb_index_command (const char *arg, int from_tty)
1741{
cd4fb1b2
SM
1742 const char dwarf5space[] = "-dwarf-5 ";
1743 dw_index_kind index_kind = dw_index_kind::GDB_INDEX;
1744
1745 if (!arg)
1746 arg = "";
1747
1748 arg = skip_spaces (arg);
1749 if (strncmp (arg, dwarf5space, strlen (dwarf5space)) == 0)
1750 {
1751 index_kind = dw_index_kind::DEBUG_NAMES;
1752 arg += strlen (dwarf5space);
1753 arg = skip_spaces (arg);
1754 }
1755
1756 if (!*arg)
1757 error (_("usage: save gdb-index [-dwarf-5] DIRECTORY"));
1758
2030c079 1759 for (objfile *objfile : current_program_space->objfiles ())
aed57c53
TT
1760 {
1761 struct stat st;
cd4fb1b2 1762
aed57c53
TT
1763 /* If the objfile does not correspond to an actual file, skip it. */
1764 if (stat (objfile_name (objfile), &st) < 0)
1765 continue;
cd4fb1b2 1766
976ca316 1767 dwarf2_per_objfile *per_objfile = get_dwarf2_per_objfile (objfile);
cd4fb1b2 1768
976ca316 1769 if (per_objfile != NULL)
aed57c53 1770 {
a70b8144 1771 try
aed57c53
TT
1772 {
1773 const char *basename = lbasename (objfile_name (objfile));
976ca316 1774 const dwz_file *dwz = dwarf2_get_dwz_file (per_objfile->per_bfd);
c4973306
SM
1775 const char *dwz_basename = NULL;
1776
1777 if (dwz != NULL)
1778 dwz_basename = lbasename (dwz->filename ());
1779
976ca316
SM
1780 write_psymtabs_to_index (per_objfile, arg, basename, dwz_basename,
1781 index_kind);
aed57c53 1782 }
230d2906 1783 catch (const gdb_exception_error &except)
aed57c53
TT
1784 {
1785 exception_fprintf (gdb_stderr, except,
1786 _("Error while writing index for `%s': "),
1787 objfile_name (objfile));
1788 }
aed57c53 1789 }
cd4fb1b2 1790
aed57c53 1791 }
cd4fb1b2
SM
1792}
1793
6c265988 1794void _initialize_dwarf_index_write ();
cd4fb1b2
SM
1795void
1796_initialize_dwarf_index_write ()
1797{
1798 cmd_list_element *c = add_cmd ("gdb-index", class_files,
1799 save_gdb_index_command, _("\
1800Save a gdb-index file.\n\
1801Usage: save gdb-index [-dwarf-5] DIRECTORY\n\
1802\n\
1803No options create one file with .gdb-index extension for pre-DWARF-5\n\
1804compatible .gdb_index section. With -dwarf-5 creates two files with\n\
1805extension .debug_names and .debug_str for DWARF-5 .debug_names section."),
1806 &save_cmdlist);
1807 set_cmd_completer (c, filename_completer);
1808}