]> git.ipfire.org Git - thirdparty/binutils-gdb.git/blame - gdb/dwarf2/index-write.c
Automatic Copyright Year update after running gdb/copyright.py
[thirdparty/binutils-gdb.git] / gdb / dwarf2 / index-write.c
CommitLineData
cd4fb1b2
SM
1/* DWARF index writing support for GDB.
2
4a94e368 3 Copyright (C) 1994-2022 Free Software Foundation, Inc.
cd4fb1b2
SM
4
5 This file is part of GDB.
6
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
11
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19
20#include "defs.h"
21
82ca8957 22#include "dwarf2/index-write.h"
23baa4cc 23
cd4fb1b2
SM
24#include "addrmap.h"
25#include "cli/cli-decode.h"
268a13a5
TT
26#include "gdbsupport/byte-vector.h"
27#include "gdbsupport/filestuff.h"
28#include "gdbsupport/gdb_unlinker.h"
29#include "gdbsupport/pathstuff.h"
30#include "gdbsupport/scoped_fd.h"
cd4fb1b2 31#include "complaints.h"
82ca8957 32#include "dwarf2/index-common.h"
cd4fb1b2 33#include "dwarf2.h"
82ca8957 34#include "dwarf2/read.h"
9fda78b6 35#include "dwarf2/dwz.h"
cd4fb1b2
SM
36#include "gdb/gdb-index.h"
37#include "gdbcmd.h"
38#include "objfiles.h"
39#include "psympriv.h"
3b00ef10 40#include "ada-lang.h"
cd4fb1b2 41
4de283e4
TT
42#include <algorithm>
43#include <cmath>
159ed7d9 44#include <forward_list>
4de283e4
TT
45#include <set>
46#include <unordered_map>
47#include <unordered_set>
48
cd4fb1b2
SM
49/* Ensure only legit values are used. */
50#define DW2_GDB_INDEX_SYMBOL_STATIC_SET_VALUE(cu_index, value) \
51 do { \
52 gdb_assert ((unsigned int) (value) <= 1); \
53 GDB_INDEX_SYMBOL_STATIC_SET_VALUE((cu_index), (value)); \
54 } while (0)
55
56/* Ensure only legit values are used. */
57#define DW2_GDB_INDEX_SYMBOL_KIND_SET_VALUE(cu_index, value) \
58 do { \
59 gdb_assert ((value) >= GDB_INDEX_SYMBOL_KIND_TYPE \
dda83cd7 60 && (value) <= GDB_INDEX_SYMBOL_KIND_OTHER); \
cd4fb1b2
SM
61 GDB_INDEX_SYMBOL_KIND_SET_VALUE((cu_index), (value)); \
62 } while (0)
63
85102364 64/* Ensure we don't use more than the allotted number of bits for the CU. */
cd4fb1b2
SM
65#define DW2_GDB_INDEX_CU_SET_VALUE(cu_index, value) \
66 do { \
67 gdb_assert (((value) & ~GDB_INDEX_CU_MASK) == 0); \
68 GDB_INDEX_CU_SET_VALUE((cu_index), (value)); \
69 } while (0)
70
71/* The "save gdb-index" command. */
72
73/* Write SIZE bytes from the buffer pointed to by DATA to FILE, with
74 error checking. */
75
76static void
77file_write (FILE *file, const void *data, size_t size)
78{
79 if (fwrite (data, 1, size, file) != size)
80 error (_("couldn't data write to file"));
81}
82
83/* Write the contents of VEC to FILE, with error checking. */
84
85template<typename Elem, typename Alloc>
86static void
87file_write (FILE *file, const std::vector<Elem, Alloc> &vec)
88{
1f88d0c8
SM
89 if (!vec.empty ())
90 file_write (file, vec.data (), vec.size () * sizeof (vec[0]));
cd4fb1b2
SM
91}
92
93/* In-memory buffer to prepare data to be written later to a file. */
94class data_buf
95{
96public:
42c2c694
TT
97 /* Copy ARRAY to the end of the buffer. */
98 void append_array (gdb::array_view<const gdb_byte> array)
cd4fb1b2 99 {
42c2c694 100 std::copy (array.begin (), array.end (), grow (array.size ()));
cd4fb1b2
SM
101 }
102
103 /* Copy CSTR (a zero-terminated string) to the end of buffer. The
104 terminating zero is appended too. */
105 void append_cstr0 (const char *cstr)
106 {
107 const size_t size = strlen (cstr) + 1;
108 std::copy (cstr, cstr + size, grow (size));
109 }
110
111 /* Store INPUT as ULEB128 to the end of buffer. */
112 void append_unsigned_leb128 (ULONGEST input)
113 {
114 for (;;)
115 {
116 gdb_byte output = input & 0x7f;
117 input >>= 7;
118 if (input)
119 output |= 0x80;
42c2c694 120 m_vec.push_back (output);
cd4fb1b2
SM
121 if (input == 0)
122 break;
123 }
124 }
125
126 /* Accept a host-format integer in VAL and append it to the buffer
127 as a target-format integer which is LEN bytes long. */
128 void append_uint (size_t len, bfd_endian byte_order, ULONGEST val)
129 {
130 ::store_unsigned_integer (grow (len), len, byte_order, val);
131 }
132
42c2c694
TT
133 /* Copy VALUE to the end of the buffer, little-endian. */
134 void append_offset (offset_type value)
135 {
136 append_uint (sizeof (value), BFD_ENDIAN_LITTLE, value);
137 }
138
cd4fb1b2
SM
139 /* Return the size of the buffer. */
140 size_t size () const
141 {
142 return m_vec.size ();
143 }
144
145 /* Return true iff the buffer is empty. */
146 bool empty () const
147 {
148 return m_vec.empty ();
149 }
150
151 /* Write the buffer to FILE. */
152 void file_write (FILE *file) const
153 {
154 ::file_write (file, m_vec);
155 }
156
157private:
158 /* Grow SIZE bytes at the end of the buffer. Returns a pointer to
159 the start of the new block. */
160 gdb_byte *grow (size_t size)
161 {
162 m_vec.resize (m_vec.size () + size);
b4be9bfd 163 return &*(m_vec.end () - size);
cd4fb1b2
SM
164 }
165
166 gdb::byte_vector m_vec;
167};
168
169/* An entry in the symbol table. */
170struct symtab_index_entry
171{
172 /* The name of the symbol. */
173 const char *name;
174 /* The offset of the name in the constant pool. */
175 offset_type index_offset;
176 /* A sorted vector of the indices of all the CUs that hold an object
177 of this name. */
178 std::vector<offset_type> cu_indices;
179};
180
181/* The symbol table. This is a power-of-2-sized hash table. */
182struct mapped_symtab
183{
184 mapped_symtab ()
185 {
186 data.resize (1024);
187 }
188
189 offset_type n_elements = 0;
190 std::vector<symtab_index_entry> data;
7ab96794
TV
191
192 /* Temporary storage for Ada names. */
193 auto_obstack m_string_obstack;
cd4fb1b2
SM
194};
195
196/* Find a slot in SYMTAB for the symbol NAME. Returns a reference to
197 the slot.
198
199 Function is used only during write_hash_table so no index format backward
200 compatibility is needed. */
201
202static symtab_index_entry &
203find_slot (struct mapped_symtab *symtab, const char *name)
204{
205 offset_type index, step, hash = mapped_index_string_hash (INT_MAX, name);
206
207 index = hash & (symtab->data.size () - 1);
208 step = ((hash * 17) & (symtab->data.size () - 1)) | 1;
209
210 for (;;)
211 {
212 if (symtab->data[index].name == NULL
213 || strcmp (name, symtab->data[index].name) == 0)
214 return symtab->data[index];
215 index = (index + step) & (symtab->data.size () - 1);
216 }
217}
218
219/* Expand SYMTAB's hash table. */
220
221static void
222hash_expand (struct mapped_symtab *symtab)
223{
224 auto old_entries = std::move (symtab->data);
225
226 symtab->data.clear ();
227 symtab->data.resize (old_entries.size () * 2);
228
229 for (auto &it : old_entries)
230 if (it.name != NULL)
231 {
232 auto &ref = find_slot (symtab, it.name);
233 ref = std::move (it);
234 }
235}
236
237/* Add an entry to SYMTAB. NAME is the name of the symbol.
238 CU_INDEX is the index of the CU in which the symbol appears.
239 IS_STATIC is one if the symbol is static, otherwise zero (global). */
240
241static void
242add_index_entry (struct mapped_symtab *symtab, const char *name,
243 int is_static, gdb_index_symbol_kind kind,
244 offset_type cu_index)
245{
246 offset_type cu_index_and_attrs;
247
248 ++symtab->n_elements;
249 if (4 * symtab->n_elements / 3 >= symtab->data.size ())
250 hash_expand (symtab);
251
252 symtab_index_entry &slot = find_slot (symtab, name);
253 if (slot.name == NULL)
254 {
255 slot.name = name;
256 /* index_offset is set later. */
257 }
258
259 cu_index_and_attrs = 0;
260 DW2_GDB_INDEX_CU_SET_VALUE (cu_index_and_attrs, cu_index);
261 DW2_GDB_INDEX_SYMBOL_STATIC_SET_VALUE (cu_index_and_attrs, is_static);
262 DW2_GDB_INDEX_SYMBOL_KIND_SET_VALUE (cu_index_and_attrs, kind);
263
264 /* We don't want to record an index value twice as we want to avoid the
265 duplication.
266 We process all global symbols and then all static symbols
267 (which would allow us to avoid the duplication by only having to check
268 the last entry pushed), but a symbol could have multiple kinds in one CU.
269 To keep things simple we don't worry about the duplication here and
85102364 270 sort and uniquify the list after we've processed all symbols. */
cd4fb1b2
SM
271 slot.cu_indices.push_back (cu_index_and_attrs);
272}
273
274/* Sort and remove duplicates of all symbols' cu_indices lists. */
275
276static void
277uniquify_cu_indices (struct mapped_symtab *symtab)
278{
279 for (auto &entry : symtab->data)
280 {
281 if (entry.name != NULL && !entry.cu_indices.empty ())
282 {
283 auto &cu_indices = entry.cu_indices;
284 std::sort (cu_indices.begin (), cu_indices.end ());
285 auto from = std::unique (cu_indices.begin (), cu_indices.end ());
286 cu_indices.erase (from, cu_indices.end ());
287 }
288 }
289}
290
291/* A form of 'const char *' suitable for container keys. Only the
292 pointer is stored. The strings themselves are compared, not the
293 pointers. */
294class c_str_view
295{
296public:
297 c_str_view (const char *cstr)
298 : m_cstr (cstr)
299 {}
300
301 bool operator== (const c_str_view &other) const
302 {
303 return strcmp (m_cstr, other.m_cstr) == 0;
304 }
305
306 /* Return the underlying C string. Note, the returned string is
307 only a reference with lifetime of this object. */
308 const char *c_str () const
309 {
310 return m_cstr;
311 }
312
313private:
314 friend class c_str_view_hasher;
315 const char *const m_cstr;
316};
317
318/* A std::unordered_map::hasher for c_str_view that uses the right
319 hash function for strings in a mapped index. */
320class c_str_view_hasher
321{
322public:
323 size_t operator () (const c_str_view &x) const
324 {
325 return mapped_index_string_hash (INT_MAX, x.m_cstr);
326 }
327};
328
329/* A std::unordered_map::hasher for std::vector<>. */
330template<typename T>
331class vector_hasher
332{
333public:
334 size_t operator () (const std::vector<T> &key) const
335 {
336 return iterative_hash (key.data (),
337 sizeof (key.front ()) * key.size (), 0);
338 }
339};
340
341/* Write the mapped hash table SYMTAB to the data buffer OUTPUT, with
342 constant pool entries going into the data buffer CPOOL. */
343
344static void
345write_hash_table (mapped_symtab *symtab, data_buf &output, data_buf &cpool)
346{
347 {
348 /* Elements are sorted vectors of the indices of all the CUs that
349 hold an object of this name. */
350 std::unordered_map<std::vector<offset_type>, offset_type,
351 vector_hasher<offset_type>>
352 symbol_hash_table;
353
354 /* We add all the index vectors to the constant pool first, to
355 ensure alignment is ok. */
356 for (symtab_index_entry &entry : symtab->data)
357 {
358 if (entry.name == NULL)
359 continue;
360 gdb_assert (entry.index_offset == 0);
361
362 /* Finding before inserting is faster than always trying to
363 insert, because inserting always allocates a node, does the
364 lookup, and then destroys the new node if another node
365 already had the same key. C++17 try_emplace will avoid
366 this. */
367 const auto found
368 = symbol_hash_table.find (entry.cu_indices);
369 if (found != symbol_hash_table.end ())
370 {
371 entry.index_offset = found->second;
372 continue;
373 }
374
375 symbol_hash_table.emplace (entry.cu_indices, cpool.size ());
376 entry.index_offset = cpool.size ();
42c2c694 377 cpool.append_offset (entry.cu_indices.size ());
cd4fb1b2 378 for (const auto index : entry.cu_indices)
42c2c694 379 cpool.append_offset (index);
cd4fb1b2
SM
380 }
381 }
382
383 /* Now write out the hash table. */
384 std::unordered_map<c_str_view, offset_type, c_str_view_hasher> str_table;
385 for (const auto &entry : symtab->data)
386 {
387 offset_type str_off, vec_off;
388
389 if (entry.name != NULL)
390 {
391 const auto insertpair = str_table.emplace (entry.name, cpool.size ());
392 if (insertpair.second)
393 cpool.append_cstr0 (entry.name);
394 str_off = insertpair.first->second;
395 vec_off = entry.index_offset;
396 }
397 else
398 {
399 /* While 0 is a valid constant pool index, it is not valid
400 to have 0 for both offsets. */
401 str_off = 0;
402 vec_off = 0;
403 }
404
42c2c694
TT
405 output.append_offset (str_off);
406 output.append_offset (vec_off);
cd4fb1b2
SM
407 }
408}
409
edfe0a0c 410typedef std::unordered_map<partial_symtab *, unsigned int> psym_index_map;
cd4fb1b2
SM
411
412/* Helper struct for building the address table. */
413struct addrmap_index_data
414{
415 addrmap_index_data (data_buf &addr_vec_, psym_index_map &cu_index_htab_)
416 : addr_vec (addr_vec_), cu_index_htab (cu_index_htab_)
417 {}
418
cd4fb1b2
SM
419 data_buf &addr_vec;
420 psym_index_map &cu_index_htab;
421
50a6759f
TT
422 int operator() (CORE_ADDR start_addr, void *obj);
423
870c2204 424 /* True if the previous_* fields are valid.
cd4fb1b2
SM
425 We can't write an entry until we see the next entry (since it is only then
426 that we know the end of the entry). */
870c2204 427 bool previous_valid = false;
cd4fb1b2 428 /* Index of the CU in the table of all CUs in the index file. */
50a6759f 429 unsigned int previous_cu_index = 0;
cd4fb1b2 430 /* Start address of the CU. */
50a6759f 431 CORE_ADDR previous_cu_start = 0;
cd4fb1b2
SM
432};
433
434/* Write an address entry to ADDR_VEC. */
435
436static void
79cc99f6 437add_address_entry (data_buf &addr_vec,
cd4fb1b2
SM
438 CORE_ADDR start, CORE_ADDR end, unsigned int cu_index)
439{
79748972
TT
440 addr_vec.append_uint (8, BFD_ENDIAN_LITTLE, start);
441 addr_vec.append_uint (8, BFD_ENDIAN_LITTLE, end);
42c2c694 442 addr_vec.append_offset (cu_index);
cd4fb1b2
SM
443}
444
445/* Worker function for traversing an addrmap to build the address table. */
446
50a6759f
TT
447int
448addrmap_index_data::operator() (CORE_ADDR start_addr, void *obj)
cd4fb1b2 449{
edfe0a0c 450 partial_symtab *pst = (partial_symtab *) obj;
cd4fb1b2 451
50a6759f
TT
452 if (previous_valid)
453 add_address_entry (addr_vec,
454 previous_cu_start, start_addr,
455 previous_cu_index);
cd4fb1b2 456
50a6759f 457 previous_cu_start = start_addr;
cd4fb1b2
SM
458 if (pst != NULL)
459 {
50a6759f
TT
460 const auto it = cu_index_htab.find (pst);
461 gdb_assert (it != cu_index_htab.cend ());
462 previous_cu_index = it->second;
870c2204 463 previous_valid = true;
cd4fb1b2
SM
464 }
465 else
870c2204 466 previous_valid = false;
cd4fb1b2
SM
467
468 return 0;
469}
470
79cc99f6 471/* Write PER_BFD's address map to ADDR_VEC.
cd4fb1b2
SM
472 CU_INDEX_HTAB is used to map addrmap entries to their CU indices
473 in the index file. */
474
475static void
79cc99f6 476write_address_map (dwarf2_per_bfd *per_bfd, data_buf &addr_vec,
cd4fb1b2
SM
477 psym_index_map &cu_index_htab)
478{
479 struct addrmap_index_data addrmap_index_data (addr_vec, cu_index_htab);
480
79cc99f6 481 addrmap_foreach (per_bfd->partial_symtabs->psymtabs_addrmap,
50a6759f 482 addrmap_index_data);
cd4fb1b2
SM
483
484 /* It's highly unlikely the last entry (end address = 0xff...ff)
485 is valid, but we should still handle it.
486 The end address is recorded as the start of the next region, but that
487 doesn't work here. To cope we pass 0xff...ff, this is a rare situation
488 anyway. */
489 if (addrmap_index_data.previous_valid)
79cc99f6 490 add_address_entry (addr_vec,
cd4fb1b2
SM
491 addrmap_index_data.previous_cu_start, (CORE_ADDR) -1,
492 addrmap_index_data.previous_cu_index);
493}
494
495/* Return the symbol kind of PSYM. */
496
497static gdb_index_symbol_kind
498symbol_kind (struct partial_symbol *psym)
499{
8a6d4234
TT
500 domain_enum domain = psym->domain;
501 enum address_class aclass = psym->aclass;
cd4fb1b2
SM
502
503 switch (domain)
504 {
505 case VAR_DOMAIN:
506 switch (aclass)
507 {
508 case LOC_BLOCK:
509 return GDB_INDEX_SYMBOL_KIND_FUNCTION;
510 case LOC_TYPEDEF:
511 return GDB_INDEX_SYMBOL_KIND_TYPE;
512 case LOC_COMPUTED:
513 case LOC_CONST_BYTES:
514 case LOC_OPTIMIZED_OUT:
515 case LOC_STATIC:
516 return GDB_INDEX_SYMBOL_KIND_VARIABLE;
517 case LOC_CONST:
518 /* Note: It's currently impossible to recognize psyms as enum values
519 short of reading the type info. For now punt. */
520 return GDB_INDEX_SYMBOL_KIND_VARIABLE;
521 default:
522 /* There are other LOC_FOO values that one might want to classify
523 as variables, but dwarf2read.c doesn't currently use them. */
524 return GDB_INDEX_SYMBOL_KIND_OTHER;
525 }
526 case STRUCT_DOMAIN:
527 return GDB_INDEX_SYMBOL_KIND_TYPE;
528 default:
529 return GDB_INDEX_SYMBOL_KIND_OTHER;
530 }
531}
532
533/* Add a list of partial symbols to SYMTAB. */
534
535static void
536write_psymbols (struct mapped_symtab *symtab,
537 std::unordered_set<partial_symbol *> &psyms_seen,
932539d7 538 const std::vector<partial_symbol *> &symbols,
cd4fb1b2
SM
539 offset_type cu_index,
540 int is_static)
541{
932539d7 542 for (partial_symbol *psym : symbols)
cd4fb1b2 543 {
7ab96794 544 const char *name = psym->ginfo.search_name ();
cd4fb1b2 545
c1b5c1eb 546 if (psym->ginfo.language () == language_ada)
7ab96794
TV
547 {
548 /* We want to ensure that the Ada main function's name appears
549 verbatim in the index. However, this name will be of the
550 form "_ada_mumble", and will be rewritten by ada_decode.
551 So, recognize it specially here and add it to the index by
552 hand. */
553 if (strcmp (main_name (), name) == 0)
554 {
555 gdb_index_symbol_kind kind = symbol_kind (psym);
556
557 add_index_entry (symtab, name, is_static, kind, cu_index);
558 }
559
560 /* In order for the index to work when read back into gdb, it
561 has to supply a funny form of the name: it should be the
562 encoded name, with any suffixes stripped. Using the
563 ordinary encoded name will not work properly with the
564 searching logic in find_name_components_bounds; nor will
565 using the decoded name. Furthermore, an Ada "verbatim"
566 name (of the form "<MumBle>") must be entered without the
567 angle brackets. Note that the current index is unusual,
568 see PR symtab/24820 for details. */
569 std::string decoded = ada_decode (name);
570 if (decoded[0] == '<')
571 name = (char *) obstack_copy0 (&symtab->m_string_obstack,
572 decoded.c_str () + 1,
573 decoded.length () - 2);
574 else
575 name = obstack_strdup (&symtab->m_string_obstack,
576 ada_encode (decoded.c_str ()));
577 }
cd4fb1b2
SM
578
579 /* Only add a given psymbol once. */
580 if (psyms_seen.insert (psym).second)
581 {
582 gdb_index_symbol_kind kind = symbol_kind (psym);
583
7ab96794 584 add_index_entry (symtab, name, is_static, kind, cu_index);
cd4fb1b2
SM
585 }
586 }
587}
588
cd4fb1b2
SM
589/* Recurse into all "included" dependencies and count their symbols as
590 if they appeared in this psymtab. */
591
592static void
edfe0a0c 593recursively_count_psymbols (partial_symtab *psymtab,
cd4fb1b2
SM
594 size_t &psyms_seen)
595{
596 for (int i = 0; i < psymtab->number_of_dependencies; ++i)
597 if (psymtab->dependencies[i]->user != NULL)
edfe0a0c 598 recursively_count_psymbols (psymtab->dependencies[i],
cd4fb1b2
SM
599 psyms_seen);
600
932539d7
TT
601 psyms_seen += psymtab->global_psymbols.size ();
602 psyms_seen += psymtab->static_psymbols.size ();
cd4fb1b2
SM
603}
604
605/* Recurse into all "included" dependencies and write their symbols as
606 if they appeared in this psymtab. */
607
608static void
609recursively_write_psymbols (struct objfile *objfile,
edfe0a0c 610 partial_symtab *psymtab,
cd4fb1b2
SM
611 struct mapped_symtab *symtab,
612 std::unordered_set<partial_symbol *> &psyms_seen,
613 offset_type cu_index)
614{
615 int i;
616
617 for (i = 0; i < psymtab->number_of_dependencies; ++i)
618 if (psymtab->dependencies[i]->user != NULL)
891813be 619 recursively_write_psymbols (objfile,
edfe0a0c 620 psymtab->dependencies[i],
cd4fb1b2
SM
621 symtab, psyms_seen, cu_index);
622
932539d7
TT
623 write_psymbols (symtab, psyms_seen,
624 psymtab->global_psymbols, cu_index,
cd4fb1b2 625 0);
932539d7
TT
626 write_psymbols (symtab, psyms_seen,
627 psymtab->static_psymbols, cu_index,
cd4fb1b2
SM
628 1);
629}
630
631/* DWARF-5 .debug_names builder. */
632class debug_names
633{
634public:
976ca316 635 debug_names (dwarf2_per_objfile *per_objfile, bool is_dwarf64,
cd4fb1b2
SM
636 bfd_endian dwarf5_byte_order)
637 : m_dwarf5_byte_order (dwarf5_byte_order),
638 m_dwarf32 (dwarf5_byte_order),
639 m_dwarf64 (dwarf5_byte_order),
640 m_dwarf (is_dwarf64
641 ? static_cast<dwarf &> (m_dwarf64)
642 : static_cast<dwarf &> (m_dwarf32)),
643 m_name_table_string_offs (m_dwarf.name_table_string_offs),
644 m_name_table_entry_offs (m_dwarf.name_table_entry_offs),
976ca316 645 m_debugstrlookup (per_objfile)
cd4fb1b2
SM
646 {}
647
648 int dwarf5_offset_size () const
649 {
650 const bool dwarf5_is_dwarf64 = &m_dwarf == &m_dwarf64;
651 return dwarf5_is_dwarf64 ? 8 : 4;
652 }
653
654 /* Is this symbol from DW_TAG_compile_unit or DW_TAG_type_unit? */
655 enum class unit_kind { cu, tu };
656
657 /* Insert one symbol. */
658 void insert (const partial_symbol *psym, int cu_index, bool is_static,
659 unit_kind kind)
660 {
661 const int dwarf_tag = psymbol_tag (psym);
662 if (dwarf_tag == 0)
663 return;
c9d95fa3 664 const char *name = psym->ginfo.search_name ();
3b00ef10 665
c1b5c1eb 666 if (psym->ginfo.language () == language_ada)
3b00ef10
TT
667 {
668 /* We want to ensure that the Ada main function's name appears
669 verbatim in the index. However, this name will be of the
670 form "_ada_mumble", and will be rewritten by ada_decode.
671 So, recognize it specially here and add it to the index by
672 hand. */
673 if (strcmp (main_name (), name) == 0)
674 {
675 const auto insertpair
676 = m_name_to_value_set.emplace (c_str_view (name),
677 std::set<symbol_value> ());
678 std::set<symbol_value> &value_set = insertpair.first->second;
679 value_set.emplace (symbol_value (dwarf_tag, cu_index, is_static,
680 kind));
681 }
682
683 /* In order for the index to work when read back into gdb, it
684 has to supply a funny form of the name: it should be the
685 encoded name, with any suffixes stripped. Using the
686 ordinary encoded name will not work properly with the
687 searching logic in find_name_components_bounds; nor will
688 using the decoded name. Furthermore, an Ada "verbatim"
689 name (of the form "<MumBle>") must be entered without the
690 angle brackets. Note that the current index is unusual,
691 see PR symtab/24820 for details. */
f945dedf 692 std::string decoded = ada_decode (name);
3b00ef10
TT
693 if (decoded[0] == '<')
694 name = (char *) obstack_copy0 (&m_string_obstack,
f945dedf
CB
695 decoded.c_str () + 1,
696 decoded.length () - 2);
3b00ef10 697 else
f945dedf
CB
698 name = obstack_strdup (&m_string_obstack,
699 ada_encode (decoded.c_str ()));
3b00ef10
TT
700 }
701
cd4fb1b2
SM
702 const auto insertpair
703 = m_name_to_value_set.emplace (c_str_view (name),
704 std::set<symbol_value> ());
705 std::set<symbol_value> &value_set = insertpair.first->second;
706 value_set.emplace (symbol_value (dwarf_tag, cu_index, is_static, kind));
707 }
708
709 /* Build all the tables. All symbols must be already inserted.
710 This function does not call file_write, caller has to do it
711 afterwards. */
712 void build ()
713 {
714 /* Verify the build method has not be called twice. */
715 gdb_assert (m_abbrev_table.empty ());
716 const size_t name_count = m_name_to_value_set.size ();
717 m_bucket_table.resize
718 (std::pow (2, std::ceil (std::log2 (name_count * 4 / 3))));
719 m_hash_table.reserve (name_count);
720 m_name_table_string_offs.reserve (name_count);
721 m_name_table_entry_offs.reserve (name_count);
722
723 /* Map each hash of symbol to its name and value. */
724 struct hash_it_pair
725 {
726 uint32_t hash;
727 decltype (m_name_to_value_set)::const_iterator it;
728 };
729 std::vector<std::forward_list<hash_it_pair>> bucket_hash;
730 bucket_hash.resize (m_bucket_table.size ());
731 for (decltype (m_name_to_value_set)::const_iterator it
732 = m_name_to_value_set.cbegin ();
733 it != m_name_to_value_set.cend ();
734 ++it)
735 {
736 const char *const name = it->first.c_str ();
737 const uint32_t hash = dwarf5_djb_hash (name);
738 hash_it_pair hashitpair;
739 hashitpair.hash = hash;
740 hashitpair.it = it;
741 auto &slot = bucket_hash[hash % bucket_hash.size()];
742 slot.push_front (std::move (hashitpair));
743 }
744 for (size_t bucket_ix = 0; bucket_ix < bucket_hash.size (); ++bucket_ix)
745 {
746 const std::forward_list<hash_it_pair> &hashitlist
747 = bucket_hash[bucket_ix];
748 if (hashitlist.empty ())
749 continue;
750 uint32_t &bucket_slot = m_bucket_table[bucket_ix];
751 /* The hashes array is indexed starting at 1. */
752 store_unsigned_integer (reinterpret_cast<gdb_byte *> (&bucket_slot),
753 sizeof (bucket_slot), m_dwarf5_byte_order,
754 m_hash_table.size () + 1);
755 for (const hash_it_pair &hashitpair : hashitlist)
756 {
757 m_hash_table.push_back (0);
758 store_unsigned_integer (reinterpret_cast<gdb_byte *>
759 (&m_hash_table.back ()),
760 sizeof (m_hash_table.back ()),
761 m_dwarf5_byte_order, hashitpair.hash);
762 const c_str_view &name = hashitpair.it->first;
763 const std::set<symbol_value> &value_set = hashitpair.it->second;
764 m_name_table_string_offs.push_back_reorder
765 (m_debugstrlookup.lookup (name.c_str ()));
766 m_name_table_entry_offs.push_back_reorder (m_entry_pool.size ());
767 gdb_assert (!value_set.empty ());
768 for (const symbol_value &value : value_set)
769 {
770 int &idx = m_indexkey_to_idx[index_key (value.dwarf_tag,
771 value.is_static,
772 value.kind)];
773 if (idx == 0)
774 {
775 idx = m_idx_next++;
776 m_abbrev_table.append_unsigned_leb128 (idx);
777 m_abbrev_table.append_unsigned_leb128 (value.dwarf_tag);
778 m_abbrev_table.append_unsigned_leb128
779 (value.kind == unit_kind::cu ? DW_IDX_compile_unit
780 : DW_IDX_type_unit);
781 m_abbrev_table.append_unsigned_leb128 (DW_FORM_udata);
782 m_abbrev_table.append_unsigned_leb128 (value.is_static
783 ? DW_IDX_GNU_internal
784 : DW_IDX_GNU_external);
785 m_abbrev_table.append_unsigned_leb128 (DW_FORM_flag_present);
786
787 /* Terminate attributes list. */
788 m_abbrev_table.append_unsigned_leb128 (0);
789 m_abbrev_table.append_unsigned_leb128 (0);
790 }
791
792 m_entry_pool.append_unsigned_leb128 (idx);
793 m_entry_pool.append_unsigned_leb128 (value.cu_index);
794 }
795
796 /* Terminate the list of CUs. */
797 m_entry_pool.append_unsigned_leb128 (0);
798 }
799 }
800 gdb_assert (m_hash_table.size () == name_count);
801
802 /* Terminate tags list. */
803 m_abbrev_table.append_unsigned_leb128 (0);
804 }
805
806 /* Return .debug_names bucket count. This must be called only after
807 calling the build method. */
808 uint32_t bucket_count () const
809 {
810 /* Verify the build method has been already called. */
811 gdb_assert (!m_abbrev_table.empty ());
812 const uint32_t retval = m_bucket_table.size ();
813
814 /* Check for overflow. */
815 gdb_assert (retval == m_bucket_table.size ());
816 return retval;
817 }
818
819 /* Return .debug_names names count. This must be called only after
820 calling the build method. */
821 uint32_t name_count () const
822 {
823 /* Verify the build method has been already called. */
824 gdb_assert (!m_abbrev_table.empty ());
825 const uint32_t retval = m_hash_table.size ();
826
827 /* Check for overflow. */
828 gdb_assert (retval == m_hash_table.size ());
829 return retval;
830 }
831
832 /* Return number of bytes of .debug_names abbreviation table. This
833 must be called only after calling the build method. */
834 uint32_t abbrev_table_bytes () const
835 {
836 gdb_assert (!m_abbrev_table.empty ());
837 return m_abbrev_table.size ();
838 }
839
840 /* Recurse into all "included" dependencies and store their symbols
841 as if they appeared in this psymtab. */
842 void recursively_write_psymbols
843 (struct objfile *objfile,
edfe0a0c 844 partial_symtab *psymtab,
cd4fb1b2
SM
845 std::unordered_set<partial_symbol *> &psyms_seen,
846 int cu_index)
847 {
848 for (int i = 0; i < psymtab->number_of_dependencies; ++i)
849 if (psymtab->dependencies[i]->user != NULL)
891813be 850 recursively_write_psymbols
edfe0a0c 851 (objfile, psymtab->dependencies[i], psyms_seen, cu_index);
cd4fb1b2 852
932539d7
TT
853 write_psymbols (psyms_seen, psymtab->global_psymbols,
854 cu_index, false, unit_kind::cu);
855 write_psymbols (psyms_seen, psymtab->static_psymbols,
856 cu_index, true, unit_kind::cu);
cd4fb1b2
SM
857 }
858
859 /* Return number of bytes the .debug_names section will have. This
860 must be called only after calling the build method. */
861 size_t bytes () const
862 {
863 /* Verify the build method has been already called. */
864 gdb_assert (!m_abbrev_table.empty ());
865 size_t expected_bytes = 0;
866 expected_bytes += m_bucket_table.size () * sizeof (m_bucket_table[0]);
867 expected_bytes += m_hash_table.size () * sizeof (m_hash_table[0]);
868 expected_bytes += m_name_table_string_offs.bytes ();
869 expected_bytes += m_name_table_entry_offs.bytes ();
870 expected_bytes += m_abbrev_table.size ();
871 expected_bytes += m_entry_pool.size ();
872 return expected_bytes;
873 }
874
875 /* Write .debug_names to FILE_NAMES and .debug_str addition to
876 FILE_STR. This must be called only after calling the build
877 method. */
878 void file_write (FILE *file_names, FILE *file_str) const
879 {
880 /* Verify the build method has been already called. */
881 gdb_assert (!m_abbrev_table.empty ());
882 ::file_write (file_names, m_bucket_table);
883 ::file_write (file_names, m_hash_table);
884 m_name_table_string_offs.file_write (file_names);
885 m_name_table_entry_offs.file_write (file_names);
886 m_abbrev_table.file_write (file_names);
887 m_entry_pool.file_write (file_names);
888 m_debugstrlookup.file_write (file_str);
889 }
890
cd4fb1b2
SM
891private:
892
893 /* Storage for symbol names mapping them to their .debug_str section
894 offsets. */
895 class debug_str_lookup
896 {
897 public:
898
30baf67b 899 /* Object constructor to be called for current DWARF2_PER_OBJFILE.
cd4fb1b2 900 All .debug_str section strings are automatically stored. */
976ca316
SM
901 debug_str_lookup (dwarf2_per_objfile *per_objfile)
902 : m_abfd (per_objfile->objfile->obfd),
903 m_per_objfile (per_objfile)
cd4fb1b2 904 {
976ca316
SM
905 per_objfile->per_bfd->str.read (per_objfile->objfile);
906 if (per_objfile->per_bfd->str.buffer == NULL)
cd4fb1b2 907 return;
976ca316
SM
908 for (const gdb_byte *data = per_objfile->per_bfd->str.buffer;
909 data < (per_objfile->per_bfd->str.buffer
910 + per_objfile->per_bfd->str.size);)
cd4fb1b2
SM
911 {
912 const char *const s = reinterpret_cast<const char *> (data);
913 const auto insertpair
914 = m_str_table.emplace (c_str_view (s),
976ca316 915 data - per_objfile->per_bfd->str.buffer);
cd4fb1b2 916 if (!insertpair.second)
b98664d3 917 complaint (_("Duplicate string \"%s\" in "
cd4fb1b2
SM
918 ".debug_str section [in module %s]"),
919 s, bfd_get_filename (m_abfd));
920 data += strlen (s) + 1;
921 }
922 }
923
924 /* Return offset of symbol name S in the .debug_str section. Add
925 such symbol to the section's end if it does not exist there
926 yet. */
927 size_t lookup (const char *s)
928 {
929 const auto it = m_str_table.find (c_str_view (s));
930 if (it != m_str_table.end ())
931 return it->second;
976ca316 932 const size_t offset = (m_per_objfile->per_bfd->str.size
cd4fb1b2
SM
933 + m_str_add_buf.size ());
934 m_str_table.emplace (c_str_view (s), offset);
935 m_str_add_buf.append_cstr0 (s);
936 return offset;
937 }
938
939 /* Append the end of the .debug_str section to FILE. */
940 void file_write (FILE *file) const
941 {
942 m_str_add_buf.file_write (file);
943 }
944
945 private:
946 std::unordered_map<c_str_view, size_t, c_str_view_hasher> m_str_table;
947 bfd *const m_abfd;
976ca316 948 dwarf2_per_objfile *m_per_objfile;
cd4fb1b2
SM
949
950 /* Data to add at the end of .debug_str for new needed symbol names. */
951 data_buf m_str_add_buf;
952 };
953
954 /* Container to map used DWARF tags to their .debug_names abbreviation
955 tags. */
956 class index_key
957 {
958 public:
959 index_key (int dwarf_tag_, bool is_static_, unit_kind kind_)
960 : dwarf_tag (dwarf_tag_), is_static (is_static_), kind (kind_)
961 {
962 }
963
964 bool
965 operator== (const index_key &other) const
966 {
967 return (dwarf_tag == other.dwarf_tag && is_static == other.is_static
968 && kind == other.kind);
969 }
970
971 const int dwarf_tag;
972 const bool is_static;
973 const unit_kind kind;
974 };
975
976 /* Provide std::unordered_map::hasher for index_key. */
977 class index_key_hasher
978 {
979 public:
980 size_t
981 operator () (const index_key &key) const
982 {
983 return (std::hash<int>() (key.dwarf_tag) << 1) | key.is_static;
984 }
985 };
986
987 /* Parameters of one symbol entry. */
988 class symbol_value
989 {
990 public:
991 const int dwarf_tag, cu_index;
992 const bool is_static;
993 const unit_kind kind;
994
995 symbol_value (int dwarf_tag_, int cu_index_, bool is_static_,
996 unit_kind kind_)
997 : dwarf_tag (dwarf_tag_), cu_index (cu_index_), is_static (is_static_),
dda83cd7 998 kind (kind_)
cd4fb1b2
SM
999 {}
1000
1001 bool
1002 operator< (const symbol_value &other) const
1003 {
1004#define X(n) \
1005 do \
1006 { \
1007 if (n < other.n) \
1008 return true; \
1009 if (n > other.n) \
1010 return false; \
1011 } \
1012 while (0)
1013 X (dwarf_tag);
1014 X (is_static);
1015 X (kind);
1016 X (cu_index);
1017#undef X
1018 return false;
1019 }
1020 };
1021
1022 /* Abstract base class to unify DWARF-32 and DWARF-64 name table
1023 output. */
1024 class offset_vec
1025 {
1026 protected:
1027 const bfd_endian dwarf5_byte_order;
1028 public:
1029 explicit offset_vec (bfd_endian dwarf5_byte_order_)
1030 : dwarf5_byte_order (dwarf5_byte_order_)
1031 {}
1032
1033 /* Call std::vector::reserve for NELEM elements. */
1034 virtual void reserve (size_t nelem) = 0;
1035
1036 /* Call std::vector::push_back with store_unsigned_integer byte
1037 reordering for ELEM. */
1038 virtual void push_back_reorder (size_t elem) = 0;
1039
1040 /* Return expected output size in bytes. */
1041 virtual size_t bytes () const = 0;
1042
1043 /* Write name table to FILE. */
1044 virtual void file_write (FILE *file) const = 0;
1045 };
1046
1047 /* Template to unify DWARF-32 and DWARF-64 output. */
1048 template<typename OffsetSize>
1049 class offset_vec_tmpl : public offset_vec
1050 {
1051 public:
1052 explicit offset_vec_tmpl (bfd_endian dwarf5_byte_order_)
1053 : offset_vec (dwarf5_byte_order_)
1054 {}
1055
1056 /* Implement offset_vec::reserve. */
1057 void reserve (size_t nelem) override
1058 {
1059 m_vec.reserve (nelem);
1060 }
1061
1062 /* Implement offset_vec::push_back_reorder. */
1063 void push_back_reorder (size_t elem) override
1064 {
1065 m_vec.push_back (elem);
1066 /* Check for overflow. */
1067 gdb_assert (m_vec.back () == elem);
1068 store_unsigned_integer (reinterpret_cast<gdb_byte *> (&m_vec.back ()),
1069 sizeof (m_vec.back ()), dwarf5_byte_order, elem);
1070 }
1071
1072 /* Implement offset_vec::bytes. */
1073 size_t bytes () const override
1074 {
1075 return m_vec.size () * sizeof (m_vec[0]);
1076 }
1077
1078 /* Implement offset_vec::file_write. */
1079 void file_write (FILE *file) const override
1080 {
1081 ::file_write (file, m_vec);
1082 }
1083
1084 private:
1085 std::vector<OffsetSize> m_vec;
1086 };
1087
1088 /* Base class to unify DWARF-32 and DWARF-64 .debug_names output
1089 respecting name table width. */
1090 class dwarf
1091 {
1092 public:
1093 offset_vec &name_table_string_offs, &name_table_entry_offs;
1094
1095 dwarf (offset_vec &name_table_string_offs_,
1096 offset_vec &name_table_entry_offs_)
1097 : name_table_string_offs (name_table_string_offs_),
1098 name_table_entry_offs (name_table_entry_offs_)
1099 {
1100 }
1101 };
1102
1103 /* Template to unify DWARF-32 and DWARF-64 .debug_names output
1104 respecting name table width. */
1105 template<typename OffsetSize>
1106 class dwarf_tmpl : public dwarf
1107 {
1108 public:
1109 explicit dwarf_tmpl (bfd_endian dwarf5_byte_order_)
1110 : dwarf (m_name_table_string_offs, m_name_table_entry_offs),
1111 m_name_table_string_offs (dwarf5_byte_order_),
1112 m_name_table_entry_offs (dwarf5_byte_order_)
1113 {}
1114
1115 private:
1116 offset_vec_tmpl<OffsetSize> m_name_table_string_offs;
1117 offset_vec_tmpl<OffsetSize> m_name_table_entry_offs;
1118 };
1119
1120 /* Try to reconstruct original DWARF tag for given partial_symbol.
1121 This function is not DWARF-5 compliant but it is sufficient for
1122 GDB as a DWARF-5 index consumer. */
1123 static int psymbol_tag (const struct partial_symbol *psym)
1124 {
8a6d4234
TT
1125 domain_enum domain = psym->domain;
1126 enum address_class aclass = psym->aclass;
cd4fb1b2
SM
1127
1128 switch (domain)
1129 {
1130 case VAR_DOMAIN:
1131 switch (aclass)
1132 {
1133 case LOC_BLOCK:
1134 return DW_TAG_subprogram;
1135 case LOC_TYPEDEF:
1136 return DW_TAG_typedef;
1137 case LOC_COMPUTED:
1138 case LOC_CONST_BYTES:
1139 case LOC_OPTIMIZED_OUT:
1140 case LOC_STATIC:
1141 return DW_TAG_variable;
1142 case LOC_CONST:
1143 /* Note: It's currently impossible to recognize psyms as enum values
1144 short of reading the type info. For now punt. */
1145 return DW_TAG_variable;
1146 default:
1147 /* There are other LOC_FOO values that one might want to classify
1148 as variables, but dwarf2read.c doesn't currently use them. */
1149 return DW_TAG_variable;
1150 }
1151 case STRUCT_DOMAIN:
1152 return DW_TAG_structure_type;
7666722f
TV
1153 case MODULE_DOMAIN:
1154 return DW_TAG_module;
cd4fb1b2
SM
1155 default:
1156 return 0;
1157 }
1158 }
1159
1160 /* Call insert for all partial symbols and mark them in PSYMS_SEEN. */
1161 void write_psymbols (std::unordered_set<partial_symbol *> &psyms_seen,
932539d7
TT
1162 const std::vector<partial_symbol *> &symbols,
1163 int cu_index, bool is_static, unit_kind kind)
cd4fb1b2 1164 {
932539d7 1165 for (partial_symbol *psym : symbols)
cd4fb1b2 1166 {
cd4fb1b2
SM
1167 /* Only add a given psymbol once. */
1168 if (psyms_seen.insert (psym).second)
1169 insert (psym, cu_index, is_static, kind);
1170 }
1171 }
1172
cd4fb1b2
SM
1173 /* Store value of each symbol. */
1174 std::unordered_map<c_str_view, std::set<symbol_value>, c_str_view_hasher>
1175 m_name_to_value_set;
1176
1177 /* Tables of DWARF-5 .debug_names. They are in object file byte
1178 order. */
1179 std::vector<uint32_t> m_bucket_table;
1180 std::vector<uint32_t> m_hash_table;
1181
1182 const bfd_endian m_dwarf5_byte_order;
1183 dwarf_tmpl<uint32_t> m_dwarf32;
1184 dwarf_tmpl<uint64_t> m_dwarf64;
1185 dwarf &m_dwarf;
1186 offset_vec &m_name_table_string_offs, &m_name_table_entry_offs;
1187 debug_str_lookup m_debugstrlookup;
1188
1189 /* Map each used .debug_names abbreviation tag parameter to its
1190 index value. */
1191 std::unordered_map<index_key, int, index_key_hasher> m_indexkey_to_idx;
1192
1193 /* Next unused .debug_names abbreviation tag for
1194 m_indexkey_to_idx. */
1195 int m_idx_next = 1;
1196
1197 /* .debug_names abbreviation table. */
1198 data_buf m_abbrev_table;
1199
1200 /* .debug_names entry pool. */
1201 data_buf m_entry_pool;
3b00ef10
TT
1202
1203 /* Temporary storage for Ada names. */
1204 auto_obstack m_string_obstack;
cd4fb1b2
SM
1205};
1206
1207/* Return iff any of the needed offsets does not fit into 32-bit
1208 .debug_names section. */
1209
1210static bool
976ca316 1211check_dwarf64_offsets (dwarf2_per_objfile *per_objfile)
cd4fb1b2 1212{
0d305d5c 1213 for (const auto &per_cu : per_objfile->per_bfd->all_comp_units)
cd4fb1b2 1214 {
0d305d5c
TT
1215 if (to_underlying (per_cu->sect_off)
1216 >= (static_cast<uint64_t> (1) << 32))
cd4fb1b2
SM
1217 return true;
1218 }
cd4fb1b2
SM
1219 return false;
1220}
1221
1222/* The psyms_seen set is potentially going to be largish (~40k
1223 elements when indexing a -g3 build of GDB itself). Estimate the
1224 number of elements in order to avoid too many rehashes, which
1225 require rebuilding buckets and thus many trips to
1226 malloc/free. */
1227
1228static size_t
976ca316 1229psyms_seen_size (dwarf2_per_objfile *per_objfile)
cd4fb1b2
SM
1230{
1231 size_t psyms_count = 0;
0d305d5c 1232 for (const auto &per_cu : per_objfile->per_bfd->all_comp_units)
cd4fb1b2 1233 {
edfe0a0c 1234 partial_symtab *psymtab = per_cu->v.psymtab;
cd4fb1b2
SM
1235
1236 if (psymtab != NULL && psymtab->user == NULL)
1237 recursively_count_psymbols (psymtab, psyms_count);
1238 }
1239 /* Generating an index for gdb itself shows a ratio of
1240 TOTAL_SEEN_SYMS/UNIQUE_SYMS or ~5. 4 seems like a good bet. */
1241 return psyms_count / 4;
1242}
1243
c4973306
SM
1244/* Assert that FILE's size is EXPECTED_SIZE. Assumes file's seek
1245 position is at the end of the file. */
cd4fb1b2 1246
c4973306
SM
1247static void
1248assert_file_size (FILE *file, size_t expected_size)
1249{
1250 const auto file_size = ftell (file);
1251 if (file_size == -1)
1252 perror_with_name (("ftell"));
1253 gdb_assert (file_size == expected_size);
1254}
1255
1256/* Write a gdb index file to OUT_FILE from all the sections passed as
1257 arguments. */
1258
1259static void
1260write_gdbindex_1 (FILE *out_file,
1261 const data_buf &cu_list,
1262 const data_buf &types_cu_list,
1263 const data_buf &addr_vec,
1264 const data_buf &symtab_vec,
1265 const data_buf &constant_pool)
1266{
1267 data_buf contents;
1268 const offset_type size_of_header = 6 * sizeof (offset_type);
1269 offset_type total_len = size_of_header;
1270
1271 /* The version number. */
42c2c694 1272 contents.append_offset (8);
c4973306
SM
1273
1274 /* The offset of the CU list from the start of the file. */
42c2c694 1275 contents.append_offset (total_len);
c4973306
SM
1276 total_len += cu_list.size ();
1277
1278 /* The offset of the types CU list from the start of the file. */
42c2c694 1279 contents.append_offset (total_len);
c4973306
SM
1280 total_len += types_cu_list.size ();
1281
1282 /* The offset of the address table from the start of the file. */
42c2c694 1283 contents.append_offset (total_len);
c4973306
SM
1284 total_len += addr_vec.size ();
1285
1286 /* The offset of the symbol table from the start of the file. */
42c2c694 1287 contents.append_offset (total_len);
c4973306
SM
1288 total_len += symtab_vec.size ();
1289
1290 /* The offset of the constant pool from the start of the file. */
42c2c694 1291 contents.append_offset (total_len);
c4973306
SM
1292 total_len += constant_pool.size ();
1293
1294 gdb_assert (contents.size () == size_of_header);
1295
1296 contents.file_write (out_file);
1297 cu_list.file_write (out_file);
1298 types_cu_list.file_write (out_file);
1299 addr_vec.file_write (out_file);
1300 symtab_vec.file_write (out_file);
1301 constant_pool.file_write (out_file);
1302
1303 assert_file_size (out_file, total_len);
1304}
1305
1306/* Write contents of a .gdb_index section for OBJFILE into OUT_FILE.
1307 If OBJFILE has an associated dwz file, write contents of a .gdb_index
1308 section for that dwz file into DWZ_OUT_FILE. If OBJFILE does not have an
1309 associated dwz file, DWZ_OUT_FILE must be NULL. */
1310
1311static void
976ca316 1312write_gdbindex (dwarf2_per_objfile *per_objfile, FILE *out_file,
c4973306 1313 FILE *dwz_out_file)
cd4fb1b2 1314{
976ca316 1315 struct objfile *objfile = per_objfile->objfile;
cd4fb1b2 1316 mapped_symtab symtab;
c4973306
SM
1317 data_buf objfile_cu_list;
1318 data_buf dwz_cu_list;
cd4fb1b2
SM
1319
1320 /* While we're scanning CU's create a table that maps a psymtab pointer
1321 (which is what addrmap records) to its index (which is what is recorded
1322 in the index file). This will later be needed to write the address
1323 table. */
1324 psym_index_map cu_index_htab;
976ca316 1325 cu_index_htab.reserve (per_objfile->per_bfd->all_comp_units.size ());
cd4fb1b2 1326
844a72ef
TT
1327 /* Store out the .debug_type CUs, if any. */
1328 data_buf types_cu_list;
1329
cd4fb1b2
SM
1330 /* The CU list is already sorted, so we don't need to do additional
1331 work here. Also, the debug_types entries do not appear in
1332 all_comp_units, but only in their own hash table. */
1333
1334 std::unordered_set<partial_symbol *> psyms_seen
976ca316 1335 (psyms_seen_size (per_objfile));
91eea9cc 1336 int counter = 0;
844a72ef 1337 int types_counter = 0;
976ca316 1338 for (int i = 0; i < per_objfile->per_bfd->all_comp_units.size (); ++i)
cd4fb1b2 1339 {
0d305d5c
TT
1340 dwarf2_per_cu_data *per_cu
1341 = per_objfile->per_bfd->all_comp_units[i].get ();
edfe0a0c 1342 partial_symtab *psymtab = per_cu->v.psymtab;
cd4fb1b2 1343
844a72ef
TT
1344 int &this_counter = per_cu->is_debug_types ? types_counter : counter;
1345
efba5c23
TV
1346 if (psymtab != NULL)
1347 {
1348 if (psymtab->user == NULL)
1349 recursively_write_psymbols (objfile, psymtab, &symtab,
844a72ef 1350 psyms_seen, this_counter);
efba5c23 1351
844a72ef
TT
1352 const auto insertpair = cu_index_htab.emplace (psymtab,
1353 this_counter);
efba5c23
TV
1354 gdb_assert (insertpair.second);
1355 }
cd4fb1b2 1356
c4973306
SM
1357 /* The all_comp_units list contains CUs read from the objfile as well as
1358 from the eventual dwz file. We need to place the entry in the
1359 corresponding index. */
844a72ef
TT
1360 data_buf &cu_list = (per_cu->is_debug_types
1361 ? types_cu_list
1362 : per_cu->is_dwz ? dwz_cu_list : objfile_cu_list);
cd4fb1b2
SM
1363 cu_list.append_uint (8, BFD_ENDIAN_LITTLE,
1364 to_underlying (per_cu->sect_off));
844a72ef
TT
1365 if (per_cu->is_debug_types)
1366 {
1367 signatured_type *sig_type = (signatured_type *) per_cu;
1368 cu_list.append_uint (8, BFD_ENDIAN_LITTLE,
1369 to_underlying (sig_type->type_offset_in_tu));
1370 cu_list.append_uint (8, BFD_ENDIAN_LITTLE,
1371 sig_type->signature);
1372 }
1373 else
1374 cu_list.append_uint (8, BFD_ENDIAN_LITTLE, per_cu->length);
1375
1376 ++this_counter;
cd4fb1b2
SM
1377 }
1378
1379 /* Dump the address map. */
1380 data_buf addr_vec;
79cc99f6 1381 write_address_map (per_objfile->per_bfd, addr_vec, cu_index_htab);
cd4fb1b2 1382
cd4fb1b2
SM
1383 /* Now that we've processed all symbols we can shrink their cu_indices
1384 lists. */
1385 uniquify_cu_indices (&symtab);
1386
1387 data_buf symtab_vec, constant_pool;
34daac4b
TV
1388 if (symtab.n_elements == 0)
1389 symtab.data.resize (0);
1390
cd4fb1b2
SM
1391 write_hash_table (&symtab, symtab_vec, constant_pool);
1392
c4973306
SM
1393 write_gdbindex_1(out_file, objfile_cu_list, types_cu_list, addr_vec,
1394 symtab_vec, constant_pool);
cd4fb1b2 1395
c4973306
SM
1396 if (dwz_out_file != NULL)
1397 write_gdbindex_1 (dwz_out_file, dwz_cu_list, {}, {}, {}, {});
1398 else
1399 gdb_assert (dwz_cu_list.empty ());
cd4fb1b2
SM
1400}
1401
1402/* DWARF-5 augmentation string for GDB's DW_IDX_GNU_* extension. */
1403static const gdb_byte dwarf5_gdb_augmentation[] = { 'G', 'D', 'B', 0 };
1404
1405/* Write a new .debug_names section for OBJFILE into OUT_FILE, write
1406 needed addition to .debug_str section to OUT_FILE_STR. Return how
1407 many bytes were expected to be written into OUT_FILE. */
1408
c4973306 1409static void
976ca316 1410write_debug_names (dwarf2_per_objfile *per_objfile,
cd4fb1b2
SM
1411 FILE *out_file, FILE *out_file_str)
1412{
976ca316
SM
1413 const bool dwarf5_is_dwarf64 = check_dwarf64_offsets (per_objfile);
1414 struct objfile *objfile = per_objfile->objfile;
cd4fb1b2 1415 const enum bfd_endian dwarf5_byte_order
08feed99 1416 = gdbarch_byte_order (objfile->arch ());
cd4fb1b2
SM
1417
1418 /* The CU list is already sorted, so we don't need to do additional
1419 work here. Also, the debug_types entries do not appear in
1420 all_comp_units, but only in their own hash table. */
1421 data_buf cu_list;
09e2fb72 1422 data_buf types_cu_list;
976ca316 1423 debug_names nametable (per_objfile, dwarf5_is_dwarf64, dwarf5_byte_order);
cd4fb1b2 1424 std::unordered_set<partial_symbol *>
976ca316 1425 psyms_seen (psyms_seen_size (per_objfile));
91eea9cc 1426 int counter = 0;
09e2fb72 1427 int types_counter = 0;
976ca316 1428 for (int i = 0; i < per_objfile->per_bfd->all_comp_units.size (); ++i)
cd4fb1b2 1429 {
0d305d5c
TT
1430 const dwarf2_per_cu_data *per_cu
1431 = per_objfile->per_bfd->all_comp_units[i].get ();
edfe0a0c 1432 partial_symtab *psymtab = per_cu->v.psymtab;
cd4fb1b2 1433
09e2fb72
TT
1434 int &this_counter = per_cu->is_debug_types ? types_counter : counter;
1435 data_buf &this_list = per_cu->is_debug_types ? types_cu_list : cu_list;
1436
2762d288 1437 if (psymtab != nullptr && psymtab->user == nullptr)
91eea9cc 1438 nametable.recursively_write_psymbols (objfile, psymtab, psyms_seen,
09e2fb72 1439 this_counter);
cd4fb1b2 1440
09e2fb72
TT
1441 this_list.append_uint (nametable.dwarf5_offset_size (),
1442 dwarf5_byte_order,
1443 to_underlying (per_cu->sect_off));
1444 ++this_counter;
cd4fb1b2
SM
1445 }
1446
2762d288
TV
1447 /* Verify that all units are represented. */
1448 gdb_assert (counter == (per_objfile->per_bfd->all_comp_units.size ()
1449 - per_objfile->per_bfd->tu_stats.nr_tus));
1450 gdb_assert (types_counter == per_objfile->per_bfd->tu_stats.nr_tus);
1451
cd4fb1b2
SM
1452 nametable.build ();
1453
1454 /* No addr_vec - DWARF-5 uses .debug_aranges generated by GCC. */
1455
1456 const offset_type bytes_of_header
1457 = ((dwarf5_is_dwarf64 ? 12 : 4)
1458 + 2 + 2 + 7 * 4
1459 + sizeof (dwarf5_gdb_augmentation));
1460 size_t expected_bytes = 0;
1461 expected_bytes += bytes_of_header;
1462 expected_bytes += cu_list.size ();
1463 expected_bytes += types_cu_list.size ();
1464 expected_bytes += nametable.bytes ();
1465 data_buf header;
1466
1467 if (!dwarf5_is_dwarf64)
1468 {
1469 const uint64_t size64 = expected_bytes - 4;
1470 gdb_assert (size64 < 0xfffffff0);
1471 header.append_uint (4, dwarf5_byte_order, size64);
1472 }
1473 else
1474 {
1475 header.append_uint (4, dwarf5_byte_order, 0xffffffff);
1476 header.append_uint (8, dwarf5_byte_order, expected_bytes - 12);
1477 }
1478
1479 /* The version number. */
1480 header.append_uint (2, dwarf5_byte_order, 5);
1481
1482 /* Padding. */
1483 header.append_uint (2, dwarf5_byte_order, 0);
1484
1485 /* comp_unit_count - The number of CUs in the CU list. */
2762d288 1486 header.append_uint (4, dwarf5_byte_order, counter);
cd4fb1b2
SM
1487
1488 /* local_type_unit_count - The number of TUs in the local TU
1489 list. */
2762d288 1490 header.append_uint (4, dwarf5_byte_order, types_counter);
cd4fb1b2
SM
1491
1492 /* foreign_type_unit_count - The number of TUs in the foreign TU
1493 list. */
1494 header.append_uint (4, dwarf5_byte_order, 0);
1495
1496 /* bucket_count - The number of hash buckets in the hash lookup
1497 table. */
1498 header.append_uint (4, dwarf5_byte_order, nametable.bucket_count ());
1499
1500 /* name_count - The number of unique names in the index. */
1501 header.append_uint (4, dwarf5_byte_order, nametable.name_count ());
1502
1503 /* abbrev_table_size - The size in bytes of the abbreviations
1504 table. */
1505 header.append_uint (4, dwarf5_byte_order, nametable.abbrev_table_bytes ());
1506
1507 /* augmentation_string_size - The size in bytes of the augmentation
1508 string. This value is rounded up to a multiple of 4. */
1509 static_assert (sizeof (dwarf5_gdb_augmentation) % 4 == 0, "");
1510 header.append_uint (4, dwarf5_byte_order, sizeof (dwarf5_gdb_augmentation));
42c2c694 1511 header.append_array (dwarf5_gdb_augmentation);
cd4fb1b2
SM
1512
1513 gdb_assert (header.size () == bytes_of_header);
1514
1515 header.file_write (out_file);
1516 cu_list.file_write (out_file);
1517 types_cu_list.file_write (out_file);
1518 nametable.file_write (out_file, out_file_str);
1519
c4973306 1520 assert_file_size (out_file, expected_bytes);
cd4fb1b2
SM
1521}
1522
c4973306 1523/* This represents an index file being written (work-in-progress).
cd4fb1b2 1524
c4973306
SM
1525 The data is initially written to a temporary file. When the finalize method
1526 is called, the file is closed and moved to its final location.
1527
1528 On failure (if this object is being destroyed with having called finalize),
1529 the temporary file is closed and deleted. */
1530
1531struct index_wip_file
cd4fb1b2 1532{
c4973306
SM
1533 index_wip_file (const char *dir, const char *basename,
1534 const char *suffix)
1535 {
1536 filename = (std::string (dir) + SLASH_STRING + basename
24b21115 1537 + suffix);
c4973306
SM
1538
1539 filename_temp = make_temp_filename (filename);
1540
2fed9db4
SM
1541 scoped_fd out_file_fd = gdb_mkostemp_cloexec (filename_temp.data (),
1542 O_BINARY);
c4973306
SM
1543 if (out_file_fd.get () == -1)
1544 perror_with_name (("mkstemp"));
1545
1546 out_file = out_file_fd.to_file ("wb");
1547
1548 if (out_file == nullptr)
1549 error (_("Can't open `%s' for writing"), filename_temp.data ());
1550
1551 unlink_file.emplace (filename_temp.data ());
1552 }
1553
1554 void finalize ()
1555 {
1556 /* We want to keep the file. */
1557 unlink_file->keep ();
1558
1559 /* Close and move the str file in place. */
1560 unlink_file.reset ();
1561 if (rename (filename_temp.data (), filename.c_str ()) != 0)
1562 perror_with_name (("rename"));
1563 }
1564
1565 std::string filename;
1566 gdb::char_vector filename_temp;
1567
1568 /* Order matters here; we want FILE to be closed before
1569 FILENAME_TEMP is unlinked, because on MS-Windows one cannot
1570 delete a file that is still open. So, we wrap the unlinker in an
1571 optional and emplace it once we know the file name. */
1572 gdb::optional<gdb::unlinker> unlink_file;
1573
1574 gdb_file_up out_file;
1575};
cd4fb1b2 1576
87d6a7aa 1577/* See dwarf-index-write.h. */
cd4fb1b2 1578
87d6a7aa 1579void
976ca316
SM
1580write_psymtabs_to_index (dwarf2_per_objfile *per_objfile, const char *dir,
1581 const char *basename, const char *dwz_basename,
cd4fb1b2
SM
1582 dw_index_kind index_kind)
1583{
79cc99f6 1584 dwarf2_per_bfd *per_bfd = per_objfile->per_bfd;
976ca316 1585 struct objfile *objfile = per_objfile->objfile;
cd4fb1b2 1586
976ca316 1587 if (per_objfile->per_bfd->using_index)
cd4fb1b2
SM
1588 error (_("Cannot use an index to create the index"));
1589
976ca316 1590 if (per_objfile->per_bfd->types.size () > 1)
cd4fb1b2
SM
1591 error (_("Cannot make an index when the file has multiple .debug_types sections"));
1592
da314dd3
TT
1593 if (per_bfd->partial_symtabs == nullptr
1594 || !per_bfd->partial_symtabs->psymtabs
79cc99f6 1595 || !per_bfd->partial_symtabs->psymtabs_addrmap)
cd4fb1b2
SM
1596 return;
1597
1598 struct stat st;
1599 if (stat (objfile_name (objfile), &st) < 0)
1600 perror_with_name (objfile_name (objfile));
1601
c4973306
SM
1602 const char *index_suffix = (index_kind == dw_index_kind::DEBUG_NAMES
1603 ? INDEX5_SUFFIX : INDEX4_SUFFIX);
cd4fb1b2 1604
c4973306
SM
1605 index_wip_file objfile_index_wip (dir, basename, index_suffix);
1606 gdb::optional<index_wip_file> dwz_index_wip;
cd4fb1b2 1607
c4973306
SM
1608 if (dwz_basename != NULL)
1609 dwz_index_wip.emplace (dir, dwz_basename, index_suffix);
cd4fb1b2
SM
1610
1611 if (index_kind == dw_index_kind::DEBUG_NAMES)
1612 {
c4973306
SM
1613 index_wip_file str_wip_file (dir, basename, DEBUG_STR_SUFFIX);
1614
976ca316 1615 write_debug_names (per_objfile, objfile_index_wip.out_file.get (),
c4973306
SM
1616 str_wip_file.out_file.get ());
1617
1618 str_wip_file.finalize ();
cd4fb1b2
SM
1619 }
1620 else
976ca316 1621 write_gdbindex (per_objfile, objfile_index_wip.out_file.get (),
c4973306
SM
1622 (dwz_index_wip.has_value ()
1623 ? dwz_index_wip->out_file.get () : NULL));
cd4fb1b2 1624
c4973306 1625 objfile_index_wip.finalize ();
87d6a7aa 1626
c4973306
SM
1627 if (dwz_index_wip.has_value ())
1628 dwz_index_wip->finalize ();
cd4fb1b2
SM
1629}
1630
1631/* Implementation of the `save gdb-index' command.
1632
1633 Note that the .gdb_index file format used by this command is
1634 documented in the GDB manual. Any changes here must be documented
1635 there. */
1636
1637static void
1638save_gdb_index_command (const char *arg, int from_tty)
1639{
cd4fb1b2
SM
1640 const char dwarf5space[] = "-dwarf-5 ";
1641 dw_index_kind index_kind = dw_index_kind::GDB_INDEX;
1642
1643 if (!arg)
1644 arg = "";
1645
1646 arg = skip_spaces (arg);
1647 if (strncmp (arg, dwarf5space, strlen (dwarf5space)) == 0)
1648 {
1649 index_kind = dw_index_kind::DEBUG_NAMES;
1650 arg += strlen (dwarf5space);
1651 arg = skip_spaces (arg);
1652 }
1653
1654 if (!*arg)
1655 error (_("usage: save gdb-index [-dwarf-5] DIRECTORY"));
1656
2030c079 1657 for (objfile *objfile : current_program_space->objfiles ())
aed57c53
TT
1658 {
1659 struct stat st;
cd4fb1b2 1660
aed57c53
TT
1661 /* If the objfile does not correspond to an actual file, skip it. */
1662 if (stat (objfile_name (objfile), &st) < 0)
1663 continue;
cd4fb1b2 1664
976ca316 1665 dwarf2_per_objfile *per_objfile = get_dwarf2_per_objfile (objfile);
cd4fb1b2 1666
976ca316 1667 if (per_objfile != NULL)
aed57c53 1668 {
a70b8144 1669 try
aed57c53
TT
1670 {
1671 const char *basename = lbasename (objfile_name (objfile));
976ca316 1672 const dwz_file *dwz = dwarf2_get_dwz_file (per_objfile->per_bfd);
c4973306
SM
1673 const char *dwz_basename = NULL;
1674
1675 if (dwz != NULL)
1676 dwz_basename = lbasename (dwz->filename ());
1677
976ca316
SM
1678 write_psymtabs_to_index (per_objfile, arg, basename, dwz_basename,
1679 index_kind);
aed57c53 1680 }
230d2906 1681 catch (const gdb_exception_error &except)
aed57c53
TT
1682 {
1683 exception_fprintf (gdb_stderr, except,
1684 _("Error while writing index for `%s': "),
1685 objfile_name (objfile));
1686 }
aed57c53 1687 }
cd4fb1b2 1688
aed57c53 1689 }
cd4fb1b2
SM
1690}
1691
6c265988 1692void _initialize_dwarf_index_write ();
cd4fb1b2
SM
1693void
1694_initialize_dwarf_index_write ()
1695{
1696 cmd_list_element *c = add_cmd ("gdb-index", class_files,
1697 save_gdb_index_command, _("\
1698Save a gdb-index file.\n\
1699Usage: save gdb-index [-dwarf-5] DIRECTORY\n\
1700\n\
1701No options create one file with .gdb-index extension for pre-DWARF-5\n\
1702compatible .gdb_index section. With -dwarf-5 creates two files with\n\
1703extension .debug_names and .debug_str for DWARF-5 .debug_names section."),
1704 &save_cmdlist);
1705 set_cmd_completer (c, filename_completer);
1706}