From ae2a50a9ae15c6a7fdb1f6255bb9bf45ad2a67d4 Mon Sep 17 00:00:00 2001 From: Simon Marchi Date: Mon, 10 Mar 2025 14:55:12 -0400 Subject: [PATCH] attempt to revamp to the CU/TU list Change-Id: I1c8214413583d540c10c9a2322ef2a21f8bb54e7 --- gdb/dwarf2/cooked-indexer.c | 44 ++- gdb/dwarf2/cooked-indexer.h | 5 +- gdb/dwarf2/index-write.c | 29 +- gdb/dwarf2/read-debug-names.c | 52 ++-- gdb/dwarf2/read-gdb-index.c | 36 ++- gdb/dwarf2/read.c | 292 ++++++++++-------- gdb/dwarf2/read.h | 36 +-- gdb/dwarf2/section.h | 10 + gdb/testsuite/gdb.base/maint.exp | 4 +- .../dw-form-ref-addr-with-type-units.exp | 108 +++++++ gdb/testsuite/gdb.dwarf2/varval.exp | 2 +- 11 files changed, 394 insertions(+), 224 deletions(-) create mode 100644 gdb/testsuite/gdb.dwarf2/dw-form-ref-addr-with-type-units.exp diff --git a/gdb/dwarf2/cooked-indexer.c b/gdb/dwarf2/cooked-indexer.c index 18c9c3b2459..e343bbe85c6 100644 --- a/gdb/dwarf2/cooked-indexer.c +++ b/gdb/dwarf2/cooked-indexer.c @@ -84,18 +84,17 @@ tag_can_have_linkage_name (enum dwarf_tag tag) cutu_reader * cooked_indexer::ensure_cu_exists (cutu_reader *reader, dwarf2_per_objfile *per_objfile, - sect_offset sect_off, bool is_dwz, - bool for_scanning) + const dwarf2_section_info §ion, + sect_offset sect_off, bool for_scanning) { /* Lookups for type unit references are always in the CU, and cross-CU references will crash. */ - if (reader->cu ()->per_cu->is_dwz == is_dwz + if (reader->cu ()->per_cu->section == §ion && reader->cu ()->header.offset_in_cu_p (sect_off)) return reader; dwarf2_per_cu *per_cu - = dwarf2_find_containing_comp_unit (sect_off, is_dwz, - per_objfile->per_bfd); + = dwarf2_find_containing_unit (section, sect_off, per_objfile->per_bfd); /* When scanning, we only want to visit a given CU a single time. Doing this check here avoids self-imports as well. */ @@ -148,10 +147,8 @@ cooked_indexer::scan_attributes (dwarf2_per_cu *scanning_per_cu, bool *is_enum_class, bool for_specification) { - bool origin_is_dwz = false; bool is_declaration = false; - sect_offset origin_offset {}; - + std::optional origin; std::optional low_pc; std::optional high_pc; bool high_pc_relative = false; @@ -218,8 +215,8 @@ cooked_indexer::scan_attributes (dwarf2_per_cu *scanning_per_cu, case DW_AT_specification: case DW_AT_abstract_origin: case DW_AT_extension: - origin_offset = attr.get_ref_die_offset (); - origin_is_dwz = attr.form == DW_FORM_GNU_ref_alt; + origin = { &get_section_for_ref (attr, reader->cu ()->per_cu), + attr.get_ref_die_offset () }; break; case DW_AT_external: @@ -308,20 +305,20 @@ cooked_indexer::scan_attributes (dwarf2_per_cu *scanning_per_cu, || (*linkage_name == nullptr && tag_can_have_linkage_name (abbrev->tag)) || (*parent_entry == nullptr && m_language != language_c)) - && origin_offset != sect_offset (0)) + && origin.has_value ()) { cutu_reader *new_reader - = ensure_cu_exists (reader, reader->cu ()->per_objfile, origin_offset, - origin_is_dwz, false); + = ensure_cu_exists (reader, reader->cu ()->per_objfile, + *origin->section, origin->offset, false); if (new_reader == nullptr) error (_(DWARF_ERROR_PREFIX "cannot follow reference to DIE at %s" " [in module %s]"), - sect_offset_str (origin_offset), + sect_offset_str (origin->offset), bfd_get_filename (reader->abfd ())); const gdb_byte *new_info_ptr - = (new_reader->buffer () + to_underlying (origin_offset)); + = (new_reader->buffer () + to_underlying (origin->offset)); if (*parent_entry == nullptr) { @@ -345,7 +342,7 @@ cooked_indexer::scan_attributes (dwarf2_per_cu *scanning_per_cu, if (new_abbrev == nullptr) error (_(DWARF_ERROR_PREFIX "Unexpected null DIE at offset %s [in module %s]"), - sect_offset_str (origin_offset), + sect_offset_str (origin->offset), bfd_get_filename (new_reader->abfd ())); new_info_ptr += bytes_read; @@ -409,8 +406,7 @@ cooked_indexer::index_imported_unit (cutu_reader *reader, const gdb_byte *info_ptr, const abbrev_info *abbrev) { - sect_offset sect_off {}; - bool is_dwz = false; + std::optional target; for (int i = 0; i < abbrev->num_attrs; ++i) { @@ -419,20 +415,18 @@ cooked_indexer::index_imported_unit (cutu_reader *reader, info_ptr = reader->read_attribute (&attr, &abbrev->attrs[i], info_ptr); if (attr.name == DW_AT_import) - { - sect_off = attr.get_ref_die_offset (); - is_dwz = (attr.form == DW_FORM_GNU_ref_alt - || reader->cu ()->per_cu->is_dwz); - } + target = { &get_section_for_ref (attr, reader->cu ()->per_cu), + attr.get_ref_die_offset () }; } /* Did not find DW_AT_import. */ - if (sect_off == sect_offset (0)) + if (!target.has_value ()) return info_ptr; dwarf2_per_objfile *per_objfile = reader->cu ()->per_objfile; cutu_reader *new_reader - = ensure_cu_exists (reader, per_objfile, sect_off, is_dwz, true); + = ensure_cu_exists (reader, per_objfile, *target->section, target->offset, + true); if (new_reader != nullptr) { index_dies (new_reader, new_reader->info_ptr (), nullptr, false); diff --git a/gdb/dwarf2/cooked-indexer.h b/gdb/dwarf2/cooked-indexer.h index a33f5d18c47..3db6e8b3666 100644 --- a/gdb/dwarf2/cooked-indexer.h +++ b/gdb/dwarf2/cooked-indexer.h @@ -56,9 +56,8 @@ private: up just a single DIE. */ cutu_reader *ensure_cu_exists (cutu_reader *reader, dwarf2_per_objfile *per_objfile, - sect_offset sect_off, - bool is_dwz, - bool for_scanning); + const dwarf2_section_info §ion, + sect_offset sect_off, bool for_scanning); /* Index DIEs in the READER starting at INFO_PTR. PARENT is the entry for the enclosing scope (nullptr at top level). FULLY diff --git a/gdb/dwarf2/index-write.c b/gdb/dwarf2/index-write.c index 8fb59318538..ddb8517a018 100644 --- a/gdb/dwarf2/index-write.c +++ b/gdb/dwarf2/index-write.c @@ -1313,13 +1313,28 @@ write_gdbindex (dwarf2_per_bfd *per_bfd, cooked_index *table, /* Store out the .debug_type CUs, if any. */ data_buf types_cu_list; - /* The CU list is already sorted, so we don't need to do additional - work here. */ + /* dwarf_per_bfd::all_units is not necessarily sorted as needed in .gdb_index, + so sort it here. */ + std::vector units; + + for (const auto &per_cu : per_bfd->all_units) + units.emplace_back (per_cu.get ()); + + std::sort (units.begin (), units.end (), + [] (const dwarf2_per_cu *a, const dwarf2_per_cu *b) + { + /* Comp units go before type units. */ + if (a->is_debug_types != b->is_debug_types) + return a->is_debug_types < b->is_debug_types; + + /* Then, sort by section offset. */ + return a->sect_off < b->sect_off; + }); int counter = 0; - for (const dwarf2_per_cu_up &per_cu : per_bfd->all_units) + for (const dwarf2_per_cu *per_cu : units) { - const auto insertpair = cu_index_htab.emplace (per_cu.get (), counter); + const auto insertpair = cu_index_htab.emplace (per_cu, counter); gdb_assert (insertpair.second); /* See enhancement PR symtab/30838. */ @@ -1335,7 +1350,7 @@ write_gdbindex (dwarf2_per_bfd *per_bfd, cooked_index *table, to_underlying (per_cu->sect_off)); if (per_cu->is_debug_types) { - signatured_type *sig_type = (signatured_type *) per_cu.get (); + const signatured_type *sig_type = (const signatured_type *) per_cu; cu_list.append_uint (8, BFD_ENDIAN_LITTLE, to_underlying (sig_type->type_offset_in_tu)); cu_list.append_uint (8, BFD_ENDIAN_LITTLE, @@ -1411,8 +1426,8 @@ write_debug_names (dwarf2_per_bfd *per_bfd, cooked_index *table, } /* Verify that all units are represented. */ - gdb_assert (counter == per_bfd->all_comp_units.size ()); - gdb_assert (types_counter == per_bfd->all_type_units.size ()); + gdb_assert (counter == per_bfd->num_comp_units); + gdb_assert (types_counter == per_bfd->num_type_units); for (const cooked_index_entry *entry : table->all_entries ()) nametable.insert (entry); diff --git a/gdb/dwarf2/read-debug-names.c b/gdb/dwarf2/read-debug-names.c index 1d32b378936..b7d771d0986 100644 --- a/gdb/dwarf2/read-debug-names.c +++ b/gdb/dwarf2/read-debug-names.c @@ -95,6 +95,14 @@ struct mapped_debug_names_reader const gdb_byte *name_table_entry_offs_reordered = nullptr; const gdb_byte *entry_pool = nullptr; + /* The compilation units table, as found in this index. The CU indices in + index entries can index directly into this. */ + std::vector comp_units_table; + + /* The type units table, as found in this index. The TU indices in index + entries can index directly into this. */ + std::vector type_units_table; + struct index_val { ULONGEST dwarf_tag; @@ -464,36 +472,36 @@ build_and_check_tu_list_from_debug_names (dwarf2_per_objfile *per_objfile, { struct objfile *objfile = per_objfile->objfile; dwarf2_per_bfd *per_bfd = per_objfile->per_bfd; - int nr_cus = per_bfd->all_comp_units.size (); - int nr_cus_tus = per_bfd->all_units.size (); section->read (objfile); - uint32_t j = nr_cus; for (uint32_t i = 0; i < map.tu_count; ++i) { + /* Read one entry from the TU list. */ sect_offset sect_off = (sect_offset) (extract_unsigned_integer (map.tu_table_reordered + i * map.offset_size, map.offset_size, map.dwarf5_byte_order)); - bool found = false; - for (; j < nr_cus_tus; j++) - if (per_bfd->get_cu (j)->sect_off == sect_off) - { - found = true; - break; - } - if (!found) + /* Find the matching dwarf2_per_cu. */ + auto found + = std::find_if (per_bfd->all_units.begin (), per_bfd->all_units.end (), + [sect_off] (const dwarf2_per_cu_up &unit) { + return unit->sect_off == sect_off + && unit->is_debug_types; + }); + + if (found == per_bfd->all_units.end ()) { warning (_("Section .debug_names has incorrect entry in TU table," " ignoring .debug_names.")); return false; } - map.type_units.emplace_back (per_bfd->get_cu (j)); + map.type_units.emplace_back (found); } + return true; } @@ -720,7 +728,6 @@ build_and_check_cu_list_from_debug_names (dwarf2_per_bfd *per_bfd, dwarf2_section_info §ion, bool is_dwz) { - if (map.cu_count != per_bfd->num_comp_units) { warning (_("Section .debug_names has incorrect number of CUs in CU table," @@ -736,18 +743,17 @@ build_and_check_cu_list_from_debug_names (dwarf2_per_bfd *per_bfd, (map.cu_table_reordered + i * map.offset_size, map.offset_size, map.dwarf5_byte_order)); - dwarf2_per_cu *found = nullptr; /* Find the matching dwarf2_per_cu. */ - for (auto &unit : per_bfd->all_units) - if (unit->sect_off == sect_off && !unit->is_debug_types - && unit->is_dwz == is_dwz) - { - found = unit.get (); - break; - } - - if (found == nullptr) + auto found + = std::find_if (per_bfd->all_units.begin (), per_bfd->all_units.end (), + [is_dwz, sect_off] (const dwarf2_per_cu_up &unit) { + return unit->sect_off == sect_off + && !unit->is_debug_types + && unit->is_dwz == is_dwz; + }); + + if (found == per_bfd->all_units.end ()) { warning (_("Section .debug_names has incorrect entry in CU table," " ignoring .debug_names.")); diff --git a/gdb/dwarf2/read-gdb-index.c b/gdb/dwarf2/read-gdb-index.c index f6c73d0c98a..907aecb270a 100644 --- a/gdb/dwarf2/read-gdb-index.c +++ b/gdb/dwarf2/read-gdb-index.c @@ -117,6 +117,10 @@ struct mapped_gdb_index : public dwarf_scanner_base /* Index data format version. */ int version = 0; + /* Compile units followed by type units, in the order as found in the + index. Indices found in index entries can index directly in into this. */ + std::vector units; + /* The address table data. */ gdb::array_view address_table; @@ -1106,17 +1110,16 @@ dw2_expand_marked_cus (dwarf2_per_objfile *per_objfile, offset_type idx, } /* Don't crash on bad data. */ - if (cu_index >= per_objfile->per_bfd->all_units.size ()) + if (cu_index >= index.units.size ()) { complaint (_(".gdb_index entry has bad CU index" " [in module %s]"), objfile_name (per_objfile->objfile)); continue; } - dwarf2_per_cu *per_cu = per_objfile->per_bfd->get_cu (cu_index); - - if (!dw2_expand_symtabs_matching_one (per_cu, per_objfile, file_matcher, - expansion_notify, lang_matcher)) + if (!dw2_expand_symtabs_matching_one (index.units[cu_index], per_objfile, + file_matcher, expansion_notify, + lang_matcher)) return false; } @@ -1313,7 +1316,7 @@ static void create_cus_from_gdb_index_list (dwarf2_per_bfd *per_bfd, const gdb_byte *cu_list, offset_type n_elements, struct dwarf2_section_info *section, - int is_dwz) + int is_dwz, std::vector &units) { for (offset_type i = 0; i < n_elements; i += 2) { @@ -1328,6 +1331,7 @@ create_cus_from_gdb_index_list (dwarf2_per_bfd *per_bfd, sect_off, length, is_dwz)); + units.emplace_back (per_bfd->all_units.back ().get ()); } } @@ -1337,20 +1341,21 @@ create_cus_from_gdb_index_list (dwarf2_per_bfd *per_bfd, static void create_cus_from_gdb_index (dwarf2_per_bfd *per_bfd, const gdb_byte *cu_list, offset_type cu_list_elements, + std::vector &units, const gdb_byte *dwz_list, offset_type dwz_elements) { gdb_assert (per_bfd->all_units.empty ()); per_bfd->all_units.reserve ((cu_list_elements + dwz_elements) / 2); create_cus_from_gdb_index_list (per_bfd, cu_list, cu_list_elements, - &per_bfd->infos[0], 0); + &per_bfd->infos[0], 0, units); if (dwz_elements == 0) return; dwz_file *dwz = per_bfd->get_dwz_file (); create_cus_from_gdb_index_list (per_bfd, dwz_list, dwz_elements, - &dwz->info, 1); + &dwz->info, 1, units); } /* Create the signatured type hash table from the index. */ @@ -1358,7 +1363,8 @@ create_cus_from_gdb_index (dwarf2_per_bfd *per_bfd, static void create_signatured_type_table_from_gdb_index (dwarf2_per_bfd *per_bfd, struct dwarf2_section_info *section, - const gdb_byte *bytes, offset_type elements) + const gdb_byte *bytes, offset_type elements, + std::vector &units) { signatured_type_set sig_types_hash; @@ -1382,6 +1388,7 @@ create_signatured_type_table_from_gdb_index sig_type->type_offset_in_tu = type_offset_in_tu; sig_types_hash.emplace (sig_type.get ()); + units.emplace_back (sig_type.get ()); per_bfd->all_units.emplace_back (sig_type.release ()); } @@ -1419,14 +1426,14 @@ create_addrmap_from_gdb_index (dwarf2_per_objfile *per_objfile, continue; } - if (cu_index >= per_bfd->all_units.size ()) + if (cu_index >= index->units.size ()) { complaint (_(".gdb_index address table has invalid CU number %u"), (unsigned) cu_index); continue; } - mutable_map.set_empty (lo, hi - 1, per_bfd->get_cu (cu_index)); + mutable_map.set_empty (lo, hi - 1, index->units[cu_index]); } index->index_addrmap @@ -1528,8 +1535,8 @@ dwarf2_read_gdb_index } } - create_cus_from_gdb_index (per_bfd, cu_list, cu_list_elements, dwz_list, - dwz_list_elements); + create_cus_from_gdb_index (per_bfd, cu_list, cu_list_elements, map->units, + dwz_list, dwz_list_elements); if (types_list_elements) { @@ -1548,7 +1555,8 @@ dwarf2_read_gdb_index : &per_bfd->infos[0]); create_signatured_type_table_from_gdb_index (per_bfd, section, types_list, - types_list_elements); + types_list_elements, + map->units); } finalize_all_units (per_bfd); diff --git a/gdb/dwarf2/read.c b/gdb/dwarf2/read.c index 5c26f87562d..f74f05f4025 100644 --- a/gdb/dwarf2/read.c +++ b/gdb/dwarf2/read.c @@ -1949,25 +1949,23 @@ dwarf2_base_index_functions::print_stats (struct objfile *objfile, return; dwarf2_per_objfile *per_objfile = get_dwarf2_per_objfile (objfile); - int total = per_objfile->per_bfd->all_units.size (); - int count = 0; + unsigned int read_count = 0; + unsigned int unread_count = 0; - for (int i = 0; i < total; ++i) - { - dwarf2_per_cu *per_cu = per_objfile->per_bfd->get_cu (i); + for (auto &per_cu : per_objfile->per_bfd->all_units) + if (per_objfile->symtab_set_p (&*per_cu)) + ++read_count; + else + ++unread_count; - if (!per_objfile->symtab_set_p (per_cu)) - ++count; - } - gdb_printf (_(" Number of read CUs: %d\n"), total - count); - gdb_printf (_(" Number of unread CUs: %d\n"), count); + gdb_printf (_(" Number of read units: %u\n"), read_count); + gdb_printf (_(" Number of unread units: %u\n"), unread_count); } void dwarf2_base_index_functions::expand_all_symtabs (struct objfile *objfile) { dwarf2_per_objfile *per_objfile = get_dwarf2_per_objfile (objfile); - int total_units = per_objfile->per_bfd->all_units.size (); for (const dwarf2_per_cu_up &per_cu : per_objfile->per_bfd->all_units) { @@ -3561,9 +3559,6 @@ build_type_psymtabs (dwarf2_per_objfile *per_objfile, /* It's up to the caller to not call us multiple times. */ gdb_assert (per_objfile->per_bfd->type_unit_groups == NULL); - if (per_objfile->per_bfd->all_type_units.size () == 0) - return; - /* TUs typically share abbrev tables, and there can be way more TUs than abbrev tables. Sort by abbrev table to reduce the number of times we read each abbrev table in. @@ -3588,7 +3583,7 @@ build_type_psymtabs (dwarf2_per_objfile *per_objfile, /* Sort in a separate table to maintain the order of all_units for .gdb_index: TU indices directly index all_type_units. */ std::vector sorted_by_abbrev; - sorted_by_abbrev.reserve (per_objfile->per_bfd->all_type_units.size ()); + sorted_by_abbrev.reserve (per_objfile->per_bfd->num_type_units); for (const auto &cu : per_objfile->per_bfd->all_units) if (cu->is_debug_types) @@ -3937,16 +3932,51 @@ read_comp_units_from_section (dwarf2_per_objfile *per_objfile, per_objfile->per_bfd->all_units.push_back (std::move (this_cu)); } } +/* "less than" function used to both sort and bisect units in the + `dwarf2_per_bfd::all_units` vector. Return true if the LHS CU comes before + (is "less" than) the section and offset in RHS. + + For simplicity, sort sections by their pointer. This is not ideal, because + it can cause the behavior to change across runs, making some bugs harder to + investigate. Instead, sections could be sorted by their properties, but it + is important that two different sections never compare equal. -/* Initialize the views on all_units. */ + LENGTH_REQUIRED indicates whether the length of the units is required + to be set already. When this functions gets called to sort units, + the length of the units may not be known yet (for example, when readin + .gdb_index). But this doesn't affect the outcome when sorting. On the + other hand, when called in the context of looking up a unit by section + offset, the length is required in order to know if the offset falls within + the section or not. */ + +template +static bool +all_units_less_than (const dwarf2_per_cu &lhs, + const section_and_offset &rhs) +{ + if (lhs.section != rhs.section) + return lhs.section < rhs.section; + + + /* Compare the end of the unit'srange, so that std::lower_bound finds the + unit we are looking for, not the one after. */ + return lhs.sect_off + lhs.length (length_required) - 1 < rhs.offset; +} void finalize_all_units (dwarf2_per_bfd *per_bfd) { - gdb::array_view tmp = per_bfd->all_units; - per_bfd->all_comp_units = tmp.slice (0, per_bfd->num_comp_units); - per_bfd->all_type_units - = tmp.slice (per_bfd->num_comp_units, per_bfd->num_type_units); + /* Ensure that the all_units vector is in the expected order for + dwarf2_find_containing_unit to be able to perform a binary search. + + Sort first by section (using the pointer of the section as the key) and + then by the offset within the section. */ + std:: + sort (per_bfd->all_units.begin (), per_bfd->all_units.end (), + [] (const dwarf2_per_cu_up &a, const dwarf2_per_cu_up &b) + { + return all_units_less_than (*a, { b->section, b->sect_off }); + }); } /* See read.h. */ @@ -5215,6 +5245,19 @@ process_full_type_unit (dwarf2_cu *cu, cu->reset_builder (); } +/* See read.h. */ + +dwarf2_section_info & +get_section_for_ref (const attribute &attr, dwarf2_per_cu *per_cu) +{ + gdb_assert (attr.form_is_ref ()); + + if (attr.form == DW_FORM_GNU_ref_alt) + return per_cu->per_bfd->get_dwz_file (true)->info; + + return *per_cu->section; +} + /* Process an imported unit DIE. */ static void @@ -5234,12 +5277,12 @@ process_imported_unit_die (struct die_info *die, struct dwarf2_cu *cu) attr = dwarf2_attr (die, DW_AT_import, cu); if (attr != NULL) { + const dwarf2_section_info §ion + = get_section_for_ref (*attr, cu->per_cu); sect_offset sect_off = attr->get_ref_die_offset (); - bool is_dwz = (attr->form == DW_FORM_GNU_ref_alt || cu->per_cu->is_dwz); dwarf2_per_objfile *per_objfile = cu->per_objfile; dwarf2_per_cu *per_cu - = dwarf2_find_containing_comp_unit (sect_off, is_dwz, - per_objfile->per_bfd); + = dwarf2_find_containing_unit (section, sect_off, per_objfile->per_bfd); /* We're importing a C++ compilation unit with tag DW_TAG_compile_unit into another compilation unit, at root level. Regard this as a hint, @@ -15515,6 +15558,7 @@ cutu_reader::read_attribute_value (attribute *attr, unsigned form, } /* Super hack. */ + // RLY NEEDED?,get_section_for_ref should return the dwz section if the ref source if in the dwz if (m_cu->per_cu->is_dwz && attr->form_is_ref ()) attr->form = DW_FORM_GNU_ref_alt; @@ -17856,9 +17900,11 @@ lookup_die_type (struct die_info *die, const struct attribute *attr, if (attr->form == DW_FORM_GNU_ref_alt) { + dwarf2_per_bfd *per_bfd = per_objfile->per_bfd; + const dwarf2_section_info §ion = per_bfd->get_dwz_file ()->info; sect_offset sect_off = attr->get_ref_die_offset (); dwarf2_per_cu *per_cu - = dwarf2_find_containing_comp_unit (sect_off, 1, per_objfile->per_bfd); + = dwarf2_find_containing_unit (section, sect_off, per_bfd); this_type = get_die_type_at_offset (sect_off, per_cu, per_objfile); } @@ -18561,42 +18607,41 @@ follow_die_ref_or_sig (struct die_info *src_die, const struct attribute *attr, return die; } -/* Follow reference OFFSET. - On entry *REF_CU is the CU of the source die referencing OFFSET. +/* Follow reference TARGET. + On entry *REF_CU is the CU of the source die referencing TARGET. On exit *REF_CU is the CU of the result. - Returns NULL if OFFSET is invalid. */ + Returns NULL if TARGET is invalid. */ -static struct die_info * -follow_die_offset (sect_offset sect_off, int offset_in_dwz, - struct dwarf2_cu **ref_cu) +static die_info * +follow_die_offset (const section_and_offset &target, dwarf2_cu **ref_cu) { - struct dwarf2_cu *target_cu, *cu = *ref_cu; - dwarf2_per_objfile *per_objfile = cu->per_objfile; + struct dwarf2_cu *source_cu = *ref_cu; + dwarf2_per_objfile *per_objfile = source_cu->per_objfile; - gdb_assert (cu->per_cu != NULL); + gdb_assert (source_cu->per_cu != NULL); - target_cu = cu; + dwarf2_cu *target_cu = source_cu; dwarf_read_debug_printf_v ("source CU offset: %s, target offset: %s, " "source CU contains target offset: %d", - sect_offset_str (cu->per_cu->sect_off), + sect_offset_str (source_cu->per_cu->sect_off), sect_offset_str (sect_off), - cu->header.offset_in_cu_p (sect_off)); + source_cu->header.offset_in_cu_p (sect_off)); - if (cu->per_cu->is_debug_types) + if (source_cu->per_cu->is_debug_types) { /* .debug_types CUs cannot reference anything outside their CU. If they need to, they have to reference a signatured type via DW_FORM_ref_sig8. */ - if (!cu->header.offset_in_cu_p (sect_off)) + if (!source_cu->header.offset_in_cu_p (sect_off)) return NULL; } - else if (offset_in_dwz != cu->per_cu->is_dwz - || !cu->header.offset_in_cu_p (sect_off)) + else if (§ion != source_cu->per_cu->section + || !source_cu->header.offset_in_cu_p (sect_off)) { dwarf2_per_cu *per_cu - = dwarf2_find_containing_comp_unit (sect_off, offset_in_dwz, - per_objfile->per_bfd); + = dwarf2_find_containing_unit (section, sect_off, + per_objfile->per_bfd); dwarf_read_debug_printf_v ("target CU offset: %s, " "target CU DIEs loaded: %d", @@ -18608,10 +18653,11 @@ follow_die_offset (sect_offset sect_off, int offset_in_dwz, Even if maybe_queue_comp_unit doesn't require us to load the CU's DIEs, it doesn't mean they are currently loaded. Since we require them to be loaded, we must check for ourselves. */ - if (maybe_queue_comp_unit (cu, per_cu, per_objfile, cu->lang ()) + if (maybe_queue_comp_unit (source_cu, per_cu, per_objfile, + source_cu->lang ()) || per_objfile->get_cu (per_cu) == nullptr) load_full_comp_unit (per_cu, per_objfile, per_objfile->get_cu (per_cu), - false, cu->lang ()); + false, source_cu->lang ()); target_cu = per_objfile->get_cu (per_cu); if (target_cu == nullptr) @@ -18621,10 +18667,10 @@ follow_die_offset (sect_offset sect_off, int offset_in_dwz, sect_offset_str (sect_off), objfile_name (per_objfile->objfile)); } - else if (cu->dies == NULL) + else if (source_cu->dies == NULL) { /* We're loading full DIEs during partial symbol reading. */ - load_full_comp_unit (cu->per_cu, per_objfile, cu, false, + load_full_comp_unit (source_cu->per_cu, per_objfile, source_cu, false, language_minimal); } @@ -18643,24 +18689,24 @@ follow_die_ref (struct die_info *src_die, const struct attribute *attr, struct dwarf2_cu **ref_cu) { sect_offset sect_off = attr->get_ref_die_offset (); - struct dwarf2_cu *cu = *ref_cu; - struct die_info *die; + struct dwarf2_cu *source_cu = *ref_cu; - if (attr->form != DW_FORM_GNU_ref_alt && src_die->sect_off == sect_off) + if (attr->form == DW_FORM_GNU_ref_alt && src_die->sect_off == sect_off) { /* Self-reference, we're done. */ return src_die; } - die = follow_die_offset (sect_off, - (attr->form == DW_FORM_GNU_ref_alt - || cu->per_cu->is_dwz), - ref_cu); - if (!die) + const dwarf2_section_info §ion + = get_section_for_ref (*attr, source_cu->per_cu); + + die_info *die = follow_die_offset (section, sect_off, + ref_cu); + if (die == nullptr) error (_(DWARF_ERROR_PREFIX "Cannot find DIE at %s referenced from DIE at %s [in module %s]"), sect_offset_str (sect_off), sect_offset_str (src_die->sect_off), - objfile_name (cu->per_objfile->objfile)); + objfile_name (source_cu->per_objfile->objfile)); return die; } @@ -18673,7 +18719,6 @@ dwarf2_fetch_die_loc_sect_off (sect_offset sect_off, dwarf2_per_cu *per_cu, gdb::function_view get_frame_pc, bool resolve_abstract_p) { - struct die_info *die; struct attribute *attr; struct dwarf2_locexpr_baton retval; struct objfile *objfile = per_objfile->objfile; @@ -18691,8 +18736,8 @@ dwarf2_fetch_die_loc_sect_off (sect_offset sect_off, dwarf2_per_cu *per_cu, sect_offset_str (sect_off), objfile_name (objfile)); } - die = follow_die_offset (sect_off, per_cu->is_dwz, &cu); - if (!die) + die_info *die = follow_die_offset (*per_cu->section, sect_off, &cu); + if (die == nullptr) error (_(DWARF_ERROR_PREFIX "Cannot find DIE at %s referenced [in module %s]"), sect_offset_str (sect_off), objfile_name (objfile)); @@ -18709,7 +18754,7 @@ dwarf2_fetch_die_loc_sect_off (sect_offset sect_off, dwarf2_per_cu *per_cu, { struct dwarf2_cu *cand_cu = cu; struct die_info *cand - = follow_die_offset (cand_off, per_cu->is_dwz, &cand_cu); + = follow_die_offset (*per_cu->section, cand_off, &cand_cu); if (!cand || !cand->parent || cand->parent->tag != DW_TAG_subprogram) @@ -18832,7 +18877,7 @@ dwarf2_fetch_constant_bytes (sect_offset sect_off, sect_offset_str (sect_off), objfile_name (objfile)); } - die = follow_die_offset (sect_off, per_cu->is_dwz, &cu); + die = follow_die_offset (*per_cu->section, sect_off, &cu); if (!die) error (_(DWARF_ERROR_PREFIX "Cannot find DIE at %s referenced [in module %s]"), @@ -18958,7 +19003,7 @@ dwarf2_fetch_die_type_sect_off (sect_offset sect_off, dwarf2_per_cu *per_cu, if (cu == nullptr) return nullptr; - die = follow_die_offset (sect_off, per_cu->is_dwz, &cu); + die = follow_die_offset (*per_cu->section, sect_off, &cu); if (!die) return NULL; @@ -19714,63 +19759,45 @@ dwarf2_per_cu::ensure_lang (dwarf2_per_objfile *per_objfile) occur in any CU. This is separate so that it can be unit tested. */ -static int -dwarf2_find_containing_comp_unit - (sect_offset sect_off, - unsigned int offset_in_dwz, - const std::vector &all_units) +static dwarf2_per_cu * +dwarf2_find_containing_unit (section_and_offset const dwarf2_section_info §ion, + sect_offset sect_off, + const std::vector &all_units) { - int low, high; + auto it = std::lower_bound (all_units.begin (), all_units.end (), + section_and_offset { §ion, sect_off }, + [] (const dwarf2_per_cu_up &per_cu, + const section_and_offset &key) + { + return all_units_less_than + (*per_cu, key); + }); - low = 0; - high = all_units.size () - 1; - while (high > low) - { - int mid = low + (high - low) / 2; - dwarf2_per_cu *mid_cu = all_units[mid].get (); + if (it == all_units.begin ()) + return sect_off >= (*it)->sect_off ? it->get () : nullptr; - if (mid_cu->is_dwz > offset_in_dwz - || (mid_cu->is_dwz == offset_in_dwz - && mid_cu->sect_off + mid_cu->length () > sect_off)) - high = mid; - else - low = mid + 1; - } - gdb_assert (low == high); - return low; + if (it == all_units.end ()) + return nullptr; + + return it->get (); } /* See read.h. */ dwarf2_per_cu * -dwarf2_find_containing_comp_unit (sect_offset sect_off, - unsigned int offset_in_dwz, - dwarf2_per_bfd *per_bfd) +dwarf2_find_containing_unit (section_and_offset const dwarf2_section_info §ion, + sect_offset sect_off, dwarf2_per_bfd *per_bfd) { - int low = dwarf2_find_containing_comp_unit - (sect_off, offset_in_dwz, per_bfd->all_units); - dwarf2_per_cu *this_cu = per_bfd->all_units[low].get (); + dwarf2_per_cu *per_cu + = dwarf2_find_containing_unit (section, sect_off, per_bfd->all_units); - if (this_cu->is_dwz != offset_in_dwz || this_cu->sect_off > sect_off) - { - if (low == 0 || this_cu->is_dwz != offset_in_dwz) - error (_(DWARF_ERROR_PREFIX - "could not find CU containing offset %s [in module %s]"), - sect_offset_str (sect_off), - per_bfd->filename ()); + if (per_cu == nullptr) + error (_(DWARF_ERROR_PREFIX + "could not find compile or type unit containing offset %s " + "[in module %s]"), + sect_offset_str (sect_off), per_bfd->filename ()); - gdb_assert (per_bfd->all_units[low-1]->sect_off - <= sect_off); - return per_bfd->all_units[low - 1].get (); - } - else - { - if (low == per_bfd->all_units.size () - 1 - && sect_off >= this_cu->sect_off + this_cu->length ()) - error (_("invalid dwarf2 offset %s"), sect_offset_str (sect_off)); - gdb_assert (sect_off < this_cu->sect_off + this_cu->length ()); - return this_cu; - } + return per_cu; } #if GDB_SELF_TEST @@ -19782,31 +19809,36 @@ static void run_test () { char dummy_per_bfd; - char dummy_section; + auto &main_section = *reinterpret_cast (0x4000); + auto &dwz_section = *reinterpret_cast(0x5000); - const auto create_dummy_per_cu = [&] (sect_offset sect_off, + const auto create_dummy_per_cu = [&] (dwarf2_section_info §ion, + sect_offset sect_off, unsigned int length, bool is_dwz) { auto per_bfd = reinterpret_cast (&dummy_per_bfd); - auto section = reinterpret_cast (&dummy_section); - return dwarf2_per_cu_up (new dwarf2_per_cu (per_bfd, section, sect_off, + return dwarf2_per_cu_up (new dwarf2_per_cu (per_bfd, §ion, sect_off, length, is_dwz)); }; /* Units in the main file. */ - dwarf2_per_cu_up one = create_dummy_per_cu (sect_offset (0), 5, false); + dwarf2_per_cu_up one + = create_dummy_per_cu (main_section, sect_offset (0), 5, false); dwarf2_per_cu *one_ptr = one.get (); dwarf2_per_cu_up two - = create_dummy_per_cu (sect_offset (one->length ()), 7, false); + = create_dummy_per_cu (main_section, sect_offset (one->length ()), 7, + false); dwarf2_per_cu *two_ptr = two.get (); /* Units in the supplementary (dwz) file. */ - dwarf2_per_cu_up three = create_dummy_per_cu (sect_offset (0), 5, true); + dwarf2_per_cu_up three + = create_dummy_per_cu (dwz_section, sect_offset (0), 5, true); dwarf2_per_cu *three_ptr = three.get (); dwarf2_per_cu_up four - = create_dummy_per_cu (sect_offset (three->length ()), 7, true); + = create_dummy_per_cu (dwz_section, sect_offset (three->length ()), 7, + true); dwarf2_per_cu *four_ptr = four.get (); std::vector units; @@ -19815,21 +19847,21 @@ run_test () units.push_back (std::move (three)); units.push_back (std::move (four)); - int result; - - result = dwarf2_find_containing_comp_unit (sect_offset (0), 0, units); - SELF_CHECK (units[result].get () == one_ptr); - result = dwarf2_find_containing_comp_unit (sect_offset (3), 0, units); - SELF_CHECK (units[result].get () == one_ptr); - result = dwarf2_find_containing_comp_unit (sect_offset (5), 0, units); - SELF_CHECK (units[result].get () == two_ptr); - - result = dwarf2_find_containing_comp_unit (sect_offset (0), 1, units); - SELF_CHECK (units[result].get () == three_ptr); - result = dwarf2_find_containing_comp_unit (sect_offset (3), 1, units); - SELF_CHECK (units[result].get () == three_ptr); - result = dwarf2_find_containing_comp_unit (sect_offset (5), 1, units); - SELF_CHECK (units[result].get () == four_ptr); + dwarf2_per_cu *result; + + result = dwarf2_find_containing_unit (main_section, sect_offset (0), units); + SELF_CHECK (result == one_ptr); + result = dwarf2_find_containing_unit (main_section, sect_offset (3), units); + SELF_CHECK (result == one_ptr); + result = dwarf2_find_containing_unit (main_section, sect_offset (5), units); + SELF_CHECK (result == two_ptr); + + result = dwarf2_find_containing_unit (dwz_section, sect_offset (0), units); + SELF_CHECK (result == three_ptr); + result = dwarf2_find_containing_unit (dwz_section, sect_offset (3), units); + SELF_CHECK (result == three_ptr); + result = dwarf2_find_containing_unit (dwz_section, sect_offset (5), units); + SELF_CHECK (result == four_ptr); } } diff --git a/gdb/dwarf2/read.h b/gdb/dwarf2/read.h index 45296633f9c..4961c4dd1c8 100644 --- a/gdb/dwarf2/read.h +++ b/gdb/dwarf2/read.h @@ -269,10 +269,10 @@ public: int ref_addr_size () const; /* Return length of this CU. */ - unsigned int length () const + unsigned int length (bool require = true) const { /* Make sure it's set already. */ - gdb_assert (m_length != 0); + gdb_assert (!require || m_length != 0); return m_length; } @@ -494,8 +494,8 @@ struct dwarf2_per_bfd const char *filename () const { return bfd_get_filename (this->obfd); } - /* Return the CU given its index. */ - dwarf2_per_cu *get_cu (int index) const + /* Return the unit given its index. */ + dwarf2_per_cu *get_unit (int index) const { return this->all_units[index].get (); } @@ -580,15 +580,10 @@ public: std::vector types; - /* Table of all the compilation units. This is used to locate + /* Table of all the compilation and type units. This is used to locate the target compilation unit of a particular reference. */ std::vector all_units; - /* The all_units vector contains both CUs and TUs. Provide views on the - vector that are limited to either the CU part or the TU part. */ - gdb::array_view all_comp_units; - gdb::array_view all_type_units; - unsigned int num_comp_units = 0; unsigned int num_type_units = 0; @@ -676,7 +671,7 @@ public: dwarf2_per_cu *operator* () const { - return m_per_bfd->get_cu (m_index); + return m_per_bfd->get_unit (m_index); } bool operator== (const all_units_iterator &other) const @@ -1179,7 +1174,7 @@ extern void dw_expand_symtabs_matching_file_matcher extern const char *read_indirect_string_at_offset (dwarf2_per_objfile *per_objfile, LONGEST str_offset); -/* Initialize the views on all_units. */ +/* Finalize the all_units vector. */ extern void finalize_all_units (dwarf2_per_bfd *per_bfd); @@ -1228,14 +1223,12 @@ extern pc_bounds_kind dwarf2_get_pc_bounds (die_info *die, dwarf2_cu *cu, addrmap_mutable *map, void *datum); -/* Locate the .debug_info compilation unit from CU's objfile which contains - the DIE at OFFSET. Raises an error on failure. */ +/* Locate the unit in PER_BFD which contains the DIE at OFFSET. Raises an + error on failure. */ -extern dwarf2_per_cu *dwarf2_find_containing_comp_unit (sect_offset sect_off, - unsigned int - offset_in_dwz, - dwarf2_per_bfd - *per_bfd); +extern dwarf2_per_cu *dwarf2_find_containing_unit + (const dwarf2_section_info §ion, sect_offset sect_off, + dwarf2_per_bfd *per_bfd); /* Decode simple location descriptions. @@ -1278,4 +1271,9 @@ extern int dwarf2_ranges_read (unsigned offset, unrelocated_addr *low_return, extern file_and_directory &find_file_and_directory (die_info *die, dwarf2_cu *cu); +/* Return the section that ATTR, an attribute with ref form, references. */ + +extern dwarf2_section_info &get_section_for_ref (const attribute &attr, + dwarf2_per_cu *per_cu); + #endif /* GDB_DWARF2_READ_H */ diff --git a/gdb/dwarf2/section.h b/gdb/dwarf2/section.h index 09bddd4fd47..9b0a408e9f7 100644 --- a/gdb/dwarf2/section.h +++ b/gdb/dwarf2/section.h @@ -43,6 +43,8 @@ the real section this "virtual" section is contained in, and BUFFER,SIZE describe the virtual section. */ +#include "dwarf2/types.h" + struct dwarf2_section_info { /* Return the name of this section. */ @@ -125,4 +127,12 @@ struct dwarf2_section_info bool is_virtual; }; +/* A pair-like structure to represent an offset into a section. */ + +struct section_and_offset +{ + const dwarf2_section_info *section; + sect_offset offset; +}; + #endif /* GDB_DWARF2_SECTION_H */ diff --git a/gdb/testsuite/gdb.base/maint.exp b/gdb/testsuite/gdb.base/maint.exp index c77612d845b..708ae3a12ce 100644 --- a/gdb/testsuite/gdb.base/maint.exp +++ b/gdb/testsuite/gdb.base/maint.exp @@ -205,8 +205,8 @@ set re \ "( Number of \"partial\" symbols read: $decimal" \ ")?( Number of psym tables \\(not yet expanded\\): $decimal" \ ")?( Total memory used for psymbol cache: $decimal" \ - ")?( Number of read CUs: $decimal" \ - " Number of unread CUs: $decimal" \ + ")?( Number of read units: $decimal" \ + " Number of unread units: $decimal" \ ")? Total memory used for objfile obstack: $decimal" \ " Total memory used for BFD obstack: $decimal" \ " Total memory used for string cache: $decimal" \ diff --git a/gdb/testsuite/gdb.dwarf2/dw-form-ref-addr-with-type-units.exp b/gdb/testsuite/gdb.dwarf2/dw-form-ref-addr-with-type-units.exp new file mode 100644 index 00000000000..ba5c3fb0f57 --- /dev/null +++ b/gdb/testsuite/gdb.dwarf2/dw-form-ref-addr-with-type-units.exp @@ -0,0 +1,108 @@ +# Copyright 2025 Free Software Foundation, Inc. + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +# This is a reproducer for PR 29518: +# +# https://sourceware.org/bugzilla/show_bug.cgi?id=29518 +# +# The root cause for the problem was that function +# dwarf2_find_containing_comp_unit was searching the whole "all_units" vector, +# containing both compile units and type units, causing it to sometimes +# erroneously return a type unit. It should have been restricted to searching +# compile units. +# +# To get dwarf2_find_containing_comp_unit to be called and reproduce the +# original bug, we need a value with form DW_FORM_ref_addr pointing to a +# different compile unit. This is produced by `%$int_type` below. +# +# We need enough type units in the "all_units" vector trick the binary search +# in dwarf2_find_containing_comp_unit in returning a type unit instead of a +# compile unit. +# +# Finally, since the type units are sorted after the compile units in the +# "all_units" vector, it helps to have the target compile unit have a section +# offset larger than the type unit. This makes the binary search go towards +# the end of the vector, where type units are located. + +load_lib dwarf.exp +require dwarf2_support +standard_testfile main.c .S + +set asm_file [standard_output_file $srcfile2] + +Dwarf::assemble $asm_file { + global srcfile + declare_labels int_type + + # The source CU. + cu {version 5} { + compile_unit { + } { + subprogram { + {MACRO_AT_func {main}} + {type %$int_type} + } + } + } + + # Create many TUs. + for {set i 1} {$i < 20} {incr i} { + tu {version 5} $i the_type_i { + type_unit {} { + declare_labels dummy_int_type + + the_type_i: structure_type { + {name s} + {byte_size 4 sdata} + } { + member { + {name i} + {type :$dummy_int_type} + } + } + + dummy_int_type: base_type { + {name int} + {encoding @DW_ATE_signed} + {byte_size 4 sdata} + } + } + } + } + + # The target CU. + cu {version 5} { + compile_unit { + } { + int_type: DW_TAG_base_type { + {DW_AT_byte_size 4 DW_FORM_sdata} + {DW_AT_encoding @DW_ATE_signed} + {DW_AT_name int} + } + } + } +} + +if { [prepare_for_testing "failed to prepare" ${testfile} \ + [list $srcfile $asm_file] {nodebug}] } { + return -1 +} + +# Without the corresponding fix, we get: +# +# (gdb) p main +# invalid dwarf2 offset 0x398 +# +gdb_test "p main" " = {int \\(void\\)} $hex
" diff --git a/gdb/testsuite/gdb.dwarf2/varval.exp b/gdb/testsuite/gdb.dwarf2/varval.exp index 5c4af784d5f..a8b29f43ff5 100644 --- a/gdb/testsuite/gdb.dwarf2/varval.exp +++ b/gdb/testsuite/gdb.dwarf2/varval.exp @@ -348,6 +348,6 @@ if ![runto_main] { } gdb_test "print badval" "value has been optimized out" gdb_test "print bad_die_val1" \ - "invalid dwarf2 offset 0xabcdef11" + {DWARF Error: could not find compile or type unit containing offset 0xabcdef11 \[in module .*/varval\]} gdb_test "print bad_die_val2" \ "Bad DW_OP_GNU_variable_value DIE\\." -- 2.39.5