From 6474c699a5257f6edfce1132b341f84c0f867f82 Mon Sep 17 00:00:00 2001 From: Simon Marchi Date: Wed, 9 Jul 2025 11:35:13 -0400 Subject: [PATCH] gdb/dwarf: sort dwarf2_per_bfd::all_units by (section, offset) MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit This patch started as a fix for PR 29518 ("GDB doesn't handle DW_FORM_ref_addr DIE references correctly with .debug_types sections") [1], but the scope has expanded a bit to fix the problem more generally, after I spotted a few issues related to the order of all_units. The first version of this patch is here [2]. PR 29518 shows that dwarf2_find_containing_comp_unit can erroneously find a type unit. The obvious problem is that the dwarf2_find_containing_comp_unit function searches the whole all_units vector (containing both comp and type units), when really it should just search the compilation units. A simple solution would be to make it search the all_comp_units view (which is removed in a patch earlier in this series). I then realized that in DWARF 5, since type units are in .debug_info (versus .debug_types in DWARF 4), type units can be interleaved with comp type in the all_units vector. That would make the all_comp_units and all_type_units views erroneous, and dwarf2_find_containing_comp_unit could still return something wrong. In v1, I added a sort in finalize_all_units to make sure all_units is in the order that dwarf2_find_containing_comp_unit expects: - comp units from the main file - type units from the main file - comp units from the dwz file - type units from the dwz file (not actually supported, see PR 30838) Another problem I spotted is that the .gdb_index reader creates units in this order: - comp units from .gdb_index from main file - comp units from .gdb_index from dwz file - type units from .gdb_index from main file This isn't the same order as above, so it would need the same sort step. Finally, I'm not exactly sure if and when it happens, but it looks like lookup_signatured_type can be called at a later time (after the initial scan and creation of dwarf2_per_cu object creation), when expanding a symtab. And that could lead to the creation of a new type unit (see function add_type_unit), which would place the new type unit at the end of the all_units vector, possibly screwing up the previous order. To handle all this in a nice and generic way, Tom Tromey proposed to change the all_units order, so that units are sorted by section, then section offset. This is what this patch implements. The sorting is done in finalize_all_units. This works well, because when looking up a unit by section offset, the caller knows which section the unit is in. Passing down a (section, section offset) tuple makes it clear and unambiguous what unit the caller is referring to. It should help eliminate some bugs where the callee used the section offset in the wrong section. Passing down the section along with the section offset replaces the "is_dwz" flag passed to dwarf2_find_containing_comp_unit and a bunch of other functions in a more general way. dwarf2_find_containing_comp_unit can now legitimately find and return type units even though it should be needed (type units are typically referred to by signature). But I don't think there is harm for this function to be more generic than needed. I therefore I renamed it to dwarf2_find_containing_unit. The sort criterion for "section" can be anything, as long as we use the same for sorting and searching. In this patch, I use the pointer to dwarf2_section_info, because it's easy. The downside is that the actual order depends on what the memory allocator decided to return, so could change from run to run, or machine to machine. Later, I might change it so that sections are ordered based on their properties, making the order stable across the board. This logic is encapsulated in the all_units_less_than function, so it's easy to change. The .debug_names reader can no longer rely on the order of the all_units vector for its checks, since all_units won't be the same order as found in the .debug_names lists. In fact, even before, it wasn't: this check assumed that .debug_info had all CUs before TUs, and that the index listed them in the exact same order. When I build a file with gcc and "-gdwarf-5 -fdebug-types-section", type units appear first in .debug_info. This caused GDB to reject a .debug_names index that is had produced: $ GDB="./gdb -nx -q --data-directory=data-directory" /home/smarchi/src/binutils-gdb/gdb/contrib/gdb-add-index.sh -dwarf-5 hello.so $ ./gdb -nx -q --data-directory=data-directory hello.so Reading symbols from hello.so... ⚠️ warning: Section .debug_names has incorrect entry in CU table, ignoring .debug_names. To make it work, add a new dwarf2_find_unit function that allows looking up a unit by start address (unlike dwarf2_find_containing_unit, which can find by any containing address), and make the .debug_names reader use it. It might make the load time of .debug_names a bit longer (the build and check step is now going to be O(n*log(n)) instead of O(n) where n is the number of units, or something like that), but I think it's important to be correct here. This patch adds a test (gdb.dwarf2/dw-form-ref-addr-with-type-units.exp), which tries to replicate the problem as shown by PR 29518. gdb.base/varval.exp needs a small change, because an error message changes (for the better, I think) gdb.dwarf2/debug-names-non-ascending-cu.exp now fails, because GDB no longer rejects a .debug_names index which lists CUs in a different order than .debug_info. Given the change I did to the .debug_names reader, explained above, I don't think this is a problem anymore (GDB can accept an index like that). I also don't think that DWARF 5 mandates that CUs are in ascending order. Delete this test. [1] https://sourceware.org/bugzilla/show_bug.cgi?id=29518 [2] https://inbox.sourceware.org/gdb-patches/20250218193443.118139-1-simon.marchi@efficios.com/ Change-Id: I45f982d824d3842ac1eb73f8cce721a0a24b5faa Approved-By: Tom Tromey --- gdb/dwarf2/cooked-indexer.c | 42 +- gdb/dwarf2/cooked-indexer.h | 4 +- gdb/dwarf2/cu.h | 5 + gdb/dwarf2/read-debug-names.c | 66 +--- gdb/dwarf2/read.c | 365 +++++++++++------- gdb/dwarf2/read.h | 29 +- gdb/dwarf2/section.h | 10 + .../debug-names-non-ascending-cu.exp | 81 ---- .../dw-form-ref-addr-with-type-units.exp | 109 ++++++ gdb/testsuite/gdb.dwarf2/varval.exp | 2 +- 10 files changed, 414 insertions(+), 299 deletions(-) delete mode 100644 gdb/testsuite/gdb.dwarf2/debug-names-non-ascending-cu.exp create mode 100644 gdb/testsuite/gdb.dwarf2/dw-form-ref-addr-with-type-units.exp diff --git a/gdb/dwarf2/cooked-indexer.c b/gdb/dwarf2/cooked-indexer.c index c093984bae0..d4557c45c29 100644 --- a/gdb/dwarf2/cooked-indexer.c +++ b/gdb/dwarf2/cooked-indexer.c @@ -83,19 +83,17 @@ tag_can_have_linkage_name (enum dwarf_tag tag) cutu_reader * cooked_indexer::ensure_cu_exists (cutu_reader *reader, - sect_offset sect_off, bool is_dwz, + const section_and_offset §_off, bool for_scanning) { /* Lookups for type unit references are always in the CU, and cross-CU references will crash. */ - if (reader->cu ()->per_cu->is_dwz == is_dwz - && reader->cu ()->header.offset_in_unit_p (sect_off)) + if (reader->section () == sect_off.section + && reader->cu ()->header.offset_in_unit_p (sect_off.offset)) return reader; dwarf2_per_objfile *per_objfile = reader->cu ()->per_objfile; - dwarf2_per_cu *per_cu - = dwarf2_find_containing_comp_unit (sect_off, is_dwz, - per_objfile->per_bfd); + dwarf2_per_cu *per_cu = dwarf2_find_containing_unit (sect_off, per_objfile); /* When scanning, we only want to visit a given CU a single time. Doing this check here avoids self-imports as well. */ @@ -148,10 +146,8 @@ cooked_indexer::scan_attributes (dwarf2_per_cu *scanning_per_cu, bool *is_enum_class, bool for_specification) { - bool origin_is_dwz = false; bool is_declaration = false; - sect_offset origin_offset {}; - + std::optional origin; std::optional low_pc; std::optional high_pc; bool high_pc_relative = false; @@ -221,8 +217,8 @@ cooked_indexer::scan_attributes (dwarf2_per_cu *scanning_per_cu, case DW_AT_specification: case DW_AT_abstract_origin: case DW_AT_extension: - origin_offset = attr.get_ref_die_offset (); - origin_is_dwz = attr.form_is_alt (); + origin = { &get_section_for_ref (attr, reader->cu ()), + attr.get_ref_die_offset () }; break; case DW_AT_external: @@ -311,19 +307,19 @@ cooked_indexer::scan_attributes (dwarf2_per_cu *scanning_per_cu, || (*linkage_name == nullptr && tag_can_have_linkage_name (abbrev->tag)) || (*parent_entry == nullptr && m_language != language_c)) - && origin_offset != sect_offset (0)) + && origin.has_value ()) { cutu_reader *new_reader - = ensure_cu_exists (reader, origin_offset, origin_is_dwz, false); + = ensure_cu_exists (reader, *origin, false); if (new_reader == nullptr) error (_(DWARF_ERROR_PREFIX "cannot follow reference to DIE at %s" " [in module %s]"), - sect_offset_str (origin_offset), + sect_offset_str (origin->offset), bfd_get_filename (reader->abfd ())); const gdb_byte *new_info_ptr - = (new_reader->buffer () + to_underlying (origin_offset)); + = (new_reader->buffer () + to_underlying (origin->offset)); if (*parent_entry == nullptr) { @@ -347,7 +343,7 @@ cooked_indexer::scan_attributes (dwarf2_per_cu *scanning_per_cu, if (new_abbrev == nullptr) error (_(DWARF_ERROR_PREFIX "Unexpected null DIE at offset %s [in module %s]"), - sect_offset_str (origin_offset), + sect_offset_str (origin->offset), bfd_get_filename (new_reader->abfd ())); new_info_ptr += bytes_read; @@ -411,8 +407,7 @@ cooked_indexer::index_imported_unit (cutu_reader *reader, const gdb_byte *info_ptr, const abbrev_info *abbrev) { - sect_offset sect_off {}; - bool is_dwz = false; + std::optional target; for (int i = 0; i < abbrev->num_attrs; ++i) { @@ -421,19 +416,16 @@ cooked_indexer::index_imported_unit (cutu_reader *reader, info_ptr = reader->read_attribute (&attr, &abbrev->attrs[i], info_ptr); if (attr.name == DW_AT_import) - { - sect_off = attr.get_ref_die_offset (); - is_dwz = (attr.form_is_alt () - || reader->cu ()->per_cu->is_dwz); - } + target = { &get_section_for_ref (attr, reader->cu ()), + attr.get_ref_die_offset () }; } /* Did not find DW_AT_import. */ - if (sect_off == sect_offset (0)) + if (!target.has_value ()) return info_ptr; cutu_reader *new_reader - = ensure_cu_exists (reader, sect_off, is_dwz, true); + = ensure_cu_exists (reader, *target, true); if (new_reader != nullptr) { index_dies (new_reader, new_reader->info_ptr (), nullptr, false); diff --git a/gdb/dwarf2/cooked-indexer.h b/gdb/dwarf2/cooked-indexer.h index 904c55f955e..83cbf7f6169 100644 --- a/gdb/dwarf2/cooked-indexer.h +++ b/gdb/dwarf2/cooked-indexer.h @@ -30,6 +30,7 @@ struct cooked_index_worker_result; struct cutu_reader; struct dwarf2_per_cu; struct dwarf2_per_objfile; +struct section_and_offset; /* An instance of this is created to index a CU. */ @@ -55,8 +56,7 @@ private: the DIEs in the CU; when false, this use is assumed to be to look up just a single DIE. */ cutu_reader *ensure_cu_exists (cutu_reader *reader, - sect_offset sect_off, - bool is_dwz, + const section_and_offset §_off, bool for_scanning); /* Index DIEs in the READER starting at INFO_PTR. PARENT is diff --git a/gdb/dwarf2/cu.h b/gdb/dwarf2/cu.h index 69f396c774a..68010a060cc 100644 --- a/gdb/dwarf2/cu.h +++ b/gdb/dwarf2/cu.h @@ -54,6 +54,11 @@ struct dwarf2_cu DISABLE_COPY_AND_ASSIGN (dwarf2_cu); + /* The section the DIEs were effectively read from. This could be + .debug_info, .debug_types, or with split DWARF, their .dwo + variants. */ + const dwarf2_section_info §ion () const; + /* TU version of handle_DW_AT_stmt_list for read_type_unit_scope. Create the set of symtabs used by this TU, or if this TU is sharing symtabs with another TU and the symtabs have already been created diff --git a/gdb/dwarf2/read-debug-names.c b/gdb/dwarf2/read-debug-names.c index e1a6a56646c..97677c04f64 100644 --- a/gdb/dwarf2/read-debug-names.c +++ b/gdb/dwarf2/read-debug-names.c @@ -462,36 +462,32 @@ build_and_check_tu_list_from_debug_names (dwarf2_per_objfile *per_objfile, { struct objfile *objfile = per_objfile->objfile; dwarf2_per_bfd *per_bfd = per_objfile->per_bfd; - int nr_cus = per_bfd->num_comp_units; - int nr_cus_tus = per_bfd->all_units.size (); section->read (objfile); - uint32_t j = nr_cus; for (uint32_t i = 0; i < map.tu_count; ++i) { + /* Read one entry from the TU list. */ sect_offset sect_off = (sect_offset) (extract_unsigned_integer (map.tu_table_reordered + i * map.offset_size, map.offset_size, map.dwarf5_byte_order)); - bool found = false; - for (; j < nr_cus_tus; j++) - if (per_bfd->get_unit (j)->sect_off == sect_off) - { - found = true; - break; - } - if (!found) + /* Find the matching dwarf2_per_cu. */ + dwarf2_per_cu *per_cu = dwarf2_find_unit ({ section, sect_off }, + per_bfd); + + if (per_cu == nullptr || !per_cu->is_debug_types) { warning (_("Section .debug_names has incorrect entry in TU table," " ignoring .debug_names.")); return false; } - map.type_units.emplace_back (per_bfd->get_unit (j)); + map.type_units.emplace_back (per_cu); } + return true; } @@ -715,40 +711,10 @@ read_debug_names_from_section (dwarf2_per_objfile *per_objfile, static bool build_and_check_cu_list_from_debug_names (dwarf2_per_bfd *per_bfd, mapped_debug_names_reader &map, - dwarf2_section_info §ion, - bool is_dwz) + dwarf2_section_info §ion) { int nr_cus = per_bfd->num_comp_units; - if (!map.augmentation_is_gdb) - { - uint32_t j = 0; - for (uint32_t i = 0; i < map.cu_count; ++i) - { - sect_offset sect_off - = (sect_offset) (extract_unsigned_integer - (map.cu_table_reordered + i * map.offset_size, - map.offset_size, - map.dwarf5_byte_order)); - bool found = false; - for (; j < nr_cus; j++) - if (per_bfd->get_unit (j)->sect_off == sect_off) - { - found = true; - break; - } - if (!found) - { - warning (_("Section .debug_names has incorrect entry in CU table," - " ignoring .debug_names.")); - return false; - } - - map.comp_units.emplace_back (per_bfd->get_unit (j)); - } - return true; - } - if (map.cu_count != nr_cus) { warning (_("Section .debug_names has incorrect number of CUs in CU table," @@ -763,14 +729,18 @@ build_and_check_cu_list_from_debug_names (dwarf2_per_bfd *per_bfd, (map.cu_table_reordered + i * map.offset_size, map.offset_size, map.dwarf5_byte_order)); - if (sect_off != per_bfd->get_unit (i)->sect_off) + + /* Find the matching dwarf2_per_cu. */ + dwarf2_per_cu *per_cu = dwarf2_find_unit ({ §ion, sect_off }, per_bfd); + + if (per_cu == nullptr || per_cu->is_debug_types) { warning (_("Section .debug_names has incorrect entry in CU table," " ignoring .debug_names.")); return false; } - map.comp_units.emplace_back (per_bfd->get_unit (i)); + map.comp_units.emplace_back (per_cu); } return true; @@ -788,16 +758,14 @@ build_and_check_cu_lists_from_debug_names (dwarf2_per_bfd *per_bfd, mapped_debug_names_reader &dwz_map) { if (!build_and_check_cu_list_from_debug_names (per_bfd, map, - per_bfd->infos[0], - false /* is_dwz */)) + per_bfd->infos[0])) return false; if (dwz_map.cu_count == 0) return true; dwz_file *dwz = per_bfd->get_dwz_file (); - return build_and_check_cu_list_from_debug_names (per_bfd, dwz_map, dwz->info, - true /* is_dwz */); + return build_and_check_cu_list_from_debug_names (per_bfd, dwz_map, dwz->info); } /* This does all the work for dwarf2_read_debug_names, but putting it diff --git a/gdb/dwarf2/read.c b/gdb/dwarf2/read.c index 430246c8007..ec8d376a7f2 100644 --- a/gdb/dwarf2/read.c +++ b/gdb/dwarf2/read.c @@ -2371,6 +2371,24 @@ get_abbrev_section_for_cu (dwarf2_per_cu *this_cu) return abbrev; } +/* "less than" function used to both sort and bisect units in the + `dwarf2_per_bfd::all_units` vector. Return true if the LHS CU comes before + (is "less" than) the section and offset in RHS. + + For simplicity, sort sections by their pointer. This is not ideal, because + it can cause the behavior to change across runs, making some bugs harder to + investigate. An improvement would be for sections to be sorted by their + properties. */ + +static bool +all_units_less_than (const dwarf2_per_cu &lhs, const section_and_offset &rhs) +{ + if (lhs.section != rhs.section) + return lhs.section < rhs.section; + + return lhs.sect_off < rhs.offset; +} + /* Fetch the abbreviation table offset from a comp or type unit header. */ static sect_offset @@ -2414,7 +2432,17 @@ add_type_unit (dwarf2_per_bfd *per_bfd, dwarf2_section_info *section, false /* is_dwz */, sig); signatured_type *sig_type = sig_type_holder.get (); - per_bfd->all_units.emplace_back (sig_type_holder.release ()); + /* Preserve the ordering of per_bfd->all_units. */ + auto insert_it + = std::lower_bound (per_bfd->all_units.begin (), per_bfd->all_units.end (), + sig_type, + [] (const dwarf2_per_cu_up &lhs, + const signatured_type *rhs) { + return all_units_less_than (*lhs, { rhs->section, + rhs->sect_off }); + }); + + per_bfd->all_units.emplace (insert_it, sig_type_holder.release ()); auto emplace_ret = per_bfd->signatured_types.emplace (sig_type); /* Assert that an insertion took place - that there wasn't a type unit with @@ -3646,11 +3674,18 @@ read_comp_units_from_section (dwarf2_per_objfile *per_objfile, } } -/* Initialize the views on all_units. */ +/* See read.h. */ void finalize_all_units (dwarf2_per_bfd *per_bfd) { + /* Ensure that the all_units vector is in the expected order for + dwarf2_find_containing_unit to be able to perform a binary search. */ + std::sort (per_bfd->all_units.begin (), per_bfd->all_units.end (), + [] (const dwarf2_per_cu_up &a, const dwarf2_per_cu_up &b) + { + return all_units_less_than (*a, { b->section, b->sect_off }); + }); } /* See read.h. */ @@ -4906,6 +4941,21 @@ process_full_type_unit (dwarf2_cu *cu) cu->reset_builder (); } +/* See read.h. */ + +const dwarf2_section_info & +get_section_for_ref (const attribute &attr, dwarf2_cu *cu) +{ + gdb_assert (attr.form_is_ref ()); + + if (attr.form_is_alt ()) + return cu->per_cu->per_bfd->get_dwz_file (true)->info; + + /* If the source is already in the supplementary (dwz) file, then CU->SECTION + already represents the section in the supplementary file. */ + return cu->section (); +} + /* Process an imported unit DIE. */ static void @@ -4925,12 +4975,11 @@ process_imported_unit_die (struct die_info *die, struct dwarf2_cu *cu) attr = dwarf2_attr (die, DW_AT_import, cu); if (attr != NULL) { + const dwarf2_section_info §ion = get_section_for_ref (*attr, cu); sect_offset sect_off = attr->get_ref_die_offset (); - bool is_dwz = attr->form_is_alt () || cu->per_cu->is_dwz; dwarf2_per_objfile *per_objfile = cu->per_objfile; dwarf2_per_cu *per_cu - = dwarf2_find_containing_comp_unit (sect_off, is_dwz, - per_objfile->per_bfd); + = dwarf2_find_containing_unit ({ §ion, sect_off }, per_objfile); /* We're importing a C++ compilation unit with tag DW_TAG_compile_unit into another compilation unit, at root level. Regard this as a hint, @@ -6051,6 +6100,20 @@ read_file_scope (struct die_info *die, struct dwarf2_cu *cu) } } +/* See cu.h. + + This function is defined in this file (instead of cu.c) because it needs + to see the definition of struct dwo_unit. */ + +const dwarf2_section_info & +dwarf2_cu::section () const +{ + if (this->dwo_unit != nullptr) + return *this->dwo_unit->section; + else + return *this->per_cu->section; +} + void dwarf2_cu::setup_type_unit_groups (struct die_info *die) { @@ -17469,9 +17532,10 @@ lookup_die_type (struct die_info *die, const struct attribute *attr, if (attr->form_is_alt ()) { + const auto §ion = get_section_for_ref (*attr, cu); sect_offset sect_off = attr->get_ref_die_offset (); dwarf2_per_cu *per_cu - = dwarf2_find_containing_comp_unit (sect_off, 1, per_objfile->per_bfd); + = dwarf2_find_containing_unit ({ §ion, sect_off }, per_objfile); this_type = get_die_type_at_offset (sect_off, per_cu, per_objfile); } @@ -18172,14 +18236,13 @@ follow_die_ref_or_sig (struct die_info *src_die, const struct attribute *attr, return die; } -/* Follow reference OFFSET. - On entry *REF_CU is the CU of the source die referencing OFFSET. +/* Follow reference TARGET. + On entry *REF_CU is the CU of the source die referencing TARGET. On exit *REF_CU is the CU of the result. - Returns NULL if OFFSET is invalid. */ + Returns nullptr if TARGET is invalid. */ -static struct die_info * -follow_die_offset (sect_offset sect_off, int offset_in_dwz, - struct dwarf2_cu **ref_cu) +static die_info * +follow_die_offset (const section_and_offset &target, dwarf2_cu **ref_cu) { dwarf2_cu *source_cu = *ref_cu; dwarf2_cu *target_cu = source_cu; @@ -18191,23 +18254,23 @@ follow_die_offset (sect_offset sect_off, int offset_in_dwz, dwarf_read_debug_printf_v ("source CU offset: %s, target offset: %s, " "source CU contains target offset: %d", sect_offset_str (source_cu->per_cu->sect_off), - sect_offset_str (sect_off), - source_cu->header.offset_in_unit_p (sect_off)); + sect_offset_str (target.offset), + (target.section == &source_cu->section () + && source_cu->header.offset_in_unit_p (target.offset))); if (source_cu->per_cu->is_debug_types) { /* .debug_types CUs cannot reference anything outside their CU. If they need to, they have to reference a signatured type via DW_FORM_ref_sig8. */ - if (!source_cu->header.offset_in_unit_p (sect_off)) + if (!source_cu->header.offset_in_unit_p (target.offset)) return NULL; } - else if (offset_in_dwz != source_cu->per_cu->is_dwz - || !source_cu->header.offset_in_unit_p (sect_off)) + else if (target.section != &source_cu->section () + || !source_cu->header.offset_in_unit_p (target.offset)) { dwarf2_per_cu *target_per_cu - = dwarf2_find_containing_comp_unit (sect_off, offset_in_dwz, - per_objfile->per_bfd); + = dwarf2_find_containing_unit (target, per_objfile); dwarf_read_debug_printf_v ("target CU offset: %s, " "target CU DIEs loaded: %d", @@ -18229,13 +18292,13 @@ follow_die_offset (sect_offset sect_off, int offset_in_dwz, error (_(DWARF_ERROR_PREFIX "cannot follow reference to DIE at %s" " [in module %s]"), - sect_offset_str (sect_off), + sect_offset_str (target.offset), objfile_name (per_objfile->objfile)); } *ref_cu = target_cu; - return target_cu->find_die (sect_off); + return target_cu->find_die (target.offset); } /* Follow reference attribute ATTR of SRC_DIE. @@ -18247,8 +18310,7 @@ follow_die_ref (struct die_info *src_die, const struct attribute *attr, struct dwarf2_cu **ref_cu) { sect_offset sect_off = attr->get_ref_die_offset (); - struct dwarf2_cu *cu = *ref_cu; - struct die_info *die; + struct dwarf2_cu *src_cu = *ref_cu; if (!attr->form_is_alt () && src_die->sect_off == sect_off) { @@ -18256,14 +18318,13 @@ follow_die_ref (struct die_info *src_die, const struct attribute *attr, return src_die; } - die = follow_die_offset (sect_off, - attr->form_is_alt () || cu->per_cu->is_dwz, - ref_cu); - if (!die) + const dwarf2_section_info §ion = get_section_for_ref (*attr, src_cu); + die_info *die = follow_die_offset ({ §ion, sect_off }, ref_cu); + if (die == nullptr) error (_(DWARF_ERROR_PREFIX "Cannot find DIE at %s referenced from DIE at %s [in module %s]"), sect_offset_str (sect_off), sect_offset_str (src_die->sect_off), - objfile_name (cu->per_objfile->objfile)); + objfile_name (src_cu->per_objfile->objfile)); return die; } @@ -18276,7 +18337,6 @@ dwarf2_fetch_die_loc_sect_off (sect_offset sect_off, dwarf2_per_cu *per_cu, gdb::function_view get_frame_pc, bool resolve_abstract_p) { - struct die_info *die; struct attribute *attr; struct dwarf2_locexpr_baton retval; struct objfile *objfile = per_objfile->objfile; @@ -18294,8 +18354,8 @@ dwarf2_fetch_die_loc_sect_off (sect_offset sect_off, dwarf2_per_cu *per_cu, sect_offset_str (sect_off), objfile_name (objfile)); } - die = follow_die_offset (sect_off, per_cu->is_dwz, &cu); - if (!die) + die_info *die = follow_die_offset ({ &cu->section (), sect_off }, &cu); + if (die == nullptr) error (_(DWARF_ERROR_PREFIX "Cannot find DIE at %s referenced [in module %s]"), sect_offset_str (sect_off), objfile_name (objfile)); @@ -18311,8 +18371,8 @@ dwarf2_fetch_die_loc_sect_off (sect_offset sect_off, dwarf2_per_cu *per_cu, : per_objfile->per_bfd->abstract_to_concrete[die->sect_off]) { struct dwarf2_cu *cand_cu = cu; - struct die_info *cand - = follow_die_offset (cand_off, per_cu->is_dwz, &cand_cu); + die_info *cand + = follow_die_offset ({ &cu->section (), cand_off }, &cand_cu); if (!cand || !cand->parent || cand->parent->tag != DW_TAG_subprogram) @@ -18414,7 +18474,6 @@ dwarf2_fetch_constant_bytes (sect_offset sect_off, obstack *obstack, LONGEST *len) { - struct die_info *die; struct attribute *attr; const gdb_byte *result = NULL; struct type *type; @@ -18435,7 +18494,7 @@ dwarf2_fetch_constant_bytes (sect_offset sect_off, sect_offset_str (sect_off), objfile_name (objfile)); } - die = follow_die_offset (sect_off, per_cu->is_dwz, &cu); + die_info *die = follow_die_offset ({ &cu->section (), sect_off }, &cu); if (!die) error (_(DWARF_ERROR_PREFIX "Cannot find DIE at %s referenced [in module %s]"), @@ -18525,8 +18584,6 @@ dwarf2_fetch_die_type_sect_off (sect_offset sect_off, dwarf2_per_cu *per_cu, dwarf2_per_objfile *per_objfile, const char **var_name) { - struct die_info *die; - dwarf2_cu *cu = per_objfile->get_cu (per_cu); if (cu == nullptr) cu = load_cu (per_cu, per_objfile, false); @@ -18534,7 +18591,7 @@ dwarf2_fetch_die_type_sect_off (sect_offset sect_off, dwarf2_per_cu *per_cu, if (cu == nullptr) return nullptr; - die = follow_die_offset (sect_off, per_cu->is_dwz, &cu); + die_info *die = follow_die_offset ({ &cu->section (), sect_off }, &cu); if (!die) return NULL; @@ -19284,69 +19341,115 @@ dwarf2_per_cu::ensure_lang (dwarf2_per_objfile *per_objfile) true, language_minimal, nullptr); } -/* A helper function for dwarf2_find_containing_comp_unit that returns - the index of the result, and that searches a vector. It will - return a result even if the offset in question does not actually - occur in any CU. This is separate so that it can be unit - tested. */ +/* Return the unit from ALL_UNITS that potentially contains TARGET. -static int -dwarf2_find_containing_comp_unit - (sect_offset sect_off, - unsigned int offset_in_dwz, - const std::vector &all_units) + Since the unit lengths may not be known yet, this function doesn't check that + TARGET.OFFSET actually falls within the range of the returned unit. The + caller is responsible for this. + + If no units possibly match TARGET, return nullptr. */ + +static dwarf2_per_cu * +dwarf2_find_containing_unit (const section_and_offset &target, + const std::vector &all_units) { - int low, high; + auto it = std::lower_bound (all_units.begin (), all_units.end (), target, + [] (const dwarf2_per_cu_up &per_cu, + const section_and_offset &key) + { + return all_units_less_than (*per_cu, key); + }); - low = 0; - high = all_units.size () - 1; - while (high > low) + if (it == all_units.begin ()) { - int mid = low + (high - low) / 2; - dwarf2_per_cu *mid_cu = all_units[mid].get (); - - if (mid_cu->is_dwz > offset_in_dwz - || (mid_cu->is_dwz == offset_in_dwz - && mid_cu->sect_off + mid_cu->length () > sect_off)) - high = mid; + /* TARGET falls before the first unit of the first section, or is an + exact match with the first. */ + if ((*it)->section == target.section && (*it)->sect_off == target.offset) + return it->get (); else - low = mid + 1; + return nullptr; + } + + if (it != all_units.end () + && (*it)->section == target.section + && (*it)->sect_off == target.offset) + { + /* TARGET is an exact match with the start of *IT, so *IT is what we're + looking for. */ + return it->get (); } - gdb_assert (low == high); - return low; + + /* Otherwise, the match is the one just before, as long as it matches the + section we're looking for. */ + --it; + + if ((*it)->section == target.section) + return it->get (); + + return nullptr; } /* See read.h. */ dwarf2_per_cu * -dwarf2_find_containing_comp_unit (sect_offset sect_off, - unsigned int offset_in_dwz, - dwarf2_per_bfd *per_bfd) +dwarf2_find_containing_unit (const section_and_offset &target, + dwarf2_per_objfile *per_objfile) { - int low = dwarf2_find_containing_comp_unit - (sect_off, offset_in_dwz, per_bfd->all_units); - dwarf2_per_cu *this_cu = per_bfd->all_units[low].get (); - - if (this_cu->is_dwz != offset_in_dwz || this_cu->sect_off > sect_off) + dwarf2_per_bfd *per_bfd = per_objfile->per_bfd; + dwarf2_per_cu *per_cu + = dwarf2_find_containing_unit (target, per_bfd->all_units); + auto error_out = [&target, per_bfd] () { - if (low == 0 || this_cu->is_dwz != offset_in_dwz) - error (_(DWARF_ERROR_PREFIX - "could not find CU containing offset %s [in module %s]"), - sect_offset_str (sect_off), - per_bfd->filename ()); + error (_(DWARF_ERROR_PREFIX + "could not find unit containing offset %s [in module %s]"), + sect_offset_str (target.offset), per_bfd->filename ()); + }; - gdb_assert (per_bfd->all_units[low-1]->sect_off - <= sect_off); - return per_bfd->all_units[low - 1].get (); - } - else - { - if (low == per_bfd->all_units.size () - 1 - && sect_off >= this_cu->sect_off + this_cu->length ()) - error (_("invalid dwarf2 offset %s"), sect_offset_str (sect_off)); - gdb_assert (sect_off < this_cu->sect_off + this_cu->length ()); - return this_cu; - } + if (per_cu == nullptr) + error_out (); + + gdb_assert (per_cu->section == target.section); + + /* Some producers of dwarf2_per_cu objects (thinking of the .gdb_index reader) + do not set the length ahead of time. The length is needed to check if + the target is truly within PER_CU's range, so compute it now. Constructing + the cutu_reader object has the side-effect of setting PER_CU's length. + Even though it should happen too often, it could be replaced with + something more lightweight that has the same effect. */ + if (!per_cu->length_is_set ()) + cutu_reader (*per_cu, *per_objfile, nullptr, nullptr, false, + language_minimal); + + /* Now we can check if the target section offset is within PER_CU's range. */ + if (target.offset < per_cu->sect_off + || target.offset >= per_cu->sect_off + per_cu->length ()) + error_out (); + + return per_cu; +} + +/* See read.h. */ + +dwarf2_per_cu * +dwarf2_find_unit (const section_and_offset &start, dwarf2_per_bfd *per_bfd) +{ + auto it = std::lower_bound (per_bfd->all_units.begin (), + per_bfd->all_units.end (), start, + [] (const dwarf2_per_cu_up &per_cu, + const section_and_offset &key) + { + return all_units_less_than (*per_cu, key); + }); + + if (it == per_bfd->all_units.end ()) + return nullptr; + + dwarf2_per_cu *per_cu = it->get (); + + if (per_cu->section != start.section || per_cu->sect_off != start.offset) + return nullptr; + + return per_cu; } #if GDB_SELF_TEST @@ -19357,59 +19460,55 @@ namespace find_containing_comp_unit { static void run_test () { - char dummy_per_bfd; - char dummy_section; - - const auto create_dummy_per_cu = [&] (sect_offset sect_off, - unsigned int length, - bool is_dwz) - { - auto per_bfd = reinterpret_cast (&dummy_per_bfd); - auto section = reinterpret_cast (&dummy_section); + auto dummy_per_bfd = reinterpret_cast (0x3000); + auto &main_section = *reinterpret_cast (0x4000); + auto &dwz_section = *reinterpret_cast (0x5000); + std::vector units; - return dwarf2_per_cu_up (new dwarf2_per_cu (per_bfd, section, sect_off, - length, is_dwz)); + /* Create one dummy unit, append it to UNITS, return a non-owning + reference. */ + auto create_dummy_per_unit = [&] (dwarf2_section_info §ion, + unsigned int sect_off, bool is_dwz) + -> dwarf2_per_cu & + { + /* Omit the length, because dwarf2_find_containing_unit does not consider + it. */ + return *units.emplace_back (new dwarf2_per_cu (dummy_per_bfd, §ion, + sect_offset (sect_off), + 0, is_dwz)); }; - /* Units in the main file. */ - dwarf2_per_cu_up one = create_dummy_per_cu (sect_offset (0), 5, false); - dwarf2_per_cu *one_ptr = one.get (); - dwarf2_per_cu_up two - = create_dummy_per_cu (sect_offset (one->length ()), 7, false); - dwarf2_per_cu *two_ptr = two.get (); - - /* Units in the supplementary (dwz) file. */ - dwarf2_per_cu_up three = create_dummy_per_cu (sect_offset (0), 5, true); - dwarf2_per_cu *three_ptr = three.get (); - dwarf2_per_cu_up four - = create_dummy_per_cu (sect_offset (three->length ()), 7, true); - dwarf2_per_cu *four_ptr = four.get (); - - std::vector units; - units.push_back (std::move (one)); - units.push_back (std::move (two)); - units.push_back (std::move (three)); - units.push_back (std::move (four)); - - int result; + /* Create 2 units in the main file and 2 units in the supplementary (dwz) + file. */ + auto &main1 = create_dummy_per_unit (main_section, 10, false); + auto &main2 = create_dummy_per_unit (main_section, 20, false); + auto &dwz1 = create_dummy_per_unit (dwz_section, 10, false); + auto &dwz2 = create_dummy_per_unit (dwz_section, 20, false); + + /* Check that looking up a unit at all offsets in the range [START,END[ in + section SECTION finds EXPECTED. */ + auto check_range = [&units] (dwarf2_section_info §ion, unsigned int start, + unsigned int end, dwarf2_per_cu *expected) + { + for (unsigned int sect_off = start; sect_off < end; ++sect_off) + { + section_and_offset target { §ion, sect_offset (sect_off) }; + dwarf2_per_cu *result = dwarf2_find_containing_unit (target, units); - result = dwarf2_find_containing_comp_unit (sect_offset (0), 0, units); - SELF_CHECK (units[result].get () == one_ptr); - result = dwarf2_find_containing_comp_unit (sect_offset (3), 0, units); - SELF_CHECK (units[result].get () == one_ptr); - result = dwarf2_find_containing_comp_unit (sect_offset (5), 0, units); - SELF_CHECK (units[result].get () == two_ptr); + SELF_CHECK (result == expected); + } + }; - result = dwarf2_find_containing_comp_unit (sect_offset (0), 1, units); - SELF_CHECK (units[result].get () == three_ptr); - result = dwarf2_find_containing_comp_unit (sect_offset (3), 1, units); - SELF_CHECK (units[result].get () == three_ptr); - result = dwarf2_find_containing_comp_unit (sect_offset (5), 1, units); - SELF_CHECK (units[result].get () == four_ptr); -} + check_range (main_section, 0, 10, nullptr); + check_range (main_section, 10, 20, &main1); + check_range (main_section, 20, 30, &main2); + check_range (dwz_section, 0, 10, nullptr); + check_range (dwz_section, 10, 20, &dwz1); + check_range (dwz_section, 20, 30, &dwz2); } -} +} /* namespace find_containing_comp_unit */ +} /* namespace selftests */ #endif /* GDB_SELF_TEST */ diff --git a/gdb/dwarf2/read.h b/gdb/dwarf2/read.h index c718b9ff714..4e3f8d7d7bc 100644 --- a/gdb/dwarf2/read.h +++ b/gdb/dwarf2/read.h @@ -295,6 +295,10 @@ public: return m_length; } + /* Return true if the length of this CU has been set. */ + bool length_is_set () const + { return m_length != 0; } + void set_length (unsigned int length, bool strict_p = true) { if (m_length == 0) @@ -1213,7 +1217,7 @@ extern void dw_expand_symtabs_matching_file_matcher extern const char *read_indirect_string_at_offset (dwarf2_per_objfile *per_objfile, LONGEST str_offset); -/* Initialize the views on all_units. */ +/* Finalize the all_units vector. */ extern void finalize_all_units (dwarf2_per_bfd *per_bfd); @@ -1258,14 +1262,17 @@ extern pc_bounds_kind dwarf2_get_pc_bounds (die_info *die, dwarf2_cu *cu, addrmap_mutable *map, void *datum); -/* Locate the .debug_info compilation unit from CU's objfile which contains - the DIE at OFFSET. Raises an error on failure. */ +/* Locate the unit in PER_OBJFILE which contains the DIE at TARGET. Raises an + error on failure. */ + +extern dwarf2_per_cu *dwarf2_find_containing_unit + (const section_and_offset &target, dwarf2_per_objfile *per_objfile); + +/* Locate the unit starting at START in PER_BFD. Return nullptr if not + found. */ -extern dwarf2_per_cu *dwarf2_find_containing_comp_unit (sect_offset sect_off, - unsigned int - offset_in_dwz, - dwarf2_per_bfd - *per_bfd); +extern dwarf2_per_cu *dwarf2_find_unit (const section_and_offset &start, + dwarf2_per_bfd *per_bfd); /* Decode simple location descriptions. @@ -1308,4 +1315,10 @@ extern int dwarf2_ranges_read (unsigned offset, unrelocated_addr *low_return, extern file_and_directory &find_file_and_directory (die_info *die, dwarf2_cu *cu); + +/* Return the section that ATTR, an attribute with ref form, references. */ + +extern const dwarf2_section_info &get_section_for_ref + (const attribute &attr, dwarf2_cu *cu); + #endif /* GDB_DWARF2_READ_H */ diff --git a/gdb/dwarf2/section.h b/gdb/dwarf2/section.h index fd6e34d3faa..fbdb025dda8 100644 --- a/gdb/dwarf2/section.h +++ b/gdb/dwarf2/section.h @@ -43,6 +43,8 @@ the real section this "virtual" section is contained in, and BUFFER,SIZE describe the virtual section. */ +#include "dwarf2/types.h" + struct dwarf2_section_info { /* Return the name of this section. */ @@ -114,4 +116,12 @@ struct dwarf2_section_info using dwarf2_section_info_up = std::unique_ptr; +/* A pair-like structure to represent an offset into a section. */ + +struct section_and_offset +{ + const dwarf2_section_info *section; + sect_offset offset; +}; + #endif /* GDB_DWARF2_SECTION_H */ diff --git a/gdb/testsuite/gdb.dwarf2/debug-names-non-ascending-cu.exp b/gdb/testsuite/gdb.dwarf2/debug-names-non-ascending-cu.exp deleted file mode 100644 index d86b5c44a3b..00000000000 --- a/gdb/testsuite/gdb.dwarf2/debug-names-non-ascending-cu.exp +++ /dev/null @@ -1,81 +0,0 @@ -# Copyright 2022-2025 Free Software Foundation, Inc. - -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program. If not, see . - -load_lib dwarf.exp - -# This test can only be run on targets which support DWARF-2 and use gas. -require dwarf2_support - -standard_testfile _start.c debug-names.S - -set func_info_vars \ - [get_func_info _start [list debug additional_flags=-nostartfiles]] - -# Create the DWARF. -set asm_file [standard_output_file $srcfile2] -Dwarf::assemble { - filename $asm_file - add_dummy_cus 0 -} { - global func_info_vars - foreach var $func_info_vars { - global $var - } - - cu { label cu_label } { - compile_unit {{language @DW_LANG_C}} { - subprogram { - {DW_AT_name _start} - {DW_AT_low_pc $_start_start DW_FORM_addr} - {DW_AT_high_pc $_start_end DW_FORM_addr} - } - } - } - - cu { label cu_label_2 } { - compile_unit {{language @DW_LANG_C}} { - base_type { - {name int} - {byte_size 4 sdata} - {encoding @DW_ATE_signed} - } - } - } - - debug_names {} { - cu cu_label_2 - cu cu_label - name _start subprogram cu_label 0xEDDB6232 - name int base_type cu_label 0xB888030 - } -} - -if [prepare_for_testing "failed to prepare" $testfile "${asm_file} ${srcfile}" \ - [list additional_flags=-nostartfiles]] { - return -1 -} - -# Check for warning. -set re \ - [list \ - "warning:" \ - "Section .debug_names has incorrect entry in CU table," \ - "ignoring .debug_names."] -set re [join $re] -gdb_assert {[regexp $re $gdb_file_cmd_msg]} "warning" - -# Verify that .debug_names section is ignored. -set index [have_index $binfile] -gdb_assert { [string equal $index ""] } ".debug_names not used" diff --git a/gdb/testsuite/gdb.dwarf2/dw-form-ref-addr-with-type-units.exp b/gdb/testsuite/gdb.dwarf2/dw-form-ref-addr-with-type-units.exp new file mode 100644 index 00000000000..6253629237a --- /dev/null +++ b/gdb/testsuite/gdb.dwarf2/dw-form-ref-addr-with-type-units.exp @@ -0,0 +1,109 @@ +# Copyright 2025 Free Software Foundation, Inc. + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +# This is a reproducer for PR 29518: +# +# https://sourceware.org/bugzilla/show_bug.cgi?id=29518 +# +# The root cause for the problem was that function +# dwarf2_find_containing_comp_unit was searching the whole "all_units" vector, +# containing both compile units and type units, causing it to sometimes +# erroneously return a type unit. It should have been restricted to searching +# compile units. +# +# To get dwarf2_find_containing_comp_unit to be called and reproduce the +# original bug, we need a value with form DW_FORM_ref_addr pointing to a +# different compile unit. This is produced by `%$int_type` below. + +load_lib dwarf.exp +require dwarf2_support +standard_testfile main.c .S + +set asm_file [standard_output_file $srcfile2] + +Dwarf::assemble $asm_file { + global srcfile + declare_labels int_type + + # The source CU. + cu {version 4} { + compile_unit { + } { + subprogram { + {MACRO_AT_func {main}} + {type %$int_type} + } + } + } + + # Create a bunch of empty / dummy CUs, to make the offset of int_type a bit + # higher. + for {set i 1} {$i < 10} {incr i} { + cu {version 4} { + compile_unit {} {} + } + } + + # The target CU. + cu {version 4} { + compile_unit { + } { + int_type: DW_TAG_base_type { + {DW_AT_byte_size 4 DW_FORM_sdata} + {DW_AT_encoding @DW_ATE_signed} + {DW_AT_name int} + } + } + } + + # Create many TUs. + # + # We need enough type units in the "all_units" vector in order to steer the + # binary search in dwarf2_find_containing_comp_unit towards the type units + # region of the array. + for {set i 1} {$i < 20} {incr i} { + tu {version 4} $i the_type_i { + type_unit {} { + declare_labels dummy_int_type + + the_type_i: structure_type { + {name s} + {byte_size 4 sdata} + } { + member { + {name i} + {type :$dummy_int_type} + } + } + + dummy_int_type: base_type { + {name int} + {encoding @DW_ATE_signed} + {byte_size 4 sdata} + } + } + } + } +} + +if { [prepare_for_testing "failed to prepare" ${testfile} \ + [list $srcfile $asm_file] {nodebug}] } { + return -1 +} + +# Without the corresponding fix, we get an internal error: +# +# gdb/dwarf2/read.c:3940: internal-error: load_full_comp_unit: Assertion `! this_cu->is_debug_types' failed. +gdb_test "p main" " = {int \\(void\\)} $hex
" diff --git a/gdb/testsuite/gdb.dwarf2/varval.exp b/gdb/testsuite/gdb.dwarf2/varval.exp index 0693f439283..6846ecb2326 100644 --- a/gdb/testsuite/gdb.dwarf2/varval.exp +++ b/gdb/testsuite/gdb.dwarf2/varval.exp @@ -348,6 +348,6 @@ if ![runto_main] { } gdb_test "print badval" "value has been optimized out" gdb_test "print bad_die_val1" \ - "invalid dwarf2 offset 0xabcdef11" + {DWARF Error: could not find unit containing offset 0xabcdef11 \[in module .*/varval\]} gdb_test "print bad_die_val2" \ "Bad DW_OP_GNU_variable_value DIE\\." -- 2.47.2