From 329a53a6d590e2e90f590c89473990040a86c8e0 Mon Sep 17 00:00:00 2001 From: Tom Tromey Date: Sat, 22 Nov 2025 11:03:57 -0700 Subject: [PATCH] Some cleanups to "pretend language" handling I noticed that the "pretend language" handling in the DWARF reader doesn't work as intended; the problem code in dwarf2_per_cu::set_lang is: if (unit_type () == DW_UT_partial) return; The issue here is that this subverts the very purpose of having a "pretend" language. Some background: when Jakub wrote dwz, we also added support for this style of DWARF compression to gdb. Now, dwz only shares DIEs in a "top level" way -- i.e., at the time (and as far as I know, continuing to today), it would not emit a DW_TAG_imported_unit inside a namespace. So, when implementing this we also implemented an optimization, namely that gdb would not re-read every imported unit a la '#include', but instead would make symtabs for each included unit (partial units didn't yet exist). However, an imported/partial unit might not have a language -- but a language is necessary for interpreting the DIEs. This is where the "pretend" language comes from. When reading a CU, any included partial units that do not have a language of their own will inherit that CU's language. This patch started by removing the DW_UT_partial check. This of course caused assertion failures in some modes, as set_lang also asserts that the language cannot change. But, it's possible for a CU to be prepared multiple times, and for different invocations to provide different languages. This is not a scenario we allowed for in the early days. Nowadays, though, it seems to me that it's basically fine in practice, with the reason being that sharing DIEs that differ semantically but not syntactically across different languages is hard to achieve. We do see this some cross-language sharing in a limited way -- "dwz -5" will emit inclusions from both C and C++ CUs for the gdb.fortran/mixed-lang-stack.exp test -- but note that this sharing is limited to things that are common between C and C++, like "float". Therefore this patch replaces the assertions in set_lang with some compare-exchanges. Finally I changed cutu_reader to use a std::optional for the pretend language. I think this makes it more clear what is happening. And, while doing this I found a spot in the cooked indexer where language_minimal was passed in, but where the importing CU's language should have been used. I regression tested this on x86-64 Fedora 40 using the default board, plus the cc-with-gdb-index, cc-with-debug-names, and cc-with-dwz-5 boards. Bug: https://sourceware.org/bugzilla/show_bug.cgi?id=33661 --- gdb/dwarf2/cooked-indexer.c | 2 +- gdb/dwarf2/read.c | 75 +++++++++++++++++++++---------------- gdb/dwarf2/read.h | 8 ++-- 3 files changed, 48 insertions(+), 37 deletions(-) diff --git a/gdb/dwarf2/cooked-indexer.c b/gdb/dwarf2/cooked-indexer.c index dbf83c695a1..1d5b5bcca40 100644 --- a/gdb/dwarf2/cooked-indexer.c +++ b/gdb/dwarf2/cooked-indexer.c @@ -114,7 +114,7 @@ cooked_indexer::ensure_cu_exists (cutu_reader *reader, = m_index_storage->get_abbrev_table_cache (); auto new_reader = std::make_unique (*per_cu, *per_objfile, nullptr, - nullptr, false, language_minimal, + nullptr, false, m_language, &abbrev_table_cache); if (new_reader->is_dummy ()) diff --git a/gdb/dwarf2/read.c b/gdb/dwarf2/read.c index d3d4cfdfc23..036ec7e6337 100644 --- a/gdb/dwarf2/read.c +++ b/gdb/dwarf2/read.c @@ -922,7 +922,7 @@ static struct type *set_die_type (struct die_info *, struct type *, static void load_full_comp_unit (dwarf2_per_cu *per_cu, dwarf2_per_objfile *per_objfile, bool skip_partial, - enum language pretend_language); + std::optional pretend_language); static void process_full_comp_unit (dwarf2_cu *cu); @@ -1581,7 +1581,7 @@ load_cu (dwarf2_per_cu *per_cu, dwarf2_per_objfile *per_objfile, sig_type != nullptr) load_full_type_unit (sig_type, per_objfile); else - load_full_comp_unit (per_cu, per_objfile, skip_partial, language_minimal); + load_full_comp_unit (per_cu, per_objfile, skip_partial, std::nullopt); dwarf2_cu *cu = per_objfile->get_cu (per_cu); if (cu == nullptr) @@ -1851,7 +1851,7 @@ dw2_get_file_names (dwarf2_per_cu *this_cu, dwarf2_per_objfile *per_objfile) return this_cu->file_names; cutu_reader reader (*this_cu, *per_objfile, nullptr, - per_objfile->get_cu (this_cu), true, language_minimal, + per_objfile->get_cu (this_cu), true, std::nullopt, nullptr); if (reader.is_dummy ()) { @@ -2910,7 +2910,7 @@ void cutu_reader::init_tu_and_read_dwo_dies (dwarf2_per_cu *this_cu, dwarf2_per_objfile *per_objfile, dwarf2_cu *existing_cu, - enum language pretend_language) + std::optional pretend_language) { signatured_type *sig_type = this_cu->as_signatured_type (); @@ -2960,7 +2960,7 @@ cutu_reader::cutu_reader (dwarf2_per_cu &this_cu, const struct abbrev_table *abbrev_table, dwarf2_cu *existing_cu, bool skip_partial, - enum language pretend_language, + std::optional pretend_language, const abbrev_table_cache *abbrev_cache) { struct objfile *objfile = per_objfile.objfile; @@ -3177,7 +3177,8 @@ cutu_reader::release_cu () cutu_reader::cutu_reader (dwarf2_per_cu &this_cu, dwarf2_per_objfile &per_objfile, - language pretend_language, dwarf2_cu &parent_cu, + std::optional pretend_language, + dwarf2_cu &parent_cu, dwo_file &dwo_file) { struct objfile *objfile = per_objfile.objfile; @@ -3436,7 +3437,7 @@ cooked_index_worker_debug_info::process_unit = storage->get_abbrev_table_cache (); auto new_reader = std::make_unique (*this_cu, *per_objfile, nullptr, nullptr, false, - language_minimal, + std::nullopt, &abbrev_table_cache); if (new_reader->is_dummy ()) @@ -3563,7 +3564,7 @@ cooked_index_worker_debug_info::process_type_units cutu_reader reader (*tu.sig_type, *per_objfile, abbrev_table.get (), nullptr, false, - language_minimal); + std::nullopt); if (!reader.is_dummy ()) storage->catch_error ([&] () { @@ -3597,7 +3598,7 @@ cooked_index_worker_debug_info::process_skeletonless_type_unit /* This does the job that build_type_psymtabs would have done. */ cutu_reader reader (**sig_type_it, *per_objfile, nullptr, nullptr, false, - language_minimal); + std::nullopt); if (!reader.is_dummy ()) process_type_unit (&reader, storage); } @@ -4175,7 +4176,8 @@ process_queue (dwarf2_per_objfile *per_objfile) static void load_full_comp_unit (dwarf2_per_cu *this_cu, dwarf2_per_objfile *per_objfile, - bool skip_partial, enum language pretend_language) + bool skip_partial, + std::optional pretend_language) { gdb_assert (!this_cu->is_debug_types ()); gdb_assert (per_objfile->get_cu (this_cu) == nullptr); @@ -6359,7 +6361,7 @@ cutu_reader::create_dwo_unit_hash_tables (dwo_file &dwo_file, /* The length of the CU is not necessary. */ dwarf2_per_cu per_cu (&per_bfd, §ion, sect_off, length, false /* is_dwz */); - cutu_reader reader (per_cu, per_objfile, language_minimal, + cutu_reader reader (per_cu, per_objfile, std::nullopt, skeleton_cu, dwo_file); std::optional opt_signature @@ -15003,7 +15005,7 @@ dwarf2_read_addr_index (dwarf2_per_cu *per_cu, dwarf2_per_objfile *per_objfile, else { cutu_reader reader (*per_cu, *per_objfile, nullptr, nullptr, false, - language_minimal); + std::nullopt); addr_base = reader.cu ()->addr_base; addr_size = reader.cu ()->header.addr_size; } @@ -17604,7 +17606,7 @@ read_signatured_type (signatured_type *sig_type, gdb_assert (per_objfile->get_cu (sig_type) == nullptr); cutu_reader reader (*sig_type, *per_objfile, nullptr, nullptr, false, - language_minimal); + std::nullopt); if (!reader.is_dummy ()) { @@ -18043,20 +18045,23 @@ dwarf2_symbol_mark_computed (const struct attribute *attr, struct symbol *sym, void dwarf2_per_cu::set_lang (enum language lang, dwarf_source_language dw_lang) { - if (unit_type () == DW_UT_partial) - return; - - /* Set if not set already. */ + packed expected1 = language_unknown; + packed expected2 = language_minimal; packed new_value = lang; - packed old_value = m_lang.exchange (new_value); - /* If already set, verify that it's the same value. */ - gdb_assert (old_value == language_unknown - || old_value == language_minimal - || old_value == lang); + /* In practice it is probably fine if different CUs using + DW_TAG_imported_unit pass different languages, because there are + language differences that make unbridled DIE sharing unlikely. + Once a non-minimal/unknown language has been set, we just keep + it. */ + if (!m_lang.compare_exchange_strong (expected1, new_value)) + m_lang.compare_exchange_strong (expected2, new_value); + + /* A similar consideration applies to the DWARF language, but here + there's only a single unknown value. */ + packed old_dw = (dwarf_source_language) 0; packed new_dw = dw_lang; - packed old_dw = m_dw_lang.exchange (new_dw); - gdb_assert (old_dw == 0 || old_dw == dw_lang); + m_dw_lang.compare_exchange_strong (old_dw, new_dw); } /* See read.h. */ @@ -18070,7 +18075,7 @@ dwarf2_per_cu::ensure_lang (dwarf2_per_objfile *per_objfile) /* Constructing this object will set the language as a side effect. */ cutu_reader reader (*this, *per_objfile, nullptr, per_objfile->get_cu (this), - true, language_minimal, nullptr); + true, std::nullopt, nullptr); } /* Return the unit from ALL_UNITS that potentially contains TARGET. @@ -18151,7 +18156,7 @@ dwarf2_find_containing_unit (const section_and_offset &target, something more lightweight that has the same effect. */ if (!per_cu->length_is_set ()) cutu_reader (*per_cu, *per_objfile, nullptr, nullptr, false, - language_minimal); + std::nullopt); /* Now we can check if the target section offset is within PER_CU's range. */ if (target.offset < per_cu->sect_off () @@ -18253,16 +18258,17 @@ run_test () void cutu_reader::prepare_one_comp_unit (struct dwarf2_cu *cu, - enum language pretend_language) + std::optional pretend_language) { struct attribute *attr; if (m_top_level_die == nullptr) { + language lang = pretend_language.value_or (language_minimal); cu->set_producer (nullptr); - cu->language_defn = language_def (pretend_language); + cu->language_defn = language_def (lang); cu->per_cu->set_unit_type (DW_UT_compile); - cu->per_cu->set_lang (pretend_language, (dwarf_source_language) 0); + cu->per_cu->set_lang (lang, (dwarf_source_language) 0); return; } @@ -18270,7 +18276,7 @@ cutu_reader::prepare_one_comp_unit (struct dwarf2_cu *cu, /* Set the language we're debugging. */ attr = dwarf2_attr (m_top_level_die, DW_AT_language, cu); - enum language lang; + std::optional lang; dwarf_source_language dw_lang = (dwarf_source_language) 0; if (cu->producer_is_xlc_opencl ()) { @@ -18299,10 +18305,12 @@ cutu_reader::prepare_one_comp_unit (struct dwarf2_cu *cu, else lang = language_minimal; } + else if (m_top_level_die->tag != DW_TAG_partial_unit) + lang = pretend_language.value_or (language_minimal); else lang = pretend_language; - cu->language_defn = language_def (lang); + cu->language_defn = language_def (lang.value_or (language_minimal)); /* Initialize the lto_artificial field. */ attr = dwarf2_attr (m_top_level_die, DW_AT_name, cu); @@ -18329,7 +18337,10 @@ cutu_reader::prepare_one_comp_unit (struct dwarf2_cu *cu, sect_offset_str (cu->per_cu->sect_off ())); } - cu->per_cu->set_lang (lang, dw_lang); + /* If the caller didn't really know what language to use, and there + is no local language, then don't bother setting the language. */ + if (lang.has_value ()) + cu->per_cu->set_lang (*lang, dw_lang); } /* See read.h. */ diff --git a/gdb/dwarf2/read.h b/gdb/dwarf2/read.h index 742dce9f2fc..172d718e6b7 100644 --- a/gdb/dwarf2/read.h +++ b/gdb/dwarf2/read.h @@ -1019,12 +1019,12 @@ public: const struct abbrev_table *abbrev_table, dwarf2_cu *existing_cu, bool skip_partial, - enum language pretend_language, + std::optional pretend_language, const abbrev_table_cache *abbrev_cache = nullptr); cutu_reader (dwarf2_per_cu &this_cu, dwarf2_per_objfile &per_objfile, - enum language pretend_language, + std::optional pretend_language, struct dwarf2_cu &parent_cu, struct dwo_file &dwo_file); @@ -1090,14 +1090,14 @@ private: void init_tu_and_read_dwo_dies (dwarf2_per_cu *this_cu, dwarf2_per_objfile *per_objfile, dwarf2_cu *existing_cu, - enum language pretend_language); + std::optional pretend_language); void read_cutu_die_from_dwo (dwarf2_cu *cu, dwo_unit *dwo_unit, die_info *stub_comp_unit_die, const char *stub_comp_dir); void prepare_one_comp_unit (struct dwarf2_cu *cu, - enum language pretend_language); + std::optional pretend_language); /* Helpers to build the in-memory DIE tree. */ -- 2.47.3