From: Tom Tromey Date: Thu, 6 Mar 2025 17:59:41 +0000 (-0700) Subject: Introduce gdb_bfd_canonicalize_symtab X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=89e59ca6dce9bc7ca88b5d6c1a073c39b8f9bcc9;p=thirdparty%2Fbinutils-gdb.git Introduce gdb_bfd_canonicalize_symtab bfd_canonicalize_symtab stores the symbols in the BFD, and returns pointers to these. The ELF reader does not reuse these stored symbols, so each call to bfd_canonicalize_symtab causes an allocation. This interacts poorly with code like arm_pikeos_osabi_sniffer, which searches the BFD symbol when called. PR gdb/32758 points out a particularly pathological case: using "maint info sections" on a program with a large number of sections (10000) will cause 10000 calls to arm_pikeos_osabi_sniffer, allocating 20G. I'm not sure BFD always worked this way. And, fixing BFD was an option. However it seemed maybe better for GDB to adapt, since adapting would mean that the fix would apply to all BFD back ends, and not just ELF. To that end, this patch adds a new gdb_bfd_canonicalize_symtab and changes all callers of bfd_canonicalize_symtab to use it instead. This new function caches the result in the per-BFD object. I looked into having this return a view of "const asymbol *". However both the compile module and machoread modify the returned symbols. And while I think this is wrong, I haven't tried to fix this here. Regression tested on x86-64 Fedora 40. Bug: https://sourceware.org/bugzilla/show_bug.cgi?id=32758 --- diff --git a/gdb/arm-pikeos-tdep.c b/gdb/arm-pikeos-tdep.c index 4760755a32a..b2c93bd7cb8 100644 --- a/gdb/arm-pikeos-tdep.c +++ b/gdb/arm-pikeos-tdep.c @@ -36,8 +36,6 @@ arm_pikeos_init_abi (struct gdbarch_info info, struct gdbarch *gdbarch) static enum gdb_osabi arm_pikeos_osabi_sniffer (bfd *abfd) { - long number_of_symbols; - long i; int pikeos_stack_found = 0; int pikeos_stack_size_found = 0; @@ -50,20 +48,15 @@ arm_pikeos_osabi_sniffer (bfd *abfd) OS ABI sniffers are called before the minimal symtabs are created. So inspect the symbol table using BFD. */ - long storage_needed = bfd_get_symtab_upper_bound (abfd); - if (storage_needed <= 0) - return GDB_OSABI_UNKNOWN; - - gdb::unique_xmalloc_ptr symbol_table - ((asymbol **) xmalloc (storage_needed)); - number_of_symbols = bfd_canonicalize_symtab (abfd, symbol_table.get ()); + gdb::array_view symbol_table + = gdb_bfd_canonicalize_symtab (abfd, false); - if (number_of_symbols <= 0) + if (symbol_table.empty ()) return GDB_OSABI_UNKNOWN; - for (i = 0; i < number_of_symbols; i++) + for (const asymbol *sym : symbol_table) { - const char *name = bfd_asymbol_name (symbol_table.get ()[i]); + const char *name = bfd_asymbol_name (sym); if (strcmp (name, "_vm_stack") == 0 || strcmp (name, "__p4_stack") == 0) diff --git a/gdb/compile/compile-object-load.c b/gdb/compile/compile-object-load.c index ef77ee30521..05e5b43affd 100644 --- a/gdb/compile/compile-object-load.c +++ b/gdb/compile/compile-object-load.c @@ -605,9 +605,7 @@ compile_object_load (const compile_file_names &file_names, CORE_ADDR regs_addr, out_value_addr = 0; struct symbol *func_sym; struct type *func_type; - long storage_needed; - asymbol **symbol_table, **symp; - long number_of_symbols, missing_symbols; + long missing_symbols; struct type *regs_type, *out_value_type = NULL; char **matching; struct objfile *objfile; @@ -635,11 +633,6 @@ compile_object_load (const compile_file_names &file_names, setup_sections_data.setup_one_section (sect); setup_sections_data.setup_one_section (nullptr); - storage_needed = bfd_get_symtab_upper_bound (abfd.get ()); - if (storage_needed < 0) - error (_("Cannot read symbols of compiled module \"%s\": %s"), - filename.get (), bfd_errmsg (bfd_get_error ())); - /* SYMFILE_VERBOSE is not passed even if FROM_TTY, user is not interested in "Reading symbols from ..." message for automatically generated file. */ scoped_objfile_unlinker objfile_holder (symbol_file_add_from_bfd @@ -692,21 +685,12 @@ compile_object_load (const compile_file_names &file_names, "module \"%s\"."), GCC_FE_WRAPPER_FUNCTION, objfile_name (objfile)); - /* The memory may be later needed - by bfd_generic_get_relocated_section_contents - called from default_symfile_relocate. */ - symbol_table = (asymbol **) obstack_alloc (&objfile->objfile_obstack, - storage_needed); - number_of_symbols = bfd_canonicalize_symtab (abfd.get (), symbol_table); - if (number_of_symbols < 0) - error (_("Cannot parse symbols of compiled module \"%s\": %s"), - filename.get (), bfd_errmsg (bfd_get_error ())); + gdb::array_view symbol_table + = gdb_bfd_canonicalize_symtab (abfd.get ()); missing_symbols = 0; - for (symp = symbol_table; symp < symbol_table + number_of_symbols; symp++) + for (asymbol *sym : symbol_table) { - asymbol *sym = *symp; - if (sym->flags != 0) continue; sym->flags = BSF_GLOBAL; @@ -800,7 +784,7 @@ compile_object_load (const compile_file_names &file_names, if (missing_symbols) error (_("%ld symbols were missing, cannot continue."), missing_symbols); - bfd_map_over_sections (abfd.get (), copy_sections, symbol_table); + bfd_map_over_sections (abfd.get (), copy_sections, symbol_table.data ()); regs_type = get_regs_type (func_sym, objfile); if (regs_type == NULL) diff --git a/gdb/dicos-tdep.c b/gdb/dicos-tdep.c index 3627426eee7..96b841a7d87 100644 --- a/gdb/dicos-tdep.c +++ b/gdb/dicos-tdep.c @@ -53,9 +53,7 @@ dicos_init_abi (struct gdbarch *gdbarch) int dicos_load_module_p (bfd *abfd, int header_size) { - long storage_needed; int ret = 0; - asymbol **symbol_table = NULL; const char *symname = "Dicos_loadModuleInfo"; asection *section; @@ -75,42 +73,19 @@ dicos_load_module_p (bfd *abfd, int header_size) /* Dicos LMs always have a "Dicos_loadModuleInfo" symbol defined. Look for it. */ - storage_needed = bfd_get_symtab_upper_bound (abfd); - if (storage_needed < 0) - { - warning (_("Can't read elf symbols from %s: %s"), - bfd_get_filename (abfd), - bfd_errmsg (bfd_get_error ())); - return 0; - } + gdb::array_view symbol_table + = gdb_bfd_canonicalize_symtab (abfd, false); - if (storage_needed > 0) + for (asymbol *sym : symbol_table) { - long i, symcount; - - symbol_table = (asymbol **) xmalloc (storage_needed); - symcount = bfd_canonicalize_symtab (abfd, symbol_table); - - if (symcount < 0) - warning (_("Can't read elf symbols from %s: %s"), - bfd_get_filename (abfd), - bfd_errmsg (bfd_get_error ())); - else + if (sym->name != NULL + && symname[0] == sym->name[0] + && strcmp (symname + 1, sym->name + 1) == 0) { - for (i = 0; i < symcount; i++) - { - asymbol *sym = symbol_table[i]; - if (sym->name != NULL - && symname[0] == sym->name[0] - && strcmp (symname + 1, sym->name + 1) == 0) - { - ret = 1; - break; - } - } + ret = 1; + break; } } - xfree (symbol_table); return ret; } diff --git a/gdb/elfread.c b/gdb/elfread.c index 5be31183433..3756fa3bd33 100644 --- a/gdb/elfread.c +++ b/gdb/elfread.c @@ -1062,8 +1062,8 @@ elf_read_minimal_symbols (struct objfile *objfile, int symfile_flags, const struct elfinfo *ei) { bfd *synth_abfd, *abfd = objfile->obfd.get (); - long symcount = 0, dynsymcount = 0, synthcount, storage_needed; - asymbol **symbol_table = NULL, **dyn_symbol_table = NULL; + long dynsymcount = 0, synthcount; + asymbol **dyn_symbol_table = NULL; asymbol *synthsyms; symtab_create_debug_printf ("reading minimal symbols of objfile %s", @@ -1087,32 +1087,16 @@ elf_read_minimal_symbols (struct objfile *objfile, int symfile_flags, /* Process the normal ELF symbol table first. */ - storage_needed = bfd_get_symtab_upper_bound (objfile->obfd.get ()); - if (storage_needed < 0) - error (_("Can't read symbols from %s: %s"), - bfd_get_filename (objfile->obfd.get ()), - bfd_errmsg (bfd_get_error ())); + gdb::array_view symbol_table + = gdb_bfd_canonicalize_symtab (objfile->obfd.get ()); - if (storage_needed > 0) - { - /* Memory gets permanently referenced from ABFD after - bfd_canonicalize_symtab so it must not get freed before ABFD gets. */ - - symbol_table = (asymbol **) bfd_alloc (abfd, storage_needed); - symcount = bfd_canonicalize_symtab (objfile->obfd.get (), symbol_table); - - if (symcount < 0) - error (_("Can't read symbols from %s: %s"), - bfd_get_filename (objfile->obfd.get ()), - bfd_errmsg (bfd_get_error ())); - - elf_symtab_read (reader, objfile, ST_REGULAR, symcount, symbol_table, - false); - } + elf_symtab_read (reader, objfile, ST_REGULAR, symbol_table.size (), + symbol_table.data (), false); /* Add the dynamic symbols. */ - storage_needed = bfd_get_dynamic_symtab_upper_bound (objfile->obfd.get ()); + long storage_needed + = bfd_get_dynamic_symtab_upper_bound (objfile->obfd.get ()); if (storage_needed > 0) { @@ -1157,7 +1141,8 @@ elf_read_minimal_symbols (struct objfile *objfile, int symfile_flags, /* Add synthetic symbols - for instance, names for any PLT entries. */ - synthcount = bfd_get_synthetic_symtab (synth_abfd, symcount, symbol_table, + synthcount = bfd_get_synthetic_symtab (synth_abfd, symbol_table.size (), + symbol_table.data (), dynsymcount, dyn_symbol_table, &synthsyms); if (synthcount > 0) diff --git a/gdb/gdb_bfd.c b/gdb/gdb_bfd.c index 8380c5377a5..1a57b3c4f56 100644 --- a/gdb/gdb_bfd.c +++ b/gdb/gdb_bfd.c @@ -143,6 +143,13 @@ struct gdb_bfd_data /* Table of all the bfds this bfd has included. */ std::vector included_bfds; + /* This is used by gdb_bfd_canonicalize_symtab to hold the symbols + returned by canonicalization. */ + std::optional> symbol_table; + /* If an error occurred while canonicalizing the symtab, this holds + the error message. */ + std::string symbol_error; + /* The registry. */ registry registry_fields; @@ -1177,6 +1184,54 @@ gdb_bfd_errmsg (bfd_error_type error_tag, char **matching) return ret; } +/* See gdb_bfd.h. */ + +gdb::array_view +gdb_bfd_canonicalize_symtab (bfd *abfd, bool should_throw) +{ + struct gdb_bfd_data *gdata = (struct gdb_bfd_data *) bfd_usrdata (abfd); + + if (!gdata->symbol_table.has_value ()) + { + /* Ensure it exists. */ + gdb::def_vector &symbol_table + = gdata->symbol_table.emplace (); + + long storage_needed = bfd_get_symtab_upper_bound (abfd); + if (storage_needed < 0) + gdata->symbol_error = bfd_errmsg (bfd_get_error ()); + else if (storage_needed > 0) + { + symbol_table.resize (storage_needed / sizeof (asymbol *)); + long number_of_symbols + = bfd_canonicalize_symtab (abfd, symbol_table.data ()); + if (number_of_symbols < 0) + { + symbol_table.clear (); + gdata->symbol_error = bfd_errmsg (bfd_get_error ()); + } + } + } + + if (!gdata->symbol_error.empty ()) + { + if (should_throw) + error (_("Cannot parse symbols of \"%s\": %s"), + bfd_get_filename (abfd), gdata->symbol_error.c_str ()); + return {}; + } + + gdb::def_vector &symbol_table = *gdata->symbol_table; + if (symbol_table.empty ()) + return {}; + + /* bfd_canonicalize_symtab adds a trailing NULL, but don't include + this in the array view. */ + gdb_assert (symbol_table.back () == nullptr); + return gdb::make_array_view (symbol_table.data (), + symbol_table.size () - 1); +} + /* Implement the 'maint info bfd' command. */ static void diff --git a/gdb/gdb_bfd.h b/gdb/gdb_bfd.h index d35f2d661a2..7830bf3f841 100644 --- a/gdb/gdb_bfd.h +++ b/gdb/gdb_bfd.h @@ -274,4 +274,16 @@ extern std::string gdb_bfd_errmsg (bfd_error_type error_tag, char **matching); extern void gdb_bfd_init (); +/* A wrapper for bfd_canonicalize_symtab that caches the result. This + is important to avoid excess memory use on repeated calls. See + PR gdb/32758. bfd_canonicalize_symtab should not be called directly + by other code in gdb. + + When SHOULD_THROW is true (the default), this will throw an + exception if symbols could not be read. When SHOULD_THROW is + false, an empty view is returned instead. */ + +extern gdb::array_view gdb_bfd_canonicalize_symtab + (bfd *abfd, bool should_throw = true); + #endif /* GDB_GDB_BFD_H */ diff --git a/gdb/solib-darwin.c b/gdb/solib-darwin.c index 6c7d9065a65..cbd89b1b3b7 100644 --- a/gdb/solib-darwin.c +++ b/gdb/solib-darwin.c @@ -146,24 +146,13 @@ struct lm_info_darwin final : public lm_info static CORE_ADDR lookup_symbol_from_bfd (bfd *abfd, const char *symname) { - long storage_needed; - asymbol **symbol_table; - unsigned int number_of_symbols; - unsigned int i; CORE_ADDR symaddr = 0; - storage_needed = bfd_get_symtab_upper_bound (abfd); + gdb::array_view symbol_table + = gdb_bfd_canonicalize_symtab (abfd, false); - if (storage_needed <= 0) - return 0; - - symbol_table = (asymbol **) xmalloc (storage_needed); - number_of_symbols = bfd_canonicalize_symtab (abfd, symbol_table); - - for (i = 0; i < number_of_symbols; i++) + for (const asymbol *sym : symbol_table) { - asymbol *sym = symbol_table[i]; - if (strcmp (sym->name, symname) == 0 && (sym->section->flags & (SEC_CODE | SEC_DATA)) != 0) { @@ -172,7 +161,6 @@ lookup_symbol_from_bfd (bfd *abfd, const char *symname) break; } } - xfree (symbol_table); return symaddr; } diff --git a/gdb/solib.c b/gdb/solib.c index 7782c8d699e..b1fdea9ad14 100644 --- a/gdb/solib.c +++ b/gdb/solib.c @@ -1431,49 +1431,38 @@ CORE_ADDR gdb_bfd_lookup_symbol_from_symtab ( bfd *abfd, gdb::function_view match_sym) { - long storage_needed = bfd_get_symtab_upper_bound (abfd); CORE_ADDR symaddr = 0; + gdb::array_view symbol_table + = gdb_bfd_canonicalize_symtab (abfd, false); - if (storage_needed > 0) + for (asymbol *sym : symbol_table) { - unsigned int i; - - gdb::def_vector storage (storage_needed / sizeof (asymbol *)); - asymbol **symbol_table = storage.data (); - unsigned int number_of_symbols - = bfd_canonicalize_symtab (abfd, symbol_table); - - for (i = 0; i < number_of_symbols; i++) + if (match_sym (sym)) { - asymbol *sym = *symbol_table++; - - if (match_sym (sym)) + gdbarch *gdbarch = current_inferior ()->arch (); + symaddr = sym->value; + + /* Some ELF targets fiddle with addresses of symbols they + consider special. They use minimal symbols to do that + and this is needed for correct breakpoint placement, + but we do not have full data here to build a complete + minimal symbol, so just set the address and let the + targets cope with that. */ + if (bfd_get_flavour (abfd) == bfd_target_elf_flavour + && gdbarch_elf_make_msymbol_special_p (gdbarch)) { - gdbarch *gdbarch = current_inferior ()->arch (); - symaddr = sym->value; - - /* Some ELF targets fiddle with addresses of symbols they - consider special. They use minimal symbols to do that - and this is needed for correct breakpoint placement, - but we do not have full data here to build a complete - minimal symbol, so just set the address and let the - targets cope with that. */ - if (bfd_get_flavour (abfd) == bfd_target_elf_flavour - && gdbarch_elf_make_msymbol_special_p (gdbarch)) + struct minimal_symbol msym { - struct minimal_symbol msym - { - }; - - msym.set_value_address (symaddr); - gdbarch_elf_make_msymbol_special (gdbarch, sym, &msym); - symaddr = CORE_ADDR (msym.unrelocated_address ()); - } + }; - /* BFD symbols are section relative. */ - symaddr += sym->section->vma; - break; + msym.set_value_address (symaddr); + gdbarch_elf_make_msymbol_special (gdbarch, sym, &msym); + symaddr = CORE_ADDR (msym.unrelocated_address ()); } + + /* BFD symbols are section relative. */ + symaddr += sym->section->vma; + break; } }