From: Guinevere Larsen Date: Wed, 23 Apr 2025 19:47:34 +0000 (-0300) Subject: gdb: make gdb_bfd cache aware of linker namespaces X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=refs%2Fheads%2Fusers%2Fguinevere%2Fsymbol_in_linker_namespace;p=thirdparty%2Fbinutils-gdb.git gdb: make gdb_bfd cache aware of linker namespaces In an attempt to improve GDB's speed, GDB has implemented a cache for BFD objects to a file. This works great in most cases, however, when using linker namespaces, it is possible to load the exact same file many times, which the gdb_bfd_cache would reuse the object. The problem comes because in different namespaces, variables may have different addresses, but if the address can be determined at read time, the variable will be marked with address class LOC_STATIC, and that address will only be correct for one of the namespaces (whichever was expanded first). This patch attempts to fix that by adding a new parameter to the gdb_bfd_cache, the linker namespace where the objfile is being loaded. By default, this value will be -1, but in systems that support linker namespaces, it will be correctly set. To manage that, a small refactor to solib_ops::bfd_open was necessary. Having only the file's full path is not sufficient to determine the linker namespace, specifically because the same file can be loaded multiple times. So instead, solib_ops::bfd_open has been changed to take a reference to the solib being read, instead of just the file name. This leads to the next problem, as solib_bfd_open is used to read the interpreter, which has no solib value at that point and must be opened by the file name alone. This final problem is solved by introducing solib_bfd_open_from_solib, a new helper that will be the default behavior of solib_ops::bfd_open, and solib_bfd_open is left untouched, so that other readers can still use it to read the interpreter. WIP: the only remaining problem is that if an objfile has separate debug info, reading the separate debuginfo objfile has no information for linker namespace (yet). --- diff --git a/gdb/gdb_bfd.c b/gdb/gdb_bfd.c index 06d6f5cb2ff..7ccf543c19e 100644 --- a/gdb/gdb_bfd.c +++ b/gdb/gdb_bfd.c @@ -93,11 +93,12 @@ static gdb::unordered_set all_bfds; struct gdb_bfd_data { /* Note that if ST is nullptr, then we simply fill in zeroes. */ - gdb_bfd_data (bfd *abfd, struct stat *st) + gdb_bfd_data (bfd *abfd, struct stat *st, int l_ns) : mtime (st == nullptr ? 0 : st->st_mtime), size (st == nullptr ? 0 : st->st_size), inode (st == nullptr ? 0 : st->st_ino), device_id (st == nullptr ? 0 : st->st_dev), + linker_namespace (l_ns), relocation_computed (0), needs_relocations (0), crc_computed (0) @@ -123,6 +124,10 @@ struct gdb_bfd_data /* The device id of the file at the point the cache entry was made. */ dev_t device_id; + /* The Linker Namespace where the objfile that lead this file to be + read was loaded. */ + int linker_namespace; + /* This is true if we have determined whether this BFD has any sections requiring relocation. */ unsigned int relocation_computed : 1; @@ -217,6 +222,11 @@ struct gdb_bfd_cache_search ino_t inode; /* The device id of the file. */ dev_t device_id; + /* The Linker Namespace where the solib was loaded. + We do this because, even if the file is exactly the same, + we'll want to read the file again to recalculate symbol + addresses. */ + int linker_namespace; }; struct bfd_cache_hash @@ -248,6 +258,7 @@ struct bfd_cache_eq && gdata->size == s.size && gdata->inode == s.inode && gdata->device_id == s.device_id + && gdata->linker_namespace == s.linker_namespace && strcmp (bfd_get_filename (abfd), s.filename) == 0); } }; @@ -504,7 +515,7 @@ target_fileio_stream::stat (struct bfd *abfd, struct stat *sb) BFD. */ static void -gdb_bfd_init_data (struct bfd *abfd, struct stat *st) +gdb_bfd_init_data (struct bfd *abfd, struct stat *st, int linker_namespace) { struct gdb_bfd_data *gdata; @@ -513,7 +524,7 @@ gdb_bfd_init_data (struct bfd *abfd, struct stat *st) /* Ask BFD to decompress sections in bfd_get_full_section_contents. */ abfd->flags |= BFD_DECOMPRESS; - gdata = new gdb_bfd_data (abfd, st); + gdata = new gdb_bfd_data (abfd, st, linker_namespace); bfd_set_usrdata (abfd, gdata); /* This is the first we've seen it, so add it to the hash table. */ @@ -525,7 +536,7 @@ gdb_bfd_init_data (struct bfd *abfd, struct stat *st) gdb_bfd_ref_ptr gdb_bfd_open (const char *name, const char *target, int fd, - bool warn_if_slow) + bool warn_if_slow, int linker_namespace) { bfd *abfd; struct stat st; @@ -579,6 +590,12 @@ gdb_bfd_open (const char *name, const char *target, int fd, search.size = st.st_size; search.inode = st.st_ino; search.device_id = st.st_dev; + search.linker_namespace = linker_namespace; + + /* If a linker namespace was supplied, print it in the debug message. */ + std::string linker_ns_str; + if (linker_namespace != -1) + linker_ns_str = string_printf ("[[%d]]::", linker_namespace); if (bfd_sharing) { @@ -586,8 +603,9 @@ gdb_bfd_open (const char *name, const char *target, int fd, iter != gdb_bfd_cache.end ()) { abfd = *iter; - bfd_cache_debug_printf ("Reusing cached bfd %s for %s", + bfd_cache_debug_printf ("Reusing cached bfd %s for %s%s", host_address_to_string (abfd), + linker_ns_str.c_str (), bfd_get_filename (abfd)); close (fd); return gdb_bfd_ref_ptr::new_reference (abfd); @@ -600,8 +618,9 @@ gdb_bfd_open (const char *name, const char *target, int fd, bfd_set_cacheable (abfd, 1); - bfd_cache_debug_printf ("Creating new bfd %s for %s", + bfd_cache_debug_printf ("Creating new bfd %s for %s%s", host_address_to_string (abfd), + linker_ns_str.c_str (), bfd_get_filename (abfd)); /* It's important to pass the already-computed stat info here, @@ -610,7 +629,7 @@ gdb_bfd_open (const char *name, const char *target, int fd, and since we will enter it into the hash table using this mtime, if the file changed at the wrong moment, the race would lead to a hash table corruption. */ - gdb_bfd_init_data (abfd, &st); + gdb_bfd_init_data (abfd, &st, linker_namespace); if (bfd_sharing) { @@ -683,8 +702,13 @@ gdb_bfd_ref (struct bfd *abfd) gdata = (struct gdb_bfd_data *) bfd_usrdata (abfd); - bfd_cache_debug_printf ("Increase reference count on bfd %s (%s)", + std::string linker_ns_str; + if (gdata && gdata->linker_namespace != -1) + linker_ns_str = string_printf ("[[%d]]::", gdata->linker_namespace); + + bfd_cache_debug_printf ("Increase reference count on bfd %s (%s%s)", host_address_to_string (abfd), + linker_ns_str.c_str (), bfd_get_filename (abfd)); if (gdata != NULL) @@ -695,7 +719,7 @@ gdb_bfd_ref (struct bfd *abfd) /* Caching only happens via gdb_bfd_open, so passing nullptr here is fine. */ - gdb_bfd_init_data (abfd, nullptr); + gdb_bfd_init_data (abfd, nullptr, -1); } /* See gdb_bfd.h. */ @@ -716,17 +740,23 @@ gdb_bfd_unref (struct bfd *abfd) gdata = (struct gdb_bfd_data *) bfd_usrdata (abfd); gdb_assert (gdata->refc >= 1); + std::string linker_ns_str; + if (gdata && gdata->linker_namespace != -1) + linker_ns_str = string_printf ("[[%d]]::", gdata->linker_namespace); + gdata->refc -= 1; if (gdata->refc > 0) { - bfd_cache_debug_printf ("Decrease reference count on bfd %s (%s)", + bfd_cache_debug_printf ("Decrease reference count on bfd %s (%s%s)", host_address_to_string (abfd), + linker_ns_str.c_str(), bfd_get_filename (abfd)); return; } - bfd_cache_debug_printf ("Delete final reference count on bfd %s (%s)", + bfd_cache_debug_printf ("Delete final reference count on bfd %s (%s%s)", host_address_to_string (abfd), + linker_ns_str.c_str (), bfd_get_filename (abfd)); archive_bfd = gdata->archive_bfd; diff --git a/gdb/gdb_bfd.h b/gdb/gdb_bfd.h index 41ce5ce6d82..e2492510346 100644 --- a/gdb/gdb_bfd.h +++ b/gdb/gdb_bfd.h @@ -100,7 +100,8 @@ typedef gdb::ref_ptr gdb_bfd_ref_ptr; be slow. */ gdb_bfd_ref_ptr gdb_bfd_open (const char *name, const char *target, - int fd = -1, bool warn_if_slow = true); + int fd = -1, bool warn_if_slow = true, + int linker_namespace = -1); /* Mark the CHILD BFD as being a member of PARENT. Also, increment the reference count of CHILD. Calling this function ensures that diff --git a/gdb/solib-aix.c b/gdb/solib-aix.c index 0ad25f18cdc..6e1721c573c 100644 --- a/gdb/solib-aix.c +++ b/gdb/solib-aix.c @@ -509,7 +509,7 @@ solib_aix_in_dynsym_resolve_code (CORE_ADDR pc) /* Implement the "bfd_open" solib_ops method. */ static gdb_bfd_ref_ptr -solib_aix_bfd_open (const char *pathname) +solib_aix_bfd_open (const solib &so) { /* The pathname is actually a synthetic filename with the following form: "/path/to/sharedlib(member.o)" (double-quotes excluded). @@ -517,6 +517,9 @@ solib_aix_bfd_open (const char *pathname) FIXME: This is a little hacky. Perhaps we should provide access to the solib's lm_info here? */ + std::string path = so.file_path (); + /* WIP: this is a convenience thing, I should really convert all pathnames to pathname.c_str(). */ + const char *pathname = path.c_str (); const int path_len = strlen (pathname); const char *sep; int filename_len; diff --git a/gdb/solib-darwin.c b/gdb/solib-darwin.c index 368f4014ba5..5aa96964bbf 100644 --- a/gdb/solib-darwin.c +++ b/gdb/solib-darwin.c @@ -613,15 +613,16 @@ darwin_relocate_section_addresses (solib &so, target_section *sec) } static gdb_bfd_ref_ptr -darwin_bfd_open (const char *pathname) +darwin_bfd_open (const solib &so) { + std::string pathname = so.file_path (); int found_file; /* Search for shared library file. */ gdb::unique_xmalloc_ptr found_pathname - = solib_find (pathname, &found_file); + = solib_find (pathname.c_str (), &found_file); if (found_pathname == NULL) - perror_with_name (pathname); + perror_with_name (pathname.c_str ()); /* Open bfd for shared library. */ gdb_bfd_ref_ptr abfd (solib_bfd_fopen (found_pathname.get (), found_file)); @@ -637,7 +638,7 @@ darwin_bfd_open (const char *pathname) /* The current filename for fat-binary BFDs is a name generated by BFD, usually a string containing the name of the architecture. Reset its value to the actual filename. */ - bfd_set_filename (res.get (), pathname); + bfd_set_filename (res.get (), pathname.c_str ()); return res; } diff --git a/gdb/solib-dsbt.c b/gdb/solib-dsbt.c index 3832a7a6333..eed3c10c98c 100644 --- a/gdb/solib-dsbt.c +++ b/gdb/solib-dsbt.c @@ -912,7 +912,7 @@ const solib_ops dsbt_so_ops = dsbt_current_sos, open_symbol_file_object, dsbt_in_dynsym_resolve_code, - solib_bfd_open, + solib_bfd_open_from_solib, nullptr, nullptr, nullptr, diff --git a/gdb/solib-frv.c b/gdb/solib-frv.c index bf13d365d1b..38198003614 100644 --- a/gdb/solib-frv.c +++ b/gdb/solib-frv.c @@ -1083,7 +1083,7 @@ const solib_ops frv_so_ops = frv_current_sos, open_symbol_file_object, frv_in_dynsym_resolve_code, - solib_bfd_open, + solib_bfd_open_from_solib, nullptr, nullptr, nullptr, diff --git a/gdb/solib-svr4.c b/gdb/solib-svr4.c index 458c4ba205f..73492cbbd26 100644 --- a/gdb/solib-svr4.c +++ b/gdb/solib-svr4.c @@ -3737,8 +3737,16 @@ svr4_find_solib_addr (solib &so) static int svr4_find_solib_ns (const solib &so) { - CORE_ADDR debug_base = find_debug_base_for_solib (&so); svr4_info *info = get_svr4_info (current_program_space); + + /* If we're reading the dynamic linker, there won't be any namespaces + registered yet. Here, we hope that that is the only situation in + which this function is called with an empty namespace_id vector, + and in that case we'll just return 0. */ + if (info->namespace_id.empty ()) + return 0; + + CORE_ADDR debug_base = find_debug_base_for_solib (&so); for (int i = 0; i < info->namespace_id.size (); i++) { if (info->namespace_id[i] == debug_base) @@ -3806,6 +3814,52 @@ svr4_get_solibs_in_ns (int nsid) return ns_solibs; } +/* See solib_ops::bfd_open in solist.h. + This function is needed because svr4 implements support for + multiple namespaces, so we'll need to supply a linker_namespace + for the gdb_bfd_cache. Otherwise, this function should be + mostly the same as gdb/solib.c's solib_bfd_open. */ +static gdb_bfd_ref_ptr +svr4_solib_bfd_open (const solib &so) +{ + int found_file; + const struct bfd_arch_info *b; + std::string pathname (so.file_path ()); + + /* Search for shared library file. */ + gdb::unique_xmalloc_ptr found_pathname + = solib_find (pathname.c_str (), &found_file); + if (found_pathname == NULL) + { + /* Return failure if the file could not be found, so that we can + accumulate messages about missing libraries. */ + if (errno == ENOENT) + return NULL; + + perror_with_name (pathname.c_str ()); + } + + /* Open bfd for shared library. */ + gdb_bfd_ref_ptr abfd (gdb_bfd_open (found_pathname.get (), gnutarget, + found_file, true, + svr4_find_solib_ns (so))); + + /* Check bfd format. */ + if (!bfd_check_format (abfd.get (), bfd_object)) + error (_ ("`%s': not in executable format: %s"), + bfd_get_filename (abfd.get ()), bfd_errmsg (bfd_get_error ())); + + /* Check bfd arch. */ + b = gdbarch_bfd_arch_info (current_inferior ()->arch ()); + if (!b->compatible (b, bfd_get_arch_info (abfd.get ()))) + error (_ ("`%s': Shared library architecture %s is not compatible " + "with target architecture %s."), + bfd_get_filename (abfd.get ()), + bfd_get_arch_info (abfd.get ())->printable_name, b->printable_name); + + return abfd; +} + const struct solib_ops svr4_so_ops = { svr4_relocate_section_addresses, @@ -3815,7 +3869,7 @@ const struct solib_ops svr4_so_ops = svr4_current_sos, open_symbol_file_object, svr4_in_dynsym_resolve_code, - solib_bfd_open, + svr4_solib_bfd_open, svr4_same, svr4_keep_data_in_core, svr4_update_solib_event_breakpoints, diff --git a/gdb/solib-target.c b/gdb/solib-target.c index f304431b814..2833f54b56f 100644 --- a/gdb/solib-target.c +++ b/gdb/solib-target.c @@ -408,7 +408,7 @@ const solib_ops solib_target_so_ops = solib_target_current_sos, solib_target_open_symbol_file_object, solib_target_in_dynsym_resolve_code, - solib_bfd_open, + solib_bfd_open_from_solib, nullptr, nullptr, nullptr, diff --git a/gdb/solib.c b/gdb/solib.c index 4e2cc3d1aa5..15b0f1f5080 100644 --- a/gdb/solib.c +++ b/gdb/solib.c @@ -469,6 +469,17 @@ solib_bfd_open (const char *pathname) return abfd; } +/* Default implementation for solib_ops::bfd_open. Extract the pathname + from the solib object and call solib_bfd_open with it. */ + +gdb_bfd_ref_ptr +solib_bfd_open_from_solib (const solib &so) +{ + std::string path = so.file_path (); + + return solib_bfd_open (path.c_str ()); +} + /* Given a pointer to one of the shared objects in our list of mapped objects, use the recorded name to open a bfd descriptor for the object, build a section table, relocate all the section addresses @@ -486,8 +497,7 @@ solib_map_sections (solib &so) { const solib_ops *ops = gdbarch_so_ops (current_inferior ()->arch ()); - gdb::unique_xmalloc_ptr filename (tilde_expand (so.so_name.c_str ())); - gdb_bfd_ref_ptr abfd (ops->bfd_open (filename.get ())); + gdb_bfd_ref_ptr abfd (ops->bfd_open (so)); /* If we have a core target then the core target might have some helpful information (i.e. build-ids) about the shared libraries we are trying @@ -521,7 +531,9 @@ solib_map_sections (solib &so) However, if it was good enough during the mapped file processing, we assume it's good enough now. */ if (!mapped_file_info->filename ().empty ()) - abfd = ops->bfd_open (mapped_file_info->filename ().c_str ()); + /* TODO: figure out if there's good reason to keep ops->bfd_open, + and if there's a good way to do so. */ + abfd = solib_bfd_open (mapped_file_info->filename ().c_str ()); else abfd = nullptr; @@ -534,7 +546,7 @@ solib_map_sections (solib &so) { warning (_ ("Build-id of %ps does not match core file."), styled_string (file_name_style.style (), - filename.get ())); + so.file_path ().c_str ())); abfd = nullptr; } } @@ -1414,9 +1426,8 @@ reload_shared_libraries_1 (int from_tty) if (from_tty) add_flags |= SYMFILE_VERBOSE; - gdb::unique_xmalloc_ptr filename ( - tilde_expand (so.so_original_name.c_str ())); - gdb_bfd_ref_ptr abfd (solib_bfd_open (filename.get ())); + std::string path = so.file_path (); + gdb_bfd_ref_ptr abfd (solib_bfd_open (path.c_str ())); if (abfd != NULL) found_pathname = bfd_get_filename (abfd.get ()); diff --git a/gdb/solist.h b/gdb/solist.h index 6ab5a06ccc9..6667447cc8e 100644 --- a/gdb/solist.h +++ b/gdb/solist.h @@ -25,6 +25,7 @@ #include "symtab.h" #include "gdb_bfd.h" #include "gdbsupport/owning_intrusive_list.h" +#include "gdbsupport/gdb_tilde_expand.h" #include "target-section.h" /* Base class for target-specific link map information. */ @@ -95,6 +96,10 @@ struct solib : intrusive_list_node that supports outputting multiple segments once the related code supports them. */ CORE_ADDR addr_low = 0, addr_high = 0; + + /* Get the full path to the binary file described by this solib. */ + std::string file_path () const + { return gdb_tilde_expand (so_name.c_str ()); } }; struct solib_ops @@ -134,7 +139,7 @@ struct solib_ops int (*in_dynsym_resolve_code) (CORE_ADDR pc); /* Find and open shared library binary file. */ - gdb_bfd_ref_ptr (*bfd_open) (const char *pathname); + gdb_bfd_ref_ptr (*bfd_open) (const solib &so); /* Given two so_list objects, one from the GDB thread list and another from the list returned by current_sos, return 1 @@ -215,7 +220,9 @@ extern gdb::unique_xmalloc_ptr solib_find (const char *in_pathname, extern gdb_bfd_ref_ptr solib_bfd_fopen (const char *pathname, int fd); /* Find solib binary file and open it. */ -extern gdb_bfd_ref_ptr solib_bfd_open (const char *in_pathname); +extern gdb_bfd_ref_ptr solib_bfd_open (const char *pathname); + +extern gdb_bfd_ref_ptr solib_bfd_open_from_solib (const solib &so); /* A default implementation of the solib_ops::find_solib_addr callback. This just returns an empty std::optional indicating GDB is