Remove mapped_index_base

author Tom Tromey <tromey@adacore.com>

Wed, 15 Jan 2025 23:18:15 +0000 (16:18 -0700)

committer Tom Tromey <tromey@adacore.com>

Fri, 17 Jan 2025 15:52:04 +0000 (08:52 -0700)
author Tom Tromey <tromey@adacore.com>
Wed, 15 Jan 2025 23:18:15 +0000 (16:18 -0700)
committer Tom Tromey <tromey@adacore.com>
Fri, 17 Jan 2025 15:52:04 +0000 (08:52 -0700)
diff --git a/gdb/dwarf2/mapped-index.h b/gdb/dwarf2/mapped-index.h

index e7dd8e925077b925a0530e84a8ca6ccd4e9e1aa4..199ee57a88b3b699810f423fff5a9602b7841d78 100644 (file)
--- a/gdb/dwarf2/mapped-index.h
+++ b/gdb/dwarf2/mapped-index.h
@@ -24,31 +24,6 @@
  #include "language.h"
  #include "quick-symbol.h"
  
-/* An index into a (C++) symbol name component in a symbol name as
-   recorded in the mapped_index's symbol table.  For each C++ symbol
-   in the symbol table, we record one entry for the start of each
-   component in the symbol in a table of name components, and then
-   sort the table, in order to be able to binary search symbol names,
-   ignoring leading namespaces, both completion and regular look up.
-   For example, for symbol "A::B::C", we'll have an entry that points
-   to "A::B::C", another that points to "B::C", and another for "C".
-   Note that function symbols in GDB index have no parameter
-   information, just the function/method names.  You can convert a
-   name_component to a "const char *" using the
-   'mapped_index::symbol_name_at(offset_type)' method.  */
-
-struct name_component
-{
-  /* Offset in the symbol name where the component starts.  Stored as
-     a (32-bit) offset instead of a pointer to save memory and improve
-     locality on 64-bit architectures.  */
-  offset_type name_offset;
-
-  /* The symbol's index in the symbol and constant pool tables of a
-     mapped_index.  */
-  offset_type idx;
-};
-
  class cooked_index;
  
  /* Base class of all DWARF scanner types.  */
@@ -91,50 +66,4 @@ struct dwarf_scanner_base
    { return nullptr; }
  };
  
-/* Base class containing bits shared by both .gdb_index and
-   .debug_name indexes.  */
-
-struct mapped_index_base : public dwarf_scanner_base
-{
-  mapped_index_base () = default;
-  DISABLE_COPY_AND_ASSIGN (mapped_index_base);
-
-  /* The name_component table (a sorted vector).  See name_component's
-     description above.  */
-  std::vector<name_component> name_components;
-
-  /* How NAME_COMPONENTS is sorted.  */
-  enum case_sensitivity name_components_casing;
-
-  /* Return the number of names in the symbol table.  */
-  virtual size_t symbol_name_count () const = 0;
-
-  /* Get the name of the symbol at IDX in the symbol table.  */
-  virtual const char *symbol_name_at
-    (offset_type idx, dwarf2_per_objfile *per_objfile) const = 0;
-
-  /* Return whether the name at IDX in the symbol table should be
-     ignored.  */
-  virtual bool symbol_name_slot_invalid (offset_type idx) const
-  {
-    return false;
-  }
-
-  /* Build the symbol name component sorted vector, if we haven't
-     yet.  */
-  void build_name_components (dwarf2_per_objfile *per_objfile);
-
-  /* Returns the lower (inclusive) and upper (exclusive) bounds of the
-     possible matches for LN_NO_PARAMS in the name component
-     vector.  */
-  std::pair<std::vector<name_component>::const_iterator,
-           std::vector<name_component>::const_iterator>
-    find_name_components_bounds (const lookup_name_info &ln_no_params,
-                                enum language lang,
-                                dwarf2_per_objfile *per_objfile) const;
-
-  cooked_index *index_for_writing () override
-  { return nullptr; }
-};
-
  #endif /* GDB_DWARF2_MAPPED_INDEX_H */
diff --git a/gdb/dwarf2/read-gdb-index.c b/gdb/dwarf2/read-gdb-index.c

index 83bd2e08c5df403d28d662695f409e6428bb270c..165816f23e7f970b9fd5ee49fdc6e67b92a9951e 100644 (file)
--- a/gdb/dwarf2/read-gdb-index.c
+++ b/gdb/dwarf2/read-gdb-index.c
@@ -29,6 +29,9 @@
  #include "mapped-index.h"
  #include "read.h"
  #include "extract-store-integer.h"
+#include "cp-support.h"
+#include "symtab.h"
+#include "gdbsupport/selftest.h"
  
  /* When true, do not reject deprecated .gdb_index sections.  */
  static bool use_deprecated_index_sections = false;
@@ -73,11 +76,43 @@ private:
    gdb::array_view<const gdb_byte> m_bytes;
  };
  
+/* An index into a (C++) symbol name component in a symbol name as
+   recorded in the mapped_index's symbol table.  For each C++ symbol
+   in the symbol table, we record one entry for the start of each
+   component in the symbol in a table of name components, and then
+   sort the table, in order to be able to binary search symbol names,
+   ignoring leading namespaces, both completion and regular look up.
+   For example, for symbol "A::B::C", we'll have an entry that points
+   to "A::B::C", another that points to "B::C", and another for "C".
+   Note that function symbols in GDB index have no parameter
+   information, just the function/method names.  You can convert a
+   name_component to a "const char *" using the
+   'mapped_index::symbol_name_at(offset_type)' method.  */
+
+struct name_component
+{
+  /* Offset in the symbol name where the component starts.  Stored as
+     a (32-bit) offset instead of a pointer to save memory and improve
+     locality on 64-bit architectures.  */
+  offset_type name_offset;
+
+  /* The symbol's index in the symbol and constant pool tables of a
+     mapped_index.  */
+  offset_type idx;
+};
+
  /* A description of .gdb_index index.  The file format is described in
     a comment by the code that writes the index.  */
  
-struct mapped_gdb_index final : public mapped_index_base
+struct mapped_gdb_index : public dwarf_scanner_base
  {
+  /* The name_component table (a sorted vector).  See name_component's
+     description above.  */
+  std::vector<name_component> name_components;
+
+  /* How NAME_COMPONENTS is sorted.  */
+  enum case_sensitivity name_components_casing;
+
    /* Index data format version.  */
    int version = 0;
  
@@ -110,7 +145,7 @@ struct mapped_gdb_index final : public mapped_index_base
      return symbol_table[2 * idx + 1];
    }
  
-  bool symbol_name_slot_invalid (offset_type idx) const override
+  bool symbol_name_slot_invalid (offset_type idx) const
    {
      return (symbol_name_index (idx) == 0
             && symbol_vec_index (idx) == 0);
@@ -118,16 +153,29 @@ struct mapped_gdb_index final : public mapped_index_base
  
    /* Convenience method to get at the name of the symbol at IDX in the
       symbol table.  */
-  const char *symbol_name_at
-    (offset_type idx, dwarf2_per_objfile *per_objfile) const override
+  virtual const char *symbol_name_at
+    (offset_type idx, dwarf2_per_objfile *per_objfile) const
    {
      return (const char *) (this->constant_pool.data ()
                            + symbol_name_index (idx));
    }
  
-  size_t symbol_name_count () const override
+  virtual size_t symbol_name_count () const
    { return this->symbol_table.size () / 2; }
  
+  /* Build the symbol name component sorted vector, if we haven't
+     yet.  */
+  void build_name_components (dwarf2_per_objfile *per_objfile);
+
+  /* Returns the lower (inclusive) and upper (exclusive) bounds of the
+     possible matches for LN_NO_PARAMS in the name component
+     vector.  */
+  std::pair<std::vector<name_component>::const_iterator,
+           std::vector<name_component>::const_iterator>
+    find_name_components_bounds (const lookup_name_info &ln_no_params,
+                                enum language lang,
+                                dwarf2_per_objfile *per_objfile) const;
+
    quick_symbol_functions_up make_quick_functions () const override;
  
    bool version_check () const override
@@ -143,8 +191,798 @@ struct mapped_gdb_index final : public mapped_index_base
      void *obj = index_addrmap->find (static_cast<CORE_ADDR> (addr));
      return static_cast<dwarf2_per_cu_data *> (obj);
    }
+
+  cooked_index *index_for_writing () override
+  { return nullptr; }
  };
  
+
+/* Starting from a search name, return the string that finds the upper
+   bound of all strings that start with SEARCH_NAME in a sorted name
+   list.  Returns the empty string to indicate that the upper bound is
+   the end of the list.  */
+
+static std::string
+make_sort_after_prefix_name (const char *search_name)
+{
+  /* When looking to complete "func", we find the upper bound of all
+     symbols that start with "func" by looking for where we'd insert
+     the closest string that would follow "func" in lexicographical
+     order.  Usually, that's "func"-with-last-character-incremented,
+     i.e. "fund".  Mind non-ASCII characters, though.  Usually those
+     will be UTF-8 multi-byte sequences, but we can't be certain.
+     Especially mind the 0xff character, which is a valid character in
+     non-UTF-8 source character sets (e.g. Latin1 'ÿ'), and we can't
+     rule out compilers allowing it in identifiers.  Note that
+     conveniently, strcmp/strcasecmp are specified to compare
+     characters interpreted as unsigned char.  So what we do is treat
+     the whole string as a base 256 number composed of a sequence of
+     base 256 "digits" and add 1 to it.  I.e., adding 1 to 0xff wraps
+     to 0, and carries 1 to the following more-significant position.
+     If the very first character in SEARCH_NAME ends up incremented
+     and carries/overflows, then the upper bound is the end of the
+     list.  The string after the empty string is also the empty
+     string.
+
+     Some examples of this operation:
+
+       SEARCH_NAME  => "+1" RESULT
+
+       "abc"              => "abd"
+       "ab\xff"           => "ac"
+       "\xff" "a" "\xff"  => "\xff" "b"
+       "\xff"             => ""
+       "\xff\xff"         => ""
+       ""                 => ""
+
+     Then, with these symbols for example:
+
+      func
+      func1
+      fund
+
+     completing "func" looks for symbols between "func" and
+     "func"-with-last-character-incremented, i.e. "fund" (exclusive),
+     which finds "func" and "func1", but not "fund".
+
+     And with:
+
+      funcÿ     (Latin1 'ÿ' [0xff])
+      funcÿ1
+      fund
+
+     completing "funcÿ" looks for symbols between "funcÿ" and "fund"
+     (exclusive), which finds "funcÿ" and "funcÿ1", but not "fund".
+
+     And with:
+
+      ÿÿ        (Latin1 'ÿ' [0xff])
+      ÿÿ1
+
+     completing "ÿ" or "ÿÿ" looks for symbols between between "ÿÿ" and
+     the end of the list.
+  */
+  std::string after = search_name;
+  while (!after.empty () && (unsigned char) after.back () == 0xff)
+    after.pop_back ();
+  if (!after.empty ())
+    after.back () = (unsigned char) after.back () + 1;
+  return after;
+}
+
+/* See declaration.  */
+
+std::pair<std::vector<name_component>::const_iterator,
+         std::vector<name_component>::const_iterator>
+mapped_gdb_index::find_name_components_bounds
+  (const lookup_name_info &lookup_name_without_params, language lang,
+   dwarf2_per_objfile *per_objfile) const
+{
+  auto *name_cmp
+    = this->name_components_casing == case_sensitive_on ? strcmp : strcasecmp;
+
+  const char *lang_name
+    = lookup_name_without_params.language_lookup_name (lang);
+
+  /* Comparison function object for lower_bound that matches against a
+     given symbol name.  */
+  auto lookup_compare_lower = [&] (const name_component &elem,
+                                  const char *name)
+    {
+      const char *elem_qualified = this->symbol_name_at (elem.idx, per_objfile);
+      const char *elem_name = elem_qualified + elem.name_offset;
+      return name_cmp (elem_name, name) < 0;
+    };
+
+  /* Comparison function object for upper_bound that matches against a
+     given symbol name.  */
+  auto lookup_compare_upper = [&] (const char *name,
+                                  const name_component &elem)
+    {
+      const char *elem_qualified = this->symbol_name_at (elem.idx, per_objfile);
+      const char *elem_name = elem_qualified + elem.name_offset;
+      return name_cmp (name, elem_name) < 0;
+    };
+
+  auto begin = this->name_components.begin ();
+  auto end = this->name_components.end ();
+
+  /* Find the lower bound.  */
+  auto lower = [&] ()
+    {
+      if (lookup_name_without_params.completion_mode () && lang_name[0] == '\0')
+       return begin;
+      else
+       return std::lower_bound (begin, end, lang_name, lookup_compare_lower);
+    } ();
+
+  /* Find the upper bound.  */
+  auto upper = [&] ()
+    {
+      if (lookup_name_without_params.completion_mode ())
+       {
+         /* In completion mode, we want UPPER to point past all
+            symbols names that have the same prefix.  I.e., with
+            these symbols, and completing "func":
+
+             function        << lower bound
+             function1
+             other_function  << upper bound
+
+            We find the upper bound by looking for the insertion
+            point of "func"-with-last-character-incremented,
+            i.e. "fund".  */
+         std::string after = make_sort_after_prefix_name (lang_name);
+         if (after.empty ())
+           return end;
+         return std::lower_bound (lower, end, after.c_str (),
+                                  lookup_compare_lower);
+       }
+      else
+       return std::upper_bound (lower, end, lang_name, lookup_compare_upper);
+    } ();
+
+  return {lower, upper};
+}
+
+/* See declaration.  */
+
+void
+mapped_gdb_index::build_name_components (dwarf2_per_objfile *per_objfile)
+{
+  if (!this->name_components.empty ())
+    return;
+
+  this->name_components_casing = case_sensitivity;
+  auto *name_cmp
+    = this->name_components_casing == case_sensitive_on ? strcmp : strcasecmp;
+
+  /* The code below only knows how to break apart components of C++
+     symbol names (and other languages that use '::' as
+     namespace/module separator) and Ada symbol names.  */
+  auto count = this->symbol_name_count ();
+  for (offset_type idx = 0; idx < count; idx++)
+    {
+      if (this->symbol_name_slot_invalid (idx))
+       continue;
+
+      const char *name = this->symbol_name_at (idx, per_objfile);
+
+      /* Add each name component to the name component table.  */
+      unsigned int previous_len = 0;
+
+      if (strstr (name, "::") != nullptr)
+       {
+         for (unsigned int current_len = cp_find_first_component (name);
+              name[current_len] != '\0';
+              current_len += cp_find_first_component (name + current_len))
+           {
+             gdb_assert (name[current_len] == ':');
+             this->name_components.push_back ({previous_len, idx});
+             /* Skip the '::'.  */
+             current_len += 2;
+             previous_len = current_len;
+           }
+       }
+      else
+       {
+         /* Handle the Ada encoded (aka mangled) form here.  */
+         for (const char *iter = strstr (name, "__");
+              iter != nullptr;
+              iter = strstr (iter, "__"))
+           {
+             this->name_components.push_back ({previous_len, idx});
+             iter += 2;
+             previous_len = iter - name;
+           }
+       }
+
+      this->name_components.push_back ({previous_len, idx});
+    }
+
+  /* Sort name_components elements by name.  */
+  auto name_comp_compare = [&] (const name_component &left,
+                               const name_component &right)
+    {
+      const char *left_qualified
+       = this->symbol_name_at (left.idx, per_objfile);
+      const char *right_qualified
+       = this->symbol_name_at (right.idx, per_objfile);
+
+      const char *left_name = left_qualified + left.name_offset;
+      const char *right_name = right_qualified + right.name_offset;
+
+      return name_cmp (left_name, right_name) < 0;
+    };
+
+  std::sort (this->name_components.begin (),
+            this->name_components.end (),
+            name_comp_compare);
+}
+
+/* Helper for dw2_expand_symtabs_matching that works with a
+   mapped_index_base instead of the containing objfile.  This is split
+   to a separate function in order to be able to unit test the
+   name_components matching using a mock mapped_index_base.  For each
+   symbol name that matches, calls MATCH_CALLBACK, passing it the
+   symbol's index in the mapped_index_base symbol table.  */
+
+static bool
+dw2_expand_symtabs_matching_symbol
+  (mapped_gdb_index &index,
+   const lookup_name_info &lookup_name_in,
+   gdb::function_view<expand_symtabs_symbol_matcher_ftype> symbol_matcher,
+   gdb::function_view<bool (offset_type)> match_callback,
+   dwarf2_per_objfile *per_objfile,
+   gdb::function_view<expand_symtabs_lang_matcher_ftype> lang_matcher)
+{
+  lookup_name_info lookup_name_without_params
+    = lookup_name_in.make_ignore_params ();
+
+  /* Build the symbol name component sorted vector, if we haven't
+     yet.  */
+  index.build_name_components (per_objfile);
+
+  /* The same symbol may appear more than once in the range though.
+     E.g., if we're looking for symbols that complete "w", and we have
+     a symbol named "w1::w2", we'll find the two name components for
+     that same symbol in the range.  To be sure we only call the
+     callback once per symbol, we first collect the symbol name
+     indexes that matched in a temporary vector and ignore
+     duplicates.  */
+  std::vector<offset_type> matches;
+
+  struct name_and_matcher
+  {
+    symbol_name_matcher_ftype *matcher;
+    const char *name;
+
+    bool operator== (const name_and_matcher &other) const
+    {
+      return matcher == other.matcher && strcmp (name, other.name) == 0;
+    }
+  };
+
+  /* A vector holding all the different symbol name matchers, for all
+     languages.  */
+  std::vector<name_and_matcher> matchers;
+
+  for (int i = 0; i < nr_languages; i++)
+    {
+      enum language lang_e = (enum language) i;
+      if (lang_matcher != nullptr && !lang_matcher (lang_e))
+       continue;
+
+      const language_defn *lang = language_def (lang_e);
+      symbol_name_matcher_ftype *name_matcher
+       = lang->get_symbol_name_matcher (lookup_name_without_params);
+
+      name_and_matcher key {
+        name_matcher,
+        lookup_name_without_params.language_lookup_name (lang_e)
+      };
+
+      /* Don't insert the same comparison routine more than once.
+        Note that we do this linear walk.  This is not a problem in
+        practice because the number of supported languages is
+        low.  */
+      if (std::find (matchers.begin (), matchers.end (), key)
+         != matchers.end ())
+       continue;
+      matchers.push_back (std::move (key));
+
+      auto bounds
+       = index.find_name_components_bounds (lookup_name_without_params,
+                                            lang_e, per_objfile);
+
+      /* Now for each symbol name in range, check to see if we have a name
+        match, and if so, call the MATCH_CALLBACK callback.  */
+
+      for (; bounds.first != bounds.second; ++bounds.first)
+       {
+         const char *qualified
+           = index.symbol_name_at (bounds.first->idx, per_objfile);
+
+         if (!name_matcher (qualified, lookup_name_without_params, NULL)
+             || (symbol_matcher != NULL && !symbol_matcher (qualified)))
+           continue;
+
+         matches.push_back (bounds.first->idx);
+       }
+    }
+
+  std::sort (matches.begin (), matches.end ());
+
+  /* Finally call the callback, once per match.  */
+  ULONGEST prev = -1;
+  bool result = true;
+  for (offset_type idx : matches)
+    {
+      if (prev != idx)
+       {
+         if (!match_callback (idx))
+           {
+             result = false;
+             break;
+           }
+         prev = idx;
+       }
+    }
+
+  /* Above we use a type wider than idx's for 'prev', since 0 and
+     (offset_type)-1 are both possible values.  */
+  static_assert (sizeof (prev) > sizeof (offset_type), "");
+
+  return result;
+}
+
+#if GDB_SELF_TEST
+
+namespace selftests { namespace dw2_expand_symtabs_matching {
+
+/* A mock .gdb_index/.debug_names-like name index table, enough to
+   exercise dw2_expand_symtabs_matching_symbol, which works with the
+   mapped_index_base interface.  Builds an index from the symbol list
+   passed as parameter to the constructor.  */
+class mock_mapped_index : public mapped_gdb_index
+{
+public:
+  mock_mapped_index (gdb::array_view<const char *> symbols)
+    : m_symbol_table (symbols)
+  {}
+
+  DISABLE_COPY_AND_ASSIGN (mock_mapped_index);
+
+  /* Return the number of names in the symbol table.  */
+  size_t symbol_name_count () const override
+  {
+    return m_symbol_table.size ();
+  }
+
+  /* Get the name of the symbol at IDX in the symbol table.  */
+  const char *symbol_name_at
+    (offset_type idx, dwarf2_per_objfile *per_objfile) const override
+  {
+    return m_symbol_table[idx];
+  }
+
+  quick_symbol_functions_up make_quick_functions () const override
+  {
+    return nullptr;
+  }
+
+private:
+  gdb::array_view<const char *> m_symbol_table;
+};
+
+/* Convenience function that converts a NULL pointer to a "<null>"
+   string, to pass to print routines.  */
+
+static const char *
+string_or_null (const char *str)
+{
+  return str != NULL ? str : "<null>";
+}
+
+/* Check if a lookup_name_info built from
+   NAME/MATCH_TYPE/COMPLETION_MODE matches the symbols in the mock
+   index.  EXPECTED_LIST is the list of expected matches, in expected
+   matching order.  If no match expected, then an empty list is
+   specified.  Returns true on success.  On failure prints a warning
+   indicating the file:line that failed, and returns false.  */
+
+static bool
+check_match (const char *file, int line,
+            mock_mapped_index &mock_index,
+            const char *name, symbol_name_match_type match_type,
+            bool completion_mode,
+            std::initializer_list<const char *> expected_list,
+            dwarf2_per_objfile *per_objfile)
+{
+  lookup_name_info lookup_name (name, match_type, completion_mode);
+
+  bool matched = true;
+
+  auto mismatch = [&] (const char *expected_str,
+                      const char *got)
+  {
+    warning (_("%s:%d: match_type=%s, looking-for=\"%s\", "
+              "expected=\"%s\", got=\"%s\"\n"),
+            file, line,
+            (match_type == symbol_name_match_type::FULL
+             ? "FULL" : "WILD"),
+            name, string_or_null (expected_str), string_or_null (got));
+    matched = false;
+  };
+
+  auto expected_it = expected_list.begin ();
+  auto expected_end = expected_list.end ();
+
+  dw2_expand_symtabs_matching_symbol (mock_index, lookup_name,
+                                     nullptr,
+                                     [&] (offset_type idx)
+  {
+    const char *matched_name = mock_index.symbol_name_at (idx, per_objfile);
+    const char *expected_str
+      = expected_it == expected_end ? NULL : *expected_it++;
+
+    if (expected_str == NULL || strcmp (expected_str, matched_name) != 0)
+      mismatch (expected_str, matched_name);
+    return true;
+  }, per_objfile, nullptr);
+
+  const char *expected_str
+  = expected_it == expected_end ? NULL : *expected_it++;
+  if (expected_str != NULL)
+    mismatch (expected_str, NULL);
+
+  return matched;
+}
+
+/* The symbols added to the mock mapped_index for testing (in
+   canonical form).  */
+static const char *test_symbols[] = {
+  "function",
+  "std::bar",
+  "std::zfunction",
+  "std::zfunction2",
+  "w1::w2",
+  "ns::foo<char*>",
+  "ns::foo<int>",
+  "ns::foo<long>",
+  "ns2::tmpl<int>::foo2",
+  "(anonymous namespace)::A::B::C",
+
+  /* These are used to check that the increment-last-char in the
+     matching algorithm for completion doesn't match "t1_fund" when
+     completing "t1_func".  */
+  "t1_func",
+  "t1_func1",
+  "t1_fund",
+  "t1_fund1",
+
+  /* A UTF-8 name with multi-byte sequences to make sure that
+     cp-name-parser understands this as a single identifier ("função"
+     is "function" in PT).  */
+  (const char *)u8"u8função",
+
+  /* Test a symbol name that ends with a 0xff character, which is a
+     valid character in non-UTF-8 source character sets (e.g. Latin1
+     'ÿ'), and we can't rule out compilers allowing it in identifiers.
+     We test this because the completion algorithm finds the upper
+     bound of symbols by looking for the insertion point of
+     "func"-with-last-character-incremented, i.e. "fund", and adding 1
+     to 0xff should wraparound and carry to the previous character.
+     See comments in make_sort_after_prefix_name.  */
+  "yfunc\377",
+
+  /* Some more symbols with \377 (0xff).  See above.  */
+  "\377",
+  "\377\377123",
+
+  /* A name with all sorts of complications.  Starts with "z" to make
+     it easier for the completion tests below.  */
+#define Z_SYM_NAME \
+  "z::std::tuple<(anonymous namespace)::ui*, std::bar<(anonymous namespace)::ui> >" \
+    "::tuple<(anonymous namespace)::ui*, " \
+    "std::default_delete<(anonymous namespace)::ui>, void>"
+
+  Z_SYM_NAME
+};
+
+/* Returns true if the mapped_index_base::find_name_component_bounds
+   method finds EXPECTED_SYMS in INDEX when looking for SEARCH_NAME,
+   in completion mode.  */
+
+static bool
+check_find_bounds_finds (mapped_gdb_index &index,
+                        const char *search_name,
+                        gdb::array_view<const char *> expected_syms,
+                        dwarf2_per_objfile *per_objfile)
+{
+  lookup_name_info lookup_name (search_name,
+                               symbol_name_match_type::FULL, true);
+
+  auto bounds = index.find_name_components_bounds (lookup_name,
+                                                  language_cplus,
+                                                  per_objfile);
+
+  size_t distance = std::distance (bounds.first, bounds.second);
+  if (distance != expected_syms.size ())
+    return false;
+
+  for (size_t exp_elem = 0; exp_elem < distance; exp_elem++)
+    {
+      auto nc_elem = bounds.first + exp_elem;
+      const char *qualified = index.symbol_name_at (nc_elem->idx, per_objfile);
+      if (strcmp (qualified, expected_syms[exp_elem]) != 0)
+       return false;
+    }
+
+  return true;
+}
+
+/* Test the lower-level mapped_index::find_name_component_bounds
+   method.  */
+
+static void
+test_mapped_index_find_name_component_bounds ()
+{
+  mock_mapped_index mock_index (test_symbols);
+
+  mock_index.build_name_components (NULL /* per_objfile */);
+
+  /* Test the lower-level mapped_index::find_name_component_bounds
+     method in completion mode.  */
+  {
+    static const char *expected_syms[] = {
+      "t1_func",
+      "t1_func1",
+    };
+
+    SELF_CHECK (check_find_bounds_finds
+                 (mock_index, "t1_func", expected_syms,
+                  NULL /* per_objfile */));
+  }
+
+  /* Check that the increment-last-char in the name matching algorithm
+     for completion doesn't get confused with Ansi1 'ÿ' / 0xff.  See
+     make_sort_after_prefix_name.  */
+  {
+    static const char *expected_syms1[] = {
+      "\377",
+      "\377\377123",
+    };
+    SELF_CHECK (check_find_bounds_finds
+                 (mock_index, "\377", expected_syms1, NULL /* per_objfile */));
+
+    static const char *expected_syms2[] = {
+      "\377\377123",
+    };
+    SELF_CHECK (check_find_bounds_finds
+                 (mock_index, "\377\377", expected_syms2,
+                  NULL /* per_objfile */));
+  }
+}
+
+/* Test dw2_expand_symtabs_matching_symbol.  */
+
+static void
+test_dw2_expand_symtabs_matching_symbol ()
+{
+  mock_mapped_index mock_index (test_symbols);
+
+  /* We let all tests run until the end even if some fails, for debug
+     convenience.  */
+  bool any_mismatch = false;
+
+  /* Create the expected symbols list (an initializer_list).  Needed
+     because lists have commas, and we need to pass them to CHECK,
+     which is a macro.  */
+#define EXPECT(...) { __VA_ARGS__ }
+
+  /* Wrapper for check_match that passes down the current
+     __FILE__/__LINE__.  */
+#define CHECK_MATCH(NAME, MATCH_TYPE, COMPLETION_MODE, EXPECTED_LIST)  \
+  any_mismatch |= !check_match (__FILE__, __LINE__,                    \
+                               mock_index,                             \
+                               NAME, MATCH_TYPE, COMPLETION_MODE,      \
+                               EXPECTED_LIST, NULL)
+
+  /* Identity checks.  */
+  for (const char *sym : test_symbols)
+    {
+      /* Should be able to match all existing symbols.  */
+      CHECK_MATCH (sym, symbol_name_match_type::FULL, false,
+                  EXPECT (sym));
+
+      /* Should be able to match all existing symbols with
+        parameters.  */
+      std::string with_params = std::string (sym) + "(int)";
+      CHECK_MATCH (with_params.c_str (), symbol_name_match_type::FULL, false,
+                  EXPECT (sym));
+
+      /* Should be able to match all existing symbols with
+        parameters and qualifiers.  */
+      with_params = std::string (sym) + " ( int ) const";
+      CHECK_MATCH (with_params.c_str (), symbol_name_match_type::FULL, false,
+                  EXPECT (sym));
+
+      /* This should really find sym, but cp-name-parser.y doesn't
+        know about lvalue/rvalue qualifiers yet.  */
+      with_params = std::string (sym) + " ( int ) &&";
+      CHECK_MATCH (with_params.c_str (), symbol_name_match_type::FULL, false,
+                  {});
+    }
+
+  /* Check that the name matching algorithm for completion doesn't get
+     confused with Latin1 'ÿ' / 0xff.  See
+     make_sort_after_prefix_name.  */
+  {
+    static const char str[] = "\377";
+    CHECK_MATCH (str, symbol_name_match_type::FULL, true,
+                EXPECT ("\377", "\377\377123"));
+  }
+
+  /* Check that the increment-last-char in the matching algorithm for
+     completion doesn't match "t1_fund" when completing "t1_func".  */
+  {
+    static const char str[] = "t1_func";
+    CHECK_MATCH (str, symbol_name_match_type::FULL, true,
+                EXPECT ("t1_func", "t1_func1"));
+  }
+
+  /* Check that completion mode works at each prefix of the expected
+     symbol name.  */
+  {
+    static const char str[] = "function(int)";
+    size_t len = strlen (str);
+    std::string lookup;
+
+    for (size_t i = 1; i < len; i++)
+      {
+       lookup.assign (str, i);
+       CHECK_MATCH (lookup.c_str (), symbol_name_match_type::FULL, true,
+                    EXPECT ("function"));
+      }
+  }
+
+  /* While "w" is a prefix of both components, the match function
+     should still only be called once.  */
+  {
+    CHECK_MATCH ("w", symbol_name_match_type::FULL, true,
+                EXPECT ("w1::w2"));
+    CHECK_MATCH ("w", symbol_name_match_type::WILD, true,
+                EXPECT ("w1::w2"));
+  }
+
+  /* Same, with a "complicated" symbol.  */
+  {
+    static const char str[] = Z_SYM_NAME;
+    size_t len = strlen (str);
+    std::string lookup;
+
+    for (size_t i = 1; i < len; i++)
+      {
+       lookup.assign (str, i);
+       CHECK_MATCH (lookup.c_str (), symbol_name_match_type::FULL, true,
+                    EXPECT (Z_SYM_NAME));
+      }
+  }
+
+  /* In FULL mode, an incomplete symbol doesn't match.  */
+  {
+    CHECK_MATCH ("std::zfunction(int", symbol_name_match_type::FULL, false,
+                {});
+  }
+
+  /* A complete symbol with parameters matches any overload, since the
+     index has no overload info.  */
+  {
+    CHECK_MATCH ("std::zfunction(int)", symbol_name_match_type::FULL, true,
+                EXPECT ("std::zfunction", "std::zfunction2"));
+    CHECK_MATCH ("zfunction(int)", symbol_name_match_type::WILD, true,
+                EXPECT ("std::zfunction", "std::zfunction2"));
+    CHECK_MATCH ("zfunc", symbol_name_match_type::WILD, true,
+                EXPECT ("std::zfunction", "std::zfunction2"));
+  }
+
+  /* Check that whitespace is ignored appropriately.  A symbol with a
+     template argument list. */
+  {
+    static const char expected[] = "ns::foo<int>";
+    CHECK_MATCH ("ns :: foo < int > ", symbol_name_match_type::FULL, false,
+                EXPECT (expected));
+    CHECK_MATCH ("foo < int > ", symbol_name_match_type::WILD, false,
+                EXPECT (expected));
+  }
+
+  /* Check that whitespace is ignored appropriately.  A symbol with a
+     template argument list that includes a pointer.  */
+  {
+    static const char expected[] = "ns::foo<char*>";
+    /* Try both completion and non-completion modes.  */
+    static const bool completion_mode[2] = {false, true};
+    for (size_t i = 0; i < 2; i++)
+      {
+       CHECK_MATCH ("ns :: foo < char * >", symbol_name_match_type::FULL,
+                    completion_mode[i], EXPECT (expected));
+       CHECK_MATCH ("foo < char * >", symbol_name_match_type::WILD,
+                    completion_mode[i], EXPECT (expected));
+
+       CHECK_MATCH ("ns :: foo < char * > (int)", symbol_name_match_type::FULL,
+                    completion_mode[i], EXPECT (expected));
+       CHECK_MATCH ("foo < char * > (int)", symbol_name_match_type::WILD,
+                    completion_mode[i], EXPECT (expected));
+      }
+  }
+
+  {
+    /* Check method qualifiers are ignored.  */
+    static const char expected[] = "ns::foo<char*>";
+    CHECK_MATCH ("ns :: foo < char * >  ( int ) const",
+                symbol_name_match_type::FULL, true, EXPECT (expected));
+    CHECK_MATCH ("ns :: foo < char * >  ( int ) &&",
+                symbol_name_match_type::FULL, true, EXPECT (expected));
+    CHECK_MATCH ("foo < char * >  ( int ) const",
+                symbol_name_match_type::WILD, true, EXPECT (expected));
+    CHECK_MATCH ("foo < char * >  ( int ) &&",
+                symbol_name_match_type::WILD, true, EXPECT (expected));
+  }
+
+  /* Test lookup names that don't match anything.  */
+  {
+    CHECK_MATCH ("bar2", symbol_name_match_type::WILD, false,
+                {});
+
+    CHECK_MATCH ("doesntexist", symbol_name_match_type::FULL, false,
+                {});
+  }
+
+  /* Some wild matching tests, exercising "(anonymous namespace)",
+     which should not be confused with a parameter list.  */
+  {
+    static const char *syms[] = {
+      "A::B::C",
+      "B::C",
+      "C",
+      "A :: B :: C ( int )",
+      "B :: C ( int )",
+      "C ( int )",
+    };
+
+    for (const char *s : syms)
+      {
+       CHECK_MATCH (s, symbol_name_match_type::WILD, false,
+                    EXPECT ("(anonymous namespace)::A::B::C"));
+      }
+  }
+
+  {
+    static const char expected[] = "ns2::tmpl<int>::foo2";
+    CHECK_MATCH ("tmp", symbol_name_match_type::WILD, true,
+                EXPECT (expected));
+    CHECK_MATCH ("tmpl<", symbol_name_match_type::WILD, true,
+                EXPECT (expected));
+  }
+
+  SELF_CHECK (!any_mismatch);
+
+#undef EXPECT
+#undef CHECK_MATCH
+}
+
+static void
+run_test ()
+{
+  test_mapped_index_find_name_component_bounds ();
+  test_dw2_expand_symtabs_matching_symbol ();
+}
+
+}} // namespace selftests::dw2_expand_symtabs_matching
+
+#endif /* GDB_SELF_TEST */
+
  struct dwarf2_gdb_index : public dwarf2_base_index_functions
  {
    /* This dumps minimal information about the index.
@@ -743,4 +1581,9 @@ Warning: This option must be enabled before gdb reads the file."),
                            NULL,
                            NULL,
                            &setlist, &showlist);
+
+#if GDB_SELF_TEST
+  selftests::register_test ("dw2_expand_symtabs_matching",
+                           selftests::dw2_expand_symtabs_matching::run_test);
+#endif
  }
diff --git a/gdb/dwarf2/read.c b/gdb/dwarf2/read.c

index 9484754447746b8a14b1bde2092407edb2aba298..4503977d62b8c277c4f379973fe0a67d54f3a23b 100644 (file)
--- a/gdb/dwarf2/read.c
+++ b/gdb/dwarf2/read.c
@@ -2078,788 +2078,6 @@ dwarf2_base_index_functions::expand_all_symtabs (struct objfile *objfile)
      }
  }
  
-
-/* Starting from a search name, return the string that finds the upper
-   bound of all strings that start with SEARCH_NAME in a sorted name
-   list.  Returns the empty string to indicate that the upper bound is
-   the end of the list.  */
-
-static std::string
-make_sort_after_prefix_name (const char *search_name)
-{
-  /* When looking to complete "func", we find the upper bound of all
-     symbols that start with "func" by looking for where we'd insert
-     the closest string that would follow "func" in lexicographical
-     order.  Usually, that's "func"-with-last-character-incremented,
-     i.e. "fund".  Mind non-ASCII characters, though.  Usually those
-     will be UTF-8 multi-byte sequences, but we can't be certain.
-     Especially mind the 0xff character, which is a valid character in
-     non-UTF-8 source character sets (e.g. Latin1 'ÿ'), and we can't
-     rule out compilers allowing it in identifiers.  Note that
-     conveniently, strcmp/strcasecmp are specified to compare
-     characters interpreted as unsigned char.  So what we do is treat
-     the whole string as a base 256 number composed of a sequence of
-     base 256 "digits" and add 1 to it.  I.e., adding 1 to 0xff wraps
-     to 0, and carries 1 to the following more-significant position.
-     If the very first character in SEARCH_NAME ends up incremented
-     and carries/overflows, then the upper bound is the end of the
-     list.  The string after the empty string is also the empty
-     string.
-
-     Some examples of this operation:
-
-       SEARCH_NAME  => "+1" RESULT
-
-       "abc"              => "abd"
-       "ab\xff"           => "ac"
-       "\xff" "a" "\xff"  => "\xff" "b"
-       "\xff"             => ""
-       "\xff\xff"         => ""
-       ""                 => ""
-
-     Then, with these symbols for example:
-
-      func
-      func1
-      fund
-
-     completing "func" looks for symbols between "func" and
-     "func"-with-last-character-incremented, i.e. "fund" (exclusive),
-     which finds "func" and "func1", but not "fund".
-
-     And with:
-
-      funcÿ     (Latin1 'ÿ' [0xff])
-      funcÿ1
-      fund
-
-     completing "funcÿ" looks for symbols between "funcÿ" and "fund"
-     (exclusive), which finds "funcÿ" and "funcÿ1", but not "fund".
-
-     And with:
-
-      ÿÿ        (Latin1 'ÿ' [0xff])
-      ÿÿ1
-
-     completing "ÿ" or "ÿÿ" looks for symbols between between "ÿÿ" and
-     the end of the list.
-  */
-  std::string after = search_name;
-  while (!after.empty () && (unsigned char) after.back () == 0xff)
-    after.pop_back ();
-  if (!after.empty ())
-    after.back () = (unsigned char) after.back () + 1;
-  return after;
-}
-
-/* See declaration.  */
-
-std::pair<std::vector<name_component>::const_iterator,
-         std::vector<name_component>::const_iterator>
-mapped_index_base::find_name_components_bounds
-  (const lookup_name_info &lookup_name_without_params, language lang,
-   dwarf2_per_objfile *per_objfile) const
-{
-  auto *name_cmp
-    = this->name_components_casing == case_sensitive_on ? strcmp : strcasecmp;
-
-  const char *lang_name
-    = lookup_name_without_params.language_lookup_name (lang);
-
-  /* Comparison function object for lower_bound that matches against a
-     given symbol name.  */
-  auto lookup_compare_lower = [&] (const name_component &elem,
-                                  const char *name)
-    {
-      const char *elem_qualified = this->symbol_name_at (elem.idx, per_objfile);
-      const char *elem_name = elem_qualified + elem.name_offset;
-      return name_cmp (elem_name, name) < 0;
-    };
-
-  /* Comparison function object for upper_bound that matches against a
-     given symbol name.  */
-  auto lookup_compare_upper = [&] (const char *name,
-                                  const name_component &elem)
-    {
-      const char *elem_qualified = this->symbol_name_at (elem.idx, per_objfile);
-      const char *elem_name = elem_qualified + elem.name_offset;
-      return name_cmp (name, elem_name) < 0;
-    };
-
-  auto begin = this->name_components.begin ();
-  auto end = this->name_components.end ();
-
-  /* Find the lower bound.  */
-  auto lower = [&] ()
-    {
-      if (lookup_name_without_params.completion_mode () && lang_name[0] == '\0')
-       return begin;
-      else
-       return std::lower_bound (begin, end, lang_name, lookup_compare_lower);
-    } ();
-
-  /* Find the upper bound.  */
-  auto upper = [&] ()
-    {
-      if (lookup_name_without_params.completion_mode ())
-       {
-         /* In completion mode, we want UPPER to point past all
-            symbols names that have the same prefix.  I.e., with
-            these symbols, and completing "func":
-
-             function        << lower bound
-             function1
-             other_function  << upper bound
-
-            We find the upper bound by looking for the insertion
-            point of "func"-with-last-character-incremented,
-            i.e. "fund".  */
-         std::string after = make_sort_after_prefix_name (lang_name);
-         if (after.empty ())
-           return end;
-         return std::lower_bound (lower, end, after.c_str (),
-                                  lookup_compare_lower);
-       }
-      else
-       return std::upper_bound (lower, end, lang_name, lookup_compare_upper);
-    } ();
-
-  return {lower, upper};
-}
-
-/* See declaration.  */
-
-void
-mapped_index_base::build_name_components (dwarf2_per_objfile *per_objfile)
-{
-  if (!this->name_components.empty ())
-    return;
-
-  this->name_components_casing = case_sensitivity;
-  auto *name_cmp
-    = this->name_components_casing == case_sensitive_on ? strcmp : strcasecmp;
-
-  /* The code below only knows how to break apart components of C++
-     symbol names (and other languages that use '::' as
-     namespace/module separator) and Ada symbol names.  */
-  auto count = this->symbol_name_count ();
-  for (offset_type idx = 0; idx < count; idx++)
-    {
-      if (this->symbol_name_slot_invalid (idx))
-       continue;
-
-      const char *name = this->symbol_name_at (idx, per_objfile);
-
-      /* Add each name component to the name component table.  */
-      unsigned int previous_len = 0;
-
-      if (strstr (name, "::") != nullptr)
-       {
-         for (unsigned int current_len = cp_find_first_component (name);
-              name[current_len] != '\0';
-              current_len += cp_find_first_component (name + current_len))
-           {
-             gdb_assert (name[current_len] == ':');
-             this->name_components.push_back ({previous_len, idx});
-             /* Skip the '::'.  */
-             current_len += 2;
-             previous_len = current_len;
-           }
-       }
-      else
-       {
-         /* Handle the Ada encoded (aka mangled) form here.  */
-         for (const char *iter = strstr (name, "__");
-              iter != nullptr;
-              iter = strstr (iter, "__"))
-           {
-             this->name_components.push_back ({previous_len, idx});
-             iter += 2;
-             previous_len = iter - name;
-           }
-       }
-
-      this->name_components.push_back ({previous_len, idx});
-    }
-
-  /* Sort name_components elements by name.  */
-  auto name_comp_compare = [&] (const name_component &left,
-                               const name_component &right)
-    {
-      const char *left_qualified
-       = this->symbol_name_at (left.idx, per_objfile);
-      const char *right_qualified
-       = this->symbol_name_at (right.idx, per_objfile);
-
-      const char *left_name = left_qualified + left.name_offset;
-      const char *right_name = right_qualified + right.name_offset;
-
-      return name_cmp (left_name, right_name) < 0;
-    };
-
-  std::sort (this->name_components.begin (),
-            this->name_components.end (),
-            name_comp_compare);
-}
-
-/* See read.h.  */
-
-bool
-dw2_expand_symtabs_matching_symbol
-  (mapped_index_base &index,
-   const lookup_name_info &lookup_name_in,
-   gdb::function_view<expand_symtabs_symbol_matcher_ftype> symbol_matcher,
-   gdb::function_view<bool (offset_type)> match_callback,
-   dwarf2_per_objfile *per_objfile,
-   gdb::function_view<expand_symtabs_lang_matcher_ftype> lang_matcher)
-{
-  lookup_name_info lookup_name_without_params
-    = lookup_name_in.make_ignore_params ();
-
-  /* Build the symbol name component sorted vector, if we haven't
-     yet.  */
-  index.build_name_components (per_objfile);
-
-  /* The same symbol may appear more than once in the range though.
-     E.g., if we're looking for symbols that complete "w", and we have
-     a symbol named "w1::w2", we'll find the two name components for
-     that same symbol in the range.  To be sure we only call the
-     callback once per symbol, we first collect the symbol name
-     indexes that matched in a temporary vector and ignore
-     duplicates.  */
-  std::vector<offset_type> matches;
-
-  struct name_and_matcher
-  {
-    symbol_name_matcher_ftype *matcher;
-    const char *name;
-
-    bool operator== (const name_and_matcher &other) const
-    {
-      return matcher == other.matcher && strcmp (name, other.name) == 0;
-    }
-  };
-
-  /* A vector holding all the different symbol name matchers, for all
-     languages.  */
-  std::vector<name_and_matcher> matchers;
-
-  for (int i = 0; i < nr_languages; i++)
-    {
-      enum language lang_e = (enum language) i;
-      if (lang_matcher != nullptr && !lang_matcher (lang_e))
-       continue;
-
-      const language_defn *lang = language_def (lang_e);
-      symbol_name_matcher_ftype *name_matcher
-       = lang->get_symbol_name_matcher (lookup_name_without_params);
-
-      name_and_matcher key {
-        name_matcher,
-        lookup_name_without_params.language_lookup_name (lang_e)
-      };
-
-      /* Don't insert the same comparison routine more than once.
-        Note that we do this linear walk.  This is not a problem in
-        practice because the number of supported languages is
-        low.  */
-      if (std::find (matchers.begin (), matchers.end (), key)
-         != matchers.end ())
-       continue;
-      matchers.push_back (std::move (key));
-
-      auto bounds
-       = index.find_name_components_bounds (lookup_name_without_params,
-                                            lang_e, per_objfile);
-
-      /* Now for each symbol name in range, check to see if we have a name
-        match, and if so, call the MATCH_CALLBACK callback.  */
-
-      for (; bounds.first != bounds.second; ++bounds.first)
-       {
-         const char *qualified
-           = index.symbol_name_at (bounds.first->idx, per_objfile);
-
-         if (!name_matcher (qualified, lookup_name_without_params, NULL)
-             || (symbol_matcher != NULL && !symbol_matcher (qualified)))
-           continue;
-
-         matches.push_back (bounds.first->idx);
-       }
-    }
-
-  std::sort (matches.begin (), matches.end ());
-
-  /* Finally call the callback, once per match.  */
-  ULONGEST prev = -1;
-  bool result = true;
-  for (offset_type idx : matches)
-    {
-      if (prev != idx)
-       {
-         if (!match_callback (idx))
-           {
-             result = false;
-             break;
-           }
-         prev = idx;
-       }
-    }
-
-  /* Above we use a type wider than idx's for 'prev', since 0 and
-     (offset_type)-1 are both possible values.  */
-  static_assert (sizeof (prev) > sizeof (offset_type), "");
-
-  return result;
-}
-
-#if GDB_SELF_TEST
-
-namespace selftests { namespace dw2_expand_symtabs_matching {
-
-/* A mock .gdb_index/.debug_names-like name index table, enough to
-   exercise dw2_expand_symtabs_matching_symbol, which works with the
-   mapped_index_base interface.  Builds an index from the symbol list
-   passed as parameter to the constructor.  */
-class mock_mapped_index : public mapped_index_base
-{
-public:
-  mock_mapped_index (gdb::array_view<const char *> symbols)
-    : m_symbol_table (symbols)
-  {}
-
-  DISABLE_COPY_AND_ASSIGN (mock_mapped_index);
-
-  /* Return the number of names in the symbol table.  */
-  size_t symbol_name_count () const override
-  {
-    return m_symbol_table.size ();
-  }
-
-  /* Get the name of the symbol at IDX in the symbol table.  */
-  const char *symbol_name_at
-    (offset_type idx, dwarf2_per_objfile *per_objfile) const override
-  {
-    return m_symbol_table[idx];
-  }
-
-  quick_symbol_functions_up make_quick_functions () const override
-  {
-    return nullptr;
-  }
-
-private:
-  gdb::array_view<const char *> m_symbol_table;
-};
-
-/* Convenience function that converts a NULL pointer to a "<null>"
-   string, to pass to print routines.  */
-
-static const char *
-string_or_null (const char *str)
-{
-  return str != NULL ? str : "<null>";
-}
-
-/* Check if a lookup_name_info built from
-   NAME/MATCH_TYPE/COMPLETION_MODE matches the symbols in the mock
-   index.  EXPECTED_LIST is the list of expected matches, in expected
-   matching order.  If no match expected, then an empty list is
-   specified.  Returns true on success.  On failure prints a warning
-   indicating the file:line that failed, and returns false.  */
-
-static bool
-check_match (const char *file, int line,
-            mock_mapped_index &mock_index,
-            const char *name, symbol_name_match_type match_type,
-            bool completion_mode,
-            std::initializer_list<const char *> expected_list,
-            dwarf2_per_objfile *per_objfile)
-{
-  lookup_name_info lookup_name (name, match_type, completion_mode);
-
-  bool matched = true;
-
-  auto mismatch = [&] (const char *expected_str,
-                      const char *got)
-  {
-    warning (_("%s:%d: match_type=%s, looking-for=\"%s\", "
-              "expected=\"%s\", got=\"%s\"\n"),
-            file, line,
-            (match_type == symbol_name_match_type::FULL
-             ? "FULL" : "WILD"),
-            name, string_or_null (expected_str), string_or_null (got));
-    matched = false;
-  };
-
-  auto expected_it = expected_list.begin ();
-  auto expected_end = expected_list.end ();
-
-  dw2_expand_symtabs_matching_symbol (mock_index, lookup_name,
-                                     nullptr,
-                                     [&] (offset_type idx)
-  {
-    const char *matched_name = mock_index.symbol_name_at (idx, per_objfile);
-    const char *expected_str
-      = expected_it == expected_end ? NULL : *expected_it++;
-
-    if (expected_str == NULL || strcmp (expected_str, matched_name) != 0)
-      mismatch (expected_str, matched_name);
-    return true;
-  }, per_objfile, nullptr);
-
-  const char *expected_str
-  = expected_it == expected_end ? NULL : *expected_it++;
-  if (expected_str != NULL)
-    mismatch (expected_str, NULL);
-
-  return matched;
-}
-
-/* The symbols added to the mock mapped_index for testing (in
-   canonical form).  */
-static const char *test_symbols[] = {
-  "function",
-  "std::bar",
-  "std::zfunction",
-  "std::zfunction2",
-  "w1::w2",
-  "ns::foo<char*>",
-  "ns::foo<int>",
-  "ns::foo<long>",
-  "ns2::tmpl<int>::foo2",
-  "(anonymous namespace)::A::B::C",
-
-  /* These are used to check that the increment-last-char in the
-     matching algorithm for completion doesn't match "t1_fund" when
-     completing "t1_func".  */
-  "t1_func",
-  "t1_func1",
-  "t1_fund",
-  "t1_fund1",
-
-  /* A UTF-8 name with multi-byte sequences to make sure that
-     cp-name-parser understands this as a single identifier ("função"
-     is "function" in PT).  */
-  (const char *)u8"u8função",
-
-  /* Test a symbol name that ends with a 0xff character, which is a
-     valid character in non-UTF-8 source character sets (e.g. Latin1
-     'ÿ'), and we can't rule out compilers allowing it in identifiers.
-     We test this because the completion algorithm finds the upper
-     bound of symbols by looking for the insertion point of
-     "func"-with-last-character-incremented, i.e. "fund", and adding 1
-     to 0xff should wraparound and carry to the previous character.
-     See comments in make_sort_after_prefix_name.  */
-  "yfunc\377",
-
-  /* Some more symbols with \377 (0xff).  See above.  */
-  "\377",
-  "\377\377123",
-
-  /* A name with all sorts of complications.  Starts with "z" to make
-     it easier for the completion tests below.  */
-#define Z_SYM_NAME \
-  "z::std::tuple<(anonymous namespace)::ui*, std::bar<(anonymous namespace)::ui> >" \
-    "::tuple<(anonymous namespace)::ui*, " \
-    "std::default_delete<(anonymous namespace)::ui>, void>"
-
-  Z_SYM_NAME
-};
-
-/* Returns true if the mapped_index_base::find_name_component_bounds
-   method finds EXPECTED_SYMS in INDEX when looking for SEARCH_NAME,
-   in completion mode.  */
-
-static bool
-check_find_bounds_finds (mapped_index_base &index,
-                        const char *search_name,
-                        gdb::array_view<const char *> expected_syms,
-                        dwarf2_per_objfile *per_objfile)
-{
-  lookup_name_info lookup_name (search_name,
-                               symbol_name_match_type::FULL, true);
-
-  auto bounds = index.find_name_components_bounds (lookup_name,
-                                                  language_cplus,
-                                                  per_objfile);
-
-  size_t distance = std::distance (bounds.first, bounds.second);
-  if (distance != expected_syms.size ())
-    return false;
-
-  for (size_t exp_elem = 0; exp_elem < distance; exp_elem++)
-    {
-      auto nc_elem = bounds.first + exp_elem;
-      const char *qualified = index.symbol_name_at (nc_elem->idx, per_objfile);
-      if (strcmp (qualified, expected_syms[exp_elem]) != 0)
-       return false;
-    }
-
-  return true;
-}
-
-/* Test the lower-level mapped_index::find_name_component_bounds
-   method.  */
-
-static void
-test_mapped_index_find_name_component_bounds ()
-{
-  mock_mapped_index mock_index (test_symbols);
-
-  mock_index.build_name_components (NULL /* per_objfile */);
-
-  /* Test the lower-level mapped_index::find_name_component_bounds
-     method in completion mode.  */
-  {
-    static const char *expected_syms[] = {
-      "t1_func",
-      "t1_func1",
-    };
-
-    SELF_CHECK (check_find_bounds_finds
-                 (mock_index, "t1_func", expected_syms,
-                  NULL /* per_objfile */));
-  }
-
-  /* Check that the increment-last-char in the name matching algorithm
-     for completion doesn't get confused with Ansi1 'ÿ' / 0xff.  See
-     make_sort_after_prefix_name.  */
-  {
-    static const char *expected_syms1[] = {
-      "\377",
-      "\377\377123",
-    };
-    SELF_CHECK (check_find_bounds_finds
-                 (mock_index, "\377", expected_syms1, NULL /* per_objfile */));
-
-    static const char *expected_syms2[] = {
-      "\377\377123",
-    };
-    SELF_CHECK (check_find_bounds_finds
-                 (mock_index, "\377\377", expected_syms2,
-                  NULL /* per_objfile */));
-  }
-}
-
-/* Test dw2_expand_symtabs_matching_symbol.  */
-
-static void
-test_dw2_expand_symtabs_matching_symbol ()
-{
-  mock_mapped_index mock_index (test_symbols);
-
-  /* We let all tests run until the end even if some fails, for debug
-     convenience.  */
-  bool any_mismatch = false;
-
-  /* Create the expected symbols list (an initializer_list).  Needed
-     because lists have commas, and we need to pass them to CHECK,
-     which is a macro.  */
-#define EXPECT(...) { __VA_ARGS__ }
-
-  /* Wrapper for check_match that passes down the current
-     __FILE__/__LINE__.  */
-#define CHECK_MATCH(NAME, MATCH_TYPE, COMPLETION_MODE, EXPECTED_LIST)  \
-  any_mismatch |= !check_match (__FILE__, __LINE__,                    \
-                               mock_index,                             \
-                               NAME, MATCH_TYPE, COMPLETION_MODE,      \
-                               EXPECTED_LIST, NULL)
-
-  /* Identity checks.  */
-  for (const char *sym : test_symbols)
-    {
-      /* Should be able to match all existing symbols.  */
-      CHECK_MATCH (sym, symbol_name_match_type::FULL, false,
-                  EXPECT (sym));
-
-      /* Should be able to match all existing symbols with
-        parameters.  */
-      std::string with_params = std::string (sym) + "(int)";
-      CHECK_MATCH (with_params.c_str (), symbol_name_match_type::FULL, false,
-                  EXPECT (sym));
-
-      /* Should be able to match all existing symbols with
-        parameters and qualifiers.  */
-      with_params = std::string (sym) + " ( int ) const";
-      CHECK_MATCH (with_params.c_str (), symbol_name_match_type::FULL, false,
-                  EXPECT (sym));
-
-      /* This should really find sym, but cp-name-parser.y doesn't
-        know about lvalue/rvalue qualifiers yet.  */
-      with_params = std::string (sym) + " ( int ) &&";
-      CHECK_MATCH (with_params.c_str (), symbol_name_match_type::FULL, false,
-                  {});
-    }
-
-  /* Check that the name matching algorithm for completion doesn't get
-     confused with Latin1 'ÿ' / 0xff.  See
-     make_sort_after_prefix_name.  */
-  {
-    static const char str[] = "\377";
-    CHECK_MATCH (str, symbol_name_match_type::FULL, true,
-                EXPECT ("\377", "\377\377123"));
-  }
-
-  /* Check that the increment-last-char in the matching algorithm for
-     completion doesn't match "t1_fund" when completing "t1_func".  */
-  {
-    static const char str[] = "t1_func";
-    CHECK_MATCH (str, symbol_name_match_type::FULL, true,
-                EXPECT ("t1_func", "t1_func1"));
-  }
-
-  /* Check that completion mode works at each prefix of the expected
-     symbol name.  */
-  {
-    static const char str[] = "function(int)";
-    size_t len = strlen (str);
-    std::string lookup;
-
-    for (size_t i = 1; i < len; i++)
-      {
-       lookup.assign (str, i);
-       CHECK_MATCH (lookup.c_str (), symbol_name_match_type::FULL, true,
-                    EXPECT ("function"));
-      }
-  }
-
-  /* While "w" is a prefix of both components, the match function
-     should still only be called once.  */
-  {
-    CHECK_MATCH ("w", symbol_name_match_type::FULL, true,
-                EXPECT ("w1::w2"));
-    CHECK_MATCH ("w", symbol_name_match_type::WILD, true,
-                EXPECT ("w1::w2"));
-  }
-
-  /* Same, with a "complicated" symbol.  */
-  {
-    static const char str[] = Z_SYM_NAME;
-    size_t len = strlen (str);
-    std::string lookup;
-
-    for (size_t i = 1; i < len; i++)
-      {
-       lookup.assign (str, i);
-       CHECK_MATCH (lookup.c_str (), symbol_name_match_type::FULL, true,
-                    EXPECT (Z_SYM_NAME));
-      }
-  }
-
-  /* In FULL mode, an incomplete symbol doesn't match.  */
-  {
-    CHECK_MATCH ("std::zfunction(int", symbol_name_match_type::FULL, false,
-                {});
-  }
-
-  /* A complete symbol with parameters matches any overload, since the
-     index has no overload info.  */
-  {
-    CHECK_MATCH ("std::zfunction(int)", symbol_name_match_type::FULL, true,
-                EXPECT ("std::zfunction", "std::zfunction2"));
-    CHECK_MATCH ("zfunction(int)", symbol_name_match_type::WILD, true,
-                EXPECT ("std::zfunction", "std::zfunction2"));
-    CHECK_MATCH ("zfunc", symbol_name_match_type::WILD, true,
-                EXPECT ("std::zfunction", "std::zfunction2"));
-  }
-
-  /* Check that whitespace is ignored appropriately.  A symbol with a
-     template argument list. */
-  {
-    static const char expected[] = "ns::foo<int>";
-    CHECK_MATCH ("ns :: foo < int > ", symbol_name_match_type::FULL, false,
-                EXPECT (expected));
-    CHECK_MATCH ("foo < int > ", symbol_name_match_type::WILD, false,
-                EXPECT (expected));
-  }
-
-  /* Check that whitespace is ignored appropriately.  A symbol with a
-     template argument list that includes a pointer.  */
-  {
-    static const char expected[] = "ns::foo<char*>";
-    /* Try both completion and non-completion modes.  */
-    static const bool completion_mode[2] = {false, true};
-    for (size_t i = 0; i < 2; i++)
-      {
-       CHECK_MATCH ("ns :: foo < char * >", symbol_name_match_type::FULL,
-                    completion_mode[i], EXPECT (expected));
-       CHECK_MATCH ("foo < char * >", symbol_name_match_type::WILD,
-                    completion_mode[i], EXPECT (expected));
-
-       CHECK_MATCH ("ns :: foo < char * > (int)", symbol_name_match_type::FULL,
-                    completion_mode[i], EXPECT (expected));
-       CHECK_MATCH ("foo < char * > (int)", symbol_name_match_type::WILD,
-                    completion_mode[i], EXPECT (expected));
-      }
-  }
-
-  {
-    /* Check method qualifiers are ignored.  */
-    static const char expected[] = "ns::foo<char*>";
-    CHECK_MATCH ("ns :: foo < char * >  ( int ) const",
-                symbol_name_match_type::FULL, true, EXPECT (expected));
-    CHECK_MATCH ("ns :: foo < char * >  ( int ) &&",
-                symbol_name_match_type::FULL, true, EXPECT (expected));
-    CHECK_MATCH ("foo < char * >  ( int ) const",
-                symbol_name_match_type::WILD, true, EXPECT (expected));
-    CHECK_MATCH ("foo < char * >  ( int ) &&",
-                symbol_name_match_type::WILD, true, EXPECT (expected));
-  }
-
-  /* Test lookup names that don't match anything.  */
-  {
-    CHECK_MATCH ("bar2", symbol_name_match_type::WILD, false,
-                {});
-
-    CHECK_MATCH ("doesntexist", symbol_name_match_type::FULL, false,
-                {});
-  }
-
-  /* Some wild matching tests, exercising "(anonymous namespace)",
-     which should not be confused with a parameter list.  */
-  {
-    static const char *syms[] = {
-      "A::B::C",
-      "B::C",
-      "C",
-      "A :: B :: C ( int )",
-      "B :: C ( int )",
-      "C ( int )",
-    };
-
-    for (const char *s : syms)
-      {
-       CHECK_MATCH (s, symbol_name_match_type::WILD, false,
-                    EXPECT ("(anonymous namespace)::A::B::C"));
-      }
-  }
-
-  {
-    static const char expected[] = "ns2::tmpl<int>::foo2";
-    CHECK_MATCH ("tmp", symbol_name_match_type::WILD, true,
-                EXPECT (expected));
-    CHECK_MATCH ("tmpl<", symbol_name_match_type::WILD, true,
-                EXPECT (expected));
-  }
-
-  SELF_CHECK (!any_mismatch);
-
-#undef EXPECT
-#undef CHECK_MATCH
-}
-
-static void
-run_test ()
-{
-  test_mapped_index_find_name_component_bounds ();
-  test_dw2_expand_symtabs_matching_symbol ();
-}
-
-}} // namespace selftests::dw2_expand_symtabs_matching
-
-#endif /* GDB_SELF_TEST */
-
  /* See read.h.  */
  
  bool
@@ -22342,8 +21560,6 @@ the demangler."),
                                                 &ada_function_alias_funcs);
  
  #if GDB_SELF_TEST
-  selftests::register_test ("dw2_expand_symtabs_matching",
-                           selftests::dw2_expand_symtabs_matching::run_test);
    selftests::register_test ("dwarf2_find_containing_comp_unit",
                             selftests::find_containing_comp_unit::run_test);
  #endif
diff --git a/gdb/dwarf2/read.h b/gdb/dwarf2/read.h

index e8735ddef6faa5eeb8c8077c73d2b5a61ef450b3..12d5f066e87e1c657da569ae8ef99f333146beae 100644 (file)
--- a/gdb/dwarf2/read.h
+++ b/gdb/dwarf2/read.h
@@ -931,21 +931,6 @@ extern bool dw2_expand_symtabs_matching_one
     gdb::function_view<expand_symtabs_exp_notify_ftype> expansion_notify,
     gdb::function_view<expand_symtabs_lang_matcher_ftype> lang_matcher);
  
-/* Helper for dw2_expand_symtabs_matching that works with a
-   mapped_index_base instead of the containing objfile.  This is split
-   to a separate function in order to be able to unit test the
-   name_components matching using a mock mapped_index_base.  For each
-   symbol name that matches, calls MATCH_CALLBACK, passing it the
-   symbol's index in the mapped_index_base symbol table.  */
-
-extern bool dw2_expand_symtabs_matching_symbol
-  (mapped_index_base &index,
-   const lookup_name_info &lookup_name_in,
-   gdb::function_view<expand_symtabs_symbol_matcher_ftype> symbol_matcher,
-   gdb::function_view<bool (offset_type)> match_callback,
-   dwarf2_per_objfile *per_objfile,
-   gdb::function_view<expand_symtabs_lang_matcher_ftype> lang_matcher);
-
  /* If FILE_MATCHER is non-NULL, set all the
     dwarf2_per_cu_quick_data::MARK of the current DWARF2_PER_OBJFILE
     that match FILE_MATCHER.  */
author	Tom Tromey <tromey@adacore.com>
	Wed, 15 Jan 2025 23:18:15 +0000 (16:18 -0700)
committer	Tom Tromey <tromey@adacore.com>
	Fri, 17 Jan 2025 15:52:04 +0000 (08:52 -0700)
gdb/dwarf2/mapped-index.h		patch \| blob \| blame \| history
gdb/dwarf2/read-gdb-index.c		patch \| blob \| blame \| history
gdb/dwarf2/read.c		patch \| blob \| blame \| history
gdb/dwarf2/read.h		patch \| blob \| blame \| history