gdbsupport: use dynamic partitioning in gdb::parallel_for_each

author Simon Marchi <simon.marchi@polymtl.ca>

Fri, 19 Sep 2025 20:27:00 +0000 (16:27 -0400)

committer Simon Marchi <simon.marchi@efficios.com>

Tue, 30 Sep 2025 19:37:20 +0000 (19:37 +0000)
author Simon Marchi <simon.marchi@polymtl.ca>
Fri, 19 Sep 2025 20:27:00 +0000 (16:27 -0400)
committer Simon Marchi <simon.marchi@efficios.com>
Tue, 30 Sep 2025 19:37:20 +0000 (19:37 +0000)
diff --git a/gdb/minsyms.c b/gdb/minsyms.c

index c4a0fa873761497a11872a54257c841440f93bf5..cc50febba6c90b12b8b1dc2b021bdc22060d198a 100644 (file)
--- a/gdb/minsyms.c
+++ b/gdb/minsyms.c
@@ -1390,6 +1390,88 @@ build_minimal_symbol_hash_tables
      }
  }
  
+/* gdb::parallel_for_each worker to compute minimal symbol names and hashes.  */
+
+class minimal_symbol_install_worker
+{
+public:
+  minimal_symbol_install_worker
+    (minimal_symbol *msymbols,
+     gdb::array_view<computed_hash_values> hash_values,
+     objfile_per_bfd_storage *per_bfd
+#if CXX_STD_THREAD
+     , std::mutex &demangled_mutex
+#endif
+    )
+    : m_time_it ("minsym install worker"),
+      m_msymbols (msymbols),
+      m_hash_values (hash_values),
+      m_per_bfd (per_bfd)
+#if CXX_STD_THREAD
+      , m_demangled_mutex (demangled_mutex)
+#endif
+  {}
+
+  void operator() (minimal_symbol *start, minimal_symbol *end) noexcept
+  {
+    for (minimal_symbol *msym = start; msym < end; ++msym)
+      {
+       size_t idx = msym - m_msymbols;
+       m_hash_values[idx].name_length = strlen (msym->linkage_name ());
+
+       if (!msym->name_set)
+         {
+           /* This will be freed later, by compute_and_set_names.  */
+           gdb::unique_xmalloc_ptr<char> demangled_name
+             = symbol_find_demangled_name (msym, msym->linkage_name ());
+           msym->set_demangled_name (demangled_name.release (),
+                                     &m_per_bfd->storage_obstack);
+           msym->name_set = 1;
+         }
+
+       /* This mangled_name_hash computation has to be outside of
+          the name_set check, or compute_and_set_names below will
+          be called with an invalid hash value.  */
+       m_hash_values[idx].mangled_name_hash
+         = fast_hash (msym->linkage_name (), m_hash_values[idx].name_length);
+       m_hash_values[idx].minsym_hash = msymbol_hash (msym->linkage_name ());
+
+       /* We only use this hash code if the search name differs
+          from the linkage name.  See the code in
+          build_minimal_symbol_hash_tables.  */
+       if (msym->search_name () != msym->linkage_name ())
+         m_hash_values[idx].minsym_demangled_hash
+           = search_name_hash (msym->language (), msym->search_name ());
+      }
+
+    {
+      /* To limit how long we hold the lock, we only acquire it here
+        and not while we demangle the names above.  */
+#if CXX_STD_THREAD
+      std::lock_guard<std::mutex> guard (m_demangled_mutex);
+#endif
+      for (minimal_symbol *msym = start; msym < end; ++msym)
+       {
+         size_t idx = msym - m_msymbols;
+         std::string_view name (msym->linkage_name (),
+                                m_hash_values[idx].name_length);
+         hashval_t hashval = m_hash_values[idx].mangled_name_hash;
+
+         msym->compute_and_set_names (name, false, m_per_bfd, hashval);
+       }
+    }
+  }
+
+private:
+  scoped_time_it m_time_it;
+  minimal_symbol *m_msymbols;
+  gdb::array_view<computed_hash_values> m_hash_values;
+  objfile_per_bfd_storage *m_per_bfd;
+#if CXX_STD_THREAD
+  std::mutex &m_demangled_mutex;
+#endif
+};
+
  /* Add the minimal symbols in the existing bunches to the objfile's official
     minimal symbol table.  In most cases there is no minimal symbol table yet
     for this objfile, and the existing bunches are used to create one.  Once
@@ -1476,59 +1558,15 @@ minimal_symbol_reader::install ()
        std::vector<computed_hash_values> hash_values (mcount);
  
        msymbols = m_objfile->per_bfd->msymbols.get ();
-      /* Arbitrarily require at least 10 elements in a thread.  */
-      gdb::parallel_for_each<10> (&msymbols[0], &msymbols[mcount],
-        [&] (minimal_symbol *start, minimal_symbol *end)
-        {
-          scoped_time_it time_it ("minsyms install worker");
-
-          for (minimal_symbol *msym = start; msym < end; ++msym)
-            {
-              size_t idx = msym - msymbols;
-              hash_values[idx].name_length = strlen (msym->linkage_name ());
-              if (!msym->name_set)
-                {
-                  /* This will be freed later, by compute_and_set_names.  */
-                  gdb::unique_xmalloc_ptr<char> demangled_name
-                    = symbol_find_demangled_name (msym, msym->linkage_name ());
-                  msym->set_demangled_name
-                    (demangled_name.release (),
-                     &m_objfile->per_bfd->storage_obstack);
-                  msym->name_set = 1;
-                }
-              /* This mangled_name_hash computation has to be outside of
-                 the name_set check, or compute_and_set_names below will
-                 be called with an invalid hash value.  */
-              hash_values[idx].mangled_name_hash
-                = fast_hash (msym->linkage_name (),
-                             hash_values[idx].name_length);
-              hash_values[idx].minsym_hash
-                = msymbol_hash (msym->linkage_name ());
-              /* We only use this hash code if the search name differs
-                 from the linkage name.  See the code in
-                 build_minimal_symbol_hash_tables.  */
-              if (msym->search_name () != msym->linkage_name ())
-                hash_values[idx].minsym_demangled_hash
-                  = search_name_hash (msym->language (), msym->search_name ());
-            }
-          {
-            /* To limit how long we hold the lock, we only acquire it here
-               and not while we demangle the names above.  */
+
+      gdb::parallel_for_each<1000, minimal_symbol *, minimal_symbol_install_worker>
+       (&msymbols[0], &msymbols[mcount], msymbols,
+        gdb::array_view<computed_hash_values> (hash_values),
+        m_objfile->per_bfd
  #if CXX_STD_THREAD
-            std::lock_guard<std::mutex> guard (demangled_mutex);
+        , demangled_mutex
  #endif
-            for (minimal_symbol *msym = start; msym < end; ++msym)
-              {
-                size_t idx = msym - msymbols;
-                msym->compute_and_set_names
-                  (std::string_view (msym->linkage_name (),
-                                     hash_values[idx].name_length),
-                   false,
-                   m_objfile->per_bfd,
-                   hash_values[idx].mangled_name_hash);
-              }
-          }
-        });
+       );
  
        build_minimal_symbol_hash_tables (m_objfile, hash_values);
      }
diff --git a/gdb/unittests/parallel-for-selftests.c b/gdb/unittests/parallel-for-selftests.c

index 559446deef43c91eb69bcc6a52b0439d463184b9..9d86aa67b106c73c165cf14304574cc5ecbdef67 100644 (file)
--- a/gdb/unittests/parallel-for-selftests.c
+++ b/gdb/unittests/parallel-for-selftests.c
@@ -60,6 +60,9 @@ test_one (do_foreach_t do_foreach, int upper_bound)
    std::vector<int> output;
    std::mutex mtx;
  
+  /* The (unfortunate) reason why we don't use std::vector<int>::iterator as
+     the parallel-for-each iterator type is that std::atomic won't work with
+     that type when building with -D_GLIBCXX_DEBUG.  */
    do_foreach (input.data (), input.data () + input.size (),
               [&] (int *start, int *end)
                 {
@@ -97,12 +100,32 @@ test_one_function (int n_threads, do_foreach_t do_foreach)
  static void
  test_parallel_for_each ()
  {
+  struct test_worker
+  {
+    /* DUMMY is there to test passing multiple arguments to the worker
+       constructor.  */
+    test_worker (foreach_callback_t callback, int dummy)
+      : m_callback (callback)
+    {
+    }
+
+    void operator() (int *first, int *last)
+    {
+      return m_callback (first, last);
+    }
+
+  private:
+    foreach_callback_t m_callback;
+  };
+
    const std::vector<do_foreach_t> for_each_functions
      {
        [] (int *start, int *end, foreach_callback_t callback)
-      { gdb::parallel_for_each<1> (start, end, callback); },
+      { gdb::parallel_for_each<1, int *, test_worker> (start, end, callback,
+                                                      0); },
        [] (int *start, int *end, foreach_callback_t callback)
-      { gdb::sequential_for_each (start, end, callback);}
+      { gdb::sequential_for_each<int *, test_worker> (start, end, callback,
+                                                     0); },
      };
  
    int default_thread_count = gdb::thread_pool::g_thread_pool->thread_count ();
diff --git a/gdbsupport/parallel-for.h b/gdbsupport/parallel-for.h

index 50406763ded4eeadff80a3b362b7843d380fd798..8d085df42fb9a2938abf6c3c84751fd714566872 100644 (file)
--- a/gdbsupport/parallel-for.h
+++ b/gdbsupport/parallel-for.h
@@ -21,117 +21,108 @@
  #define GDBSUPPORT_PARALLEL_FOR_H
  
  #include <algorithm>
-#include <type_traits>
+#include <atomic>
+#include <tuple>
  #include "gdbsupport/thread-pool.h"
-#include "gdbsupport/function-view.h"
  
  namespace gdb
  {
  
-/* A very simple "parallel for".  This splits the range of iterators
-   into subranges, and then passes each subrange to the callback.  The
-   work may or may not be done in separate threads.
+/* A "parallel-for" implementation using a shared work queue.  Work items get
+   popped in batches of size up to BATCH_SIZE from the queue and handed out to
+   worker threads.
  
-   This approach was chosen over having the callback work on single
-   items because it makes it simple for the caller to do
-   once-per-subrange initialization and destruction.
+   Each worker thread instantiates an object of type Worker, forwarding ARGS to
+   its constructor.  The Worker object can be used to keep some per-worker
+   thread state.
  
-   The parameter N says how batching ought to be done -- there will be
-   at least N elements processed per thread.  Setting N to 0 is not
-   allowed.  */
+   Worker threads call Worker::operator() repeatedly until the queue is
+   empty.  */
  
-template<std::size_t n, class RandomIt, class RangeFunction>
+template<std::size_t batch_size, class RandomIt, class Worker,
+        class... WorkerArgs>
  void
-parallel_for_each (RandomIt first, RandomIt last, RangeFunction callback)
+parallel_for_each (const RandomIt first, const RandomIt last,
+                  WorkerArgs &&...worker_args)
  {
    /* If enabled, print debug info about how the work is distributed across
       the threads.  */
    const bool parallel_for_each_debug = false;
  
-  size_t n_worker_threads = thread_pool::g_thread_pool->thread_count ();
-  size_t n_threads = n_worker_threads;
-  size_t n_elements = last - first;
-  size_t elts_per_thread = 0;
-  size_t elts_left_over = 0;
+  gdb_assert (first <= last);
  
-  if (n_threads > 1)
+  if (parallel_for_each_debug)
      {
-      /* Require that there should be at least N elements in a
-        thread.  */
-      gdb_assert (n > 0);
-      if (n_elements / n_threads < n)
-       n_threads = std::max (n_elements / n, (size_t) 1);
-      elts_per_thread = n_elements / n_threads;
-      elts_left_over = n_elements % n_threads;
-      /* n_elements == n_threads * elts_per_thread + elts_left_over. */
+      debug_printf ("Parallel for: n elements: %zu\n",
+                   static_cast<std::size_t> (last - first));
+      debug_printf ("Parallel for: batch size: %zu\n", batch_size);
      }
  
-  size_t count = n_threads == 0 ? 0 : n_threads - 1;
+  const size_t n_worker_threads
+    = std::max<size_t> (thread_pool::g_thread_pool->thread_count (), 1);
    std::vector<gdb::future<void>> results;
  
-  if (parallel_for_each_debug)
-    {
-      debug_printf (_("Parallel for: n_elements: %zu\n"), n_elements);
-      debug_printf (_("Parallel for: minimum elements per thread: %zu\n"), n);
-      debug_printf (_("Parallel for: elts_per_thread: %zu\n"), elts_per_thread);
-    }
+  /* The next item to hand out.  */
+  std::atomic<RandomIt> next = first;
  
-  for (int i = 0; i < count; ++i)
-    {
-      RandomIt end;
-      end = first + elts_per_thread;
-      if (i < elts_left_over)
-       /* Distribute the leftovers over the worker threads, to avoid having
-          to handle all of them in a single thread.  */
-       end++;
-
-      /* This case means we don't have enough elements to really
-        distribute them.  Rather than ever submit a task that does
-        nothing, we short-circuit here.  */
-      if (first == end)
-       end = last;
-
-      if (end == last)
-       {
-         /* We're about to dispatch the last batch of elements, which
-            we normally process in the main thread.  So just truncate
-            the result list here.  This avoids submitting empty tasks
-            to the thread pool.  */
-         count = i;
-         break;
-       }
+  /* The worker thread task.
  
-      if (parallel_for_each_debug)
-       {
-         debug_printf (_("Parallel for: elements on worker thread %i\t: %zu"),
-                       i, (size_t)(end - first));
-         debug_printf (_("\n"));
-       }
-      results.push_back (gdb::thread_pool::g_thread_pool->post_task ([=] ()
-        {
-         return callback (first, end);
-       }));
-      first = end;
-    }
+     We need to capture args as a tuple, because it's not possible to capture
+     the parameter pack directly in C++17.  Once we migrate to C++20, the
+     capture can be simplified to:
  
-  for (int i = count; i < n_worker_threads; ++i)
-    if (parallel_for_each_debug)
-      {
-       debug_printf (_("Parallel for: elements on worker thread %i\t: 0"), i);
-       debug_printf (_("\n"));
-      }
+       ... args = std::forward<Args>(args)
  
-  /* Process all the remaining elements in the main thread.  */
-  if (parallel_for_each_debug)
+     and `args` can be used as-is in the lambda.  */
+  auto args_tuple
+    = std::forward_as_tuple (std::forward<WorkerArgs> (worker_args)...);
+  auto task = [&next, first, last, n_worker_threads, &args_tuple] ()
      {
-      debug_printf (_("Parallel for: elements on main thread\t\t: %zu"),
-                   (size_t)(last - first));
-      debug_printf (_("\n"));
-    }
+      /* Instantiate the user-defined worker.  */
+      auto worker = std::make_from_tuple<Worker> (args_tuple);
  
-  if (first != last)
-    callback (first, last);
+      for (;;)
+       {
+         /* Grab a snapshot of NEXT.  */
+         auto local_next = next.load ();
+         gdb_assert (local_next <= last);
+
+         /* Number of remaining items.  */
+         auto n_remaining = last - local_next;
+         gdb_assert (n_remaining >= 0);
+
+         /* Are we done?  */
+         if (n_remaining == 0)
+           break;
+
+         const auto this_batch_size
+           = std::min<std::size_t> (batch_size, n_remaining);
+
+         /* The range to process in this iteration.  */
+         const auto this_batch_first = local_next;
+         const auto this_batch_last = local_next + this_batch_size;
+
+         /* Update NEXT.  If the current value of NEXT doesn't match
+            LOCAL_NEXT, it means another thread updated it concurrently,
+            restart.  */
+         if (!next.compare_exchange_weak (local_next, this_batch_last))
+           continue;
+
+         if (parallel_for_each_debug)
+           debug_printf ("Processing %zu items, range [%zu, %zu[\n",
+                         this_batch_size,
+                         static_cast<size_t> (this_batch_first - first),
+                         static_cast<size_t> (this_batch_last - first));
+
+         worker (this_batch_first, this_batch_last);
+       }
+    };
  
+  /* Start N_WORKER_THREADS tasks.  */
+  for (int i = 0; i < n_worker_threads; ++i)
+    results.push_back (gdb::thread_pool::g_thread_pool->post_task (task));
+
+  /* Wait for all of them to be finished.  */
    for (auto &fut : results)
      fut.get ();
  }
@@ -140,12 +131,14 @@ parallel_for_each (RandomIt first, RandomIt last, RangeFunction callback)
     when debugging multi-threading behavior, and you want to limit
     multi-threading in a fine-grained way.  */
  
-template<class RandomIt, class RangeFunction>
+template<class RandomIt, class Worker, class... WorkerArgs>
  void
-sequential_for_each (RandomIt first, RandomIt last, RangeFunction callback)
+sequential_for_each (RandomIt first, RandomIt last, WorkerArgs &&...worker_args)
  {
-  if (first != last)
-    callback (first, last);
+  if (first == last)
+    return;
+
+  Worker (std::forward<WorkerArgs> (worker_args)...) (first, last);
  }
  
  }
author	Simon Marchi <simon.marchi@polymtl.ca>
	Fri, 19 Sep 2025 20:27:00 +0000 (16:27 -0400)
committer	Simon Marchi <simon.marchi@efficios.com>
	Tue, 30 Sep 2025 19:37:20 +0000 (19:37 +0000)
gdb/minsyms.c		patch \| blob \| blame \| history
gdb/unittests/parallel-for-selftests.c		patch \| blob \| blame \| history
gdbsupport/parallel-for.h		patch \| blob \| blame \| history