[thirdparty/binutils-gdb.git] / gdbsupport / parallel-for.h

/* Parallel for loops

   Copyright (C) 2019-2023 Free Software Foundation, Inc.

   This file is part of GDB.

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 3 of the License, or
   (at your option) any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */

#ifndef GDBSUPPORT_PARALLEL_FOR_H
#define GDBSUPPORT_PARALLEL_FOR_H

#include <algorithm>
#include <type_traits>
#include "gdbsupport/invoke-result.h"
#include "gdbsupport/thread-pool.h"
#include "gdbsupport/function-view.h"

namespace gdb
{

namespace detail
{

/* This is a helper class that is used to accumulate results for
   parallel_for.  There is a specialization for 'void', below.  */
template<typename T>
struct par_for_accumulator
{
public:

  explicit par_for_accumulator (size_t n_threads)
    : m_futures (n_threads)
  {
  }

  /* The result type that is accumulated.  */
  typedef std::vector<T> result_type;

  /* Post the Ith task to a background thread, and store a future for
     later.  */
  void post (size_t i, std::function<T ()> task)
  {
    m_futures[i]
      = gdb::thread_pool::g_thread_pool->post_task (std::move (task));
  }

  /* Invoke TASK in the current thread, then compute all the results
     from all background tasks and put them into a result vector,
     which is returned.  */
  result_type finish (gdb::function_view<T ()> task)
  {
    result_type result (m_futures.size () + 1);

    result.back () = task ();

    for (size_t i = 0; i < m_futures.size (); ++i)
      result[i] = m_futures[i].get ();

    return result;
  }

private:
  
  /* A vector of futures coming from the tasks run in the
     background.  */
  std::vector<gdb::future<T>> m_futures;
};

/* See the generic template.  */
template<>
struct par_for_accumulator<void>
{
public:

  explicit par_for_accumulator (size_t n_threads)
    : m_futures (n_threads)
  {
  }

  /* This specialization does not compute results.  */
  typedef void result_type;

  void post (size_t i, std::function<void ()> task)
  {
    m_futures[i]
      = gdb::thread_pool::g_thread_pool->post_task (std::move (task));
  }

  result_type finish (gdb::function_view<void ()> task)
  {
    task ();

    for (auto &future : m_futures)
      {
	/* Use 'get' and not 'wait', to propagate any exception.  */
	future.get ();
      }
  }

private:

  std::vector<gdb::future<void>> m_futures;
};

}

/* A very simple "parallel for".  This splits the range of iterators
   into subranges, and then passes each subrange to the callback.  The
   work may or may not be done in separate threads.

   This approach was chosen over having the callback work on single
   items because it makes it simple for the caller to do
   once-per-subrange initialization and destruction.

   The parameter N says how batching ought to be done -- there will be
   at least N elements processed per thread.  Setting N to 0 is not
   allowed.

   If the function returns a non-void type, then a vector of the
   results is returned.  The size of the resulting vector depends on
   the number of threads that were used.  */

template<class RandomIt, class RangeFunction>
typename gdb::detail::par_for_accumulator<
    typename gdb::invoke_result<RangeFunction, RandomIt, RandomIt>::type
  >::result_type
parallel_for_each (unsigned n, RandomIt first, RandomIt last,
		   RangeFunction callback,
		   gdb::function_view<size_t(RandomIt)> task_size = nullptr)
{
  using result_type
    = typename gdb::invoke_result<RangeFunction, RandomIt, RandomIt>::type;

  /* If enabled, print debug info about how the work is distributed across
     the threads.  */
  const bool parallel_for_each_debug = false;

  size_t n_worker_threads = thread_pool::g_thread_pool->thread_count ();
  size_t n_threads = n_worker_threads;
  size_t n_elements = last - first;
  size_t elts_per_thread = 0;
  size_t elts_left_over = 0;
  size_t total_size = 0;
  size_t size_per_thread = 0;
  size_t max_element_size = n_elements == 0 ? 1 : SIZE_MAX / n_elements;

  if (n_threads > 1)
    {
      if (task_size != nullptr)
	{
	  gdb_assert (n == 1);
	  for (RandomIt i = first; i != last; ++i)
	    {
	      size_t element_size = task_size (i);
	      gdb_assert (element_size > 0);
	      if (element_size > max_element_size)
		/* We could start scaling here, but that doesn't seem to be
		   worth the effort.  */
		element_size = max_element_size;
	      size_t prev_total_size = total_size;
	      total_size += element_size;
	      /* Check for overflow.  */
	      gdb_assert (prev_total_size < total_size);
	    }
	  size_per_thread = total_size / n_threads;
	}
      else
	{
	  /* Require that there should be at least N elements in a
	     thread.  */
	  gdb_assert (n > 0);
	  if (n_elements / n_threads < n)
	    n_threads = std::max (n_elements / n, (size_t) 1);
	  elts_per_thread = n_elements / n_threads;
	  elts_left_over = n_elements % n_threads;
	  /* n_elements == n_threads * elts_per_thread + elts_left_over. */
	}
    }

  size_t count = n_threads == 0 ? 0 : n_threads - 1;
  gdb::detail::par_for_accumulator<result_type> results (count);

  if (parallel_for_each_debug)
    {
      debug_printf (_("Parallel for: n_elements: %zu\n"), n_elements);
      if (task_size != nullptr)
	{
	  debug_printf (_("Parallel for: total_size: %zu\n"), total_size);
	  debug_printf (_("Parallel for: size_per_thread: %zu\n"), size_per_thread);
	}
      else
	{
	  debug_printf (_("Parallel for: minimum elements per thread: %u\n"), n);
	  debug_printf (_("Parallel for: elts_per_thread: %zu\n"), elts_per_thread);
	}
    }

  size_t remaining_size = total_size;
  for (int i = 0; i < count; ++i)
    {
      RandomIt end;
      size_t chunk_size = 0;
      if (task_size == nullptr)
	{
	  end = first + elts_per_thread;
	  if (i < elts_left_over)
	    /* Distribute the leftovers over the worker threads, to avoid having
	       to handle all of them in a single thread.  */
	    end++;
	}
      else
	{
	  RandomIt j;
	  for (j = first; j < last && chunk_size < size_per_thread; ++j)
	    {
	      size_t element_size = task_size (j);
	      if (element_size > max_element_size)
		element_size = max_element_size;
	      chunk_size += element_size;
	    }
	  end = j;
	  remaining_size -= chunk_size;
	}
      if (parallel_for_each_debug)
	{
	  debug_printf (_("Parallel for: elements on worker thread %i\t: %zu"),
			i, (size_t)(end - first));
	  if (task_size != nullptr)
	    debug_printf (_("\t(size: %zu)"), chunk_size);
	  debug_printf (_("\n"));
	}
      results.post (i, [=] ()
        {
	  return callback (first, end);
	});
      first = end;
    }

  for (int i = count; i < n_worker_threads; ++i)
    if (parallel_for_each_debug)
      {
	debug_printf (_("Parallel for: elements on worker thread %i\t: 0"), i);
	if (task_size != nullptr)
	  debug_printf (_("\t(size: 0)"));
	debug_printf (_("\n"));
      }

  /* Process all the remaining elements in the main thread.  */
  if (parallel_for_each_debug)
    {
      debug_printf (_("Parallel for: elements on main thread\t\t: %zu"),
		    (size_t)(last - first));
      if (task_size != nullptr)
	debug_printf (_("\t(size: %zu)"), remaining_size);
      debug_printf (_("\n"));
    }
  return results.finish ([=] ()
    {
      return callback (first, last);
    });
}

/* A sequential drop-in replacement of parallel_for_each.  This can be useful
   when debugging multi-threading behaviour, and you want to limit
   multi-threading in a fine-grained way.  */

template<class RandomIt, class RangeFunction>
typename gdb::detail::par_for_accumulator<
    typename gdb::invoke_result<RangeFunction, RandomIt, RandomIt>::type
  >::result_type
sequential_for_each (unsigned n, RandomIt first, RandomIt last,
		     RangeFunction callback,
		     gdb::function_view<size_t(RandomIt)> task_size = nullptr)
{
  using result_type = typename gdb::invoke_result<RangeFunction, RandomIt, RandomIt>::type;

  gdb::detail::par_for_accumulator<result_type> results (0);

  /* Process all the remaining elements in the main thread.  */
  return results.finish ([=] ()
    {
      return callback (first, last);
    });
}

}

#endif /* GDBSUPPORT_PARALLEL_FOR_H */
Commit	Line	Data
d55c9a68 TT	1	/* Parallel for loops
d55c9a68 TT	2
213516ef	3	Copyright (C) 2019-2023 Free Software Foundation, Inc.
d55c9a68 TT	4
	5	This file is part of GDB.
	6
	7	This program is free software; you can redistribute it and/or modify
	8	it under the terms of the GNU General Public License as published by
	9	the Free Software Foundation; either version 3 of the License, or
	10	(at your option) any later version.
	11
	12	This program is distributed in the hope that it will be useful,
	13	but WITHOUT ANY WARRANTY; without even the implied warranty of
	14	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	15	GNU General Public License for more details.
	16
	17	You should have received a copy of the GNU General Public License
	18	along with this program. If not, see <http://www.gnu.org/licenses/>. */
	19
	20	#ifndef GDBSUPPORT_PARALLEL_FOR_H
	21	#define GDBSUPPORT_PARALLEL_FOR_H
	22
	23	#include <algorithm>
f4565e4c	24	#include <type_traits>
c95dd24e	25	#include "gdbsupport/invoke-result.h"
d55c9a68	26	#include "gdbsupport/thread-pool.h"
b859a3ef	27	#include "gdbsupport/function-view.h"
d55c9a68 TT	28
	29	namespace gdb
	30	{
	31
f4565e4c TT	32	namespace detail
	33	{
	34
	35	/* This is a helper class that is used to accumulate results for
	36	parallel_for. There is a specialization for 'void', below. */
	37	template<typename T>
	38	struct par_for_accumulator
	39	{
	40	public:
	41
	42	explicit par_for_accumulator (size_t n_threads)
	43	: m_futures (n_threads)
	44	{
	45	}
	46
	47	/* The result type that is accumulated. */
	48	typedef std::vector<T> result_type;
	49
	50	/* Post the Ith task to a background thread, and store a future for
	51	later. */
	52	void post (size_t i, std::function<T ()> task)
	53	{
	54	m_futures[i]
	55	= gdb::thread_pool::g_thread_pool->post_task (std::move (task));
	56	}
	57
	58	/* Invoke TASK in the current thread, then compute all the results
	59	from all background tasks and put them into a result vector,
	60	which is returned. */
	61	result_type finish (gdb::function_view<T ()> task)
	62	{
	63	result_type result (m_futures.size () + 1);
	64
	65	result.back () = task ();
	66
	67	for (size_t i = 0; i < m_futures.size (); ++i)
	68	result[i] = m_futures[i].get ();
	69
	70	return result;
	71	}
	72
	73	private:
	74
	75	/* A vector of futures coming from the tasks run in the
	76	background. */
20c4eb42	77	std::vector<gdb::future<T>> m_futures;
f4565e4c TT	78	};
	79
	80	/* See the generic template. */
	81	template<>
	82	struct par_for_accumulator<void>
	83	{
	84	public:
	85
	86	explicit par_for_accumulator (size_t n_threads)
	87	: m_futures (n_threads)
	88	{
	89	}
	90
	91	/* This specialization does not compute results. */
	92	typedef void result_type;
	93
	94	void post (size_t i, std::function<void ()> task)
	95	{
	96	m_futures[i]
	97	= gdb::thread_pool::g_thread_pool->post_task (std::move (task));
	98	}
	99
	100	result_type finish (gdb::function_view<void ()> task)
	101	{
	102	task ();
	103
	104	for (auto &future : m_futures)
	105	{
	106	/* Use 'get' and not 'wait', to propagate any exception. */
	107	future.get ();
	108	}
	109	}
	110
	111	private:
	112
20c4eb42	113	std::vector<gdb::future<void>> m_futures;
f4565e4c TT	114	};
	115
	116	}
	117
d55c9a68 TT	118	/* A very simple "parallel for". This splits the range of iterators
	119	into subranges, and then passes each subrange to the callback. The
	120	work may or may not be done in separate threads.
	121
	122	This approach was chosen over having the callback work on single
	123	items because it makes it simple for the caller to do
82d734f7 TT	124	once-per-subrange initialization and destruction.
	125
	126	The parameter N says how batching ought to be done -- there will be
	127	at least N elements processed per thread. Setting N to 0 is not
f4565e4c TT	128	allowed.
	129
	130	If the function returns a non-void type, then a vector of the
	131	results is returned. The size of the resulting vector depends on
	132	the number of threads that were used. */
d55c9a68 TT	133
d55c9a68 TT	134	template<class RandomIt, class RangeFunction>
f4565e4c	135	typename gdb::detail::par_for_accumulator<
c95dd24e	136	typename gdb::invoke_result<RangeFunction, RandomIt, RandomIt>::type
f4565e4c	137	>::result_type
82d734f7	138	parallel_for_each (unsigned n, RandomIt first, RandomIt last,
b859a3ef TV	139	RangeFunction callback,
b859a3ef TV	140	gdb::function_view<size_t(RandomIt)> task_size = nullptr)
d55c9a68	141	{
a09520cd	142	using result_type
c95dd24e	143	= typename gdb::invoke_result<RangeFunction, RandomIt, RandomIt>::type;
d55c9a68	144
53944a3b TV	145	/* If enabled, print debug info about how the work is distributed across
53944a3b TV	146	the threads. */
9b89bf16	147	const bool parallel_for_each_debug = false;
53944a3b TV	148
	149	size_t n_worker_threads = thread_pool::g_thread_pool->thread_count ();
	150	size_t n_threads = n_worker_threads;
d55c9a68	151	size_t n_elements = last - first;
f4565e4c	152	size_t elts_per_thread = 0;
4319180c	153	size_t elts_left_over = 0;
b859a3ef TV	154	size_t total_size = 0;
	155	size_t size_per_thread = 0;
	156	size_t max_element_size = n_elements == 0 ? 1 : SIZE_MAX / n_elements;
4319180c	157
d55c9a68 TT	158	if (n_threads > 1)
d55c9a68 TT	159	{
b859a3ef TV	160	if (task_size != nullptr)
	161	{
	162	gdb_assert (n == 1);
	163	for (RandomIt i = first; i != last; ++i)
	164	{
	165	size_t element_size = task_size (i);
	166	gdb_assert (element_size > 0);
	167	if (element_size > max_element_size)
	168	/* We could start scaling here, but that doesn't seem to be
	169	worth the effort. */
	170	element_size = max_element_size;
	171	size_t prev_total_size = total_size;
	172	total_size += element_size;
	173	/* Check for overflow. */
	174	gdb_assert (prev_total_size < total_size);
	175	}
	176	size_per_thread = total_size / n_threads;
	177	}
	178	else
	179	{
	180	/* Require that there should be at least N elements in a
	181	thread. */
	182	gdb_assert (n > 0);
	183	if (n_elements / n_threads < n)
	184	n_threads = std::max (n_elements / n, (size_t) 1);
	185	elts_per_thread = n_elements / n_threads;
	186	elts_left_over = n_elements % n_threads;
	187	/* n_elements == n_threads * elts_per_thread + elts_left_over. */
	188	}
d55c9a68	189	}
d55c9a68	190
f4565e4c TT	191	size_t count = n_threads == 0 ? 0 : n_threads - 1;
f4565e4c TT	192	gdb::detail::par_for_accumulator<result_type> results (count);
d55c9a68	193
53944a3b TV	194	if (parallel_for_each_debug)
	195	{
	196	debug_printf (_("Parallel for: n_elements: %zu\n"), n_elements);
b859a3ef TV	197	if (task_size != nullptr)
	198	{
	199	debug_printf (_("Parallel for: total_size: %zu\n"), total_size);
	200	debug_printf (_("Parallel for: size_per_thread: %zu\n"), size_per_thread);
	201	}
	202	else
	203	{
	204	debug_printf (_("Parallel for: minimum elements per thread: %u\n"), n);
	205	debug_printf (_("Parallel for: elts_per_thread: %zu\n"), elts_per_thread);
	206	}
53944a3b TV	207	}
53944a3b TV	208
b859a3ef	209	size_t remaining_size = total_size;
f4565e4c TT	210	for (int i = 0; i < count; ++i)
f4565e4c TT	211	{
b859a3ef TV	212	RandomIt end;
	213	size_t chunk_size = 0;
	214	if (task_size == nullptr)
	215	{
	216	end = first + elts_per_thread;
	217	if (i < elts_left_over)
	218	/* Distribute the leftovers over the worker threads, to avoid having
	219	to handle all of them in a single thread. */
	220	end++;
	221	}
	222	else
	223	{
	224	RandomIt j;
	225	for (j = first; j < last && chunk_size < size_per_thread; ++j)
	226	{
	227	size_t element_size = task_size (j);
	228	if (element_size > max_element_size)
	229	element_size = max_element_size;
	230	chunk_size += element_size;
	231	}
	232	end = j;
	233	remaining_size -= chunk_size;
	234	}
53944a3b	235	if (parallel_for_each_debug)
b859a3ef TV	236	{
	237	debug_printf (_("Parallel for: elements on worker thread %i\t: %zu"),
	238	i, (size_t)(end - first));
	239	if (task_size != nullptr)
	240	debug_printf (_("\t(size: %zu)"), chunk_size);
	241	debug_printf (_("\n"));
	242	}
f4565e4c TT	243	results.post (i, [=] ()
	244	{
	245	return callback (first, end);
	246	});
	247	first = end;
	248	}
	249
53944a3b TV	250	for (int i = count; i < n_worker_threads; ++i)
53944a3b TV	251	if (parallel_for_each_debug)
b859a3ef TV	252	{
	253	debug_printf (_("Parallel for: elements on worker thread %i\t: 0"), i);
	254	if (task_size != nullptr)
	255	debug_printf (_("\t(size: 0)"));
	256	debug_printf (_("\n"));
	257	}
53944a3b	258
f4565e4c	259	/* Process all the remaining elements in the main thread. */
53944a3b	260	if (parallel_for_each_debug)
b859a3ef TV	261	{
	262	debug_printf (_("Parallel for: elements on main thread\t\t: %zu"),
	263	(size_t)(last - first));
	264	if (task_size != nullptr)
	265	debug_printf (_("\t(size: %zu)"), remaining_size);
	266	debug_printf (_("\n"));
	267	}
f4565e4c TT	268	return results.finish ([=] ()
	269	{
	270	return callback (first, last);
	271	});
d55c9a68 TT	272	}
d55c9a68 TT	273
18a5766d TV	274	/* A sequential drop-in replacement of parallel_for_each. This can be useful
	275	when debugging multi-threading behaviour, and you want to limit
	276	multi-threading in a fine-grained way. */
	277
	278	template<class RandomIt, class RangeFunction>
	279	typename gdb::detail::par_for_accumulator<
c95dd24e	280	typename gdb::invoke_result<RangeFunction, RandomIt, RandomIt>::type
18a5766d TV	281	>::result_type
18a5766d TV	282	sequential_for_each (unsigned n, RandomIt first, RandomIt last,
b859a3ef TV	283	RangeFunction callback,
b859a3ef TV	284	gdb::function_view<size_t(RandomIt)> task_size = nullptr)
18a5766d	285	{
c95dd24e	286	using result_type = typename gdb::invoke_result<RangeFunction, RandomIt, RandomIt>::type;
18a5766d TV	287
	288	gdb::detail::par_for_accumulator<result_type> results (0);
	289
	290	/* Process all the remaining elements in the main thread. */
	291	return results.finish ([=] ()
	292	{
	293	return callback (first, last);
	294	});
	295	}
	296
d55c9a68 TT	297	}
	298
	299	#endif /* GDBSUPPORT_PARALLEL_FOR_H */