]>
Commit | Line | Data |
---|---|---|
d55c9a68 TT |
1 | /* Parallel for loops |
2 | ||
213516ef | 3 | Copyright (C) 2019-2023 Free Software Foundation, Inc. |
d55c9a68 TT |
4 | |
5 | This file is part of GDB. | |
6 | ||
7 | This program is free software; you can redistribute it and/or modify | |
8 | it under the terms of the GNU General Public License as published by | |
9 | the Free Software Foundation; either version 3 of the License, or | |
10 | (at your option) any later version. | |
11 | ||
12 | This program is distributed in the hope that it will be useful, | |
13 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
15 | GNU General Public License for more details. | |
16 | ||
17 | You should have received a copy of the GNU General Public License | |
18 | along with this program. If not, see <http://www.gnu.org/licenses/>. */ | |
19 | ||
20 | #ifndef GDBSUPPORT_PARALLEL_FOR_H | |
21 | #define GDBSUPPORT_PARALLEL_FOR_H | |
22 | ||
23 | #include <algorithm> | |
f4565e4c | 24 | #include <type_traits> |
c95dd24e | 25 | #include "gdbsupport/invoke-result.h" |
d55c9a68 | 26 | #include "gdbsupport/thread-pool.h" |
b859a3ef | 27 | #include "gdbsupport/function-view.h" |
d55c9a68 TT |
28 | |
29 | namespace gdb | |
30 | { | |
31 | ||
f4565e4c TT |
32 | namespace detail |
33 | { | |
34 | ||
35 | /* This is a helper class that is used to accumulate results for | |
36 | parallel_for. There is a specialization for 'void', below. */ | |
37 | template<typename T> | |
38 | struct par_for_accumulator | |
39 | { | |
40 | public: | |
41 | ||
42 | explicit par_for_accumulator (size_t n_threads) | |
43 | : m_futures (n_threads) | |
44 | { | |
45 | } | |
46 | ||
47 | /* The result type that is accumulated. */ | |
48 | typedef std::vector<T> result_type; | |
49 | ||
50 | /* Post the Ith task to a background thread, and store a future for | |
51 | later. */ | |
52 | void post (size_t i, std::function<T ()> task) | |
53 | { | |
54 | m_futures[i] | |
55 | = gdb::thread_pool::g_thread_pool->post_task (std::move (task)); | |
56 | } | |
57 | ||
58 | /* Invoke TASK in the current thread, then compute all the results | |
59 | from all background tasks and put them into a result vector, | |
60 | which is returned. */ | |
61 | result_type finish (gdb::function_view<T ()> task) | |
62 | { | |
63 | result_type result (m_futures.size () + 1); | |
64 | ||
65 | result.back () = task (); | |
66 | ||
67 | for (size_t i = 0; i < m_futures.size (); ++i) | |
68 | result[i] = m_futures[i].get (); | |
69 | ||
70 | return result; | |
71 | } | |
72 | ||
73 | private: | |
74 | ||
75 | /* A vector of futures coming from the tasks run in the | |
76 | background. */ | |
20c4eb42 | 77 | std::vector<gdb::future<T>> m_futures; |
f4565e4c TT |
78 | }; |
79 | ||
80 | /* See the generic template. */ | |
81 | template<> | |
82 | struct par_for_accumulator<void> | |
83 | { | |
84 | public: | |
85 | ||
86 | explicit par_for_accumulator (size_t n_threads) | |
87 | : m_futures (n_threads) | |
88 | { | |
89 | } | |
90 | ||
91 | /* This specialization does not compute results. */ | |
92 | typedef void result_type; | |
93 | ||
94 | void post (size_t i, std::function<void ()> task) | |
95 | { | |
96 | m_futures[i] | |
97 | = gdb::thread_pool::g_thread_pool->post_task (std::move (task)); | |
98 | } | |
99 | ||
100 | result_type finish (gdb::function_view<void ()> task) | |
101 | { | |
102 | task (); | |
103 | ||
104 | for (auto &future : m_futures) | |
105 | { | |
106 | /* Use 'get' and not 'wait', to propagate any exception. */ | |
107 | future.get (); | |
108 | } | |
109 | } | |
110 | ||
111 | private: | |
112 | ||
20c4eb42 | 113 | std::vector<gdb::future<void>> m_futures; |
f4565e4c TT |
114 | }; |
115 | ||
116 | } | |
117 | ||
d55c9a68 TT |
118 | /* A very simple "parallel for". This splits the range of iterators |
119 | into subranges, and then passes each subrange to the callback. The | |
120 | work may or may not be done in separate threads. | |
121 | ||
122 | This approach was chosen over having the callback work on single | |
123 | items because it makes it simple for the caller to do | |
82d734f7 TT |
124 | once-per-subrange initialization and destruction. |
125 | ||
126 | The parameter N says how batching ought to be done -- there will be | |
127 | at least N elements processed per thread. Setting N to 0 is not | |
f4565e4c TT |
128 | allowed. |
129 | ||
130 | If the function returns a non-void type, then a vector of the | |
131 | results is returned. The size of the resulting vector depends on | |
132 | the number of threads that were used. */ | |
d55c9a68 TT |
133 | |
134 | template<class RandomIt, class RangeFunction> | |
f4565e4c | 135 | typename gdb::detail::par_for_accumulator< |
c95dd24e | 136 | typename gdb::invoke_result<RangeFunction, RandomIt, RandomIt>::type |
f4565e4c | 137 | >::result_type |
82d734f7 | 138 | parallel_for_each (unsigned n, RandomIt first, RandomIt last, |
b859a3ef TV |
139 | RangeFunction callback, |
140 | gdb::function_view<size_t(RandomIt)> task_size = nullptr) | |
d55c9a68 | 141 | { |
a09520cd | 142 | using result_type |
c95dd24e | 143 | = typename gdb::invoke_result<RangeFunction, RandomIt, RandomIt>::type; |
d55c9a68 | 144 | |
53944a3b TV |
145 | /* If enabled, print debug info about how the work is distributed across |
146 | the threads. */ | |
9b89bf16 | 147 | const bool parallel_for_each_debug = false; |
53944a3b TV |
148 | |
149 | size_t n_worker_threads = thread_pool::g_thread_pool->thread_count (); | |
150 | size_t n_threads = n_worker_threads; | |
d55c9a68 | 151 | size_t n_elements = last - first; |
f4565e4c | 152 | size_t elts_per_thread = 0; |
4319180c | 153 | size_t elts_left_over = 0; |
b859a3ef TV |
154 | size_t total_size = 0; |
155 | size_t size_per_thread = 0; | |
156 | size_t max_element_size = n_elements == 0 ? 1 : SIZE_MAX / n_elements; | |
4319180c | 157 | |
d55c9a68 TT |
158 | if (n_threads > 1) |
159 | { | |
b859a3ef TV |
160 | if (task_size != nullptr) |
161 | { | |
162 | gdb_assert (n == 1); | |
163 | for (RandomIt i = first; i != last; ++i) | |
164 | { | |
165 | size_t element_size = task_size (i); | |
166 | gdb_assert (element_size > 0); | |
167 | if (element_size > max_element_size) | |
168 | /* We could start scaling here, but that doesn't seem to be | |
169 | worth the effort. */ | |
170 | element_size = max_element_size; | |
171 | size_t prev_total_size = total_size; | |
172 | total_size += element_size; | |
173 | /* Check for overflow. */ | |
174 | gdb_assert (prev_total_size < total_size); | |
175 | } | |
176 | size_per_thread = total_size / n_threads; | |
177 | } | |
178 | else | |
179 | { | |
180 | /* Require that there should be at least N elements in a | |
181 | thread. */ | |
182 | gdb_assert (n > 0); | |
183 | if (n_elements / n_threads < n) | |
184 | n_threads = std::max (n_elements / n, (size_t) 1); | |
185 | elts_per_thread = n_elements / n_threads; | |
186 | elts_left_over = n_elements % n_threads; | |
187 | /* n_elements == n_threads * elts_per_thread + elts_left_over. */ | |
188 | } | |
d55c9a68 | 189 | } |
d55c9a68 | 190 | |
f4565e4c TT |
191 | size_t count = n_threads == 0 ? 0 : n_threads - 1; |
192 | gdb::detail::par_for_accumulator<result_type> results (count); | |
d55c9a68 | 193 | |
53944a3b TV |
194 | if (parallel_for_each_debug) |
195 | { | |
196 | debug_printf (_("Parallel for: n_elements: %zu\n"), n_elements); | |
b859a3ef TV |
197 | if (task_size != nullptr) |
198 | { | |
199 | debug_printf (_("Parallel for: total_size: %zu\n"), total_size); | |
200 | debug_printf (_("Parallel for: size_per_thread: %zu\n"), size_per_thread); | |
201 | } | |
202 | else | |
203 | { | |
204 | debug_printf (_("Parallel for: minimum elements per thread: %u\n"), n); | |
205 | debug_printf (_("Parallel for: elts_per_thread: %zu\n"), elts_per_thread); | |
206 | } | |
53944a3b TV |
207 | } |
208 | ||
b859a3ef | 209 | size_t remaining_size = total_size; |
f4565e4c TT |
210 | for (int i = 0; i < count; ++i) |
211 | { | |
b859a3ef TV |
212 | RandomIt end; |
213 | size_t chunk_size = 0; | |
214 | if (task_size == nullptr) | |
215 | { | |
216 | end = first + elts_per_thread; | |
217 | if (i < elts_left_over) | |
218 | /* Distribute the leftovers over the worker threads, to avoid having | |
219 | to handle all of them in a single thread. */ | |
220 | end++; | |
221 | } | |
222 | else | |
223 | { | |
224 | RandomIt j; | |
225 | for (j = first; j < last && chunk_size < size_per_thread; ++j) | |
226 | { | |
227 | size_t element_size = task_size (j); | |
228 | if (element_size > max_element_size) | |
229 | element_size = max_element_size; | |
230 | chunk_size += element_size; | |
231 | } | |
232 | end = j; | |
233 | remaining_size -= chunk_size; | |
234 | } | |
53944a3b | 235 | if (parallel_for_each_debug) |
b859a3ef TV |
236 | { |
237 | debug_printf (_("Parallel for: elements on worker thread %i\t: %zu"), | |
238 | i, (size_t)(end - first)); | |
239 | if (task_size != nullptr) | |
240 | debug_printf (_("\t(size: %zu)"), chunk_size); | |
241 | debug_printf (_("\n")); | |
242 | } | |
f4565e4c TT |
243 | results.post (i, [=] () |
244 | { | |
245 | return callback (first, end); | |
246 | }); | |
247 | first = end; | |
248 | } | |
249 | ||
53944a3b TV |
250 | for (int i = count; i < n_worker_threads; ++i) |
251 | if (parallel_for_each_debug) | |
b859a3ef TV |
252 | { |
253 | debug_printf (_("Parallel for: elements on worker thread %i\t: 0"), i); | |
254 | if (task_size != nullptr) | |
255 | debug_printf (_("\t(size: 0)")); | |
256 | debug_printf (_("\n")); | |
257 | } | |
53944a3b | 258 | |
f4565e4c | 259 | /* Process all the remaining elements in the main thread. */ |
53944a3b | 260 | if (parallel_for_each_debug) |
b859a3ef TV |
261 | { |
262 | debug_printf (_("Parallel for: elements on main thread\t\t: %zu"), | |
263 | (size_t)(last - first)); | |
264 | if (task_size != nullptr) | |
265 | debug_printf (_("\t(size: %zu)"), remaining_size); | |
266 | debug_printf (_("\n")); | |
267 | } | |
f4565e4c TT |
268 | return results.finish ([=] () |
269 | { | |
270 | return callback (first, last); | |
271 | }); | |
d55c9a68 TT |
272 | } |
273 | ||
18a5766d TV |
274 | /* A sequential drop-in replacement of parallel_for_each. This can be useful |
275 | when debugging multi-threading behaviour, and you want to limit | |
276 | multi-threading in a fine-grained way. */ | |
277 | ||
278 | template<class RandomIt, class RangeFunction> | |
279 | typename gdb::detail::par_for_accumulator< | |
c95dd24e | 280 | typename gdb::invoke_result<RangeFunction, RandomIt, RandomIt>::type |
18a5766d TV |
281 | >::result_type |
282 | sequential_for_each (unsigned n, RandomIt first, RandomIt last, | |
b859a3ef TV |
283 | RangeFunction callback, |
284 | gdb::function_view<size_t(RandomIt)> task_size = nullptr) | |
18a5766d | 285 | { |
c95dd24e | 286 | using result_type = typename gdb::invoke_result<RangeFunction, RandomIt, RandomIt>::type; |
18a5766d TV |
287 | |
288 | gdb::detail::par_for_accumulator<result_type> results (0); | |
289 | ||
290 | /* Process all the remaining elements in the main thread. */ | |
291 | return results.finish ([=] () | |
292 | { | |
293 | return callback (first, last); | |
294 | }); | |
295 | } | |
296 | ||
d55c9a68 TT |
297 | } |
298 | ||
299 | #endif /* GDBSUPPORT_PARALLEL_FOR_H */ |