]>
Commit | Line | Data |
---|---|---|
c2ba9709 JS |
1 | // -*- C++ -*- |
2 | ||
83ffe9cd | 3 | // Copyright (C) 2007-2023 Free Software Foundation, Inc. |
c2ba9709 JS |
4 | // |
5 | // This file is part of the GNU ISO C++ Library. This library is free | |
6 | // software; you can redistribute it and/or modify it under the terms | |
7 | // of the GNU General Public License as published by the Free Software | |
748086b7 | 8 | // Foundation; either version 3, or (at your option) any later |
c2ba9709 JS |
9 | // version. |
10 | ||
11 | // This library is distributed in the hope that it will be useful, but | |
12 | // WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 | // General Public License for more details. | |
15 | ||
748086b7 JJ |
16 | // Under Section 7 of GPL version 3, you are granted additional |
17 | // permissions described in the GCC Runtime Library Exception, version | |
18 | // 3.1, as published by the Free Software Foundation. | |
19 | ||
20 | // You should have received a copy of the GNU General Public License and | |
21 | // a copy of the GCC Runtime Library Exception along with this program; | |
22 | // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see | |
23 | // <http://www.gnu.org/licenses/>. | |
c2ba9709 JS |
24 | |
25 | /** @file parallel/partition.h | |
26 | * @brief Parallel implementation of std::partition(), | |
27 | * std::nth_element(), and std::partial_sort(). | |
28 | * This file is a GNU parallel extension to the Standard C++ Library. | |
29 | */ | |
30 | ||
31 | // Written by Johannes Singler and Felix Putze. | |
32 | ||
33 | #ifndef _GLIBCXX_PARALLEL_PARTITION_H | |
34 | #define _GLIBCXX_PARALLEL_PARTITION_H 1 | |
35 | ||
36 | #include <parallel/basic_iterator.h> | |
37 | #include <parallel/sort.h> | |
4f39bf5c | 38 | #include <parallel/random_number.h> |
c2ba9709 JS |
39 | #include <bits/stl_algo.h> |
40 | #include <parallel/parallel.h> | |
41 | ||
e683ee2a | 42 | /** @brief Decide whether to declare certain variables volatile. */ |
c2ba9709 JS |
43 | #define _GLIBCXX_VOLATILE volatile |
44 | ||
45 | namespace __gnu_parallel | |
46 | { | |
77d16198 PC |
47 | /** @brief Parallel implementation of std::partition. |
48 | * @param __begin Begin iterator of input sequence to split. | |
49 | * @param __end End iterator of input sequence to split. | |
50 | * @param __pred Partition predicate, possibly including some kind | |
51 | * of pivot. | |
52 | * @param __num_threads Maximum number of threads to use for this task. | |
53 | * @return Number of elements not fulfilling the predicate. */ | |
54 | template<typename _RAIter, typename _Predicate> | |
55 | typename std::iterator_traits<_RAIter>::difference_type | |
56 | __parallel_partition(_RAIter __begin, _RAIter __end, | |
57 | _Predicate __pred, _ThreadIndex __num_threads) | |
58 | { | |
59 | typedef std::iterator_traits<_RAIter> _TraitsType; | |
60 | typedef typename _TraitsType::value_type _ValueType; | |
61 | typedef typename _TraitsType::difference_type _DifferenceType; | |
62 | ||
63 | _DifferenceType __n = __end - __begin; | |
64 | ||
65 | _GLIBCXX_CALL(__n) | |
66 | ||
67 | const _Settings& __s = _Settings::get(); | |
68 | ||
ed277997 JS |
69 | // shared |
70 | _GLIBCXX_VOLATILE _DifferenceType __left = 0, __right = __n - 1, | |
71 | __dist = __n, | |
72 | __leftover_left, __leftover_right, | |
73 | __leftnew, __rightnew; | |
77d16198 | 74 | |
ed277997 | 75 | // just 0 or 1, but int to allow atomic operations |
8fc81078 | 76 | int* __reserved_left = 0, * __reserved_right = 0; |
77d16198 | 77 | |
d95ba652 | 78 | _DifferenceType __chunk_size = __s.partition_chunk_size; |
77d16198 | 79 | |
77d16198 | 80 | //at least two chunks per thread |
ed277997 | 81 | if (__dist >= 2 * __num_threads * __chunk_size) |
77d16198 PC |
82 | # pragma omp parallel num_threads(__num_threads) |
83 | { | |
84 | # pragma omp single | |
85 | { | |
86 | __num_threads = omp_get_num_threads(); | |
ed277997 JS |
87 | __reserved_left = new int[__num_threads]; |
88 | __reserved_right = new int[__num_threads]; | |
77d16198 PC |
89 | |
90 | if (__s.partition_chunk_share > 0.0) | |
91 | __chunk_size = std::max<_DifferenceType> | |
92 | (__s.partition_chunk_size, (double)__n | |
93 | * __s.partition_chunk_share / (double)__num_threads); | |
94 | else | |
95 | __chunk_size = __s.partition_chunk_size; | |
96 | } | |
97 | ||
ed277997 | 98 | while (__dist >= 2 * __num_threads * __chunk_size) |
77d16198 PC |
99 | { |
100 | # pragma omp single | |
101 | { | |
ed277997 | 102 | _DifferenceType __num_chunks = __dist / __chunk_size; |
77d16198 | 103 | |
8b0c13a8 | 104 | for (_ThreadIndex __r = 0; __r < __num_threads; ++__r) |
77d16198 | 105 | { |
ed277997 JS |
106 | __reserved_left [__r] = 0; // false |
107 | __reserved_right[__r] = 0; // false | |
77d16198 PC |
108 | } |
109 | __leftover_left = 0; | |
110 | __leftover_right = 0; | |
111 | } //implicit barrier | |
112 | ||
113 | // Private. | |
114 | _DifferenceType __thread_left, __thread_left_border, | |
115 | __thread_right, __thread_right_border; | |
77d16198 | 116 | |
ed277997 | 117 | __thread_left = __left + 1; |
77d16198 PC |
118 | // Just to satisfy the condition below. |
119 | __thread_left_border = __thread_left - 1; | |
ed277997 | 120 | |
77d16198 | 121 | __thread_right = __n - 1; |
ed277997 | 122 | // Just to satisfy the condition below. |
77d16198 PC |
123 | __thread_right_border = __thread_right + 1; |
124 | ||
125 | bool __iam_finished = false; | |
126 | while (!__iam_finished) | |
127 | { | |
128 | if (__thread_left > __thread_left_border) | |
129 | { | |
ed277997 JS |
130 | _DifferenceType __former_dist = |
131 | __fetch_and_add(&__dist, -__chunk_size); | |
132 | if (__former_dist < __chunk_size) | |
133 | { | |
134 | __fetch_and_add(&__dist, __chunk_size); | |
135 | __iam_finished = true; | |
136 | break; | |
137 | } | |
138 | else | |
139 | { | |
140 | __thread_left = | |
141 | __fetch_and_add(&__left, __chunk_size); | |
142 | __thread_left_border = | |
143 | __thread_left + (__chunk_size - 1); | |
144 | } | |
77d16198 PC |
145 | } |
146 | ||
147 | if (__thread_right < __thread_right_border) | |
148 | { | |
ed277997 JS |
149 | _DifferenceType __former_dist = |
150 | __fetch_and_add(&__dist, -__chunk_size); | |
151 | if (__former_dist < __chunk_size) | |
152 | { | |
153 | __fetch_and_add(&__dist, __chunk_size); | |
154 | __iam_finished = true; | |
155 | break; | |
156 | } | |
157 | else | |
158 | { | |
159 | __thread_right = | |
160 | __fetch_and_add(&__right, -__chunk_size); | |
161 | __thread_right_border = | |
162 | __thread_right - (__chunk_size - 1); | |
163 | } | |
77d16198 PC |
164 | } |
165 | ||
77d16198 PC |
166 | // Swap as usual. |
167 | while (__thread_left < __thread_right) | |
168 | { | |
169 | while (__pred(__begin[__thread_left]) | |
170 | && __thread_left <= __thread_left_border) | |
171 | ++__thread_left; | |
172 | while (!__pred(__begin[__thread_right]) | |
173 | && __thread_right >= __thread_right_border) | |
174 | --__thread_right; | |
175 | ||
176 | if (__thread_left > __thread_left_border | |
177 | || __thread_right < __thread_right_border) | |
178 | // Fetch new chunk(__s). | |
179 | break; | |
180 | ||
fc722a0e JS |
181 | std::iter_swap(__begin + __thread_left, |
182 | __begin + __thread_right); | |
77d16198 PC |
183 | ++__thread_left; |
184 | --__thread_right; | |
185 | } | |
186 | } | |
187 | ||
188 | // Now swap the leftover chunks to the right places. | |
189 | if (__thread_left <= __thread_left_border) | |
190 | # pragma omp atomic | |
191 | ++__leftover_left; | |
192 | if (__thread_right >= __thread_right_border) | |
193 | # pragma omp atomic | |
194 | ++__leftover_right; | |
195 | ||
196 | # pragma omp barrier | |
197 | ||
ed277997 JS |
198 | _DifferenceType |
199 | __leftold = __left, | |
200 | __leftnew = __left - __leftover_left * __chunk_size, | |
201 | __rightold = __right, | |
202 | __rightnew = __right + __leftover_right * __chunk_size; | |
77d16198 PC |
203 | |
204 | // <=> __thread_left_border + (__chunk_size - 1) >= __leftnew | |
205 | if (__thread_left <= __thread_left_border | |
206 | && __thread_left_border >= __leftnew) | |
207 | { | |
208 | // Chunk already in place, reserve spot. | |
209 | __reserved_left[(__left - (__thread_left_border + 1)) | |
ed277997 | 210 | / __chunk_size] = 1; |
77d16198 PC |
211 | } |
212 | ||
213 | // <=> __thread_right_border - (__chunk_size - 1) <= __rightnew | |
214 | if (__thread_right >= __thread_right_border | |
215 | && __thread_right_border <= __rightnew) | |
216 | { | |
217 | // Chunk already in place, reserve spot. | |
218 | __reserved_right[((__thread_right_border - 1) - __right) | |
ed277997 | 219 | / __chunk_size] = 1; |
77d16198 PC |
220 | } |
221 | ||
222 | # pragma omp barrier | |
223 | ||
224 | if (__thread_left <= __thread_left_border | |
225 | && __thread_left_border < __leftnew) | |
226 | { | |
227 | // Find spot and swap. | |
228 | _DifferenceType __swapstart = -1; | |
ed277997 JS |
229 | for (int __r = 0; __r < __leftover_left; ++__r) |
230 | if (__reserved_left[__r] == 0 | |
231 | && __compare_and_swap(&(__reserved_left[__r]), 0, 1)) | |
232 | { | |
233 | __swapstart = __leftold - (__r + 1) * __chunk_size; | |
234 | break; | |
235 | } | |
c2ba9709 | 236 | |
e383deac | 237 | #if _GLIBCXX_PARALLEL_ASSERTIONS |
77d16198 | 238 | _GLIBCXX_PARALLEL_ASSERT(__swapstart != -1); |
c2ba9709 JS |
239 | #endif |
240 | ||
77d16198 PC |
241 | std::swap_ranges(__begin + __thread_left_border |
242 | - (__chunk_size - 1), | |
243 | __begin + __thread_left_border + 1, | |
244 | __begin + __swapstart); | |
245 | } | |
246 | ||
247 | if (__thread_right >= __thread_right_border | |
248 | && __thread_right_border > __rightnew) | |
249 | { | |
250 | // Find spot and swap | |
251 | _DifferenceType __swapstart = -1; | |
ed277997 JS |
252 | for (int __r = 0; __r < __leftover_right; ++__r) |
253 | if (__reserved_right[__r] == 0 | |
254 | && __compare_and_swap(&(__reserved_right[__r]), 0, 1)) | |
255 | { | |
256 | __swapstart = __rightold + __r * __chunk_size + 1; | |
257 | break; | |
258 | } | |
c2ba9709 | 259 | |
e383deac | 260 | #if _GLIBCXX_PARALLEL_ASSERTIONS |
77d16198 | 261 | _GLIBCXX_PARALLEL_ASSERT(__swapstart != -1); |
c2ba9709 JS |
262 | #endif |
263 | ||
77d16198 PC |
264 | std::swap_ranges(__begin + __thread_right_border, |
265 | __begin + __thread_right_border | |
266 | + __chunk_size, __begin + __swapstart); | |
267 | } | |
e383deac | 268 | #if _GLIBCXX_PARALLEL_ASSERTIONS |
e683ee2a | 269 | # pragma omp barrier |
c2ba9709 | 270 | |
e683ee2a | 271 | # pragma omp single |
77d16198 | 272 | { |
8b0c13a8 | 273 | for (_DifferenceType __r = 0; __r < __leftover_left; ++__r) |
ed277997 | 274 | _GLIBCXX_PARALLEL_ASSERT(__reserved_left[__r] == 1); |
8b0c13a8 | 275 | for (_DifferenceType __r = 0; __r < __leftover_right; ++__r) |
ed277997 | 276 | _GLIBCXX_PARALLEL_ASSERT(__reserved_right[__r] == 1); |
77d16198 | 277 | } |
c2ba9709 JS |
278 | #endif |
279 | ||
77d16198 PC |
280 | __left = __leftnew; |
281 | __right = __rightnew; | |
ed277997 | 282 | __dist = __right - __left + 1; |
77d16198 PC |
283 | } |
284 | ||
285 | # pragma omp flush(__left, __right) | |
286 | } // end "recursion" //parallel | |
287 | ||
288 | _DifferenceType __final_left = __left, __final_right = __right; | |
289 | ||
290 | while (__final_left < __final_right) | |
291 | { | |
292 | // Go right until key is geq than pivot. | |
293 | while (__pred(__begin[__final_left]) | |
294 | && __final_left < __final_right) | |
295 | ++__final_left; | |
296 | ||
297 | // Go left until key is less than pivot. | |
298 | while (!__pred(__begin[__final_right]) | |
299 | && __final_left < __final_right) | |
300 | --__final_right; | |
301 | ||
302 | if (__final_left == __final_right) | |
303 | break; | |
fc722a0e | 304 | std::iter_swap(__begin + __final_left, __begin + __final_right); |
77d16198 PC |
305 | ++__final_left; |
306 | --__final_right; | |
307 | } | |
308 | ||
309 | // All elements on the left side are < piv, all elements on the | |
310 | // right are >= piv | |
311 | delete[] __reserved_left; | |
312 | delete[] __reserved_right; | |
313 | ||
77d16198 PC |
314 | // Element "between" __final_left and __final_right might not have |
315 | // been regarded yet | |
316 | if (__final_left < __n && !__pred(__begin[__final_left])) | |
317 | // Really swapped. | |
318 | return __final_left; | |
319 | else | |
320 | return __final_left + 1; | |
321 | } | |
322 | ||
323 | /** | |
324 | * @brief Parallel implementation of std::nth_element(). | |
325 | * @param __begin Begin iterator of input sequence. | |
326 | * @param __nth _Iterator of element that must be in position afterwards. | |
327 | * @param __end End iterator of input sequence. | |
328 | * @param __comp Comparator. | |
329 | */ | |
330 | template<typename _RAIter, typename _Compare> | |
331 | void | |
332 | __parallel_nth_element(_RAIter __begin, _RAIter __nth, | |
333 | _RAIter __end, _Compare __comp) | |
334 | { | |
335 | typedef std::iterator_traits<_RAIter> _TraitsType; | |
336 | typedef typename _TraitsType::value_type _ValueType; | |
337 | typedef typename _TraitsType::difference_type _DifferenceType; | |
338 | ||
339 | _GLIBCXX_CALL(__end - __begin) | |
340 | ||
341 | _RAIter __split; | |
342 | _RandomNumber __rng; | |
343 | ||
d95ba652 JS |
344 | const _Settings& __s = _Settings::get(); |
345 | _DifferenceType __minimum_length = std::max<_DifferenceType>(2, | |
346 | std::max(__s.nth_element_minimal_n, __s.partition_minimal_n)); | |
77d16198 PC |
347 | |
348 | // Break if input range to small. | |
349 | while (static_cast<_SequenceIndex>(__end - __begin) >= __minimum_length) | |
350 | { | |
351 | _DifferenceType __n = __end - __begin; | |
352 | ||
d95ba652 | 353 | _RAIter __pivot_pos = __begin + __rng(__n); |
77d16198 PC |
354 | |
355 | // Swap __pivot_pos value to end. | |
356 | if (__pivot_pos != (__end - 1)) | |
fc722a0e | 357 | std::iter_swap(__pivot_pos, __end - 1); |
77d16198 PC |
358 | __pivot_pos = __end - 1; |
359 | ||
360 | // _Compare must have first_value_type, second_value_type, | |
361 | // result_type | |
362 | // _Compare == | |
363 | // __gnu_parallel::_Lexicographic<S, int, | |
364 | // __gnu_parallel::_Less<S, S> > | |
365 | // __pivot_pos == std::pair<S, int>* | |
31380bc4 | 366 | __gnu_parallel::__binder2nd<_Compare, _ValueType, _ValueType, bool> |
77d16198 PC |
367 | __pred(__comp, *__pivot_pos); |
368 | ||
369 | // Divide, leave pivot unchanged in last place. | |
370 | _RAIter __split_pos1, __split_pos2; | |
371 | __split_pos1 = __begin + __parallel_partition(__begin, __end - 1, | |
372 | __pred, | |
373 | __get_max_threads()); | |
374 | ||
375 | // Left side: < __pivot_pos; __right side: >= __pivot_pos | |
376 | ||
377 | // Swap pivot back to middle. | |
378 | if (__split_pos1 != __pivot_pos) | |
fc722a0e | 379 | std::iter_swap(__split_pos1, __pivot_pos); |
77d16198 PC |
380 | __pivot_pos = __split_pos1; |
381 | ||
382 | // In case all elements are equal, __split_pos1 == 0 | |
383 | if ((__split_pos1 + 1 - __begin) < (__n >> 7) | |
384 | || (__end - __split_pos1) < (__n >> 7)) | |
385 | { | |
386 | // Very unequal split, one part smaller than one 128th | |
387 | // elements not strictly larger than the pivot. | |
388 | __gnu_parallel::__unary_negate<__gnu_parallel:: | |
389 | __binder1st<_Compare, _ValueType, | |
390 | _ValueType, bool>, _ValueType> | |
391 | __pred(__gnu_parallel::__binder1st<_Compare, _ValueType, | |
392 | _ValueType, bool>(__comp, *__pivot_pos)); | |
393 | ||
394 | // Find other end of pivot-equal range. | |
395 | __split_pos2 = __gnu_sequential::partition(__split_pos1 + 1, | |
396 | __end, __pred); | |
397 | } | |
398 | else | |
399 | // Only skip the pivot. | |
400 | __split_pos2 = __split_pos1 + 1; | |
401 | ||
402 | // Compare iterators. | |
403 | if (__split_pos2 <= __nth) | |
404 | __begin = __split_pos2; | |
405 | else if (__nth < __split_pos1) | |
406 | __end = __split_pos1; | |
407 | else | |
408 | break; | |
409 | } | |
410 | ||
411 | // Only at most _Settings::partition_minimal_n __elements __left. | |
d95ba652 | 412 | __gnu_sequential::nth_element(__begin, __nth, __end, __comp); |
77d16198 PC |
413 | } |
414 | ||
415 | /** @brief Parallel implementation of std::partial_sort(). | |
1acba85b | 416 | * @param __begin Begin iterator of input sequence. |
77d16198 | 417 | * @param __middle Sort until this position. |
1acba85b | 418 | * @param __end End iterator of input sequence. |
77d16198 PC |
419 | * @param __comp Comparator. */ |
420 | template<typename _RAIter, typename _Compare> | |
421 | void | |
422 | __parallel_partial_sort(_RAIter __begin, | |
423 | _RAIter __middle, | |
424 | _RAIter __end, _Compare __comp) | |
425 | { | |
426 | __parallel_nth_element(__begin, __middle, __end, __comp); | |
427 | std::sort(__begin, __middle, __comp); | |
428 | } | |
c2ba9709 | 429 | |
e683ee2a | 430 | } //namespace __gnu_parallel |
c2ba9709 JS |
431 | |
432 | #undef _GLIBCXX_VOLATILE | |
433 | ||
cbcd1e45 | 434 | #endif /* _GLIBCXX_PARALLEL_PARTITION_H */ |