]>
Commit | Line | Data |
---|---|---|
1 | // -*- C++ -*- | |
2 | ||
3 | // Copyright (C) 2007-2020 Free Software Foundation, Inc. | |
4 | // | |
5 | // This file is part of the GNU ISO C++ Library. This library is free | |
6 | // software; you can redistribute it and/or modify it under the terms | |
7 | // of the GNU General Public License as published by the Free Software | |
8 | // Foundation; either version 3, or (at your option) any later | |
9 | // version. | |
10 | ||
11 | // This library is distributed in the hope that it will be useful, but | |
12 | // WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 | // General Public License for more details. | |
15 | ||
16 | // Under Section 7 of GPL version 3, you are granted additional | |
17 | // permissions described in the GCC Runtime Library Exception, version | |
18 | // 3.1, as published by the Free Software Foundation. | |
19 | ||
20 | // You should have received a copy of the GNU General Public License and | |
21 | // a copy of the GCC Runtime Library Exception along with this program; | |
22 | // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see | |
23 | // <http://www.gnu.org/licenses/>. | |
24 | ||
25 | /** @file parallel/balanced_quicksort.h | |
26 | * @brief Implementation of a dynamically load-balanced parallel quicksort. | |
27 | * | |
28 | * It works in-place and needs only logarithmic extra memory. | |
29 | * The algorithm is similar to the one proposed in | |
30 | * | |
31 | * P. Tsigas and Y. Zhang. | |
32 | * A simple, fast parallel implementation of quicksort and | |
33 | * its performance evaluation on SUN enterprise 10000. | |
34 | * In 11th Euromicro Conference on Parallel, Distributed and | |
35 | * Network-Based Processing, page 372, 2003. | |
36 | * | |
37 | * This file is a GNU parallel extension to the Standard C++ Library. | |
38 | */ | |
39 | ||
40 | // Written by Johannes Singler. | |
41 | ||
42 | #ifndef _GLIBCXX_PARALLEL_BALANCED_QUICKSORT_H | |
43 | #define _GLIBCXX_PARALLEL_BALANCED_QUICKSORT_H 1 | |
44 | ||
45 | #include <parallel/basic_iterator.h> | |
46 | #include <bits/stl_algo.h> | |
47 | #include <bits/stl_function.h> | |
48 | ||
49 | #include <parallel/settings.h> | |
50 | #include <parallel/partition.h> | |
51 | #include <parallel/random_number.h> | |
52 | #include <parallel/queue.h> | |
53 | ||
54 | #if _GLIBCXX_PARALLEL_ASSERTIONS | |
55 | #include <parallel/checkers.h> | |
56 | #ifdef _GLIBCXX_HAVE_UNISTD_H | |
57 | #include <unistd.h> | |
58 | #endif | |
59 | #endif | |
60 | ||
61 | namespace __gnu_parallel | |
62 | { | |
63 | /** @brief Information local to one thread in the parallel quicksort run. */ | |
64 | template<typename _RAIter> | |
65 | struct _QSBThreadLocal | |
66 | { | |
67 | typedef std::iterator_traits<_RAIter> _TraitsType; | |
68 | typedef typename _TraitsType::difference_type _DifferenceType; | |
69 | ||
70 | /** @brief Continuous part of the sequence, described by an | |
71 | iterator pair. */ | |
72 | typedef std::pair<_RAIter, _RAIter> _Piece; | |
73 | ||
74 | /** @brief Initial piece to work on. */ | |
75 | _Piece _M_initial; | |
76 | ||
77 | /** @brief Work-stealing queue. */ | |
78 | _RestrictedBoundedConcurrentQueue<_Piece> _M_leftover_parts; | |
79 | ||
80 | /** @brief Number of threads involved in this algorithm. */ | |
81 | _ThreadIndex _M_num_threads; | |
82 | ||
83 | /** @brief Pointer to a counter of elements left over to sort. */ | |
84 | volatile _DifferenceType* _M_elements_leftover; | |
85 | ||
86 | /** @brief The complete sequence to sort. */ | |
87 | _Piece _M_global; | |
88 | ||
89 | /** @brief Constructor. | |
90 | * @param __queue_size size of the work-stealing queue. */ | |
91 | _QSBThreadLocal(int __queue_size) : _M_leftover_parts(__queue_size) { } | |
92 | }; | |
93 | ||
94 | /** @brief Balanced quicksort divide step. | |
95 | * @param __begin Begin iterator of subsequence. | |
96 | * @param __end End iterator of subsequence. | |
97 | * @param __comp Comparator. | |
98 | * @param __num_threads Number of threads that are allowed to work on | |
99 | * this part. | |
100 | * @pre @c (__end-__begin)>=1 */ | |
101 | template<typename _RAIter, typename _Compare> | |
102 | typename std::iterator_traits<_RAIter>::difference_type | |
103 | __qsb_divide(_RAIter __begin, _RAIter __end, | |
104 | _Compare __comp, _ThreadIndex __num_threads) | |
105 | { | |
106 | _GLIBCXX_PARALLEL_ASSERT(__num_threads > 0); | |
107 | ||
108 | typedef std::iterator_traits<_RAIter> _TraitsType; | |
109 | typedef typename _TraitsType::value_type _ValueType; | |
110 | typedef typename _TraitsType::difference_type _DifferenceType; | |
111 | ||
112 | _RAIter __pivot_pos = | |
113 | __median_of_three_iterators(__begin, __begin + (__end - __begin) / 2, | |
114 | __end - 1, __comp); | |
115 | ||
116 | #if defined(_GLIBCXX_PARALLEL_ASSERTIONS) | |
117 | // Must be in between somewhere. | |
118 | _DifferenceType __n = __end - __begin; | |
119 | ||
120 | _GLIBCXX_PARALLEL_ASSERT((!__comp(*__pivot_pos, *__begin) | |
121 | && !__comp(*(__begin + __n / 2), | |
122 | *__pivot_pos)) | |
123 | || (!__comp(*__pivot_pos, *__begin) | |
124 | && !__comp(*(__end - 1), *__pivot_pos)) | |
125 | || (!__comp(*__pivot_pos, *(__begin + __n / 2)) | |
126 | && !__comp(*__begin, *__pivot_pos)) | |
127 | || (!__comp(*__pivot_pos, *(__begin + __n / 2)) | |
128 | && !__comp(*(__end - 1), *__pivot_pos)) | |
129 | || (!__comp(*__pivot_pos, *(__end - 1)) | |
130 | && !__comp(*__begin, *__pivot_pos)) | |
131 | || (!__comp(*__pivot_pos, *(__end - 1)) | |
132 | && !__comp(*(__begin + __n / 2), | |
133 | *__pivot_pos))); | |
134 | #endif | |
135 | ||
136 | // Swap pivot value to end. | |
137 | if (__pivot_pos != (__end - 1)) | |
138 | std::iter_swap(__pivot_pos, __end - 1); | |
139 | __pivot_pos = __end - 1; | |
140 | ||
141 | __gnu_parallel::__binder2nd<_Compare, _ValueType, _ValueType, bool> | |
142 | __pred(__comp, *__pivot_pos); | |
143 | ||
144 | // Divide, returning __end - __begin - 1 in the worst case. | |
145 | _DifferenceType __split_pos = __parallel_partition(__begin, __end - 1, | |
146 | __pred, | |
147 | __num_threads); | |
148 | ||
149 | // Swap back pivot to middle. | |
150 | std::iter_swap(__begin + __split_pos, __pivot_pos); | |
151 | __pivot_pos = __begin + __split_pos; | |
152 | ||
153 | #if _GLIBCXX_PARALLEL_ASSERTIONS | |
154 | _RAIter __r; | |
155 | for (__r = __begin; __r != __pivot_pos; ++__r) | |
156 | _GLIBCXX_PARALLEL_ASSERT(__comp(*__r, *__pivot_pos)); | |
157 | for (; __r != __end; ++__r) | |
158 | _GLIBCXX_PARALLEL_ASSERT(!__comp(*__r, *__pivot_pos)); | |
159 | #endif | |
160 | ||
161 | return __split_pos; | |
162 | } | |
163 | ||
164 | /** @brief Quicksort conquer step. | |
165 | * @param __tls Array of thread-local storages. | |
166 | * @param __begin Begin iterator of subsequence. | |
167 | * @param __end End iterator of subsequence. | |
168 | * @param __comp Comparator. | |
169 | * @param __iam Number of the thread processing this function. | |
170 | * @param __num_threads | |
171 | * Number of threads that are allowed to work on this part. */ | |
172 | template<typename _RAIter, typename _Compare> | |
173 | void | |
174 | __qsb_conquer(_QSBThreadLocal<_RAIter>** __tls, | |
175 | _RAIter __begin, _RAIter __end, | |
176 | _Compare __comp, | |
177 | _ThreadIndex __iam, _ThreadIndex __num_threads, | |
178 | bool __parent_wait) | |
179 | { | |
180 | typedef std::iterator_traits<_RAIter> _TraitsType; | |
181 | typedef typename _TraitsType::value_type _ValueType; | |
182 | typedef typename _TraitsType::difference_type _DifferenceType; | |
183 | ||
184 | _DifferenceType __n = __end - __begin; | |
185 | ||
186 | if (__num_threads <= 1 || __n <= 1) | |
187 | { | |
188 | __tls[__iam]->_M_initial.first = __begin; | |
189 | __tls[__iam]->_M_initial.second = __end; | |
190 | ||
191 | __qsb_local_sort_with_helping(__tls, __comp, __iam, __parent_wait); | |
192 | ||
193 | return; | |
194 | } | |
195 | ||
196 | // Divide step. | |
197 | _DifferenceType __split_pos = | |
198 | __qsb_divide(__begin, __end, __comp, __num_threads); | |
199 | ||
200 | #if _GLIBCXX_PARALLEL_ASSERTIONS | |
201 | _GLIBCXX_PARALLEL_ASSERT(0 <= __split_pos && | |
202 | __split_pos < (__end - __begin)); | |
203 | #endif | |
204 | ||
205 | _ThreadIndex | |
206 | __num_threads_leftside = std::max<_ThreadIndex> | |
207 | (1, std::min<_ThreadIndex>(__num_threads - 1, __split_pos | |
208 | * __num_threads / __n)); | |
209 | ||
210 | # pragma omp atomic | |
211 | *__tls[__iam]->_M_elements_leftover -= (_DifferenceType)1; | |
212 | ||
213 | // Conquer step. | |
214 | # pragma omp parallel num_threads(2) | |
215 | { | |
216 | bool __wait; | |
217 | if(omp_get_num_threads() < 2) | |
218 | __wait = false; | |
219 | else | |
220 | __wait = __parent_wait; | |
221 | ||
222 | # pragma omp sections | |
223 | { | |
224 | # pragma omp section | |
225 | { | |
226 | __qsb_conquer(__tls, __begin, __begin + __split_pos, __comp, | |
227 | __iam, __num_threads_leftside, __wait); | |
228 | __wait = __parent_wait; | |
229 | } | |
230 | // The pivot_pos is left in place, to ensure termination. | |
231 | # pragma omp section | |
232 | { | |
233 | __qsb_conquer(__tls, __begin + __split_pos + 1, __end, __comp, | |
234 | __iam + __num_threads_leftside, | |
235 | __num_threads - __num_threads_leftside, __wait); | |
236 | __wait = __parent_wait; | |
237 | } | |
238 | } | |
239 | } | |
240 | } | |
241 | ||
242 | /** | |
243 | * @brief Quicksort step doing load-balanced local sort. | |
244 | * @param __tls Array of thread-local storages. | |
245 | * @param __comp Comparator. | |
246 | * @param __iam Number of the thread processing this function. | |
247 | */ | |
248 | template<typename _RAIter, typename _Compare> | |
249 | void | |
250 | __qsb_local_sort_with_helping(_QSBThreadLocal<_RAIter>** __tls, | |
251 | _Compare& __comp, _ThreadIndex __iam, | |
252 | bool __wait) | |
253 | { | |
254 | typedef std::iterator_traits<_RAIter> _TraitsType; | |
255 | typedef typename _TraitsType::value_type _ValueType; | |
256 | typedef typename _TraitsType::difference_type _DifferenceType; | |
257 | typedef std::pair<_RAIter, _RAIter> _Piece; | |
258 | ||
259 | _QSBThreadLocal<_RAIter>& __tl = *__tls[__iam]; | |
260 | ||
261 | _DifferenceType | |
262 | __base_case_n = _Settings::get().sort_qsb_base_case_maximal_n; | |
263 | if (__base_case_n < 2) | |
264 | __base_case_n = 2; | |
265 | _ThreadIndex __num_threads = __tl._M_num_threads; | |
266 | ||
267 | // Every thread has its own random number generator. | |
268 | _RandomNumber __rng(__iam + 1); | |
269 | ||
270 | _Piece __current = __tl._M_initial; | |
271 | ||
272 | _DifferenceType __elements_done = 0; | |
273 | #if _GLIBCXX_PARALLEL_ASSERTIONS | |
274 | _DifferenceType __total_elements_done = 0; | |
275 | #endif | |
276 | ||
277 | for (;;) | |
278 | { | |
279 | // Invariant: __current must be a valid (maybe empty) range. | |
280 | _RAIter __begin = __current.first, __end = __current.second; | |
281 | _DifferenceType __n = __end - __begin; | |
282 | ||
283 | if (__n > __base_case_n) | |
284 | { | |
285 | // Divide. | |
286 | _RAIter __pivot_pos = __begin + __rng(__n); | |
287 | ||
288 | // Swap __pivot_pos value to end. | |
289 | if (__pivot_pos != (__end - 1)) | |
290 | std::iter_swap(__pivot_pos, __end - 1); | |
291 | __pivot_pos = __end - 1; | |
292 | ||
293 | __gnu_parallel::__binder2nd | |
294 | <_Compare, _ValueType, _ValueType, bool> | |
295 | __pred(__comp, *__pivot_pos); | |
296 | ||
297 | // Divide, leave pivot unchanged in last place. | |
298 | _RAIter __split_pos1, __split_pos2; | |
299 | __split_pos1 = __gnu_sequential::partition(__begin, __end - 1, | |
300 | __pred); | |
301 | ||
302 | // Left side: < __pivot_pos; __right side: >= __pivot_pos. | |
303 | #if _GLIBCXX_PARALLEL_ASSERTIONS | |
304 | _GLIBCXX_PARALLEL_ASSERT(__begin <= __split_pos1 | |
305 | && __split_pos1 < __end); | |
306 | #endif | |
307 | // Swap pivot back to middle. | |
308 | if (__split_pos1 != __pivot_pos) | |
309 | std::iter_swap(__split_pos1, __pivot_pos); | |
310 | __pivot_pos = __split_pos1; | |
311 | ||
312 | // In case all elements are equal, __split_pos1 == 0. | |
313 | if ((__split_pos1 + 1 - __begin) < (__n >> 7) | |
314 | || (__end - __split_pos1) < (__n >> 7)) | |
315 | { | |
316 | // Very unequal split, one part smaller than one 128th | |
317 | // elements not strictly larger than the pivot. | |
318 | __gnu_parallel::__unary_negate<__gnu_parallel::__binder1st | |
319 | <_Compare, _ValueType, _ValueType, bool>, _ValueType> | |
320 | __pred(__gnu_parallel::__binder1st | |
321 | <_Compare, _ValueType, _ValueType, bool> | |
322 | (__comp, *__pivot_pos)); | |
323 | ||
324 | // Find other end of pivot-equal range. | |
325 | __split_pos2 = __gnu_sequential::partition(__split_pos1 + 1, | |
326 | __end, __pred); | |
327 | } | |
328 | else | |
329 | // Only skip the pivot. | |
330 | __split_pos2 = __split_pos1 + 1; | |
331 | ||
332 | // Elements equal to pivot are done. | |
333 | __elements_done += (__split_pos2 - __split_pos1); | |
334 | #if _GLIBCXX_PARALLEL_ASSERTIONS | |
335 | __total_elements_done += (__split_pos2 - __split_pos1); | |
336 | #endif | |
337 | // Always push larger part onto stack. | |
338 | if (((__split_pos1 + 1) - __begin) < (__end - (__split_pos2))) | |
339 | { | |
340 | // Right side larger. | |
341 | if ((__split_pos2) != __end) | |
342 | __tl._M_leftover_parts.push_front | |
343 | (std::make_pair(__split_pos2, __end)); | |
344 | ||
345 | //__current.first = __begin; //already set anyway | |
346 | __current.second = __split_pos1; | |
347 | continue; | |
348 | } | |
349 | else | |
350 | { | |
351 | // Left side larger. | |
352 | if (__begin != __split_pos1) | |
353 | __tl._M_leftover_parts.push_front(std::make_pair | |
354 | (__begin, __split_pos1)); | |
355 | ||
356 | __current.first = __split_pos2; | |
357 | //__current.second = __end; //already set anyway | |
358 | continue; | |
359 | } | |
360 | } | |
361 | else | |
362 | { | |
363 | __gnu_sequential::sort(__begin, __end, __comp); | |
364 | __elements_done += __n; | |
365 | #if _GLIBCXX_PARALLEL_ASSERTIONS | |
366 | __total_elements_done += __n; | |
367 | #endif | |
368 | ||
369 | // Prefer own stack, small pieces. | |
370 | if (__tl._M_leftover_parts.pop_front(__current)) | |
371 | continue; | |
372 | ||
373 | # pragma omp atomic | |
374 | *__tl._M_elements_leftover -= __elements_done; | |
375 | ||
376 | __elements_done = 0; | |
377 | ||
378 | #if _GLIBCXX_PARALLEL_ASSERTIONS | |
379 | double __search_start = omp_get_wtime(); | |
380 | #endif | |
381 | ||
382 | // Look for new work. | |
383 | bool __successfully_stolen = false; | |
384 | while (__wait && *__tl._M_elements_leftover > 0 | |
385 | && !__successfully_stolen | |
386 | #if _GLIBCXX_PARALLEL_ASSERTIONS | |
387 | // Possible dead-lock. | |
388 | && (omp_get_wtime() < (__search_start + 1.0)) | |
389 | #endif | |
390 | ) | |
391 | { | |
392 | _ThreadIndex __victim; | |
393 | __victim = __rng(__num_threads); | |
394 | ||
395 | // Large pieces. | |
396 | __successfully_stolen = (__victim != __iam) | |
397 | && __tls[__victim]->_M_leftover_parts.pop_back(__current); | |
398 | if (!__successfully_stolen) | |
399 | __yield(); | |
400 | #if !defined(__ICC) && !defined(__ECC) | |
401 | # pragma omp flush | |
402 | #endif | |
403 | } | |
404 | ||
405 | #if _GLIBCXX_PARALLEL_ASSERTIONS | |
406 | if (omp_get_wtime() >= (__search_start + 1.0)) | |
407 | { | |
408 | sleep(1); | |
409 | _GLIBCXX_PARALLEL_ASSERT(omp_get_wtime() | |
410 | < (__search_start + 1.0)); | |
411 | } | |
412 | #endif | |
413 | if (!__successfully_stolen) | |
414 | { | |
415 | #if _GLIBCXX_PARALLEL_ASSERTIONS | |
416 | _GLIBCXX_PARALLEL_ASSERT(*__tl._M_elements_leftover == 0); | |
417 | #endif | |
418 | return; | |
419 | } | |
420 | } | |
421 | } | |
422 | } | |
423 | ||
424 | /** @brief Top-level quicksort routine. | |
425 | * @param __begin Begin iterator of sequence. | |
426 | * @param __end End iterator of sequence. | |
427 | * @param __comp Comparator. | |
428 | * @param __num_threads Number of threads that are allowed to work on | |
429 | * this part. | |
430 | */ | |
431 | template<typename _RAIter, typename _Compare> | |
432 | void | |
433 | __parallel_sort_qsb(_RAIter __begin, _RAIter __end, | |
434 | _Compare __comp, _ThreadIndex __num_threads) | |
435 | { | |
436 | _GLIBCXX_CALL(__end - __begin) | |
437 | ||
438 | typedef std::iterator_traits<_RAIter> _TraitsType; | |
439 | typedef typename _TraitsType::value_type _ValueType; | |
440 | typedef typename _TraitsType::difference_type _DifferenceType; | |
441 | typedef std::pair<_RAIter, _RAIter> _Piece; | |
442 | ||
443 | typedef _QSBThreadLocal<_RAIter> _TLSType; | |
444 | ||
445 | _DifferenceType __n = __end - __begin; | |
446 | ||
447 | if (__n <= 1) | |
448 | return; | |
449 | ||
450 | // At least one element per processor. | |
451 | if (__num_threads > __n) | |
452 | __num_threads = static_cast<_ThreadIndex>(__n); | |
453 | ||
454 | // Initialize thread local storage | |
455 | _TLSType** __tls = new _TLSType*[__num_threads]; | |
456 | _DifferenceType __queue_size = (__num_threads | |
457 | * (_ThreadIndex)(__rd_log2(__n) + 1)); | |
458 | for (_ThreadIndex __t = 0; __t < __num_threads; ++__t) | |
459 | __tls[__t] = new _QSBThreadLocal<_RAIter>(__queue_size); | |
460 | ||
461 | // There can never be more than ceil(__rd_log2(__n)) ranges on the | |
462 | // stack, because | |
463 | // 1. Only one processor pushes onto the stack | |
464 | // 2. The largest range has at most length __n | |
465 | // 3. Each range is larger than half of the range remaining | |
466 | volatile _DifferenceType __elements_leftover = __n; | |
467 | for (_ThreadIndex __i = 0; __i < __num_threads; ++__i) | |
468 | { | |
469 | __tls[__i]->_M_elements_leftover = &__elements_leftover; | |
470 | __tls[__i]->_M_num_threads = __num_threads; | |
471 | __tls[__i]->_M_global = std::make_pair(__begin, __end); | |
472 | ||
473 | // Just in case nothing is left to assign. | |
474 | __tls[__i]->_M_initial = std::make_pair(__end, __end); | |
475 | } | |
476 | ||
477 | // Main recursion call. | |
478 | __qsb_conquer(__tls, __begin, __begin + __n, __comp, 0, | |
479 | __num_threads, true); | |
480 | ||
481 | #if _GLIBCXX_PARALLEL_ASSERTIONS | |
482 | // All stack must be empty. | |
483 | _Piece __dummy; | |
484 | for (_ThreadIndex __i = 1; __i < __num_threads; ++__i) | |
485 | _GLIBCXX_PARALLEL_ASSERT( | |
486 | !__tls[__i]->_M_leftover_parts.pop_back(__dummy)); | |
487 | #endif | |
488 | ||
489 | for (_ThreadIndex __i = 0; __i < __num_threads; ++__i) | |
490 | delete __tls[__i]; | |
491 | delete[] __tls; | |
492 | } | |
493 | } // namespace __gnu_parallel | |
494 | ||
495 | #endif /* _GLIBCXX_PARALLEL_BALANCED_QUICKSORT_H */ |