]> git.ipfire.org Git - thirdparty/gcc.git/blame - libstdc++-v3/include/parallel/balanced_quicksort.h
builtins.c (std_canonical_va_list): Treat structure based va_list types.
[thirdparty/gcc.git] / libstdc++-v3 / include / parallel / balanced_quicksort.h
CommitLineData
c2ba9709
JS
1// -*- C++ -*-
2
531898c3 3// Copyright (C) 2007, 2008 Free Software Foundation, Inc.
c2ba9709
JS
4//
5// This file is part of the GNU ISO C++ Library. This library is free
6// software; you can redistribute it and/or modify it under the terms
7// of the GNU General Public License as published by the Free Software
8// Foundation; either version 2, or (at your option) any later
9// version.
10
11// This library is distributed in the hope that it will be useful, but
12// WITHOUT ANY WARRANTY; without even the implied warranty of
13// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14// General Public License for more details.
15
16// You should have received a copy of the GNU General Public License
17// along with this library; see the file COPYING. If not, write to
18// the Free Software Foundation, 59 Temple Place - Suite 330, Boston,
19// MA 02111-1307, USA.
20
21// As a special exception, you may use this file as part of a free
22// software library without restriction. Specifically, if other files
23// instantiate templates or use macros or inline functions from this
24// file, or you compile this file and link it with other files to
25// produce an executable, this file does not by itself cause the
26// resulting executable to be covered by the GNU General Public
27// License. This exception does not however invalidate any other
28// reasons why the executable file might be covered by the GNU General
29// Public License.
30
31/** @file parallel/balanced_quicksort.h
32 * @brief Implementation of a dynamically load-balanced parallel quicksort.
33 *
34 * It works in-place and needs only logarithmic extra memory.
1904bef1
JS
35 * The algorithm is similar to the one proposed in
36 *
37 * P. Tsigas and Y. Zhang.
38 * A simple, fast parallel implementation of quicksort and
39 * its performance evaluation on SUN enterprise 10000.
40 * In 11th Euromicro Conference on Parallel, Distributed and
41 * Network-Based Processing, page 372, 2003.
42 *
c2ba9709
JS
43 * This file is a GNU parallel extension to the Standard C++ Library.
44 */
45
46// Written by Johannes Singler.
47
48#ifndef _GLIBCXX_PARALLEL_BAL_QUICKSORT_H
49#define _GLIBCXX_PARALLEL_BAL_QUICKSORT_H 1
50
51#include <parallel/basic_iterator.h>
52#include <bits/stl_algo.h>
53
54#include <parallel/settings.h>
55#include <parallel/partition.h>
56#include <parallel/random_number.h>
57#include <parallel/queue.h>
58#include <functional>
59
60#if _GLIBCXX_ASSERTIONS
61#include <parallel/checkers.h>
62#endif
63
64namespace __gnu_parallel
65{
e683ee2a
JS
66/** @brief Information local to one thread in the parallel quicksort run. */
67template<typename RandomAccessIterator>
c2ba9709
JS
68 struct QSBThreadLocal
69 {
70 typedef std::iterator_traits<RandomAccessIterator> traits_type;
71 typedef typename traits_type::difference_type difference_type;
72
73 /** @brief Continuous part of the sequence, described by an
e683ee2a 74 iterator pair. */
c2ba9709
JS
75 typedef std::pair<RandomAccessIterator, RandomAccessIterator> Piece;
76
77 /** @brief Initial piece to work on. */
78 Piece initial;
79
80 /** @brief Work-stealing queue. */
81 RestrictedBoundedConcurrentQueue<Piece> leftover_parts;
82
83 /** @brief Number of threads involved in this algorithm. */
84 thread_index_t num_threads;
85
86 /** @brief Pointer to a counter of elements left over to sort. */
87 volatile difference_type* elements_leftover;
88
89 /** @brief The complete sequence to sort. */
90 Piece global;
91
92 /** @brief Constructor.
93 * @param queue_size Size of the work-stealing queue. */
94 QSBThreadLocal(int queue_size) : leftover_parts(queue_size) { }
95 };
96
e683ee2a
JS
97/** @brief Balanced quicksort divide step.
98 * @param begin Begin iterator of subsequence.
99 * @param end End iterator of subsequence.
100 * @param comp Comparator.
101 * @param num_threads Number of threads that are allowed to work on
102 * this part.
103 * @pre @c (end-begin)>=1 */
104template<typename RandomAccessIterator, typename Comparator>
531898c3 105 typename std::iterator_traits<RandomAccessIterator>::difference_type
c2ba9709 106 qsb_divide(RandomAccessIterator begin, RandomAccessIterator end,
e683ee2a 107 Comparator comp, thread_index_t num_threads)
c2ba9709
JS
108 {
109 _GLIBCXX_PARALLEL_ASSERT(num_threads > 0);
110
111 typedef std::iterator_traits<RandomAccessIterator> traits_type;
112 typedef typename traits_type::value_type value_type;
113 typedef typename traits_type::difference_type difference_type;
114
5817ff8e
PC
115 RandomAccessIterator pivot_pos =
116 median_of_three_iterators(begin, begin + (end - begin) / 2,
117 end - 1, comp);
c2ba9709
JS
118
119#if defined(_GLIBCXX_ASSERTIONS)
120 // Must be in between somewhere.
121 difference_type n = end - begin;
122
e683ee2a
JS
123 _GLIBCXX_PARALLEL_ASSERT(
124 (!comp(*pivot_pos, *begin) && !comp(*(begin + n / 2), *pivot_pos))
38a28aab 125 || (!comp(*pivot_pos, *begin) && !comp(*(end - 1), *pivot_pos))
e683ee2a 126 || (!comp(*pivot_pos, *(begin + n / 2)) && !comp(*begin, *pivot_pos))
38a28aab
JS
127 || (!comp(*pivot_pos, *(begin + n / 2)) && !comp(*(end - 1), *pivot_pos))
128 || (!comp(*pivot_pos, *(end - 1)) && !comp(*begin, *pivot_pos))
129 || (!comp(*pivot_pos, *(end - 1)) && !comp(*(begin + n / 2), *pivot_pos)));
c2ba9709
JS
130#endif
131
132 // Swap pivot value to end.
133 if (pivot_pos != (end - 1))
134 std::swap(*pivot_pos, *(end - 1));
135 pivot_pos = end - 1;
136
e683ee2a
JS
137 __gnu_parallel::binder2nd<Comparator, value_type, value_type, bool>
138 pred(comp, *pivot_pos);
c2ba9709
JS
139
140 // Divide, returning end - begin - 1 in the worst case.
e683ee2a
JS
141 difference_type split_pos = parallel_partition(
142 begin, end - 1, pred, num_threads);
c2ba9709
JS
143
144 // Swap back pivot to middle.
145 std::swap(*(begin + split_pos), *pivot_pos);
146 pivot_pos = begin + split_pos;
147
148#if _GLIBCXX_ASSERTIONS
149 RandomAccessIterator r;
5817ff8e 150 for (r = begin; r != pivot_pos; ++r)
c2ba9709 151 _GLIBCXX_PARALLEL_ASSERT(comp(*r, *pivot_pos));
5817ff8e 152 for (; r != end; ++r)
c2ba9709
JS
153 _GLIBCXX_PARALLEL_ASSERT(!comp(*r, *pivot_pos));
154#endif
155
156 return split_pos;
157 }
158
e683ee2a
JS
159/** @brief Quicksort conquer step.
160 * @param tls Array of thread-local storages.
161 * @param begin Begin iterator of subsequence.
162 * @param end End iterator of subsequence.
163 * @param comp Comparator.
164 * @param iam Number of the thread processing this function.
165 * @param num_threads
166 * Number of threads that are allowed to work on this part. */
167template<typename RandomAccessIterator, typename Comparator>
531898c3 168 void
c2ba9709 169 qsb_conquer(QSBThreadLocal<RandomAccessIterator>** tls,
e683ee2a
JS
170 RandomAccessIterator begin, RandomAccessIterator end,
171 Comparator comp,
172 thread_index_t iam, thread_index_t num_threads,
173 bool parent_wait)
c2ba9709
JS
174 {
175 typedef std::iterator_traits<RandomAccessIterator> traits_type;
176 typedef typename traits_type::value_type value_type;
177 typedef typename traits_type::difference_type difference_type;
178
179 difference_type n = end - begin;
180
e683ee2a 181 if (num_threads <= 1 || n <= 1)
c2ba9709 182 {
e683ee2a
JS
183 tls[iam]->initial.first = begin;
184 tls[iam]->initial.second = end;
c2ba9709 185
e683ee2a 186 qsb_local_sort_with_helping(tls, comp, iam, parent_wait);
c2ba9709 187
e683ee2a 188 return;
c2ba9709
JS
189 }
190
191 // Divide step.
192 difference_type split_pos = qsb_divide(begin, end, comp, num_threads);
193
194#if _GLIBCXX_ASSERTIONS
195 _GLIBCXX_PARALLEL_ASSERT(0 <= split_pos && split_pos < (end - begin));
196#endif
197
e683ee2a
JS
198 thread_index_t num_threads_leftside =
199 std::max<thread_index_t>(1, std::min<thread_index_t>(
200 num_threads - 1, split_pos * num_threads / n));
c2ba9709 201
e683ee2a 202# pragma omp atomic
c2ba9709
JS
203 *tls[iam]->elements_leftover -= (difference_type)1;
204
205 // Conquer step.
e683ee2a 206# pragma omp parallel num_threads(2)
c2ba9709 207 {
e683ee2a
JS
208 bool wait;
209 if(omp_get_num_threads() < 2)
210 wait = false;
211 else
212 wait = parent_wait;
213
214# pragma omp sections
215 {
216# pragma omp section
217 {
218 qsb_conquer(tls, begin, begin + split_pos, comp,
219 iam,
220 num_threads_leftside,
221 wait);
222 wait = parent_wait;
223 }
224 // The pivot_pos is left in place, to ensure termination.
225# pragma omp section
226 {
227 qsb_conquer(tls, begin + split_pos + 1, end, comp,
228 iam + num_threads_leftside,
229 num_threads - num_threads_leftside,
230 wait);
231 wait = parent_wait;
232 }
233 }
c2ba9709
JS
234 }
235 }
236
e683ee2a
JS
237/**
238 * @brief Quicksort step doing load-balanced local sort.
239 * @param tls Array of thread-local storages.
240 * @param comp Comparator.
241 * @param iam Number of the thread processing this function.
242 */
243template<typename RandomAccessIterator, typename Comparator>
531898c3 244 void
c2ba9709 245 qsb_local_sort_with_helping(QSBThreadLocal<RandomAccessIterator>** tls,
e683ee2a 246 Comparator& comp, int iam, bool wait)
c2ba9709
JS
247 {
248 typedef std::iterator_traits<RandomAccessIterator> traits_type;
249 typedef typename traits_type::value_type value_type;
250 typedef typename traits_type::difference_type difference_type;
251 typedef std::pair<RandomAccessIterator, RandomAccessIterator> Piece;
252
253 QSBThreadLocal<RandomAccessIterator>& tl = *tls[iam];
254
d7066497
JS
255 difference_type base_case_n =
256 _Settings::get().sort_qsb_base_case_maximal_n;
c2ba9709
JS
257 if (base_case_n < 2)
258 base_case_n = 2;
259 thread_index_t num_threads = tl.num_threads;
260
261 // Every thread has its own random number generator.
262 random_number rng(iam + 1);
263
264 Piece current = tl.initial;
265
266 difference_type elements_done = 0;
267#if _GLIBCXX_ASSERTIONS
268 difference_type total_elements_done = 0;
269#endif
270
271 for (;;)
272 {
e683ee2a
JS
273 // Invariant: current must be a valid (maybe empty) range.
274 RandomAccessIterator begin = current.first, end = current.second;
275 difference_type n = end - begin;
c2ba9709 276
e683ee2a
JS
277 if (n > base_case_n)
278 {
279 // Divide.
280 RandomAccessIterator pivot_pos = begin + rng(n);
c2ba9709 281
e683ee2a
JS
282 // Swap pivot_pos value to end.
283 if (pivot_pos != (end - 1))
284 std::swap(*pivot_pos, *(end - 1));
285 pivot_pos = end - 1;
c2ba9709 286
e683ee2a
JS
287 __gnu_parallel::binder2nd
288 <Comparator, value_type, value_type, bool>
289 pred(comp, *pivot_pos);
c2ba9709 290
e683ee2a
JS
291 // Divide, leave pivot unchanged in last place.
292 RandomAccessIterator split_pos1, split_pos2;
293 split_pos1 = __gnu_sequential::partition(begin, end - 1, pred);
c2ba9709 294
e683ee2a 295 // Left side: < pivot_pos; right side: >= pivot_pos.
c2ba9709 296#if _GLIBCXX_ASSERTIONS
e683ee2a 297 _GLIBCXX_PARALLEL_ASSERT(begin <= split_pos1 && split_pos1 < end);
c2ba9709 298#endif
e683ee2a
JS
299 // Swap pivot back to middle.
300 if (split_pos1 != pivot_pos)
301 std::swap(*split_pos1, *pivot_pos);
302 pivot_pos = split_pos1;
303
304 // In case all elements are equal, split_pos1 == 0.
305 if ((split_pos1 + 1 - begin) < (n >> 7)
306 || (end - split_pos1) < (n >> 7))
307 {
308 // Very unequal split, one part smaller than one 128th
309 // elements not strictly larger than the pivot.
310 __gnu_parallel::unary_negate<__gnu_parallel::binder1st
a4797b34
PC
311 <Comparator, value_type, value_type, bool>, value_type>
312 pred(__gnu_parallel::binder1st
313 <Comparator, value_type, value_type, bool>(comp,
314 *pivot_pos));
e683ee2a
JS
315
316 // Find other end of pivot-equal range.
5817ff8e
PC
317 split_pos2 = __gnu_sequential::partition(split_pos1 + 1,
318 end, pred);
e683ee2a
JS
319 }
320 else
321 // Only skip the pivot.
322 split_pos2 = split_pos1 + 1;
323
324 // Elements equal to pivot are done.
325 elements_done += (split_pos2 - split_pos1);
c2ba9709 326#if _GLIBCXX_ASSERTIONS
e683ee2a 327 total_elements_done += (split_pos2 - split_pos1);
c2ba9709 328#endif
e683ee2a
JS
329 // Always push larger part onto stack.
330 if (((split_pos1 + 1) - begin) < (end - (split_pos2)))
331 {
332 // Right side larger.
333 if ((split_pos2) != end)
a4797b34
PC
334 tl.leftover_parts.push_front(std::make_pair(split_pos2,
335 end));
e683ee2a
JS
336
337 //current.first = begin; //already set anyway
338 current.second = split_pos1;
339 continue;
340 }
341 else
342 {
343 // Left side larger.
344 if (begin != split_pos1)
5817ff8e
PC
345 tl.leftover_parts.push_front(std::make_pair(begin,
346 split_pos1));
e683ee2a
JS
347
348 current.first = split_pos2;
349 //current.second = end; //already set anyway
350 continue;
351 }
352 }
353 else
354 {
355 __gnu_sequential::sort(begin, end, comp);
356 elements_done += n;
c2ba9709 357#if _GLIBCXX_ASSERTIONS
e683ee2a 358 total_elements_done += n;
c2ba9709
JS
359#endif
360
e683ee2a
JS
361 // Prefer own stack, small pieces.
362 if (tl.leftover_parts.pop_front(current))
363 continue;
c2ba9709 364
e683ee2a
JS
365# pragma omp atomic
366 *tl.elements_leftover -= elements_done;
367
368 elements_done = 0;
c2ba9709
JS
369
370#if _GLIBCXX_ASSERTIONS
e683ee2a 371 double search_start = omp_get_wtime();
c2ba9709
JS
372#endif
373
e683ee2a
JS
374 // Look for new work.
375 bool successfully_stolen = false;
376 while (wait && *tl.elements_leftover > 0 && !successfully_stolen
c2ba9709 377#if _GLIBCXX_ASSERTIONS
e683ee2a
JS
378 // Possible dead-lock.
379 && (omp_get_wtime() < (search_start + 1.0))
c2ba9709 380#endif
e683ee2a
JS
381 )
382 {
383 thread_index_t victim;
384 victim = rng(num_threads);
385
386 // Large pieces.
387 successfully_stolen = (victim != iam)
388 && tls[victim]->leftover_parts.pop_back(current);
389 if (!successfully_stolen)
390 yield();
c2ba9709 391#if !defined(__ICC) && !defined(__ECC)
e683ee2a 392# pragma omp flush
c2ba9709 393#endif
e683ee2a 394 }
c2ba9709
JS
395
396#if _GLIBCXX_ASSERTIONS
e683ee2a
JS
397 if (omp_get_wtime() >= (search_start + 1.0))
398 {
399 sleep(1);
5817ff8e
PC
400 _GLIBCXX_PARALLEL_ASSERT(omp_get_wtime()
401 < (search_start + 1.0));
e683ee2a 402 }
c2ba9709 403#endif
e683ee2a
JS
404 if (!successfully_stolen)
405 {
c2ba9709 406#if _GLIBCXX_ASSERTIONS
e683ee2a 407 _GLIBCXX_PARALLEL_ASSERT(*tl.elements_leftover == 0);
c2ba9709 408#endif
e683ee2a
JS
409 return;
410 }
411 }
c2ba9709
JS
412 }
413 }
414
e683ee2a
JS
415/** @brief Top-level quicksort routine.
416 * @param begin Begin iterator of sequence.
417 * @param end End iterator of sequence.
418 * @param comp Comparator.
e683ee2a
JS
419 * @param num_threads Number of threads that are allowed to work on
420 * this part.
421 */
422template<typename RandomAccessIterator, typename Comparator>
531898c3 423 void
c2ba9709 424 parallel_sort_qsb(RandomAccessIterator begin, RandomAccessIterator end,
e683ee2a 425 Comparator comp,
e683ee2a 426 thread_index_t num_threads)
c2ba9709
JS
427 {
428 _GLIBCXX_CALL(end - begin)
429
430 typedef std::iterator_traits<RandomAccessIterator> traits_type;
431 typedef typename traits_type::value_type value_type;
432 typedef typename traits_type::difference_type difference_type;
433 typedef std::pair<RandomAccessIterator, RandomAccessIterator> Piece;
434
435 typedef QSBThreadLocal<RandomAccessIterator> tls_type;
436
d7066497
JS
437 difference_type n = end - begin;
438
c2ba9709
JS
439 if (n <= 1)
440 return;
441
442 // At least one element per processor.
443 if (num_threads > n)
444 num_threads = static_cast<thread_index_t>(n);
445
e683ee2a 446 // Initialize thread local storage
c2ba9709 447 tls_type** tls = new tls_type*[num_threads];
e683ee2a
JS
448 difference_type queue_size = num_threads * (thread_index_t)(log2(n) + 1);
449 for (thread_index_t t = 0; t < num_threads; ++t)
450 tls[t] = new QSBThreadLocal<RandomAccessIterator>(queue_size);
c2ba9709
JS
451
452 // There can never be more than ceil(log2(n)) ranges on the stack, because
453 // 1. Only one processor pushes onto the stack
454 // 2. The largest range has at most length n
455 // 3. Each range is larger than half of the range remaining
456 volatile difference_type elements_leftover = n;
5817ff8e 457 for (int i = 0; i < num_threads; ++i)
c2ba9709 458 {
e683ee2a
JS
459 tls[i]->elements_leftover = &elements_leftover;
460 tls[i]->num_threads = num_threads;
461 tls[i]->global = std::make_pair(begin, end);
c2ba9709 462
e683ee2a
JS
463 // Just in case nothing is left to assign.
464 tls[i]->initial = std::make_pair(end, end);
c2ba9709
JS
465 }
466
c2ba9709 467 // Main recursion call.
e683ee2a 468 qsb_conquer(tls, begin, begin + n, comp, 0, num_threads, true);
c2ba9709
JS
469
470#if _GLIBCXX_ASSERTIONS
471 // All stack must be empty.
472 Piece dummy;
5817ff8e 473 for (int i = 1; i < num_threads; ++i)
c2ba9709
JS
474 _GLIBCXX_PARALLEL_ASSERT(!tls[i]->leftover_parts.pop_back(dummy));
475#endif
476
5817ff8e 477 for (int i = 0; i < num_threads; ++i)
c2ba9709
JS
478 delete tls[i];
479 delete[] tls;
480 }
481} // namespace __gnu_parallel
482
483#endif