]> git.ipfire.org Git - thirdparty/gcc.git/blame - libstdc++-v3/include/parallel/multiway_mergesort.h
lto-plugin.c (temp_obj_dir_name): Remove.
[thirdparty/gcc.git] / libstdc++-v3 / include / parallel / multiway_mergesort.h
CommitLineData
c2ba9709
JS
1// -*- C++ -*-
2
748086b7 3// Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc.
c2ba9709
JS
4//
5// This file is part of the GNU ISO C++ Library. This library is free
6// software; you can redistribute it and/or modify it under the terms
7// of the GNU General Public License as published by the Free Software
748086b7 8// Foundation; either version 3, or (at your option) any later
c2ba9709
JS
9// version.
10
11// This library is distributed in the hope that it will be useful, but
12// WITHOUT ANY WARRANTY; without even the implied warranty of
13// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14// General Public License for more details.
15
748086b7
JJ
16// Under Section 7 of GPL version 3, you are granted additional
17// permissions described in the GCC Runtime Library Exception, version
18// 3.1, as published by the Free Software Foundation.
c2ba9709 19
748086b7
JJ
20// You should have received a copy of the GNU General Public License and
21// a copy of the GCC Runtime Library Exception along with this program;
22// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23// <http://www.gnu.org/licenses/>.
c2ba9709
JS
24
25/** @file parallel/multiway_mergesort.h
26 * @brief Parallel multiway merge sort.
27 * This file is a GNU parallel extension to the Standard C++ Library.
28 */
29
30// Written by Johannes Singler.
31
cbcd1e45
JS
32#ifndef _GLIBCXX_PARALLEL_MULTIWAY_MERGESORT_H
33#define _GLIBCXX_PARALLEL_MULTIWAY_MERGESORT_H 1
c2ba9709
JS
34
35#include <vector>
36
37#include <parallel/basic_iterator.h>
38#include <bits/stl_algo.h>
39#include <parallel/parallel.h>
40#include <parallel/multiway_merge.h>
c2ba9709
JS
41
42namespace __gnu_parallel
43{
44
e683ee2a
JS
45/** @brief Subsequence description. */
46template<typename _DifferenceTp>
1acba85b 47 struct _Piece
c2ba9709 48 {
1acba85b 49 typedef _DifferenceTp _DifferenceType;
c2ba9709
JS
50
51 /** @brief Begin of subsequence. */
54384f7f 52 _DifferenceType _M_begin;
c2ba9709
JS
53
54 /** @brief End of subsequence. */
54384f7f 55 _DifferenceType _M_end;
c2ba9709
JS
56 };
57
e683ee2a
JS
58/** @brief Data accessed by all threads.
59 *
60 * PMWMS = parallel multiway mergesort */
1acba85b
JS
61template<typename _RAIter>
62 struct _PMWMSSortingData
c2ba9709 63 {
1acba85b
JS
64 typedef std::iterator_traits<_RAIter> _TraitsType;
65 typedef typename _TraitsType::value_type _ValueType;
66 typedef typename _TraitsType::difference_type _DifferenceType;
c2ba9709 67
e683ee2a 68 /** @brief Number of threads involved. */
54384f7f 69 _ThreadIndex _M_num_threads;
e683ee2a 70
1acba85b
JS
71 /** @brief Input __begin. */
72 _RAIter _M_source;
c2ba9709
JS
73
74 /** @brief Start indices, per thread. */
1acba85b 75 _DifferenceType* _M_starts;
c2ba9709 76
c2ba9709 77 /** @brief Storage in which to sort. */
1acba85b 78 _ValueType** _M_temporary;
c2ba9709 79
c2ba9709 80 /** @brief Samples. */
1acba85b 81 _ValueType* _M_samples;
c2ba9709
JS
82
83 /** @brief Offsets to add to the found positions. */
1acba85b 84 _DifferenceType* _M_offsets;
c2ba9709 85
1acba85b
JS
86 /** @brief Pieces of data to merge @__c [thread][__sequence] */
87 std::vector<_Piece<_DifferenceType> >* _M_pieces;
e683ee2a
JS
88};
89
90/**
1acba85b
JS
91 * @brief Select _M_samples from a sequence.
92 * @param __sd Pointer to algorithm data. _Result will be placed in
93 * @__c __sd->_M_samples.
94 * @param __num_samples Number of _M_samples to select.
e683ee2a 95 */
1acba85b 96template<typename _RAIter, typename _DifferenceTp>
5817ff8e 97 void
1acba85b
JS
98 __determine_samples(_PMWMSSortingData<_RAIter>* __sd,
99 _DifferenceTp __num_samples)
c2ba9709 100 {
1acba85b
JS
101 typedef std::iterator_traits<_RAIter> _TraitsType;
102 typedef typename _TraitsType::value_type _ValueType;
103 typedef _DifferenceTp _DifferenceType;
c2ba9709 104
1acba85b 105 _ThreadIndex __iam = omp_get_thread_num();
c2ba9709 106
1acba85b 107 _DifferenceType* __es = new _DifferenceType[__num_samples + 2];
c891154f 108
1acba85b
JS
109 equally_split(__sd->_M_starts[__iam + 1] - __sd->_M_starts[__iam],
110 __num_samples + 1, __es);
c2ba9709 111
1acba85b
JS
112 for (_DifferenceType __i = 0; __i < __num_samples; ++__i)
113 ::new(&(__sd->_M_samples[__iam * __num_samples + __i]))
15ac3c72 114 _ValueType(__sd->_M_source[__sd->_M_starts[__iam] + __es[__i + 1]]);
e683ee2a 115
1acba85b 116 delete[] __es;
c2ba9709
JS
117 }
118
f9985df5 119/** @brief Split consistently. */
1acba85b
JS
120template<bool __exact, typename _RAIter,
121 typename _Compare, typename _SortingPlacesIterator>
122 struct _SplitConsistently
f9985df5
JS
123 {
124 };
125
126/** @brief Split by exact splitting. */
1acba85b
JS
127template<typename _RAIter, typename _Compare,
128 typename _SortingPlacesIterator>
129 struct _SplitConsistently
130 <true, _RAIter, _Compare, _SortingPlacesIterator>
f9985df5
JS
131 {
132 void operator()(
1acba85b
JS
133 const _ThreadIndex __iam,
134 _PMWMSSortingData<_RAIter>* __sd,
135 _Compare& __comp,
f9985df5 136 const typename
1acba85b
JS
137 std::iterator_traits<_RAIter>::difference_type
138 __num_samples)
f9985df5
JS
139 const
140 {
141# pragma omp barrier
142
1acba85b 143 std::vector<std::pair<_SortingPlacesIterator, _SortingPlacesIterator> >
54384f7f
JS
144 seqs(__sd->_M_num_threads);
145 for (_ThreadIndex __s = 0; __s < __sd->_M_num_threads; __s++)
1acba85b 146 seqs[__s] = std::make_pair(__sd->_M_temporary[__s],
15ac3c72
JS
147 __sd->_M_temporary[__s]
148 + (__sd->_M_starts[__s + 1]
149 - __sd->_M_starts[__s]));
f9985df5 150
54384f7f 151 std::vector<_SortingPlacesIterator> _M_offsets(__sd->_M_num_threads);
f9985df5
JS
152
153 // if not last thread
54384f7f 154 if (__iam < __sd->_M_num_threads - 1)
f9985df5 155 multiseq_partition(seqs.begin(), seqs.end(),
15ac3c72
JS
156 __sd->_M_starts[__iam + 1], _M_offsets.begin(),
157 __comp);
f9985df5 158
54384f7f 159 for (int __seq = 0; __seq < __sd->_M_num_threads; __seq++)
f9985df5
JS
160 {
161 // for each sequence
54384f7f 162 if (__iam < (__sd->_M_num_threads - 1))
15ac3c72
JS
163 __sd->_M_pieces[__iam][__seq]._M_end
164 = _M_offsets[__seq] - seqs[__seq].first;
f9985df5
JS
165 else
166 // very end of this sequence
54384f7f 167 __sd->_M_pieces[__iam][__seq]._M_end =
15ac3c72 168 __sd->_M_starts[__seq + 1] - __sd->_M_starts[__seq];
f9985df5
JS
169 }
170
171# pragma omp barrier
172
54384f7f 173 for (_ThreadIndex __seq = 0; __seq < __sd->_M_num_threads; __seq++)
f9985df5
JS
174 {
175 // For each sequence.
1acba85b 176 if (__iam > 0)
15ac3c72
JS
177 __sd->_M_pieces[__iam][__seq]._M_begin =
178 __sd->_M_pieces[__iam - 1][__seq]._M_end;
f9985df5
JS
179 else
180 // Absolute beginning.
54384f7f 181 __sd->_M_pieces[__iam][__seq]._M_begin = 0;
f9985df5
JS
182 }
183 }
184 };
185
186/** @brief Split by sampling. */
1acba85b
JS
187template<typename _RAIter, typename _Compare,
188 typename _SortingPlacesIterator>
189 struct _SplitConsistently<false, _RAIter, _Compare,
190 _SortingPlacesIterator>
f9985df5
JS
191 {
192 void operator()(
1acba85b
JS
193 const _ThreadIndex __iam,
194 _PMWMSSortingData<_RAIter>* __sd,
195 _Compare& __comp,
f9985df5 196 const typename
1acba85b
JS
197 std::iterator_traits<_RAIter>::difference_type
198 __num_samples)
f9985df5
JS
199 const
200 {
1acba85b
JS
201 typedef std::iterator_traits<_RAIter> _TraitsType;
202 typedef typename _TraitsType::value_type _ValueType;
203 typedef typename _TraitsType::difference_type _DifferenceType;
f9985df5 204
1acba85b 205 __determine_samples(__sd, __num_samples);
f9985df5
JS
206
207# pragma omp barrier
208
209# pragma omp single
1acba85b 210 __gnu_sequential::sort(__sd->_M_samples,
15ac3c72
JS
211 __sd->_M_samples
212 + (__num_samples * __sd->_M_num_threads),
1acba85b 213 __comp);
f9985df5
JS
214
215# pragma omp barrier
216
54384f7f 217 for (_ThreadIndex __s = 0; __s < __sd->_M_num_threads; ++__s)
f9985df5
JS
218 {
219 // For each sequence.
1acba85b 220 if (__num_samples * __iam > 0)
54384f7f 221 __sd->_M_pieces[__iam][__s]._M_begin =
1acba85b
JS
222 std::lower_bound(__sd->_M_temporary[__s],
223 __sd->_M_temporary[__s]
224 + (__sd->_M_starts[__s + 1] - __sd->_M_starts[__s]),
225 __sd->_M_samples[__num_samples * __iam],
226 __comp)
227 - __sd->_M_temporary[__s];
f9985df5
JS
228 else
229 // Absolute beginning.
54384f7f 230 __sd->_M_pieces[__iam][__s]._M_begin = 0;
1acba85b 231
15ac3c72
JS
232 if ((__num_samples * (__iam + 1)) <
233 (__num_samples * __sd->_M_num_threads))
54384f7f 234 __sd->_M_pieces[__iam][__s]._M_end =
1acba85b
JS
235 std::lower_bound(__sd->_M_temporary[__s],
236 __sd->_M_temporary[__s]
15ac3c72 237 + (__sd->_M_starts[__s + 1] - __sd->_M_starts[__s]),
1acba85b
JS
238 __sd->_M_samples[__num_samples * (__iam + 1)],
239 __comp)
240 - __sd->_M_temporary[__s];
f9985df5 241 else
721641c4 242 // Absolute end.
15ac3c72
JS
243 __sd->_M_pieces[__iam][__s]._M_end = __sd->_M_starts[__s + 1]
244 - __sd->_M_starts[__s];
f9985df5
JS
245 }
246 }
247 };
248
1acba85b
JS
249template<bool __stable, typename _RAIter, typename _Compare>
250 struct __possibly_stable_sort
f9985df5
JS
251 {
252 };
253
1acba85b
JS
254template<typename _RAIter, typename _Compare>
255 struct __possibly_stable_sort<true, _RAIter, _Compare>
f9985df5 256 {
1acba85b
JS
257 void operator()(const _RAIter& __begin,
258 const _RAIter& __end, _Compare& __comp) const
f9985df5 259 {
1acba85b 260 __gnu_sequential::stable_sort(__begin, __end, __comp);
f9985df5
JS
261 }
262 };
263
1acba85b
JS
264template<typename _RAIter, typename _Compare>
265 struct __possibly_stable_sort<false, _RAIter, _Compare>
f9985df5 266 {
1acba85b
JS
267 void operator()(const _RAIter __begin,
268 const _RAIter __end, _Compare& __comp) const
f9985df5 269 {
1acba85b 270 __gnu_sequential::sort(__begin, __end, __comp);
f9985df5
JS
271 }
272 };
273
1acba85b
JS
274template<bool __stable, typename Seq_RAIter,
275 typename _RAIter, typename _Compare,
f9985df5 276 typename DiffType>
1acba85b 277 struct __possibly_stable_multiway_merge
f9985df5
JS
278 {
279 };
280
1acba85b
JS
281template<typename Seq_RAIter, typename _RAIter,
282 typename _Compare, typename DiffType>
283 struct __possibly_stable_multiway_merge
284 <true, Seq_RAIter, _RAIter, _Compare,
f9985df5
JS
285 DiffType>
286 {
1acba85b
JS
287 void operator()(const Seq_RAIter& __seqs_begin,
288 const Seq_RAIter& __seqs_end,
289 const _RAIter& __target,
290 _Compare& __comp,
291 DiffType __length_am) const
f9985df5 292 {
15ac3c72
JS
293 stable_multiway_merge(__seqs_begin, __seqs_end, __target, __length_am,
294 __comp, sequential_tag());
f9985df5
JS
295 }
296 };
297
1acba85b
JS
298template<typename Seq_RAIter, typename _RAIter,
299 typename _Compare, typename DiffType>
300 struct __possibly_stable_multiway_merge
301 <false, Seq_RAIter, _RAIter, _Compare,
f9985df5
JS
302 DiffType>
303 {
1acba85b
JS
304 void operator()(const Seq_RAIter& __seqs_begin,
305 const Seq_RAIter& __seqs_end,
306 const _RAIter& __target,
307 _Compare& __comp,
308 DiffType __length_am) const
f9985df5 309 {
1acba85b 310 multiway_merge(__seqs_begin, __seqs_end, __target, __length_am, __comp,
36fc5958 311 sequential_tag());
f9985df5
JS
312 }
313 };
314
e683ee2a 315/** @brief PMWMS code executed by each thread.
1acba85b
JS
316 * @param __sd Pointer to algorithm data.
317 * @param __comp Comparator.
e683ee2a 318 */
1acba85b
JS
319template<bool __stable, bool __exact, typename _RAIter,
320 typename _Compare>
5817ff8e 321 void
1acba85b
JS
322 parallel_sort_mwms_pu(_PMWMSSortingData<_RAIter>* __sd,
323 _Compare& __comp)
c2ba9709 324 {
1acba85b
JS
325 typedef std::iterator_traits<_RAIter> _TraitsType;
326 typedef typename _TraitsType::value_type _ValueType;
327 typedef typename _TraitsType::difference_type _DifferenceType;
c2ba9709 328
1acba85b 329 _ThreadIndex __iam = omp_get_thread_num();
c2ba9709
JS
330
331 // Length of this thread's chunk, before merging.
15ac3c72
JS
332 _DifferenceType __length_local
333 = __sd->_M_starts[__iam + 1] - __sd->_M_starts[__iam];
c2ba9709 334
f9985df5 335 // Sort in temporary storage, leave space for sentinel.
c2ba9709 336
1acba85b 337 typedef _ValueType* _SortingPlacesIterator;
c2ba9709 338
1acba85b
JS
339 __sd->_M_temporary[__iam] =
340 static_cast<_ValueType*>(
341 ::operator new(sizeof(_ValueType) * (__length_local + 1)));
c2ba9709
JS
342
343 // Copy there.
15ac3c72
JS
344 std::uninitialized_copy(
345 __sd->_M_source + __sd->_M_starts[__iam],
346 __sd->_M_source + __sd->_M_starts[__iam] + __length_local,
347 __sd->_M_temporary[__iam]);
e683ee2a 348
1acba85b 349 __possibly_stable_sort<__stable, _SortingPlacesIterator, _Compare>()
15ac3c72
JS
350 (__sd->_M_temporary[__iam],
351 __sd->_M_temporary[__iam] + __length_local,
352 __comp);
e683ee2a 353
1acba85b
JS
354 // Invariant: locally sorted subsequence in sd->_M_temporary[__iam],
355 // __sd->_M_temporary[__iam] + __length_local.
e683ee2a 356
f9985df5 357 // No barrier here: Synchronization is done by the splitting routine.
e683ee2a 358
1acba85b 359 _DifferenceType __num_samples =
54384f7f 360 _Settings::get().sort_mwms_oversampling * __sd->_M_num_threads - 1;
1acba85b
JS
361 _SplitConsistently
362 <__exact, _RAIter, _Compare, _SortingPlacesIterator>()
363 (__iam, __sd, __comp, __num_samples);
c2ba9709 364
1acba85b
JS
365 // Offset from __target __begin, __length after merging.
366 _DifferenceType __offset = 0, __length_am = 0;
54384f7f 367 for (_ThreadIndex __s = 0; __s < __sd->_M_num_threads; __s++)
c2ba9709 368 {
15ac3c72
JS
369 __length_am += __sd->_M_pieces[__iam][__s]._M_end
370 - __sd->_M_pieces[__iam][__s]._M_begin;
54384f7f 371 __offset += __sd->_M_pieces[__iam][__s]._M_begin;
c2ba9709
JS
372 }
373
f9985df5 374 typedef std::vector<
1acba85b 375 std::pair<_SortingPlacesIterator, _SortingPlacesIterator> >
4459d22e
JS
376 _SeqVector;
377 _SeqVector seqs(__sd->_M_num_threads);
c2ba9709 378
54384f7f 379 for (int __s = 0; __s < __sd->_M_num_threads; ++__s)
c2ba9709 380 {
1acba85b 381 seqs[__s] =
15ac3c72
JS
382 std::make_pair(
383 __sd->_M_temporary[__s] + __sd->_M_pieces[__iam][__s]._M_begin,
384 __sd->_M_temporary[__s] + __sd->_M_pieces[__iam][__s]._M_end);
c2ba9709
JS
385 }
386
1acba85b
JS
387 __possibly_stable_multiway_merge<
388 __stable,
4459d22e 389 typename _SeqVector::iterator,
1acba85b
JS
390 _RAIter,
391 _Compare, _DifferenceType>()
f9985df5 392 (seqs.begin(), seqs.end(),
1acba85b
JS
393 __sd->_M_source + __offset, __comp,
394 __length_am);
c2ba9709 395
e683ee2a 396# pragma omp barrier
c2ba9709 397
1acba85b 398 ::operator delete(__sd->_M_temporary[__iam]);
c2ba9709
JS
399 }
400
e683ee2a 401/** @brief PMWMS main call.
1acba85b
JS
402 * @param __begin Begin iterator of sequence.
403 * @param __end End iterator of sequence.
404 * @param __comp Comparator.
405 * @param __n Length of sequence.
406 * @param __num_threads Number of threads to use.
e683ee2a 407 */
1acba85b
JS
408template<bool __stable, bool __exact, typename _RAIter,
409 typename _Compare>
5817ff8e 410 void
1acba85b
JS
411 parallel_sort_mwms(_RAIter __begin, _RAIter __end,
412 _Compare __comp,
413 _ThreadIndex __num_threads)
c2ba9709 414 {
1acba85b 415 _GLIBCXX_CALL(__end - __begin)
e683ee2a 416
1acba85b
JS
417 typedef std::iterator_traits<_RAIter> _TraitsType;
418 typedef typename _TraitsType::value_type _ValueType;
419 typedef typename _TraitsType::difference_type _DifferenceType;
c2ba9709 420
1acba85b 421 _DifferenceType __n = __end - __begin;
f9985df5 422
1acba85b 423 if (__n <= 1)
c2ba9709
JS
424 return;
425
e683ee2a 426 // at least one element per thread
1acba85b
JS
427 if (__num_threads > __n)
428 __num_threads = static_cast<_ThreadIndex>(__n);
c2ba9709 429
e683ee2a 430 // shared variables
1acba85b
JS
431 _PMWMSSortingData<_RAIter> __sd;
432 _DifferenceType* _M_starts;
c2ba9709 433
1acba85b 434# pragma omp parallel num_threads(__num_threads)
e683ee2a 435 {
15ac3c72 436 __num_threads = omp_get_num_threads(); //no more threads than requested
e683ee2a
JS
437
438# pragma omp single
439 {
54384f7f 440 __sd._M_num_threads = __num_threads;
1acba85b 441 __sd._M_source = __begin;
c2ba9709 442
1acba85b 443 __sd._M_temporary = new _ValueType*[__num_threads];
f9985df5 444
1acba85b 445 if (!__exact)
e683ee2a 446 {
4459d22e 447 _DifferenceType __size =
15ac3c72 448 (_Settings::get().sort_mwms_oversampling * __num_threads - 1)
1acba85b
JS
449 * __num_threads;
450 __sd._M_samples = static_cast<_ValueType*>(
4459d22e 451 ::operator new(__size * sizeof(_ValueType)));
e683ee2a
JS
452 }
453 else
1acba85b
JS
454 __sd._M_samples = NULL;
455
456 __sd._M_offsets = new _DifferenceType[__num_threads - 1];
15ac3c72
JS
457 __sd._M_pieces
458 = new std::vector<_Piece<_DifferenceType> >[__num_threads];
1acba85b
JS
459 for (int __s = 0; __s < __num_threads; ++__s)
460 __sd._M_pieces[__s].resize(__num_threads);
15ac3c72
JS
461 _M_starts = __sd._M_starts
462 = new _DifferenceType[__num_threads + 1];
1acba85b
JS
463
464 _DifferenceType __chunk_length = __n / __num_threads;
465 _DifferenceType __split = __n % __num_threads;
466 _DifferenceType __pos = 0;
467 for (int __i = 0; __i < __num_threads; ++__i)
e683ee2a 468 {
1acba85b 469 _M_starts[__i] = __pos;
15ac3c72
JS
470 __pos += (__i < __split)
471 ? (__chunk_length + 1) : __chunk_length;
e683ee2a 472 }
1acba85b 473 _M_starts[__num_threads] = __pos;
f9985df5 474 } //single
e683ee2a
JS
475
476 // Now sort in parallel.
1acba85b 477 parallel_sort_mwms_pu<__stable, __exact>(&__sd, __comp);
e683ee2a 478 } //parallel
c2ba9709 479
1acba85b
JS
480 delete[] _M_starts;
481 delete[] __sd._M_temporary;
c2ba9709 482
1acba85b
JS
483 if (!__exact)
484 ::operator delete(__sd._M_samples);
c2ba9709 485
1acba85b
JS
486 delete[] __sd._M_offsets;
487 delete[] __sd._M_pieces;
c2ba9709 488 }
e683ee2a 489} //namespace __gnu_parallel
c2ba9709 490
cbcd1e45 491#endif /* _GLIBCXX_PARALLEL_MULTIWAY_MERGESORT_H */