]> git.ipfire.org Git - thirdparty/gcc.git/blame - libstdc++-v3/include/parallel/multiway_mergesort.h
Update copyright years.
[thirdparty/gcc.git] / libstdc++-v3 / include / parallel / multiway_mergesort.h
CommitLineData
c2ba9709
JS
1// -*- C++ -*-
2
a945c346 3// Copyright (C) 2007-2024 Free Software Foundation, Inc.
c2ba9709
JS
4//
5// This file is part of the GNU ISO C++ Library. This library is free
6// software; you can redistribute it and/or modify it under the terms
7// of the GNU General Public License as published by the Free Software
748086b7 8// Foundation; either version 3, or (at your option) any later
c2ba9709
JS
9// version.
10
11// This library is distributed in the hope that it will be useful, but
12// WITHOUT ANY WARRANTY; without even the implied warranty of
13// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14// General Public License for more details.
15
748086b7
JJ
16// Under Section 7 of GPL version 3, you are granted additional
17// permissions described in the GCC Runtime Library Exception, version
18// 3.1, as published by the Free Software Foundation.
c2ba9709 19
748086b7
JJ
20// You should have received a copy of the GNU General Public License and
21// a copy of the GCC Runtime Library Exception along with this program;
22// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23// <http://www.gnu.org/licenses/>.
c2ba9709
JS
24
25/** @file parallel/multiway_mergesort.h
26 * @brief Parallel multiway merge sort.
27 * This file is a GNU parallel extension to the Standard C++ Library.
28 */
29
30// Written by Johannes Singler.
31
cbcd1e45
JS
32#ifndef _GLIBCXX_PARALLEL_MULTIWAY_MERGESORT_H
33#define _GLIBCXX_PARALLEL_MULTIWAY_MERGESORT_H 1
c2ba9709
JS
34
35#include <vector>
36
37#include <parallel/basic_iterator.h>
38#include <bits/stl_algo.h>
39#include <parallel/parallel.h>
40#include <parallel/multiway_merge.h>
c2ba9709
JS
41
42namespace __gnu_parallel
43{
338311e5
PC
44 /** @brief Subsequence description. */
45 template<typename _DifferenceTp>
46 struct _Piece
47 {
48 typedef _DifferenceTp _DifferenceType;
c2ba9709 49
338311e5
PC
50 /** @brief Begin of subsequence. */
51 _DifferenceType _M_begin;
c2ba9709 52
338311e5
PC
53 /** @brief End of subsequence. */
54 _DifferenceType _M_end;
55 };
c2ba9709 56
338311e5
PC
57 /** @brief Data accessed by all threads.
58 *
59 * PMWMS = parallel multiway mergesort */
60 template<typename _RAIter>
61 struct _PMWMSSortingData
62 {
63 typedef std::iterator_traits<_RAIter> _TraitsType;
64 typedef typename _TraitsType::value_type _ValueType;
65 typedef typename _TraitsType::difference_type _DifferenceType;
c2ba9709 66
338311e5
PC
67 /** @brief Number of threads involved. */
68 _ThreadIndex _M_num_threads;
f9985df5 69
338311e5
PC
70 /** @brief Input __begin. */
71 _RAIter _M_source;
f9985df5 72
338311e5
PC
73 /** @brief Start indices, per thread. */
74 _DifferenceType* _M_starts;
f9985df5 75
338311e5
PC
76 /** @brief Storage in which to sort. */
77 _ValueType** _M_temporary;
78
79 /** @brief Samples. */
80 _ValueType* _M_samples;
81
82 /** @brief Offsets to add to the found positions. */
83 _DifferenceType* _M_offsets;
84
8e32aa11 85 /** @brief Pieces of data to merge @c [thread][__sequence] */
338311e5 86 std::vector<_Piece<_DifferenceType> >* _M_pieces;
f9985df5
JS
87 };
88
338311e5
PC
89 /**
90 * @brief Select _M_samples from a sequence.
91 * @param __sd Pointer to algorithm data. _Result will be placed in
8e32aa11 92 * @c __sd->_M_samples.
338311e5
PC
93 * @param __num_samples Number of _M_samples to select.
94 */
95 template<typename _RAIter, typename _DifferenceTp>
96 void
97 __determine_samples(_PMWMSSortingData<_RAIter>* __sd,
98 _DifferenceTp __num_samples)
f9985df5 99 {
1acba85b
JS
100 typedef std::iterator_traits<_RAIter> _TraitsType;
101 typedef typename _TraitsType::value_type _ValueType;
338311e5 102 typedef _DifferenceTp _DifferenceType;
f9985df5 103
338311e5 104 _ThreadIndex __iam = omp_get_thread_num();
f9985df5 105
338311e5 106 _DifferenceType* __es = new _DifferenceType[__num_samples + 2];
f9985df5 107
6b77089f
PC
108 __equally_split(__sd->_M_starts[__iam + 1] - __sd->_M_starts[__iam],
109 __num_samples + 1, __es);
f9985df5 110
338311e5
PC
111 for (_DifferenceType __i = 0; __i < __num_samples; ++__i)
112 ::new(&(__sd->_M_samples[__iam * __num_samples + __i]))
113 _ValueType(__sd->_M_source[__sd->_M_starts[__iam]
114 + __es[__i + 1]]);
115
116 delete[] __es;
117 }
f9985df5 118
338311e5
PC
119 /** @brief Split consistently. */
120 template<bool __exact, typename _RAIter,
121 typename _Compare, typename _SortingPlacesIterator>
122 struct _SplitConsistently
123 { };
124
125 /** @brief Split by exact splitting. */
126 template<typename _RAIter, typename _Compare,
127 typename _SortingPlacesIterator>
77d16198 128 struct _SplitConsistently<true, _RAIter, _Compare, _SortingPlacesIterator>
338311e5
PC
129 {
130 void
131 operator()(const _ThreadIndex __iam,
132 _PMWMSSortingData<_RAIter>* __sd,
133 _Compare& __comp,
134 const typename
135 std::iterator_traits<_RAIter>::difference_type
136 __num_samples) const
137 {
138# pragma omp barrier
139
140 std::vector<std::pair<_SortingPlacesIterator,
141 _SortingPlacesIterator> >
77d16198 142 __seqs(__sd->_M_num_threads);
338311e5 143 for (_ThreadIndex __s = 0; __s < __sd->_M_num_threads; __s++)
77d16198
PC
144 __seqs[__s] = std::make_pair(__sd->_M_temporary[__s],
145 __sd->_M_temporary[__s]
146 + (__sd->_M_starts[__s + 1]
147 - __sd->_M_starts[__s]));
338311e5 148
77d16198 149 std::vector<_SortingPlacesIterator> __offsets(__sd->_M_num_threads);
338311e5
PC
150
151 // if not last thread
152 if (__iam < __sd->_M_num_threads - 1)
77d16198
PC
153 multiseq_partition(__seqs.begin(), __seqs.end(),
154 __sd->_M_starts[__iam + 1], __offsets.begin(),
338311e5
PC
155 __comp);
156
8b0c13a8 157 for (_ThreadIndex __seq = 0; __seq < __sd->_M_num_threads; __seq++)
338311e5
PC
158 {
159 // for each sequence
160 if (__iam < (__sd->_M_num_threads - 1))
161 __sd->_M_pieces[__iam][__seq]._M_end
77d16198 162 = __offsets[__seq] - __seqs[__seq].first;
338311e5
PC
163 else
164 // very end of this sequence
165 __sd->_M_pieces[__iam][__seq]._M_end =
166 __sd->_M_starts[__seq + 1] - __sd->_M_starts[__seq];
167 }
168
169# pragma omp barrier
170
171 for (_ThreadIndex __seq = 0; __seq < __sd->_M_num_threads; __seq++)
172 {
173 // For each sequence.
174 if (__iam > 0)
175 __sd->_M_pieces[__iam][__seq]._M_begin =
176 __sd->_M_pieces[__iam - 1][__seq]._M_end;
177 else
178 // Absolute beginning.
179 __sd->_M_pieces[__iam][__seq]._M_begin = 0;
180 }
181 }
182 };
183
184 /** @brief Split by sampling. */
185 template<typename _RAIter, typename _Compare,
186 typename _SortingPlacesIterator>
77d16198 187 struct _SplitConsistently<false, _RAIter, _Compare, _SortingPlacesIterator>
338311e5
PC
188 {
189 void
190 operator()(const _ThreadIndex __iam,
191 _PMWMSSortingData<_RAIter>* __sd,
192 _Compare& __comp,
193 const typename
194 std::iterator_traits<_RAIter>::difference_type
195 __num_samples) const
196 {
197 typedef std::iterator_traits<_RAIter> _TraitsType;
198 typedef typename _TraitsType::value_type _ValueType;
199 typedef typename _TraitsType::difference_type _DifferenceType;
200
201 __determine_samples(__sd, __num_samples);
202
203# pragma omp barrier
204
205# pragma omp single
206 __gnu_sequential::sort(__sd->_M_samples,
207 __sd->_M_samples
208 + (__num_samples * __sd->_M_num_threads),
209 __comp);
210
211# pragma omp barrier
212
213 for (_ThreadIndex __s = 0; __s < __sd->_M_num_threads; ++__s)
214 {
215 // For each sequence.
216 if (__num_samples * __iam > 0)
217 __sd->_M_pieces[__iam][__s]._M_begin =
1acba85b 218 std::lower_bound(__sd->_M_temporary[__s],
338311e5
PC
219 __sd->_M_temporary[__s]
220 + (__sd->_M_starts[__s + 1]
221 - __sd->_M_starts[__s]),
222 __sd->_M_samples[__num_samples * __iam],
223 __comp)
1acba85b 224 - __sd->_M_temporary[__s];
338311e5
PC
225 else
226 // Absolute beginning.
227 __sd->_M_pieces[__iam][__s]._M_begin = 0;
1acba85b 228
338311e5
PC
229 if ((__num_samples * (__iam + 1)) <
230 (__num_samples * __sd->_M_num_threads))
231 __sd->_M_pieces[__iam][__s]._M_end =
1acba85b 232 std::lower_bound(__sd->_M_temporary[__s],
338311e5
PC
233 __sd->_M_temporary[__s]
234 + (__sd->_M_starts[__s + 1]
235 - __sd->_M_starts[__s]),
236 __sd->_M_samples[__num_samples * (__iam + 1)],
237 __comp)
1acba85b 238 - __sd->_M_temporary[__s];
338311e5
PC
239 else
240 // Absolute end.
241 __sd->_M_pieces[__iam][__s]._M_end = (__sd->_M_starts[__s + 1]
242 - __sd->_M_starts[__s]);
243 }
244 }
f9985df5
JS
245 };
246
338311e5
PC
247 template<bool __stable, typename _RAIter, typename _Compare>
248 struct __possibly_stable_sort
249 { };
f9985df5 250
338311e5
PC
251 template<typename _RAIter, typename _Compare>
252 struct __possibly_stable_sort<true, _RAIter, _Compare>
f9985df5 253 {
338311e5
PC
254 void operator()(const _RAIter& __begin,
255 const _RAIter& __end, _Compare& __comp) const
256 { __gnu_sequential::stable_sort(__begin, __end, __comp); }
257 };
f9985df5 258
338311e5
PC
259 template<typename _RAIter, typename _Compare>
260 struct __possibly_stable_sort<false, _RAIter, _Compare>
f9985df5 261 {
338311e5
PC
262 void operator()(const _RAIter __begin,
263 const _RAIter __end, _Compare& __comp) const
264 { __gnu_sequential::sort(__begin, __end, __comp); }
265 };
266
57766351 267 template<bool __stable, typename _Seq_RAIter,
338311e5 268 typename _RAIter, typename _Compare,
57766351 269 typename _DiffType>
338311e5
PC
270 struct __possibly_stable_multiway_merge
271 { };
272
57766351 273 template<typename _Seq_RAIter, typename _RAIter,
338311e5 274 typename _Compare, typename _DiffType>
57766351 275 struct __possibly_stable_multiway_merge<true, _Seq_RAIter,
338311e5 276 _RAIter, _Compare, _DiffType>
f9985df5 277 {
57766351
JW
278 void operator()(const _Seq_RAIter& __seqs_begin,
279 const _Seq_RAIter& __seqs_end,
338311e5
PC
280 const _RAIter& __target,
281 _Compare& __comp,
282 _DiffType __length_am) const
77d16198
PC
283 { stable_multiway_merge(__seqs_begin, __seqs_end, __target,
284 __length_am, __comp, sequential_tag()); }
338311e5 285 };
f9985df5 286
57766351 287 template<typename _Seq_RAIter, typename _RAIter,
338311e5 288 typename _Compare, typename _DiffType>
57766351 289 struct __possibly_stable_multiway_merge<false, _Seq_RAIter,
338311e5
PC
290 _RAIter, _Compare, _DiffType>
291 {
57766351
JW
292 void operator()(const _Seq_RAIter& __seqs_begin,
293 const _Seq_RAIter& __seqs_end,
1acba85b
JS
294 const _RAIter& __target,
295 _Compare& __comp,
338311e5 296 _DiffType __length_am) const
77d16198
PC
297 { multiway_merge(__seqs_begin, __seqs_end, __target, __length_am,
298 __comp, sequential_tag()); }
338311e5
PC
299 };
300
301 /** @brief PMWMS code executed by each thread.
302 * @param __sd Pointer to algorithm data.
303 * @param __comp Comparator.
304 */
305 template<bool __stable, bool __exact, typename _RAIter,
306 typename _Compare>
307 void
308 parallel_sort_mwms_pu(_PMWMSSortingData<_RAIter>* __sd,
309 _Compare& __comp)
310 {
311 typedef std::iterator_traits<_RAIter> _TraitsType;
312 typedef typename _TraitsType::value_type _ValueType;
313 typedef typename _TraitsType::difference_type _DifferenceType;
f9985df5 314
338311e5
PC
315 _ThreadIndex __iam = omp_get_thread_num();
316
317 // Length of this thread's chunk, before merging.
77d16198
PC
318 _DifferenceType __length_local =
319 __sd->_M_starts[__iam + 1] - __sd->_M_starts[__iam];
338311e5
PC
320
321 // Sort in temporary storage, leave space for sentinel.
322
323 typedef _ValueType* _SortingPlacesIterator;
324
325 __sd->_M_temporary[__iam] =
326 static_cast<_ValueType*>(::operator new(sizeof(_ValueType)
327 * (__length_local + 1)));
328
329 // Copy there.
330 std::uninitialized_copy(__sd->_M_source + __sd->_M_starts[__iam],
331 __sd->_M_source + __sd->_M_starts[__iam]
332 + __length_local,
333 __sd->_M_temporary[__iam]);
334
335 __possibly_stable_sort<__stable, _SortingPlacesIterator, _Compare>()
15ac3c72 336 (__sd->_M_temporary[__iam],
338311e5 337 __sd->_M_temporary[__iam] + __length_local,
15ac3c72 338 __comp);
e683ee2a 339
338311e5
PC
340 // Invariant: locally sorted subsequence in sd->_M_temporary[__iam],
341 // __sd->_M_temporary[__iam] + __length_local.
e683ee2a 342
338311e5 343 // No barrier here: Synchronization is done by the splitting routine.
e683ee2a 344
338311e5 345 _DifferenceType __num_samples =
54384f7f 346 _Settings::get().sort_mwms_oversampling * __sd->_M_num_threads - 1;
77d16198 347 _SplitConsistently<__exact, _RAIter, _Compare, _SortingPlacesIterator>()
1acba85b 348 (__iam, __sd, __comp, __num_samples);
c2ba9709 349
338311e5
PC
350 // Offset from __target __begin, __length after merging.
351 _DifferenceType __offset = 0, __length_am = 0;
352 for (_ThreadIndex __s = 0; __s < __sd->_M_num_threads; __s++)
353 {
354 __length_am += (__sd->_M_pieces[__iam][__s]._M_end
355 - __sd->_M_pieces[__iam][__s]._M_begin);
356 __offset += __sd->_M_pieces[__iam][__s]._M_begin;
357 }
c2ba9709 358
338311e5 359 typedef std::vector<
77d16198 360 std::pair<_SortingPlacesIterator, _SortingPlacesIterator> >
4459d22e 361 _SeqVector;
77d16198 362 _SeqVector __seqs(__sd->_M_num_threads);
c2ba9709 363
8b0c13a8 364 for (_ThreadIndex __s = 0; __s < __sd->_M_num_threads; ++__s)
338311e5 365 {
77d16198
PC
366 __seqs[__s] =
367 std::make_pair(__sd->_M_temporary[__s]
368 + __sd->_M_pieces[__iam][__s]._M_begin,
369 __sd->_M_temporary[__s]
370 + __sd->_M_pieces[__iam][__s]._M_end);
338311e5 371 }
c2ba9709 372
338311e5 373 __possibly_stable_multiway_merge<
77d16198
PC
374 __stable, typename _SeqVector::iterator,
375 _RAIter, _Compare, _DifferenceType>()(__seqs.begin(), __seqs.end(),
376 __sd->_M_source + __offset, __comp,
377 __length_am);
c2ba9709 378
338311e5 379# pragma omp barrier
c2ba9709 380
0ecca7a6
PC
381 for (_DifferenceType __i = 0; __i < __length_local; ++__i)
382 __sd->_M_temporary[__iam][__i].~_ValueType();
338311e5
PC
383 ::operator delete(__sd->_M_temporary[__iam]);
384 }
c2ba9709 385
338311e5
PC
386 /** @brief PMWMS main call.
387 * @param __begin Begin iterator of sequence.
388 * @param __end End iterator of sequence.
389 * @param __comp Comparator.
338311e5
PC
390 * @param __num_threads Number of threads to use.
391 */
392 template<bool __stable, bool __exact, typename _RAIter,
1acba85b 393 typename _Compare>
338311e5
PC
394 void
395 parallel_sort_mwms(_RAIter __begin, _RAIter __end,
396 _Compare __comp,
397 _ThreadIndex __num_threads)
398 {
399 _GLIBCXX_CALL(__end - __begin)
e683ee2a 400
338311e5
PC
401 typedef std::iterator_traits<_RAIter> _TraitsType;
402 typedef typename _TraitsType::value_type _ValueType;
403 typedef typename _TraitsType::difference_type _DifferenceType;
c2ba9709 404
338311e5 405 _DifferenceType __n = __end - __begin;
f9985df5 406
338311e5
PC
407 if (__n <= 1)
408 return;
c2ba9709 409
338311e5
PC
410 // at least one element per thread
411 if (__num_threads > __n)
412 __num_threads = static_cast<_ThreadIndex>(__n);
c2ba9709 413
338311e5
PC
414 // shared variables
415 _PMWMSSortingData<_RAIter> __sd;
77d16198 416 _DifferenceType* __starts;
0ecca7a6 417 _DifferenceType __size;
c2ba9709 418
338311e5 419# pragma omp parallel num_threads(__num_threads)
e683ee2a 420 {
15ac3c72 421 __num_threads = omp_get_num_threads(); //no more threads than requested
e683ee2a
JS
422
423# pragma omp single
338311e5
PC
424 {
425 __sd._M_num_threads = __num_threads;
426 __sd._M_source = __begin;
427
428 __sd._M_temporary = new _ValueType*[__num_threads];
429
430 if (!__exact)
431 {
0ecca7a6 432 __size =
338311e5
PC
433 (_Settings::get().sort_mwms_oversampling * __num_threads - 1)
434 * __num_threads;
435 __sd._M_samples = static_cast<_ValueType*>
436 (::operator new(__size * sizeof(_ValueType)));
437 }
438 else
8fc81078 439 __sd._M_samples = 0;
338311e5
PC
440
441 __sd._M_offsets = new _DifferenceType[__num_threads - 1];
442 __sd._M_pieces
443 = new std::vector<_Piece<_DifferenceType> >[__num_threads];
8b0c13a8 444 for (_ThreadIndex __s = 0; __s < __num_threads; ++__s)
338311e5 445 __sd._M_pieces[__s].resize(__num_threads);
77d16198 446 __starts = __sd._M_starts = new _DifferenceType[__num_threads + 1];
338311e5
PC
447
448 _DifferenceType __chunk_length = __n / __num_threads;
449 _DifferenceType __split = __n % __num_threads;
450 _DifferenceType __pos = 0;
8b0c13a8 451 for (_ThreadIndex __i = 0; __i < __num_threads; ++__i)
338311e5 452 {
77d16198
PC
453 __starts[__i] = __pos;
454 __pos += ((__i < __split)
455 ? (__chunk_length + 1) : __chunk_length);
338311e5 456 }
77d16198 457 __starts[__num_threads] = __pos;
338311e5 458 } //single
e683ee2a
JS
459
460 // Now sort in parallel.
1acba85b 461 parallel_sort_mwms_pu<__stable, __exact>(&__sd, __comp);
e683ee2a 462 } //parallel
c2ba9709 463
77d16198 464 delete[] __starts;
338311e5 465 delete[] __sd._M_temporary;
c2ba9709 466
338311e5 467 if (!__exact)
0ecca7a6
PC
468 {
469 for (_DifferenceType __i = 0; __i < __size; ++__i)
470 __sd._M_samples[__i].~_ValueType();
471 ::operator delete(__sd._M_samples);
472 }
c2ba9709 473
338311e5
PC
474 delete[] __sd._M_offsets;
475 delete[] __sd._M_pieces;
476 }
477
e683ee2a 478} //namespace __gnu_parallel
c2ba9709 479
cbcd1e45 480#endif /* _GLIBCXX_PARALLEL_MULTIWAY_MERGESORT_H */