]> git.ipfire.org Git - thirdparty/gcc.git/blob - libstdc++-v3/include/parallel/multiway_merge.h
multiway_merge.h: Reformat to 80 columns; adjust some inline specifiers; other minor...
[thirdparty/gcc.git] / libstdc++-v3 / include / parallel / multiway_merge.h
1 // -*- C++ -*-
2
3 // Copyright (C) 2007, 2008 Free Software Foundation, Inc.
4 //
5 // This file is part of the GNU ISO C++ Library. This library is free
6 // software; you can redistribute it and/or modify it under the terms
7 // of the GNU General Public License as published by the Free Software
8 // Foundation; either version 2, or (at your option) any later
9 // version.
10
11 // This library is distributed in the hope that it will be useful, but
12 // WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 // General Public License for more details.
15
16 // You should have received a copy of the GNU General Public License
17 // along with this library; see the file COPYING. If not, write to
18 // the Free Software Foundation, 59 Temple Place - Suite 330, Boston,
19 // MA 02111-1307, USA.
20
21 // As a special exception, you may use this file as part of a free
22 // software library without restriction. Specifically, if other files
23 // instantiate templates or use macros or inline functions from this
24 // file, or you compile this file and link it with other files to
25 // produce an executable, this file does not by itself cause the
26 // resulting executable to be covered by the GNU General Public
27 // License. This exception does not however invalidate any other
28 // reasons why the executable file might be covered by the GNU General
29 // Public License.
30
31 /** @file parallel/multiway_merge.h
32 * @brief Implementation of sequential and parallel multiway merge.
33 *
34 * Explanations on the high-speed merging routines in the appendix of
35 *
36 * P. Sanders.
37 * Fast priority queues for cached memory.
38 * ACM Journal of Experimental Algorithmics, 5, 2000.
39 *
40 * This file is a GNU parallel extension to the Standard C++ Library.
41 */
42
43 // Written by Johannes Singler.
44
45 #ifndef _GLIBCXX_PARALLEL_MULTIWAY_MERGE_H
46 #define _GLIBCXX_PARALLEL_MULTIWAY_MERGE_H
47
48 #include <vector>
49
50 #include <bits/stl_algo.h>
51 #include <parallel/features.h>
52 #include <parallel/parallel.h>
53 #include <parallel/merge.h>
54 #include <parallel/losertree.h>
55 #if _GLIBCXX_ASSERTIONS
56 #include <parallel/checkers.h>
57 #endif
58
59 /** @brief Length of a sequence described by a pair of iterators. */
60 #define _GLIBCXX_PARALLEL_LENGTH(s) ((s).second - (s).first)
61
62 // XXX need iterator typedefs
63 namespace __gnu_parallel
64 {
65 template<typename RandomAccessIterator, typename Comparator>
66 class guarded_iterator;
67
68 template<typename RandomAccessIterator, typename Comparator>
69 inline bool
70 operator<(guarded_iterator<RandomAccessIterator, Comparator>& bi1,
71 guarded_iterator<RandomAccessIterator, Comparator>& bi2);
72
73 template<typename RandomAccessIterator, typename Comparator>
74 inline bool
75 operator<=(guarded_iterator<RandomAccessIterator, Comparator>& bi1,
76 guarded_iterator<RandomAccessIterator, Comparator>& bi2);
77
78 /** @brief Iterator wrapper supporting an implicit supremum at the end
79 of the sequence, dominating all comparisons.
80 * Deriving from RandomAccessIterator is not possible since
81 * RandomAccessIterator need not be a class.
82 */
83 template<typename RandomAccessIterator, typename Comparator>
84 class guarded_iterator
85 {
86 private:
87 /** @brief Current iterator position. */
88 RandomAccessIterator current;
89
90 /** @brief End iterator of the sequence. */
91 RandomAccessIterator end;
92
93 /** @brief Comparator. */
94 Comparator& comp;
95
96 public:
97 /** @brief Constructor. Sets iterator to beginning of sequence.
98 * @param begin Begin iterator of sequence.
99 * @param end End iterator of sequence.
100 * @param comp Comparator provided for associated overloaded
101 * compare operators. */
102 guarded_iterator(RandomAccessIterator begin,
103 RandomAccessIterator end, Comparator& comp)
104 : current(begin), end(end), comp(comp)
105 { }
106
107 /** @brief Pre-increment operator.
108 * @return This. */
109 guarded_iterator<RandomAccessIterator, Comparator>&
110 operator++()
111 {
112 ++current;
113 return *this;
114 }
115
116 /** @brief Dereference operator.
117 * @return Referenced element. */
118 typename std::iterator_traits<RandomAccessIterator>::value_type
119 operator*()
120 { return *current; }
121
122 /** @brief Convert to wrapped iterator.
123 * @return Wrapped iterator. */
124 operator RandomAccessIterator()
125 { return current; }
126
127 friend bool
128 operator< <RandomAccessIterator, Comparator>(
129 guarded_iterator<RandomAccessIterator, Comparator>& bi1,
130 guarded_iterator<RandomAccessIterator, Comparator>& bi2);
131
132 friend bool
133 operator<= <RandomAccessIterator, Comparator>(
134 guarded_iterator<RandomAccessIterator, Comparator>& bi1,
135 guarded_iterator<RandomAccessIterator, Comparator>& bi2);
136 };
137
138 /** @brief Compare two elements referenced by guarded iterators.
139 * @param bi1 First iterator.
140 * @param bi2 Second iterator.
141 * @return @c True if less. */
142 template<typename RandomAccessIterator, typename Comparator>
143 inline bool
144 operator<(guarded_iterator<RandomAccessIterator, Comparator>& bi1,
145 guarded_iterator<RandomAccessIterator, Comparator>& bi2)
146 {
147 if (bi1.current == bi1.end) //bi1 is sup
148 return bi2.current == bi2.end; //bi2 is not sup
149 if (bi2.current == bi2.end) //bi2 is sup
150 return true;
151 return (bi1.comp)(*bi1, *bi2); //normal compare
152 }
153
154 /** @brief Compare two elements referenced by guarded iterators.
155 * @param bi1 First iterator.
156 * @param bi2 Second iterator.
157 * @return @c True if less equal. */
158 template<typename RandomAccessIterator, typename Comparator>
159 inline bool
160 operator<=(guarded_iterator<RandomAccessIterator, Comparator>& bi1,
161 guarded_iterator<RandomAccessIterator, Comparator>& bi2)
162 {
163 if (bi2.current == bi2.end) //bi1 is sup
164 return bi1.current != bi1.end; //bi2 is not sup
165 if (bi1.current == bi1.end) //bi2 is sup
166 return false;
167 return !(bi1.comp)(*bi2, *bi1); //normal compare
168 }
169
170 template<typename RandomAccessIterator, typename Comparator>
171 class unguarded_iterator;
172
173 template<typename RandomAccessIterator, typename Comparator>
174 inline bool
175 operator<(unguarded_iterator<RandomAccessIterator, Comparator>& bi1,
176 unguarded_iterator<RandomAccessIterator, Comparator>& bi2);
177
178 template<typename RandomAccessIterator, typename Comparator>
179 inline bool
180 operator<=(unguarded_iterator<RandomAccessIterator, Comparator>& bi1,
181 unguarded_iterator<RandomAccessIterator, Comparator>& bi2);
182
183 template<typename RandomAccessIterator, typename Comparator>
184 class unguarded_iterator
185 {
186 private:
187 /** @brief Current iterator position. */
188 RandomAccessIterator& current;
189 /** @brief Comparator. */
190 mutable Comparator& comp;
191
192 public:
193 /** @brief Constructor. Sets iterator to beginning of sequence.
194 * @param begin Begin iterator of sequence.
195 * @param end Unused, only for compatibility.
196 * @param comp Unused, only for compatibility. */
197 unguarded_iterator(RandomAccessIterator begin,
198 RandomAccessIterator end, Comparator& comp)
199 : current(begin), comp(comp)
200 { }
201
202 /** @brief Pre-increment operator.
203 * @return This. */
204 unguarded_iterator<RandomAccessIterator, Comparator>&
205 operator++()
206 {
207 ++current;
208 return *this;
209 }
210
211 /** @brief Dereference operator.
212 * @return Referenced element. */
213 typename std::iterator_traits<RandomAccessIterator>::value_type
214 operator*()
215 { return *current; }
216
217 /** @brief Convert to wrapped iterator.
218 * @return Wrapped iterator. */
219 operator RandomAccessIterator()
220 { return current; }
221
222 friend bool
223 operator< <RandomAccessIterator, Comparator>(
224 unguarded_iterator<RandomAccessIterator, Comparator>& bi1,
225 unguarded_iterator<RandomAccessIterator, Comparator>& bi2);
226
227 friend bool
228 operator<= <RandomAccessIterator, Comparator>(
229 unguarded_iterator<RandomAccessIterator, Comparator>& bi1,
230 unguarded_iterator<RandomAccessIterator, Comparator>& bi2);
231 };
232
233 /** @brief Compare two elements referenced by unguarded iterators.
234 * @param bi1 First iterator.
235 * @param bi2 Second iterator.
236 * @return @c True if less. */
237 template<typename RandomAccessIterator, typename Comparator>
238 inline bool
239 operator<(unguarded_iterator<RandomAccessIterator, Comparator>& bi1,
240 unguarded_iterator<RandomAccessIterator, Comparator>& bi2)
241 {
242 // Normal compare.
243 return (bi1.comp)(*bi1, *bi2);
244 }
245
246 /** @brief Compare two elements referenced by unguarded iterators.
247 * @param bi1 First iterator.
248 * @param bi2 Second iterator.
249 * @return @c True if less equal. */
250 template<typename RandomAccessIterator, typename Comparator>
251 inline bool
252 operator<=(unguarded_iterator<RandomAccessIterator, Comparator>& bi1,
253 unguarded_iterator<RandomAccessIterator, Comparator>& bi2)
254 {
255 // Normal compare.
256 return !(bi1.comp)(*bi2, *bi1);
257 }
258
259 /** Prepare a set of sequences to be merged without a (end) guard
260 * @param seqs_begin
261 * @param seqs_end
262 * @param comp
263 * @param min_sequence
264 * @param stable
265 * @pre (seqs_end - seqs_begin > 0) */
266 template<typename RandomAccessIteratorIterator, typename Comparator>
267 typename std::iterator_traits<
268 typename std::iterator_traits<RandomAccessIteratorIterator>::value_type
269 ::first_type>::difference_type
270 prepare_unguarded(RandomAccessIteratorIterator seqs_begin,
271 RandomAccessIteratorIterator seqs_end, Comparator comp,
272 int& min_sequence, bool stable)
273 {
274 _GLIBCXX_CALL(seqs_end - seqs_begin)
275
276 typedef typename std::iterator_traits<RandomAccessIteratorIterator>
277 ::value_type::first_type
278 RandomAccessIterator1;
279 typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
280 value_type;
281 typedef typename std::iterator_traits<RandomAccessIterator1>
282 ::difference_type
283 difference_type;
284
285 if ((*seqs_begin).first == (*seqs_begin).second)
286 {
287 // Empty sequence found, it's the first one.
288 min_sequence = 0;
289 return -1;
290 }
291
292 // Last element in sequence.
293 value_type min = *((*seqs_begin).second - 1);
294 min_sequence = 0;
295 for (RandomAccessIteratorIterator s = seqs_begin + 1; s != seqs_end; ++s)
296 {
297 if ((*s).first == (*s).second)
298 {
299 // Empty sequence found.
300 min_sequence = static_cast<int>(s - seqs_begin);
301 return -1;
302 }
303
304 // Last element in sequence.
305 const value_type& v = *((*s).second - 1);
306 if (comp(v, min)) //strictly smaller
307 {
308 min = v;
309 min_sequence = static_cast<int>(s - seqs_begin);
310 }
311 }
312
313 difference_type overhang_size = 0;
314
315 int s = 0;
316 for (s = 0; s <= min_sequence; ++s)
317 {
318 RandomAccessIterator1 split;
319 if (stable)
320 split = std::upper_bound(seqs_begin[s].first, seqs_begin[s].second,
321 min, comp);
322 else
323 split = std::lower_bound(seqs_begin[s].first, seqs_begin[s].second,
324 min, comp);
325
326 overhang_size += seqs_begin[s].second - split;
327 }
328
329 for (; s < (seqs_end - seqs_begin); ++s)
330 {
331 RandomAccessIterator1 split = std::lower_bound(
332 seqs_begin[s].first, seqs_begin[s].second, min, comp);
333 overhang_size += seqs_begin[s].second - split;
334 }
335
336 // So many elements will be left over afterwards.
337 return overhang_size;
338 }
339
340 /** Prepare a set of sequences to be merged with a (end) guard (sentinel)
341 * @param seqs_begin
342 * @param seqs_end
343 * @param comp */
344 template<typename RandomAccessIteratorIterator, typename Comparator>
345 typename std::iterator_traits<typename std::iterator_traits<
346 RandomAccessIteratorIterator>::value_type::first_type>::difference_type
347 prepare_unguarded_sentinel(RandomAccessIteratorIterator seqs_begin,
348 RandomAccessIteratorIterator seqs_end,
349 Comparator comp)
350 {
351 _GLIBCXX_CALL(seqs_end - seqs_begin)
352
353 typedef typename std::iterator_traits<RandomAccessIteratorIterator>
354 ::value_type::first_type
355 RandomAccessIterator1;
356 typedef typename std::iterator_traits<RandomAccessIterator1>
357 ::value_type
358 value_type;
359 typedef typename std::iterator_traits<RandomAccessIterator1>
360 ::difference_type
361 difference_type;
362
363 // Last element in sequence.
364 value_type* max = NULL;
365 for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; ++s)
366 {
367 if ((*s).first == (*s).second)
368 continue;
369
370 // Last element in sequence.
371 value_type& v = *((*s).second - 1);
372
373 // Strictly greater.
374 if (!max || comp(*max, v))
375 max = &v;
376 }
377
378 difference_type overhang_size = 0;
379 for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; ++s)
380 {
381 RandomAccessIterator1 split =
382 std::lower_bound((*s).first, (*s).second, *max, comp);
383 overhang_size += (*s).second - split;
384
385 // Set sentinel.
386 *((*s).second) = *max;
387 }
388
389 // So many elements will be left over afterwards.
390 return overhang_size;
391 }
392
393 /** @brief Highly efficient 3-way merging procedure.
394 * @param seqs_begin Begin iterator of iterator pair input sequence.
395 * @param seqs_end End iterator of iterator pair input sequence.
396 * @param target Begin iterator out output sequence.
397 * @param comp Comparator.
398 * @param length Maximum length to merge.
399 * @param stable Unused, stable anyway.
400 * @return End iterator of output sequence. */
401 template<template<typename RAI, typename C> class iterator,
402 typename RandomAccessIteratorIterator,
403 typename RandomAccessIterator3,
404 typename _DifferenceTp,
405 typename Comparator>
406 RandomAccessIterator3
407 multiway_merge_3_variant(RandomAccessIteratorIterator seqs_begin,
408 RandomAccessIteratorIterator seqs_end,
409 RandomAccessIterator3 target,
410 Comparator comp, _DifferenceTp length,
411 bool stable)
412 {
413 _GLIBCXX_CALL(length);
414
415 typedef _DifferenceTp difference_type;
416
417 typedef typename std::iterator_traits<RandomAccessIteratorIterator>
418 ::value_type::first_type
419 RandomAccessIterator1;
420 typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
421 value_type;
422
423 if (length == 0)
424 return target;
425
426 iterator<RandomAccessIterator1, Comparator>
427 seq0(seqs_begin[0].first, seqs_begin[0].second, comp),
428 seq1(seqs_begin[1].first, seqs_begin[1].second, comp),
429 seq2(seqs_begin[2].first, seqs_begin[2].second, comp);
430
431 if (seq0 <= seq1)
432 {
433 if (seq1 <= seq2)
434 goto s012;
435 else
436 if (seq2 < seq0)
437 goto s201;
438 else
439 goto s021;
440 }
441 else
442 {
443 if (seq1 <= seq2)
444 {
445 if (seq0 <= seq2)
446 goto s102;
447 else
448 goto s120;
449 }
450 else
451 goto s210;
452 }
453
454 #define _GLIBCXX_PARALLEL_MERGE_3_CASE(a,b,c,c0,c1)\
455 s ## a ## b ## c : \
456 *target = *seq ## a; \
457 ++target; \
458 --length; \
459 ++seq ## a; \
460 if (length == 0) goto finish; \
461 if (seq ## a c0 seq ## b) goto s ## a ## b ## c; \
462 if (seq ## a c1 seq ## c) goto s ## b ## a ## c; \
463 goto s ## b ## c ## a;
464
465 _GLIBCXX_PARALLEL_MERGE_3_CASE(0, 1, 2, <=, <=);
466 _GLIBCXX_PARALLEL_MERGE_3_CASE(1, 2, 0, <=, < );
467 _GLIBCXX_PARALLEL_MERGE_3_CASE(2, 0, 1, < , < );
468 _GLIBCXX_PARALLEL_MERGE_3_CASE(1, 0, 2, < , <=);
469 _GLIBCXX_PARALLEL_MERGE_3_CASE(0, 2, 1, <=, <=);
470 _GLIBCXX_PARALLEL_MERGE_3_CASE(2, 1, 0, < , < );
471
472 #undef _GLIBCXX_PARALLEL_MERGE_3_CASE
473
474 finish:
475 ;
476
477 seqs_begin[0].first = seq0;
478 seqs_begin[1].first = seq1;
479 seqs_begin[2].first = seq2;
480
481 return target;
482 }
483
484 template<typename RandomAccessIteratorIterator,
485 typename RandomAccessIterator3,
486 typename _DifferenceTp,
487 typename Comparator>
488 RandomAccessIterator3
489 multiway_merge_3_combined(RandomAccessIteratorIterator seqs_begin,
490 RandomAccessIteratorIterator seqs_end,
491 RandomAccessIterator3 target,
492 Comparator comp,
493 _DifferenceTp length, bool stable)
494 {
495 _GLIBCXX_CALL(length);
496
497 typedef _DifferenceTp difference_type;
498 typedef typename std::iterator_traits<RandomAccessIteratorIterator>
499 ::value_type::first_type
500 RandomAccessIterator1;
501 typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
502 value_type;
503
504 int min_seq;
505 RandomAccessIterator3 target_end;
506
507 // Stable anyway.
508 difference_type overhang =
509 prepare_unguarded(seqs_begin, seqs_end, comp, min_seq, true);
510
511 difference_type total_length = 0;
512 for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; ++s)
513 total_length += _GLIBCXX_PARALLEL_LENGTH(*s);
514
515 if (overhang != -1)
516 {
517 difference_type unguarded_length =
518 std::min(length, total_length - overhang);
519 target_end = multiway_merge_3_variant<unguarded_iterator>
520 (seqs_begin, seqs_end, target, comp, unguarded_length, stable);
521 overhang = length - unguarded_length;
522 }
523 else
524 {
525 // Empty sequence found.
526 overhang = length;
527 target_end = target;
528 }
529
530 #if _GLIBCXX_ASSERTIONS
531 _GLIBCXX_PARALLEL_ASSERT(target_end == target + length - overhang);
532 _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp));
533 #endif
534
535 switch (min_seq)
536 {
537 case 0:
538 // Iterators will be advanced accordingly.
539 target_end = merge_advance(seqs_begin[1].first, seqs_begin[1].second,
540 seqs_begin[2].first, seqs_begin[2].second,
541 target_end, overhang, comp);
542 break;
543 case 1:
544 target_end = merge_advance(seqs_begin[0].first, seqs_begin[0].second,
545 seqs_begin[2].first, seqs_begin[2].second,
546 target_end, overhang, comp);
547 break;
548 case 2:
549 target_end = merge_advance(seqs_begin[0].first, seqs_begin[0].second,
550 seqs_begin[1].first, seqs_begin[1].second,
551 target_end, overhang, comp);
552 break;
553 default:
554 _GLIBCXX_PARALLEL_ASSERT(false);
555 }
556
557 #if _GLIBCXX_ASSERTIONS
558 _GLIBCXX_PARALLEL_ASSERT(target_end == target + length);
559 _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp));
560 #endif
561
562 return target_end;
563 }
564
565 /** @brief Highly efficient 4-way merging procedure.
566 * @param seqs_begin Begin iterator of iterator pair input sequence.
567 * @param seqs_end End iterator of iterator pair input sequence.
568 * @param target Begin iterator out output sequence.
569 * @param comp Comparator.
570 * @param length Maximum length to merge.
571 * @param stable Unused, stable anyway.
572 * @return End iterator of output sequence. */
573 template<template<typename RAI, typename C> class iterator,
574 typename RandomAccessIteratorIterator,
575 typename RandomAccessIterator3,
576 typename _DifferenceTp,
577 typename Comparator>
578 RandomAccessIterator3
579 multiway_merge_4_variant(RandomAccessIteratorIterator seqs_begin,
580 RandomAccessIteratorIterator seqs_end,
581 RandomAccessIterator3 target,
582 Comparator comp, _DifferenceTp length, bool stable)
583 {
584 _GLIBCXX_CALL(length);
585 typedef _DifferenceTp difference_type;
586
587 typedef typename std::iterator_traits<RandomAccessIteratorIterator>
588 ::value_type::first_type
589 RandomAccessIterator1;
590 typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
591 value_type;
592
593 iterator<RandomAccessIterator1, Comparator>
594 seq0(seqs_begin[0].first, seqs_begin[0].second, comp),
595 seq1(seqs_begin[1].first, seqs_begin[1].second, comp),
596 seq2(seqs_begin[2].first, seqs_begin[2].second, comp),
597 seq3(seqs_begin[3].first, seqs_begin[3].second, comp);
598
599 #define _GLIBCXX_PARALLEL_DECISION(a,b,c,d) { \
600 if (seq ## d < seq ## a) goto s ## d ## a ## b ## c; \
601 if (seq ## d < seq ## b) goto s ## a ## d ## b ## c; \
602 if (seq ## d < seq ## c) goto s ## a ## b ## d ## c; \
603 goto s ## a ## b ## c ## d; }
604
605 if (seq0 <= seq1)
606 {
607 if (seq1 <= seq2)
608 _GLIBCXX_PARALLEL_DECISION(0,1,2,3)
609 else
610 if (seq2 < seq0)
611 _GLIBCXX_PARALLEL_DECISION(2,0,1,3)
612 else
613 _GLIBCXX_PARALLEL_DECISION(0,2,1,3)
614 }
615 else
616 {
617 if (seq1 <= seq2)
618 {
619 if (seq0 <= seq2)
620 _GLIBCXX_PARALLEL_DECISION(1,0,2,3)
621 else
622 _GLIBCXX_PARALLEL_DECISION(1,2,0,3)
623 }
624 else
625 _GLIBCXX_PARALLEL_DECISION(2,1,0,3)
626 }
627
628 #define _GLIBCXX_PARALLEL_MERGE_4_CASE(a,b,c,d,c0,c1,c2) \
629 s ## a ## b ## c ## d: \
630 if (length == 0) goto finish; \
631 *target = *seq ## a; \
632 ++target; \
633 --length; \
634 ++seq ## a; \
635 if (seq ## a c0 seq ## b) goto s ## a ## b ## c ## d; \
636 if (seq ## a c1 seq ## c) goto s ## b ## a ## c ## d; \
637 if (seq ## a c2 seq ## d) goto s ## b ## c ## a ## d; \
638 goto s ## b ## c ## d ## a;
639
640 _GLIBCXX_PARALLEL_MERGE_4_CASE(0, 1, 2, 3, <=, <=, <=);
641 _GLIBCXX_PARALLEL_MERGE_4_CASE(0, 1, 3, 2, <=, <=, <=);
642 _GLIBCXX_PARALLEL_MERGE_4_CASE(0, 2, 1, 3, <=, <=, <=);
643 _GLIBCXX_PARALLEL_MERGE_4_CASE(0, 2, 3, 1, <=, <=, <=);
644 _GLIBCXX_PARALLEL_MERGE_4_CASE(0, 3, 1, 2, <=, <=, <=);
645 _GLIBCXX_PARALLEL_MERGE_4_CASE(0, 3, 2, 1, <=, <=, <=);
646 _GLIBCXX_PARALLEL_MERGE_4_CASE(1, 0, 2, 3, < , <=, <=);
647 _GLIBCXX_PARALLEL_MERGE_4_CASE(1, 0, 3, 2, < , <=, <=);
648 _GLIBCXX_PARALLEL_MERGE_4_CASE(1, 2, 0, 3, <=, < , <=);
649 _GLIBCXX_PARALLEL_MERGE_4_CASE(1, 2, 3, 0, <=, <=, < );
650 _GLIBCXX_PARALLEL_MERGE_4_CASE(1, 3, 0, 2, <=, < , <=);
651 _GLIBCXX_PARALLEL_MERGE_4_CASE(1, 3, 2, 0, <=, <=, < );
652 _GLIBCXX_PARALLEL_MERGE_4_CASE(2, 0, 1, 3, < , < , <=);
653 _GLIBCXX_PARALLEL_MERGE_4_CASE(2, 0, 3, 1, < , <=, < );
654 _GLIBCXX_PARALLEL_MERGE_4_CASE(2, 1, 0, 3, < , < , <=);
655 _GLIBCXX_PARALLEL_MERGE_4_CASE(2, 1, 3, 0, < , <=, < );
656 _GLIBCXX_PARALLEL_MERGE_4_CASE(2, 3, 0, 1, <=, < , < );
657 _GLIBCXX_PARALLEL_MERGE_4_CASE(2, 3, 1, 0, <=, < , < );
658 _GLIBCXX_PARALLEL_MERGE_4_CASE(3, 0, 1, 2, < , < , < );
659 _GLIBCXX_PARALLEL_MERGE_4_CASE(3, 0, 2, 1, < , < , < );
660 _GLIBCXX_PARALLEL_MERGE_4_CASE(3, 1, 0, 2, < , < , < );
661 _GLIBCXX_PARALLEL_MERGE_4_CASE(3, 1, 2, 0, < , < , < );
662 _GLIBCXX_PARALLEL_MERGE_4_CASE(3, 2, 0, 1, < , < , < );
663 _GLIBCXX_PARALLEL_MERGE_4_CASE(3, 2, 1, 0, < , < , < );
664
665 #undef _GLIBCXX_PARALLEL_MERGE_4_CASE
666 #undef _GLIBCXX_PARALLEL_DECISION
667
668 finish:
669 ;
670
671 seqs_begin[0].first = seq0;
672 seqs_begin[1].first = seq1;
673 seqs_begin[2].first = seq2;
674 seqs_begin[3].first = seq3;
675
676 return target;
677 }
678
679 template<typename RandomAccessIteratorIterator,
680 typename RandomAccessIterator3,
681 typename _DifferenceTp,
682 typename Comparator>
683 RandomAccessIterator3
684 multiway_merge_4_combined(RandomAccessIteratorIterator seqs_begin,
685 RandomAccessIteratorIterator seqs_end,
686 RandomAccessIterator3 target,
687 Comparator comp,
688 _DifferenceTp length, bool stable)
689 {
690 _GLIBCXX_CALL(length);
691 typedef _DifferenceTp difference_type;
692
693 typedef typename std::iterator_traits<RandomAccessIteratorIterator>
694 ::value_type::first_type
695 RandomAccessIterator1;
696 typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
697 value_type;
698
699 int min_seq;
700 RandomAccessIterator3 target_end;
701
702 // Stable anyway.
703 difference_type overhang =
704 prepare_unguarded(seqs_begin, seqs_end, comp, min_seq, true);
705
706 difference_type total_length = 0;
707 for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; ++s)
708 total_length += _GLIBCXX_PARALLEL_LENGTH(*s);
709
710 if (overhang != -1)
711 {
712 difference_type unguarded_length =
713 std::min(length, total_length - overhang);
714 target_end = multiway_merge_4_variant<unguarded_iterator>
715 (seqs_begin, seqs_end, target, comp, unguarded_length, stable);
716 overhang = length - unguarded_length;
717 }
718 else
719 {
720 // Empty sequence found.
721 overhang = length;
722 target_end = target;
723 }
724
725 #if _GLIBCXX_ASSERTIONS
726 _GLIBCXX_PARALLEL_ASSERT(target_end == target + length - overhang);
727 _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp));
728 #endif
729
730 std::vector<std::pair<RandomAccessIterator1, RandomAccessIterator1> >
731 one_missing(seqs_begin, seqs_end);
732 one_missing.erase(one_missing.begin() + min_seq); //remove
733
734 target_end = multiway_merge_3_variant<guarded_iterator>(
735 one_missing.begin(), one_missing.end(),
736 target_end, comp, overhang, stable);
737
738 // Insert back again.
739 one_missing.insert(one_missing.begin() + min_seq, seqs_begin[min_seq]);
740 // Write back modified iterators.
741 copy(one_missing.begin(), one_missing.end(), seqs_begin);
742
743 #if _GLIBCXX_ASSERTIONS
744 _GLIBCXX_PARALLEL_ASSERT(target_end == target + length);
745 _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp));
746 #endif
747
748 return target_end;
749 }
750
751 /** @brief Basic multi-way merging procedure.
752 *
753 * The head elements are kept in a sorted array, new heads are
754 * inserted linearly.
755 * @param seqs_begin Begin iterator of iterator pair input sequence.
756 * @param seqs_end End iterator of iterator pair input sequence.
757 * @param target Begin iterator out output sequence.
758 * @param comp Comparator.
759 * @param length Maximum length to merge.
760 * @param stable Stable merging incurs a performance penalty.
761 * @return End iterator of output sequence.
762 */
763 template<typename RandomAccessIteratorIterator,
764 typename RandomAccessIterator3,
765 typename _DifferenceTp,
766 typename Comparator>
767 RandomAccessIterator3
768 multiway_merge_bubble(RandomAccessIteratorIterator seqs_begin,
769 RandomAccessIteratorIterator seqs_end,
770 RandomAccessIterator3 target,
771 Comparator comp, _DifferenceTp length, bool stable)
772 {
773 _GLIBCXX_CALL(length)
774
775 typedef _DifferenceTp difference_type;
776 typedef typename std::iterator_traits<RandomAccessIteratorIterator>
777 ::value_type::first_type
778 RandomAccessIterator1;
779 typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
780 value_type;
781
782 int k = static_cast<int>(seqs_end - seqs_begin);
783 int nrs; // Number of remaining sequences.
784
785 // Avoid default constructor.
786 value_type* fe = static_cast<value_type*>(
787 ::operator new(sizeof(value_type) * k)); // Front elements.
788 int* source = new int[k];
789 difference_type total_length = 0;
790
791 // Write entries into queue.
792 nrs = 0;
793 for (int pi = 0; pi < k; ++pi)
794 {
795 if (seqs_begin[pi].first != seqs_begin[pi].second)
796 {
797 ::new(&(fe[nrs])) value_type(*(seqs_begin[pi].first));
798 source[nrs] = pi;
799 ++nrs;
800 total_length += _GLIBCXX_PARALLEL_LENGTH(seqs_begin[pi]);
801 }
802 }
803
804 if (stable)
805 {
806 // Bubble sort fe and source by fe.
807 for (int k = 0; k < nrs - 1; ++k)
808 for (int pi = nrs - 1; pi > k; --pi)
809 if (comp(fe[pi], fe[pi - 1]) ||
810 (!comp(fe[pi - 1], fe[pi]) && source[pi] < source[pi - 1]))
811 {
812 std::swap(fe[pi - 1], fe[pi]);
813 std::swap(source[pi - 1], source[pi]);
814 }
815 }
816 else
817 {
818 for (int k = 0; k < nrs - 1; ++k)
819 for (int pi = nrs - 1; pi > k; --pi)
820 if (comp(fe[pi], fe[pi-1]))
821 {
822 std::swap(fe[pi-1], fe[pi]);
823 std::swap(source[pi-1], source[pi]);
824 }
825 }
826
827 // Iterate.
828 if (stable)
829 {
830 int j;
831 while (nrs > 0 && length > 0)
832 {
833 if (source[0] < source[1])
834 {
835 // fe[0] <= fe[1]
836 while ((nrs == 1 || !comp(fe[1], fe[0])) && length > 0)
837 {
838 *target = fe[0];
839 ++target;
840 ++(seqs_begin[source[0]].first);
841 --length;
842 if (seqs_begin[source[0]].first
843 == seqs_begin[source[0]].second)
844 {
845 // Move everything to the left.
846 for (int s = 0; s < nrs - 1; ++s)
847 {
848 fe[s] = fe[s + 1];
849 source[s] = source[s + 1];
850 }
851 fe[nrs - 1].~value_type(); //Destruct explicitly.
852 --nrs;
853 break;
854 }
855 else
856 fe[0] = *(seqs_begin[source[0]].first);
857 }
858 }
859 else
860 {
861 // fe[0] < fe[1]
862 while ((nrs == 1 || comp(fe[0], fe[1])) && length > 0)
863 {
864 *target = fe[0];
865 ++target;
866 ++(seqs_begin[source[0]].first);
867 --length;
868 if (seqs_begin[source[0]].first
869 == seqs_begin[source[0]].second)
870 {
871 for (int s = 0; s < nrs - 1; ++s)
872 {
873 fe[s] = fe[s + 1];
874 source[s] = source[s + 1];
875 }
876 fe[nrs - 1].~value_type(); //Destruct explicitly.
877 --nrs;
878 break;
879 }
880 else
881 fe[0] = *(seqs_begin[source[0]].first);
882 }
883 }
884
885 // Sink down.
886 j = 1;
887 while ((j < nrs) && (comp(fe[j], fe[j - 1])
888 || (!comp(fe[j - 1], fe[j])
889 && (source[j] < source[j - 1]))))
890 {
891 std::swap(fe[j - 1], fe[j]);
892 std::swap(source[j - 1], source[j]);
893 ++j;
894 }
895 }
896 }
897 else
898 {
899 int j;
900 while (nrs > 0 && length > 0)
901 {
902 // fe[0] <= fe[1]
903 while (nrs == 1 || (!comp(fe[1], fe[0])) && length > 0)
904 {
905 *target = fe[0];
906 ++target;
907 ++seqs_begin[source[0]].first;
908 --length;
909 if (seqs_begin[source[0]].first
910 == seqs_begin[source[0]].second)
911 {
912 for (int s = 0; s < (nrs - 1); ++s)
913 {
914 fe[s] = fe[s + 1];
915 source[s] = source[s + 1];
916 }
917 fe[nrs - 1].~value_type(); //Destruct explicitly.
918 --nrs;
919 break;
920 }
921 else
922 fe[0] = *(seqs_begin[source[0]].first);
923 }
924
925 // Sink down.
926 j = 1;
927 while ((j < nrs) && comp(fe[j], fe[j - 1]))
928 {
929 std::swap(fe[j - 1], fe[j]);
930 std::swap(source[j - 1], source[j]);
931 ++j;
932 }
933 }
934 }
935
936 ::operator delete(fe); //Destructors already called.
937 delete[] source;
938
939 return target;
940 }
941
942 /** @brief Multi-way merging procedure for a high branching factor,
943 * guarded case.
944 *
945 * The head elements are kept in a loser tree.
946 * @param seqs_begin Begin iterator of iterator pair input sequence.
947 * @param seqs_end End iterator of iterator pair input sequence.
948 * @param target Begin iterator out output sequence.
949 * @param comp Comparator.
950 * @param length Maximum length to merge.
951 * @param stable Stable merging incurs a performance penalty.
952 * @return End iterator of output sequence.
953 */
954 template<typename LT,
955 typename RandomAccessIteratorIterator,
956 typename RandomAccessIterator3,
957 typename _DifferenceTp,
958 typename Comparator>
959 RandomAccessIterator3
960 multiway_merge_loser_tree(RandomAccessIteratorIterator seqs_begin,
961 RandomAccessIteratorIterator seqs_end,
962 RandomAccessIterator3 target,
963 Comparator comp,
964 _DifferenceTp length, bool stable)
965 {
966 _GLIBCXX_CALL(length)
967
968 typedef _DifferenceTp difference_type;
969 typedef typename std::iterator_traits<RandomAccessIteratorIterator>
970 ::value_type::first_type
971 RandomAccessIterator1;
972 typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
973 value_type;
974
975 int k = static_cast<int>(seqs_end - seqs_begin);
976
977 LT lt(k, comp);
978
979 difference_type total_length = 0;
980
981 // Default value for potentially non-default-constructible types.
982 value_type* arbitrary_element = NULL;
983
984 for (int t = 0; t < k; ++t)
985 {
986 if(arbitrary_element == NULL
987 && _GLIBCXX_PARALLEL_LENGTH(seqs_begin[t]) > 0)
988 arbitrary_element = &(*seqs_begin[t].first);
989 total_length += _GLIBCXX_PARALLEL_LENGTH(seqs_begin[t]);
990 }
991
992 if(total_length == 0)
993 return target;
994
995 for (int t = 0; t < k; ++t)
996 {
997 if (stable)
998 {
999 if (seqs_begin[t].first == seqs_begin[t].second)
1000 lt.insert_start_stable(*arbitrary_element, t, true);
1001 else
1002 lt.insert_start_stable(*seqs_begin[t].first, t, false);
1003 }
1004 else
1005 {
1006 if (seqs_begin[t].first == seqs_begin[t].second)
1007 lt.insert_start(*arbitrary_element, t, true);
1008 else
1009 lt.insert_start(*seqs_begin[t].first, t, false);
1010 }
1011 }
1012
1013 if (stable)
1014 lt.init_stable();
1015 else
1016 lt.init();
1017
1018 total_length = std::min(total_length, length);
1019
1020 int source;
1021
1022 if (stable)
1023 {
1024 for (difference_type i = 0; i < total_length; ++i)
1025 {
1026 // Take out.
1027 source = lt.get_min_source();
1028
1029 *(target++) = *(seqs_begin[source].first++);
1030
1031 // Feed.
1032 if (seqs_begin[source].first == seqs_begin[source].second)
1033 lt.delete_min_insert_stable(*arbitrary_element, true);
1034 else
1035 // Replace from same source.
1036 lt.delete_min_insert_stable(*seqs_begin[source].first, false);
1037
1038 }
1039 }
1040 else
1041 {
1042 for (difference_type i = 0; i < total_length; ++i)
1043 {
1044 //take out
1045 source = lt.get_min_source();
1046
1047 *(target++) = *(seqs_begin[source].first++);
1048
1049 // Feed.
1050 if (seqs_begin[source].first == seqs_begin[source].second)
1051 lt.delete_min_insert(*arbitrary_element, true);
1052 else
1053 // Replace from same source.
1054 lt.delete_min_insert(*seqs_begin[source].first, false);
1055 }
1056 }
1057
1058 return target;
1059 }
1060
1061 /** @brief Multi-way merging procedure for a high branching factor,
1062 * unguarded case.
1063 *
1064 * The head elements are kept in a loser tree.
1065 * @param seqs_begin Begin iterator of iterator pair input sequence.
1066 * @param seqs_end End iterator of iterator pair input sequence.
1067 * @param target Begin iterator out output sequence.
1068 * @param comp Comparator.
1069 * @param length Maximum length to merge.
1070 * @param stable Stable merging incurs a performance penalty.
1071 * @return End iterator of output sequence.
1072 * @pre No input will run out of elements during the merge.
1073 */
1074 template<typename LT,
1075 typename RandomAccessIteratorIterator,
1076 typename RandomAccessIterator3,
1077 typename _DifferenceTp, typename Comparator>
1078 RandomAccessIterator3
1079 multiway_merge_loser_tree_unguarded(RandomAccessIteratorIterator seqs_begin,
1080 RandomAccessIteratorIterator seqs_end,
1081 RandomAccessIterator3 target,
1082 Comparator comp,
1083 _DifferenceTp length, bool stable)
1084 {
1085 _GLIBCXX_CALL(length)
1086 typedef _DifferenceTp difference_type;
1087
1088 typedef typename std::iterator_traits<RandomAccessIteratorIterator>
1089 ::value_type::first_type
1090 RandomAccessIterator1;
1091 typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
1092 value_type;
1093
1094 int k = seqs_end - seqs_begin;
1095
1096 LT lt(k, comp);
1097
1098 difference_type total_length = 0;
1099
1100 for (int t = 0; t < k; ++t)
1101 {
1102 #if _GLIBCXX_ASSERTIONS
1103 _GLIBCXX_PARALLEL_ASSERT(seqs_begin[t].first != seqs_begin[t].second);
1104 #endif
1105 if (stable)
1106 lt.insert_start_stable(*seqs_begin[t].first, t, false);
1107 else
1108 lt.insert_start(*seqs_begin[t].first, t, false);
1109
1110 total_length += _GLIBCXX_PARALLEL_LENGTH(seqs_begin[t]);
1111 }
1112
1113 if (stable)
1114 lt.init_stable();
1115 else
1116 lt.init();
1117
1118 // Do not go past end.
1119 length = std::min(total_length, length);
1120
1121 int source;
1122
1123 #if _GLIBCXX_ASSERTIONS
1124 difference_type i = 0;
1125 #endif
1126
1127 if (stable)
1128 {
1129 RandomAccessIterator3 target_end = target + length;
1130 while (target < target_end)
1131 {
1132 // Take out.
1133 source = lt.get_min_source();
1134
1135 #if _GLIBCXX_ASSERTIONS
1136 _GLIBCXX_PARALLEL_ASSERT(i == 0
1137 || !comp(*(seqs_begin[source].first), *(target - 1)));
1138 #endif
1139
1140 *(target++) = *(seqs_begin[source].first++);
1141
1142 #if _GLIBCXX_ASSERTIONS
1143 _GLIBCXX_PARALLEL_ASSERT(
1144 (seqs_begin[source].first != seqs_begin[source].second)
1145 || (i == length - 1));
1146 ++i;
1147 #endif
1148 // Feed.
1149 // Replace from same source.
1150 lt.delete_min_insert_stable(*seqs_begin[source].first, false);
1151
1152 }
1153 }
1154 else
1155 {
1156 RandomAccessIterator3 target_end = target + length;
1157 while (target < target_end)
1158 {
1159 // Take out.
1160 source = lt.get_min_source();
1161
1162 #if _GLIBCXX_ASSERTIONS
1163 if (i > 0 && comp(*(seqs_begin[source].first), *(target - 1)))
1164 printf(" %i %i %i\n", length, i, source);
1165 _GLIBCXX_PARALLEL_ASSERT(i == 0
1166 || !comp(*(seqs_begin[source].first), *(target - 1)));
1167 #endif
1168
1169 *(target++) = *(seqs_begin[source].first++);
1170
1171 #if _GLIBCXX_ASSERTIONS
1172 if (!((seqs_begin[source].first != seqs_begin[source].second)
1173 || (i >= length - 1)))
1174 printf(" %i %i %i\n", length, i, source);
1175 _GLIBCXX_PARALLEL_ASSERT(
1176 (seqs_begin[source].first != seqs_begin[source].second)
1177 || (i >= length - 1));
1178 ++i;
1179 #endif
1180 // Feed.
1181 // Replace from same source.
1182 lt.delete_min_insert(*seqs_begin[source].first, false);
1183 }
1184 }
1185
1186 return target;
1187 }
1188
1189 template<typename RandomAccessIteratorIterator,
1190 typename RandomAccessIterator3,
1191 typename _DifferenceTp,
1192 typename Comparator>
1193 RandomAccessIterator3
1194 multiway_merge_loser_tree_combined(RandomAccessIteratorIterator seqs_begin,
1195 RandomAccessIteratorIterator seqs_end,
1196 RandomAccessIterator3 target,
1197 Comparator comp,
1198 _DifferenceTp length, bool stable)
1199 {
1200 _GLIBCXX_CALL(length)
1201
1202 typedef _DifferenceTp difference_type;
1203
1204 typedef typename std::iterator_traits<RandomAccessIteratorIterator>
1205 ::value_type::first_type
1206 RandomAccessIterator1;
1207 typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
1208 value_type;
1209
1210 int min_seq;
1211 RandomAccessIterator3 target_end;
1212 difference_type overhang = prepare_unguarded(seqs_begin, seqs_end,
1213 comp, min_seq, stable);
1214
1215 difference_type total_length = 0;
1216 for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; ++s)
1217 total_length += _GLIBCXX_PARALLEL_LENGTH(*s);
1218
1219 if (overhang != -1)
1220 {
1221 difference_type unguarded_length =
1222 std::min(length, total_length - overhang);
1223 target_end = multiway_merge_loser_tree_unguarded
1224 <typename loser_tree_unguarded_traits<value_type, Comparator>::LT>
1225 (seqs_begin, seqs_end, target, comp, unguarded_length, stable);
1226 overhang = length - unguarded_length;
1227 }
1228 else
1229 {
1230 // Empty sequence found.
1231 overhang = length;
1232 target_end = target;
1233 }
1234
1235 #if _GLIBCXX_ASSERTIONS
1236 _GLIBCXX_PARALLEL_ASSERT(target_end == target + length - overhang);
1237 _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp));
1238 #endif
1239
1240 target_end = multiway_merge_loser_tree
1241 <typename loser_tree_traits<value_type, Comparator>::LT>
1242 (seqs_begin, seqs_end, target_end, comp, overhang, stable);
1243
1244 #if _GLIBCXX_ASSERTIONS
1245 _GLIBCXX_PARALLEL_ASSERT(target_end == target + length);
1246 _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp));
1247 #endif
1248
1249 return target_end;
1250 }
1251
1252 template<typename RandomAccessIteratorIterator,
1253 typename RandomAccessIterator3,
1254 typename _DifferenceTp,
1255 typename Comparator>
1256 RandomAccessIterator3
1257 multiway_merge_loser_tree_sentinel(RandomAccessIteratorIterator seqs_begin,
1258 RandomAccessIteratorIterator seqs_end,
1259 RandomAccessIterator3 target,
1260 Comparator comp,
1261 _DifferenceTp length, bool stable)
1262 {
1263 _GLIBCXX_CALL(length)
1264
1265 typedef _DifferenceTp difference_type;
1266 typedef std::iterator_traits<RandomAccessIteratorIterator> traits_type;
1267 typedef typename std::iterator_traits<RandomAccessIteratorIterator>
1268 ::value_type::first_type
1269 RandomAccessIterator1;
1270 typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
1271 value_type;
1272
1273 RandomAccessIterator3 target_end;
1274 difference_type overhang =
1275 prepare_unguarded_sentinel(seqs_begin, seqs_end, comp);
1276
1277 difference_type total_length = 0;
1278 for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; ++s)
1279 {
1280 total_length += _GLIBCXX_PARALLEL_LENGTH(*s);
1281
1282 // Sentinel spot.
1283 ++((*s).second);
1284 }
1285
1286 difference_type unguarded_length =
1287 std::min(length, total_length - overhang);
1288 target_end = multiway_merge_loser_tree_unguarded
1289 <typename loser_tree_unguarded_traits<value_type, Comparator>::LT>
1290 (seqs_begin, seqs_end, target, comp, unguarded_length, stable);
1291 overhang = length - unguarded_length;
1292
1293 #if _GLIBCXX_ASSERTIONS
1294 _GLIBCXX_PARALLEL_ASSERT(target_end == target + length - overhang);
1295 _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp));
1296 #endif
1297
1298 // Copy rest stable.
1299 for (RandomAccessIteratorIterator s = seqs_begin;
1300 s != seqs_end && overhang > 0; ++s)
1301 {
1302 // Restore.
1303 --((*s).second);
1304 difference_type local_length =
1305 std::min<difference_type>(overhang, _GLIBCXX_PARALLEL_LENGTH(*s));
1306 target_end = std::copy((*s).first, (*s).first + local_length,
1307 target_end);
1308 (*s).first += local_length;
1309 overhang -= local_length;
1310 }
1311
1312 #if _GLIBCXX_ASSERTIONS
1313 _GLIBCXX_PARALLEL_ASSERT(overhang == 0);
1314 _GLIBCXX_PARALLEL_ASSERT(target_end == target + length);
1315 _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp));
1316 #endif
1317
1318 return target_end;
1319 }
1320
1321 /** @brief Sequential multi-way merging switch.
1322 *
1323 * The _GLIBCXX_PARALLEL_DECISION if based on the branching factor and
1324 * runtime settings.
1325 * @param seqs_begin Begin iterator of iterator pair input sequence.
1326 * @param seqs_end End iterator of iterator pair input sequence.
1327 * @param target Begin iterator out output sequence.
1328 * @param comp Comparator.
1329 * @param length Maximum length to merge.
1330 * @param stable Stable merging incurs a performance penalty.
1331 * @param sentinel The sequences have a sentinel element.
1332 * @return End iterator of output sequence. */
1333 template<typename RandomAccessIteratorIterator,
1334 typename RandomAccessIterator3,
1335 typename _DifferenceTp,
1336 typename Comparator>
1337 RandomAccessIterator3
1338 multiway_merge(RandomAccessIteratorIterator seqs_begin,
1339 RandomAccessIteratorIterator seqs_end,
1340 RandomAccessIterator3 target,
1341 Comparator comp, _DifferenceTp length,
1342 bool stable, bool sentinel,
1343 sequential_tag)
1344 {
1345 _GLIBCXX_CALL(length)
1346
1347 typedef _DifferenceTp difference_type;
1348 typedef typename std::iterator_traits<RandomAccessIteratorIterator>
1349 ::value_type::first_type
1350 RandomAccessIterator1;
1351 typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
1352 value_type;
1353
1354 #if _GLIBCXX_ASSERTIONS
1355 for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; ++s)
1356 _GLIBCXX_PARALLEL_ASSERT(is_sorted((*s).first, (*s).second, comp));
1357 #endif
1358
1359 RandomAccessIterator3 return_target = target;
1360 int k = static_cast<int>(seqs_end - seqs_begin);
1361
1362 Settings::MultiwayMergeAlgorithm mwma =
1363 Settings::multiway_merge_algorithm;
1364
1365 if (!sentinel && mwma == Settings::LOSER_TREE_SENTINEL)
1366 mwma = Settings::LOSER_TREE_COMBINED;
1367
1368 switch (k)
1369 {
1370 case 0:
1371 break;
1372 case 1:
1373 return_target = std::copy(seqs_begin[0].first,
1374 seqs_begin[0].first + length,
1375 target);
1376 seqs_begin[0].first += length;
1377 break;
1378 case 2:
1379 return_target = merge_advance(seqs_begin[0].first,
1380 seqs_begin[0].second,
1381 seqs_begin[1].first,
1382 seqs_begin[1].second,
1383 target, length, comp);
1384 break;
1385 case 3:
1386 switch (mwma)
1387 {
1388 case Settings::LOSER_TREE_COMBINED:
1389 return_target = multiway_merge_3_combined(seqs_begin,
1390 seqs_end,
1391 target,
1392 comp, length,
1393 stable);
1394 break;
1395 case Settings::LOSER_TREE_SENTINEL:
1396 return_target =
1397 multiway_merge_3_variant<unguarded_iterator>(seqs_begin,
1398 seqs_end,
1399 target,
1400 comp, length,
1401 stable);
1402 break;
1403 default:
1404 return_target =
1405 multiway_merge_3_variant<guarded_iterator>(seqs_begin,
1406 seqs_end,
1407 target,
1408 comp, length,
1409 stable);
1410 break;
1411 }
1412 break;
1413 case 4:
1414 switch (mwma)
1415 {
1416 case Settings::LOSER_TREE_COMBINED:
1417 return_target = multiway_merge_4_combined(seqs_begin,
1418 seqs_end,
1419 target,
1420 comp, length, stable);
1421 break;
1422 case Settings::LOSER_TREE_SENTINEL:
1423 return_target =
1424 multiway_merge_4_variant<unguarded_iterator>(seqs_begin,
1425 seqs_end,
1426 target,
1427 comp, length,
1428 stable);
1429 break;
1430 default:
1431 return_target = multiway_merge_4_variant<guarded_iterator>(
1432 seqs_begin,
1433 seqs_end,
1434 target,
1435 comp, length, stable);
1436 break;
1437 }
1438 break;
1439 default:
1440 {
1441 switch (mwma)
1442 {
1443 case Settings::BUBBLE:
1444 return_target = multiway_merge_bubble(seqs_begin,
1445 seqs_end,
1446 target,
1447 comp, length, stable);
1448 break;
1449 #if _GLIBCXX_LOSER_TREE_EXPLICIT
1450 case Settings::LOSER_TREE_EXPLICIT:
1451 return_target = multiway_merge_loser_tree<
1452 LoserTreeExplicit<value_type, Comparator> >(seqs_begin,
1453 seqs_end,
1454 target,
1455 comp, length,
1456 stable);
1457 break;
1458 #endif
1459 #if _GLIBCXX_LOSER_TREE
1460 case Settings::LOSER_TREE:
1461 return_target = multiway_merge_loser_tree<
1462 LoserTree<value_type, Comparator> >(seqs_begin,
1463 seqs_end,
1464 target,
1465 comp, length,
1466 stable);
1467 break;
1468 #endif
1469 #if _GLIBCXX_LOSER_TREE_COMBINED
1470 case Settings::LOSER_TREE_COMBINED:
1471 return_target = multiway_merge_loser_tree_combined(seqs_begin,
1472 seqs_end,
1473 target,
1474 comp, length,
1475 stable);
1476 break;
1477 #endif
1478 #if _GLIBCXX_LOSER_TREE_SENTINEL
1479 case Settings::LOSER_TREE_SENTINEL:
1480 return_target = multiway_merge_loser_tree_sentinel(seqs_begin,
1481 seqs_end,
1482 target,
1483 comp, length,
1484 stable);
1485 break;
1486 #endif
1487 default:
1488 // multiway_merge algorithm not implemented.
1489 _GLIBCXX_PARALLEL_ASSERT(0);
1490 break;
1491 }
1492 }
1493 }
1494 #if _GLIBCXX_ASSERTIONS
1495 _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target + length, comp));
1496 #endif
1497
1498 return return_target;
1499 }
1500
1501 /** @brief Parallel multi-way merge routine.
1502 *
1503 * The _GLIBCXX_PARALLEL_DECISION if based on the branching factor
1504 * and runtime settings.
1505 * @param seqs_begin Begin iterator of iterator pair input sequence.
1506 * @param seqs_end End iterator of iterator pair input sequence.
1507 * @param target Begin iterator out output sequence.
1508 * @param comp Comparator.
1509 * @param length Maximum length to merge.
1510 * @param stable Stable merging incurs a performance penalty.
1511 * @param sentinel Ignored.
1512 * @return End iterator of output sequence.
1513 */
1514 template<typename RandomAccessIteratorIterator,
1515 typename RandomAccessIterator3,
1516 typename _DifferenceTp,
1517 typename Comparator>
1518 RandomAccessIterator3
1519 parallel_multiway_merge(RandomAccessIteratorIterator seqs_begin,
1520 RandomAccessIteratorIterator seqs_end,
1521 RandomAccessIterator3 target,
1522 Comparator comp,
1523 _DifferenceTp length, bool stable, bool sentinel)
1524 {
1525 _GLIBCXX_CALL(length)
1526
1527 typedef _DifferenceTp difference_type;
1528 typedef typename std::iterator_traits<RandomAccessIteratorIterator>
1529 ::value_type::first_type
1530 RandomAccessIterator1;
1531 typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
1532 value_type;
1533
1534 // k sequences.
1535 int k = static_cast<int>(seqs_end - seqs_begin);
1536
1537 difference_type total_length = 0;
1538 for (RandomAccessIteratorIterator raii = seqs_begin;
1539 raii != seqs_end; ++raii)
1540 total_length += _GLIBCXX_PARALLEL_LENGTH(*raii);
1541
1542 _GLIBCXX_CALL(total_length)
1543
1544 if (total_length == 0 || k == 0)
1545 return target;
1546
1547 bool tight = (total_length == length);
1548
1549 std::vector<std::pair<difference_type, difference_type> >* pieces;
1550
1551 thread_index_t num_threads = static_cast<thread_index_t>(
1552 std::min<difference_type>(get_max_threads(), total_length));
1553
1554 # pragma omp parallel num_threads (num_threads)
1555 {
1556 # pragma omp single
1557 {
1558 num_threads = omp_get_num_threads();
1559 // Thread t will have to merge pieces[iam][0..k - 1]
1560 pieces = new std::vector<
1561 std::pair<difference_type, difference_type> >[num_threads];
1562 for (int s = 0; s < num_threads; ++s)
1563 pieces[s].resize(k);
1564
1565 difference_type num_samples =
1566 Settings::merge_oversampling * num_threads;
1567
1568 if (Settings::multiway_merge_splitting == Settings::SAMPLING)
1569 {
1570 value_type* samples = static_cast<value_type*>(
1571 ::operator new(sizeof(value_type) * k * num_samples));
1572 // Sample.
1573 for (int s = 0; s < k; ++s)
1574 for (difference_type i = 0; i < num_samples; ++i)
1575 {
1576 difference_type sample_index =
1577 static_cast<difference_type>(
1578 _GLIBCXX_PARALLEL_LENGTH(seqs_begin[s])
1579 * (double(i + 1) / (num_samples + 1))
1580 * (double(length) / total_length));
1581 ::new(&(samples[s * num_samples + i]))
1582 value_type(seqs_begin[s].first[sample_index]);
1583 }
1584
1585 if (stable)
1586 __gnu_sequential::stable_sort(samples, samples
1587 + (num_samples * k), comp);
1588 else
1589 __gnu_sequential::sort(samples, samples
1590 + (num_samples * k), comp);
1591
1592 for (int slab = 0; slab < num_threads; ++slab)
1593 // For each slab / processor.
1594 for (int seq = 0; seq < k; ++seq)
1595 {
1596 // For each sequence.
1597 if (slab > 0)
1598 pieces[slab][seq].first =
1599 std::upper_bound(seqs_begin[seq].first,
1600 seqs_begin[seq].second,
1601 samples[num_samples * k
1602 * slab / num_threads],
1603 comp)
1604 - seqs_begin[seq].first;
1605 else
1606 {
1607 // Absolute beginning.
1608 pieces[slab][seq].first = 0;
1609 }
1610 if ((slab + 1) < num_threads)
1611 pieces[slab][seq].second =
1612 std::upper_bound(seqs_begin[seq].first,
1613 seqs_begin[seq].second,
1614 samples[num_samples * k
1615 * (slab + 1)
1616 / num_threads], comp)
1617 - seqs_begin[seq].first;
1618 else
1619 pieces[slab][seq].second
1620 = _GLIBCXX_PARALLEL_LENGTH(seqs_begin[seq]);
1621 }
1622 ::operator delete(samples);
1623 }
1624 else
1625 {
1626 // (Settings::multiway_merge_splitting == Settings::EXACT).
1627 std::vector<RandomAccessIterator1>* offsets =
1628 new std::vector<RandomAccessIterator1>[num_threads];
1629 std::vector<
1630 std::pair<RandomAccessIterator1, RandomAccessIterator1>
1631 > se(k);
1632
1633 copy(seqs_begin, seqs_end, se.begin());
1634
1635 difference_type* borders =
1636 new difference_type[num_threads + 1];
1637 equally_split(length, num_threads, borders);
1638
1639 for (int s = 0; s < (num_threads - 1); ++s)
1640 {
1641 offsets[s].resize(k);
1642 multiseq_partition(
1643 se.begin(), se.end(), borders[s + 1],
1644 offsets[s].begin(), comp);
1645
1646 // Last one also needed and available.
1647 if (!tight)
1648 {
1649 offsets[num_threads - 1].resize(k);
1650 multiseq_partition(se.begin(), se.end(),
1651 difference_type(length),
1652 offsets[num_threads - 1].begin(),
1653 comp);
1654 }
1655 }
1656
1657
1658 for (int slab = 0; slab < num_threads; ++slab)
1659 {
1660 // For each slab / processor.
1661 for (int seq = 0; seq < k; ++seq)
1662 {
1663 // For each sequence.
1664 if (slab == 0)
1665 {
1666 // Absolute beginning.
1667 pieces[slab][seq].first = 0;
1668 }
1669 else
1670 pieces[slab][seq].first =
1671 pieces[slab - 1][seq].second;
1672 if (!tight || slab < (num_threads - 1))
1673 pieces[slab][seq].second =
1674 offsets[slab][seq] - seqs_begin[seq].first;
1675 else
1676 {
1677 // slab == num_threads - 1
1678 pieces[slab][seq].second =
1679 _GLIBCXX_PARALLEL_LENGTH(seqs_begin[seq]);
1680 }
1681 }
1682 }
1683 delete[] offsets;
1684 }
1685 } //single
1686
1687 thread_index_t iam = omp_get_thread_num();
1688
1689 difference_type target_position = 0;
1690
1691 for (int c = 0; c < k; ++c)
1692 target_position += pieces[iam][c].first;
1693
1694 if (k > 2)
1695 {
1696 std::pair<RandomAccessIterator1, RandomAccessIterator1>* chunks
1697 = new
1698 std::pair<RandomAccessIterator1, RandomAccessIterator1>[k];
1699
1700 difference_type local_length = 0;
1701 for (int s = 0; s < k; ++s)
1702 {
1703 chunks[s] = std::make_pair(
1704 seqs_begin[s].first + pieces[iam][s].first,
1705 seqs_begin[s].first + pieces[iam][s].second);
1706 local_length += _GLIBCXX_PARALLEL_LENGTH(chunks[s]);
1707 }
1708
1709 multiway_merge(
1710 chunks, chunks + k, target + target_position, comp,
1711 std::min(local_length, length - target_position),
1712 stable, false, sequential_tag());
1713
1714 delete[] chunks;
1715 }
1716 else if (k == 2)
1717 {
1718 RandomAccessIterator1
1719 begin0 = seqs_begin[0].first + pieces[iam][0].first,
1720 begin1 = seqs_begin[1].first + pieces[iam][1].first;
1721 merge_advance(begin0,
1722 seqs_begin[0].first + pieces[iam][0].second,
1723 begin1,
1724 seqs_begin[1].first + pieces[iam][1].second,
1725 target + target_position,
1726 (pieces[iam][0].second - pieces[iam][0].first) +
1727 (pieces[iam][1].second - pieces[iam][1].first),
1728 comp);
1729 }
1730 } //parallel
1731
1732 #if _GLIBCXX_ASSERTIONS
1733 _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target + length, comp));
1734 #endif
1735
1736 // Update ends of sequences.
1737 for (int s = 0; s < k; ++s)
1738 seqs_begin[s].first += pieces[num_threads - 1][s].second;
1739
1740 delete[] pieces;
1741
1742 return target + length;
1743 }
1744
1745 /**
1746 * @brief Multi-way merging front-end.
1747 * @param seqs_begin Begin iterator of iterator pair input sequence.
1748 * @param seqs_end End iterator of iterator pair input sequence.
1749 * @param target Begin iterator out output sequence.
1750 * @param comp Comparator.
1751 * @param length Maximum length to merge.
1752 * @param stable Stable merging incurs a performance penalty.
1753 * @return End iterator of output sequence.
1754 */
1755 template<typename RandomAccessIteratorPairIterator,
1756 typename RandomAccessIterator3,
1757 typename _DifferenceTp,
1758 typename Comparator>
1759 RandomAccessIterator3
1760 multiway_merge(RandomAccessIteratorPairIterator seqs_begin,
1761 RandomAccessIteratorPairIterator seqs_end,
1762 RandomAccessIterator3 target, Comparator comp,
1763 _DifferenceTp length, bool stable)
1764 {
1765 typedef _DifferenceTp difference_type;
1766 _GLIBCXX_CALL(seqs_end - seqs_begin)
1767
1768 if (seqs_begin == seqs_end)
1769 return target;
1770
1771 RandomAccessIterator3 target_end;
1772 if (_GLIBCXX_PARALLEL_CONDITION(
1773 ((seqs_end - seqs_begin) >= Settings::multiway_merge_minimal_k)
1774 && ((sequence_index_t)length >= Settings::multiway_merge_minimal_n)))
1775 target_end = parallel_multiway_merge(seqs_begin, seqs_end,
1776 target, comp,
1777 static_cast<difference_type>(length),
1778 stable, false);
1779 else
1780 target_end = multiway_merge(seqs_begin, seqs_end, target, comp, length,
1781 stable, false, sequential_tag());
1782
1783 return target_end;
1784 }
1785
1786 /** @brief Multi-way merging front-end.
1787 * @param seqs_begin Begin iterator of iterator pair input sequence.
1788 * @param seqs_end End iterator of iterator pair input sequence.
1789 * @param target Begin iterator out output sequence.
1790 * @param comp Comparator.
1791 * @param length Maximum length to merge.
1792 * @param stable Stable merging incurs a performance penalty.
1793 * @return End iterator of output sequence.
1794 * @pre For each @c i, @c seqs_begin[i].second must be the end
1795 * marker of the sequence, but also reference the one more sentinel
1796 * element. */
1797 template<typename RandomAccessIteratorPairIterator,
1798 typename RandomAccessIterator3,
1799 typename _DifferenceTp,
1800 typename Comparator>
1801 RandomAccessIterator3
1802 multiway_merge_sentinel(RandomAccessIteratorPairIterator seqs_begin,
1803 RandomAccessIteratorPairIterator seqs_end,
1804 RandomAccessIterator3 target,
1805 Comparator comp,
1806 _DifferenceTp length,
1807 bool stable)
1808 {
1809 typedef _DifferenceTp difference_type;
1810
1811 if (seqs_begin == seqs_end)
1812 return target;
1813
1814 _GLIBCXX_CALL(seqs_end - seqs_begin)
1815
1816 if (_GLIBCXX_PARALLEL_CONDITION(
1817 ((seqs_end - seqs_begin) >= Settings::multiway_merge_minimal_k)
1818 && ((sequence_index_t)length >= Settings::multiway_merge_minimal_n)))
1819 return parallel_multiway_merge(
1820 seqs_begin, seqs_end,
1821 target, comp, static_cast<difference_type>(length), stable, true);
1822 else
1823 return multiway_merge(seqs_begin, seqs_end,
1824 target, comp, length, stable,
1825 true, sequential_tag());
1826 }
1827 }
1828
1829 #endif