3 // Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc.
5 // This file is part of the GNU ISO C++ Library. This library is free
6 // software; you can redistribute it and/or modify it under the terms
7 // of the GNU General Public License as published by the Free Software
8 // Foundation; either version 3, or (at your option) any later
11 // This library is distributed in the hope that it will be useful, but
12 // WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 // General Public License for more details.
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 // <http://www.gnu.org/licenses/>.
25 /** @file parallel/multiway_mergesort.h
26 * @brief Parallel multiway merge sort.
27 * This file is a GNU parallel extension to the Standard C++ Library.
30 // Written by Johannes Singler.
32 #ifndef _GLIBCXX_PARALLEL_MULTIWAY_MERGESORT_H
33 #define _GLIBCXX_PARALLEL_MULTIWAY_MERGESORT_H 1
37 #include <parallel/basic_iterator.h>
38 #include <bits/stl_algo.h>
39 #include <parallel/parallel.h>
40 #include <parallel/multiway_merge.h>
42 namespace __gnu_parallel
45 /** @brief Subsequence description. */
46 template<typename _DifferenceTp
>
49 typedef _DifferenceTp _DifferenceType
;
51 /** @brief Begin of subsequence. */
52 _DifferenceType __begin
;
54 /** @brief End of subsequence. */
55 _DifferenceType __end
;
58 /** @brief Data accessed by all threads.
60 * PMWMS = parallel multiway mergesort */
61 template<typename _RAIter
>
62 struct _PMWMSSortingData
64 typedef std::iterator_traits
<_RAIter
> _TraitsType
;
65 typedef typename
_TraitsType::value_type _ValueType
;
66 typedef typename
_TraitsType::difference_type _DifferenceType
;
68 /** @brief Number of threads involved. */
69 _ThreadIndex __num_threads
;
71 /** @brief Input __begin. */
74 /** @brief Start indices, per thread. */
75 _DifferenceType
* _M_starts
;
77 /** @brief Storage in which to sort. */
78 _ValueType
** _M_temporary
;
80 /** @brief Samples. */
81 _ValueType
* _M_samples
;
83 /** @brief Offsets to add to the found positions. */
84 _DifferenceType
* _M_offsets
;
86 /** @brief Pieces of data to merge @__c [thread][__sequence] */
87 std::vector
<_Piece
<_DifferenceType
> >* _M_pieces
;
91 * @brief Select _M_samples from a sequence.
92 * @param __sd Pointer to algorithm data. _Result will be placed in
93 * @__c __sd->_M_samples.
94 * @param __num_samples Number of _M_samples to select.
96 template<typename _RAIter
, typename _DifferenceTp
>
98 __determine_samples(_PMWMSSortingData
<_RAIter
>* __sd
,
99 _DifferenceTp __num_samples
)
101 typedef std::iterator_traits
<_RAIter
> _TraitsType
;
102 typedef typename
_TraitsType::value_type _ValueType
;
103 typedef _DifferenceTp _DifferenceType
;
105 _ThreadIndex __iam
= omp_get_thread_num();
107 _DifferenceType
* __es
= new _DifferenceType
[__num_samples
+ 2];
109 equally_split(__sd
->_M_starts
[__iam
+ 1] - __sd
->_M_starts
[__iam
],
110 __num_samples
+ 1, __es
);
112 for (_DifferenceType __i
= 0; __i
< __num_samples
; ++__i
)
113 ::new(&(__sd
->_M_samples
[__iam
* __num_samples
+ __i
]))
114 _ValueType(__sd
->_M_source
[__sd
->_M_starts
[__iam
] + __es
[__i
+ 1]]);
119 /** @brief Split consistently. */
120 template<bool __exact
, typename _RAIter
,
121 typename _Compare
, typename _SortingPlacesIterator
>
122 struct _SplitConsistently
126 /** @brief Split by exact splitting. */
127 template<typename _RAIter
, typename _Compare
,
128 typename _SortingPlacesIterator
>
129 struct _SplitConsistently
130 <true, _RAIter
, _Compare
, _SortingPlacesIterator
>
133 const _ThreadIndex __iam
,
134 _PMWMSSortingData
<_RAIter
>* __sd
,
137 std::iterator_traits
<_RAIter
>::difference_type
143 std::vector
<std::pair
<_SortingPlacesIterator
, _SortingPlacesIterator
> >
144 seqs(__sd
->__num_threads
);
145 for (_ThreadIndex __s
= 0; __s
< __sd
->__num_threads
; __s
++)
146 seqs
[__s
] = std::make_pair(__sd
->_M_temporary
[__s
],
147 __sd
->_M_temporary
[__s
]
148 + (__sd
->_M_starts
[__s
+ 1] - __sd
->_M_starts
[__s
]));
150 std::vector
<_SortingPlacesIterator
> _M_offsets(__sd
->__num_threads
);
152 // if not last thread
153 if (__iam
< __sd
->__num_threads
- 1)
154 multiseq_partition(seqs
.begin(), seqs
.end(),
155 __sd
->_M_starts
[__iam
+ 1], _M_offsets
.begin(), __comp
);
157 for (int __seq
= 0; __seq
< __sd
->__num_threads
; __seq
++)
160 if (__iam
< (__sd
->__num_threads
- 1))
161 __sd
->_M_pieces
[__iam
][__seq
].__end
= _M_offsets
[__seq
] - seqs
[__seq
].first
;
163 // very end of this sequence
164 __sd
->_M_pieces
[__iam
][__seq
].__end
=
165 __sd
->_M_starts
[__seq
+ 1] - __sd
->_M_starts
[__seq
];
170 for (_ThreadIndex __seq
= 0; __seq
< __sd
->__num_threads
; __seq
++)
172 // For each sequence.
174 __sd
->_M_pieces
[__iam
][__seq
].__begin
= __sd
->_M_pieces
[__iam
- 1][__seq
].__end
;
176 // Absolute beginning.
177 __sd
->_M_pieces
[__iam
][__seq
].__begin
= 0;
182 /** @brief Split by sampling. */
183 template<typename _RAIter
, typename _Compare
,
184 typename _SortingPlacesIterator
>
185 struct _SplitConsistently
<false, _RAIter
, _Compare
,
186 _SortingPlacesIterator
>
189 const _ThreadIndex __iam
,
190 _PMWMSSortingData
<_RAIter
>* __sd
,
193 std::iterator_traits
<_RAIter
>::difference_type
197 typedef std::iterator_traits
<_RAIter
> _TraitsType
;
198 typedef typename
_TraitsType::value_type _ValueType
;
199 typedef typename
_TraitsType::difference_type _DifferenceType
;
201 __determine_samples(__sd
, __num_samples
);
206 __gnu_sequential::sort(__sd
->_M_samples
,
207 __sd
->_M_samples
+ (__num_samples
* __sd
->__num_threads
),
212 for (_ThreadIndex __s
= 0; __s
< __sd
->__num_threads
; ++__s
)
214 // For each sequence.
215 if (__num_samples
* __iam
> 0)
216 __sd
->_M_pieces
[__iam
][__s
].__begin
=
217 std::lower_bound(__sd
->_M_temporary
[__s
],
218 __sd
->_M_temporary
[__s
]
219 + (__sd
->_M_starts
[__s
+ 1] - __sd
->_M_starts
[__s
]),
220 __sd
->_M_samples
[__num_samples
* __iam
],
222 - __sd
->_M_temporary
[__s
];
224 // Absolute beginning.
225 __sd
->_M_pieces
[__iam
][__s
].__begin
= 0;
227 if ((__num_samples
* (__iam
+ 1)) < (__num_samples
* __sd
->__num_threads
))
228 __sd
->_M_pieces
[__iam
][__s
].__end
=
229 std::lower_bound(__sd
->_M_temporary
[__s
],
230 __sd
->_M_temporary
[__s
]
231 + (__sd
->_M_starts
[__s
+ 1] - __sd
->_M_starts
[__s
]),
232 __sd
->_M_samples
[__num_samples
* (__iam
+ 1)],
234 - __sd
->_M_temporary
[__s
];
237 __sd
->_M_pieces
[__iam
][__s
].__end
= __sd
->_M_starts
[__s
+ 1] - __sd
->_M_starts
[__s
];
242 template<bool __stable
, typename _RAIter
, typename _Compare
>
243 struct __possibly_stable_sort
247 template<typename _RAIter
, typename _Compare
>
248 struct __possibly_stable_sort
<true, _RAIter
, _Compare
>
250 void operator()(const _RAIter
& __begin
,
251 const _RAIter
& __end
, _Compare
& __comp
) const
253 __gnu_sequential::stable_sort(__begin
, __end
, __comp
);
257 template<typename _RAIter
, typename _Compare
>
258 struct __possibly_stable_sort
<false, _RAIter
, _Compare
>
260 void operator()(const _RAIter __begin
,
261 const _RAIter __end
, _Compare
& __comp
) const
263 __gnu_sequential::sort(__begin
, __end
, __comp
);
267 template<bool __stable
, typename Seq_RAIter
,
268 typename _RAIter
, typename _Compare
,
270 struct __possibly_stable_multiway_merge
274 template<typename Seq_RAIter
, typename _RAIter
,
275 typename _Compare
, typename DiffType
>
276 struct __possibly_stable_multiway_merge
277 <true, Seq_RAIter
, _RAIter
, _Compare
,
280 void operator()(const Seq_RAIter
& __seqs_begin
,
281 const Seq_RAIter
& __seqs_end
,
282 const _RAIter
& __target
,
284 DiffType __length_am
) const
286 stable_multiway_merge(__seqs_begin
, __seqs_end
, __target
, __length_am
, __comp
,
291 template<typename Seq_RAIter
, typename _RAIter
,
292 typename _Compare
, typename DiffType
>
293 struct __possibly_stable_multiway_merge
294 <false, Seq_RAIter
, _RAIter
, _Compare
,
297 void operator()(const Seq_RAIter
& __seqs_begin
,
298 const Seq_RAIter
& __seqs_end
,
299 const _RAIter
& __target
,
301 DiffType __length_am
) const
303 multiway_merge(__seqs_begin
, __seqs_end
, __target
, __length_am
, __comp
,
308 /** @brief PMWMS code executed by each thread.
309 * @param __sd Pointer to algorithm data.
310 * @param __comp Comparator.
312 template<bool __stable
, bool __exact
, typename _RAIter
,
315 parallel_sort_mwms_pu(_PMWMSSortingData
<_RAIter
>* __sd
,
318 typedef std::iterator_traits
<_RAIter
> _TraitsType
;
319 typedef typename
_TraitsType::value_type _ValueType
;
320 typedef typename
_TraitsType::difference_type _DifferenceType
;
322 _ThreadIndex __iam
= omp_get_thread_num();
324 // Length of this thread's chunk, before merging.
325 _DifferenceType __length_local
= __sd
->_M_starts
[__iam
+ 1] - __sd
->_M_starts
[__iam
];
327 // Sort in temporary storage, leave space for sentinel.
329 typedef _ValueType
* _SortingPlacesIterator
;
331 __sd
->_M_temporary
[__iam
] =
332 static_cast<_ValueType
*>(
333 ::operator new(sizeof(_ValueType
) * (__length_local
+ 1)));
336 std::uninitialized_copy(__sd
->_M_source
+ __sd
->_M_starts
[__iam
],
337 __sd
->_M_source
+ __sd
->_M_starts
[__iam
] + __length_local
,
338 __sd
->_M_temporary
[__iam
]);
340 __possibly_stable_sort
<__stable
, _SortingPlacesIterator
, _Compare
>()
341 (__sd
->_M_temporary
[__iam
], __sd
->_M_temporary
[__iam
] + __length_local
, __comp
);
343 // Invariant: locally sorted subsequence in sd->_M_temporary[__iam],
344 // __sd->_M_temporary[__iam] + __length_local.
346 // No barrier here: Synchronization is done by the splitting routine.
348 _DifferenceType __num_samples
=
349 _Settings::get().sort_mwms_oversampling
* __sd
->__num_threads
- 1;
351 <__exact
, _RAIter
, _Compare
, _SortingPlacesIterator
>()
352 (__iam
, __sd
, __comp
, __num_samples
);
354 // Offset from __target __begin, __length after merging.
355 _DifferenceType __offset
= 0, __length_am
= 0;
356 for (_ThreadIndex __s
= 0; __s
< __sd
->__num_threads
; __s
++)
358 __length_am
+= __sd
->_M_pieces
[__iam
][__s
].__end
- __sd
->_M_pieces
[__iam
][__s
].__begin
;
359 __offset
+= __sd
->_M_pieces
[__iam
][__s
].__begin
;
363 std::pair
<_SortingPlacesIterator
, _SortingPlacesIterator
> >
365 seq_vector_type
seqs(__sd
->__num_threads
);
367 for (int __s
= 0; __s
< __sd
->__num_threads
; ++__s
)
370 std::make_pair(__sd
->_M_temporary
[__s
] + __sd
->_M_pieces
[__iam
][__s
].__begin
,
371 __sd
->_M_temporary
[__s
] + __sd
->_M_pieces
[__iam
][__s
].__end
);
374 __possibly_stable_multiway_merge
<
376 typename
seq_vector_type::iterator
,
378 _Compare
, _DifferenceType
>()
379 (seqs
.begin(), seqs
.end(),
380 __sd
->_M_source
+ __offset
, __comp
,
385 ::operator delete(__sd
->_M_temporary
[__iam
]);
388 /** @brief PMWMS main call.
389 * @param __begin Begin iterator of sequence.
390 * @param __end End iterator of sequence.
391 * @param __comp Comparator.
392 * @param __n Length of sequence.
393 * @param __num_threads Number of threads to use.
395 template<bool __stable
, bool __exact
, typename _RAIter
,
398 parallel_sort_mwms(_RAIter __begin
, _RAIter __end
,
400 _ThreadIndex __num_threads
)
402 _GLIBCXX_CALL(__end
- __begin
)
404 typedef std::iterator_traits
<_RAIter
> _TraitsType
;
405 typedef typename
_TraitsType::value_type _ValueType
;
406 typedef typename
_TraitsType::difference_type _DifferenceType
;
408 _DifferenceType __n
= __end
- __begin
;
413 // at least one element per thread
414 if (__num_threads
> __n
)
415 __num_threads
= static_cast<_ThreadIndex
>(__n
);
418 _PMWMSSortingData
<_RAIter
> __sd
;
419 _DifferenceType
* _M_starts
;
421 # pragma omp parallel num_threads(__num_threads)
423 __num_threads
= omp_get_num_threads(); //no more threads than requested
427 __sd
.__num_threads
= __num_threads
;
428 __sd
._M_source
= __begin
;
430 __sd
._M_temporary
= new _ValueType
*[__num_threads
];
434 _DifferenceType size
=
435 (_Settings::get().sort_mwms_oversampling
* __num_threads
- 1)
437 __sd
._M_samples
= static_cast<_ValueType
*>(
438 ::operator new(size
* sizeof(_ValueType
)));
441 __sd
._M_samples
= NULL
;
443 __sd
._M_offsets
= new _DifferenceType
[__num_threads
- 1];
444 __sd
._M_pieces
= new std::vector
<_Piece
<_DifferenceType
> >[__num_threads
];
445 for (int __s
= 0; __s
< __num_threads
; ++__s
)
446 __sd
._M_pieces
[__s
].resize(__num_threads
);
447 _M_starts
= __sd
._M_starts
= new _DifferenceType
[__num_threads
+ 1];
449 _DifferenceType __chunk_length
= __n
/ __num_threads
;
450 _DifferenceType __split
= __n
% __num_threads
;
451 _DifferenceType __pos
= 0;
452 for (int __i
= 0; __i
< __num_threads
; ++__i
)
454 _M_starts
[__i
] = __pos
;
455 __pos
+= (__i
< __split
) ? (__chunk_length
+ 1) : __chunk_length
;
457 _M_starts
[__num_threads
] = __pos
;
460 // Now sort in parallel.
461 parallel_sort_mwms_pu
<__stable
, __exact
>(&__sd
, __comp
);
465 delete[] __sd
._M_temporary
;
468 ::operator delete(__sd
._M_samples
);
470 delete[] __sd
._M_offsets
;
471 delete[] __sd
._M_pieces
;
473 } //namespace __gnu_parallel
475 #endif /* _GLIBCXX_PARALLEL_MULTIWAY_MERGESORT_H */