]>
Commit | Line | Data |
---|---|---|
c2ba9709 JS |
1 | // -*- C++ -*- |
2 | ||
748086b7 | 3 | // Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc. |
c2ba9709 JS |
4 | // |
5 | // This file is part of the GNU ISO C++ Library. This library is free | |
6 | // software; you can redistribute it and/or modify it under the terms | |
7 | // of the GNU General Public License as published by the Free Software | |
748086b7 | 8 | // Foundation; either version 3, or (at your option) any later |
c2ba9709 JS |
9 | // version. |
10 | ||
11 | // This library is distributed in the hope that it will be useful, but | |
12 | // WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 | // General Public License for more details. | |
15 | ||
748086b7 JJ |
16 | // Under Section 7 of GPL version 3, you are granted additional |
17 | // permissions described in the GCC Runtime Library Exception, version | |
18 | // 3.1, as published by the Free Software Foundation. | |
c2ba9709 | 19 | |
748086b7 JJ |
20 | // You should have received a copy of the GNU General Public License and |
21 | // a copy of the GCC Runtime Library Exception along with this program; | |
22 | // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see | |
23 | // <http://www.gnu.org/licenses/>. | |
c2ba9709 JS |
24 | |
25 | /** @file parallel/random_shuffle.h | |
26 | * @brief Parallel implementation of std::random_shuffle(). | |
27 | * This file is a GNU parallel extension to the Standard C++ Library. | |
28 | */ | |
29 | ||
30 | // Written by Johannes Singler. | |
31 | ||
32 | #ifndef _GLIBCXX_PARALLEL_RANDOM_SHUFFLE_H | |
33 | #define _GLIBCXX_PARALLEL_RANDOM_SHUFFLE_H 1 | |
34 | ||
35 | #include <limits> | |
6f95a65a | 36 | #include <bits/stl_numeric.h> |
c2ba9709 | 37 | #include <parallel/parallel.h> |
c2ba9709 | 38 | #include <parallel/random_number.h> |
c2ba9709 JS |
39 | |
40 | namespace __gnu_parallel | |
41 | { | |
77d16198 PC |
42 | /** @brief Type to hold the index of a bin. |
43 | * | |
44 | * Since many variables of this type are allocated, it should be | |
45 | * chosen as small as possible. | |
46 | */ | |
47 | typedef unsigned short _BinIndex; | |
48 | ||
49 | /** @brief Data known to every thread participating in | |
50 | __gnu_parallel::__parallel_random_shuffle(). */ | |
51 | template<typename _RAIter> | |
52 | struct _DRandomShufflingGlobalData | |
53 | { | |
54 | typedef std::iterator_traits<_RAIter> _TraitsType; | |
55 | typedef typename _TraitsType::value_type _ValueType; | |
56 | typedef typename _TraitsType::difference_type _DifferenceType; | |
c2ba9709 | 57 | |
77d16198 PC |
58 | /** @brief Begin iterator of the __source. */ |
59 | _RAIter& _M_source; | |
c2ba9709 | 60 | |
77d16198 PC |
61 | /** @brief Temporary arrays for each thread. */ |
62 | _ValueType** _M_temporaries; | |
c2ba9709 | 63 | |
77d16198 PC |
64 | /** @brief Two-dimensional array to hold the thread-bin distribution. |
65 | * | |
66 | * Dimensions (_M_num_threads + 1) __x (_M_num_bins + 1). */ | |
67 | _DifferenceType** _M_dist; | |
c2ba9709 | 68 | |
77d16198 PC |
69 | /** @brief Start indexes of the threads' __chunks. */ |
70 | _DifferenceType* _M_starts; | |
c2ba9709 | 71 | |
77d16198 PC |
72 | /** @brief Number of the thread that will further process the |
73 | corresponding bin. */ | |
74 | _ThreadIndex* _M_bin_proc; | |
c2ba9709 | 75 | |
77d16198 PC |
76 | /** @brief Number of bins to distribute to. */ |
77 | int _M_num_bins; | |
c2ba9709 | 78 | |
77d16198 PC |
79 | /** @brief Number of bits needed to address the bins. */ |
80 | int _M_num_bits; | |
c2ba9709 | 81 | |
77d16198 PC |
82 | /** @brief Constructor. */ |
83 | _DRandomShufflingGlobalData(_RAIter& __source) | |
84 | : _M_source(__source) { } | |
85 | }; | |
86 | ||
87 | /** @brief Local data for a thread participating in | |
88 | __gnu_parallel::__parallel_random_shuffle(). | |
89 | */ | |
90 | template<typename _RAIter, typename _RandomNumberGenerator> | |
91 | struct _DRSSorterPU | |
92 | { | |
93 | /** @brief Number of threads participating in total. */ | |
94 | int _M_num_threads; | |
95 | ||
96 | /** @brief Begin index for bins taken care of by this thread. */ | |
97 | _BinIndex _M_bins_begin; | |
98 | ||
99 | /** @brief End index for bins taken care of by this thread. */ | |
100 | _BinIndex __bins_end; | |
101 | ||
102 | /** @brief Random _M_seed for this thread. */ | |
103 | uint32_t _M_seed; | |
104 | ||
105 | /** @brief Pointer to global data. */ | |
106 | _DRandomShufflingGlobalData<_RAIter>* _M_sd; | |
107 | }; | |
108 | ||
8e32aa11 | 109 | /** @brief Generate a random number in @c [0,2^__logp). |
77d16198 PC |
110 | * @param __logp Logarithm (basis 2) of the upper range __bound. |
111 | * @param __rng Random number generator to use. | |
112 | */ | |
113 | template<typename _RandomNumberGenerator> | |
114 | inline int | |
115 | __random_number_pow2(int __logp, _RandomNumberGenerator& __rng) | |
116 | { return __rng.__genrand_bits(__logp); } | |
117 | ||
118 | /** @brief Random shuffle code executed by each thread. | |
119 | * @param __pus Array of thread-local data records. */ | |
120 | template<typename _RAIter, typename _RandomNumberGenerator> | |
121 | void | |
122 | __parallel_random_shuffle_drs_pu(_DRSSorterPU<_RAIter, | |
123 | _RandomNumberGenerator>* __pus) | |
124 | { | |
125 | typedef std::iterator_traits<_RAIter> _TraitsType; | |
126 | typedef typename _TraitsType::value_type _ValueType; | |
127 | typedef typename _TraitsType::difference_type _DifferenceType; | |
c2ba9709 | 128 | |
77d16198 PC |
129 | _ThreadIndex __iam = omp_get_thread_num(); |
130 | _DRSSorterPU<_RAIter, _RandomNumberGenerator>* __d = &__pus[__iam]; | |
131 | _DRandomShufflingGlobalData<_RAIter>* __sd = __d->_M_sd; | |
c2ba9709 | 132 | |
77d16198 PC |
133 | // Indexing: _M_dist[bin][processor] |
134 | _DifferenceType __length = (__sd->_M_starts[__iam + 1] | |
135 | - __sd->_M_starts[__iam]); | |
136 | _BinIndex* __oracles = new _BinIndex[__length]; | |
137 | _DifferenceType* __dist = new _DifferenceType[__sd->_M_num_bins + 1]; | |
138 | _BinIndex* __bin_proc = new _BinIndex[__sd->_M_num_bins]; | |
139 | _ValueType** __temporaries = new _ValueType*[__d->_M_num_threads]; | |
c2ba9709 | 140 | |
77d16198 PC |
141 | // Compute oracles and count appearances. |
142 | for (_BinIndex __b = 0; __b < __sd->_M_num_bins + 1; ++__b) | |
143 | __dist[__b] = 0; | |
144 | int __num_bits = __sd->_M_num_bits; | |
c2ba9709 | 145 | |
77d16198 | 146 | _RandomNumber __rng(__d->_M_seed); |
c2ba9709 | 147 | |
77d16198 PC |
148 | // First main loop. |
149 | for (_DifferenceType __i = 0; __i < __length; ++__i) | |
150 | { | |
151 | _BinIndex __oracle = __random_number_pow2(__num_bits, __rng); | |
152 | __oracles[__i] = __oracle; | |
c2ba9709 | 153 | |
77d16198 PC |
154 | // To allow prefix (partial) sum. |
155 | ++(__dist[__oracle + 1]); | |
156 | } | |
c2ba9709 | 157 | |
77d16198 PC |
158 | for (_BinIndex __b = 0; __b < __sd->_M_num_bins + 1; ++__b) |
159 | __sd->_M_dist[__b][__iam + 1] = __dist[__b]; | |
c2ba9709 | 160 | |
77d16198 | 161 | # pragma omp barrier |
c2ba9709 | 162 | |
77d16198 | 163 | # pragma omp single |
c2ba9709 | 164 | { |
77d16198 PC |
165 | // Sum up bins, __sd->_M_dist[__s + 1][__d->_M_num_threads] now |
166 | // contains the total number of items in bin __s | |
167 | for (_BinIndex __s = 0; __s < __sd->_M_num_bins; ++__s) | |
168 | __gnu_sequential::partial_sum(__sd->_M_dist[__s + 1], | |
169 | __sd->_M_dist[__s + 1] | |
170 | + __d->_M_num_threads + 1, | |
171 | __sd->_M_dist[__s + 1]); | |
c2ba9709 JS |
172 | } |
173 | ||
77d16198 PC |
174 | # pragma omp barrier |
175 | ||
176 | _SequenceIndex __offset = 0, __global_offset = 0; | |
177 | for (_BinIndex __s = 0; __s < __d->_M_bins_begin; ++__s) | |
178 | __global_offset += __sd->_M_dist[__s + 1][__d->_M_num_threads]; | |
179 | ||
180 | # pragma omp barrier | |
181 | ||
182 | for (_BinIndex __s = __d->_M_bins_begin; __s < __d->__bins_end; ++__s) | |
183 | { | |
184 | for (int __t = 0; __t < __d->_M_num_threads + 1; ++__t) | |
185 | __sd->_M_dist[__s + 1][__t] += __offset; | |
186 | __offset = __sd->_M_dist[__s + 1][__d->_M_num_threads]; | |
187 | } | |
188 | ||
189 | __sd->_M_temporaries[__iam] = static_cast<_ValueType*> | |
190 | (::operator new(sizeof(_ValueType) * __offset)); | |
191 | ||
192 | # pragma omp barrier | |
193 | ||
194 | // Draw local copies to avoid false sharing. | |
195 | for (_BinIndex __b = 0; __b < __sd->_M_num_bins + 1; ++__b) | |
196 | __dist[__b] = __sd->_M_dist[__b][__iam]; | |
197 | for (_BinIndex __b = 0; __b < __sd->_M_num_bins; ++__b) | |
198 | __bin_proc[__b] = __sd->_M_bin_proc[__b]; | |
199 | for (_ThreadIndex __t = 0; __t < __d->_M_num_threads; ++__t) | |
200 | __temporaries[__t] = __sd->_M_temporaries[__t]; | |
201 | ||
202 | _RAIter __source = __sd->_M_source; | |
203 | _DifferenceType __start = __sd->_M_starts[__iam]; | |
204 | ||
205 | // Distribute according to oracles, second main loop. | |
206 | for (_DifferenceType __i = 0; __i < __length; ++__i) | |
207 | { | |
208 | _BinIndex __target_bin = __oracles[__i]; | |
209 | _ThreadIndex __target_p = __bin_proc[__target_bin]; | |
210 | ||
211 | // Last column [__d->_M_num_threads] stays unchanged. | |
212 | ::new(&(__temporaries[__target_p][__dist[__target_bin + 1]++])) | |
213 | _ValueType(*(__source + __i + __start)); | |
214 | } | |
215 | ||
216 | delete[] __oracles; | |
217 | delete[] __dist; | |
218 | delete[] __bin_proc; | |
219 | delete[] __temporaries; | |
220 | ||
221 | # pragma omp barrier | |
222 | ||
223 | // Shuffle bins internally. | |
224 | for (_BinIndex __b = __d->_M_bins_begin; __b < __d->__bins_end; ++__b) | |
225 | { | |
226 | _ValueType* __begin = | |
227 | (__sd->_M_temporaries[__iam] | |
228 | + (__b == __d->_M_bins_begin | |
229 | ? 0 : __sd->_M_dist[__b][__d->_M_num_threads])), | |
230 | * __end = (__sd->_M_temporaries[__iam] | |
231 | + __sd->_M_dist[__b + 1][__d->_M_num_threads]); | |
232 | ||
233 | __sequential_random_shuffle(__begin, __end, __rng); | |
234 | std::copy(__begin, __end, __sd->_M_source + __global_offset | |
235 | + (__b == __d->_M_bins_begin | |
236 | ? 0 : __sd->_M_dist[__b][__d->_M_num_threads])); | |
237 | } | |
238 | ||
239 | ::operator delete(__sd->_M_temporaries[__iam]); | |
240 | } | |
241 | ||
242 | /** @brief Round up to the next greater power of 2. | |
243 | * @param __x _Integer to round up */ | |
244 | template<typename _Tp> | |
245 | _Tp | |
246 | __round_up_to_pow2(_Tp __x) | |
247 | { | |
248 | if (__x <= 1) | |
249 | return 1; | |
250 | else | |
251 | return (_Tp)1 << (__rd_log2(__x - 1) + 1); | |
252 | } | |
253 | ||
254 | /** @brief Main parallel random shuffle step. | |
255 | * @param __begin Begin iterator of sequence. | |
256 | * @param __end End iterator of sequence. | |
257 | * @param __n Length of sequence. | |
258 | * @param __num_threads Number of threads to use. | |
259 | * @param __rng Random number generator to use. | |
260 | */ | |
261 | template<typename _RAIter, typename _RandomNumberGenerator> | |
262 | void | |
263 | __parallel_random_shuffle_drs(_RAIter __begin, _RAIter __end, | |
264 | typename std::iterator_traits | |
265 | <_RAIter>::difference_type __n, | |
266 | _ThreadIndex __num_threads, | |
267 | _RandomNumberGenerator& __rng) | |
268 | { | |
269 | typedef std::iterator_traits<_RAIter> _TraitsType; | |
270 | typedef typename _TraitsType::value_type _ValueType; | |
271 | typedef typename _TraitsType::difference_type _DifferenceType; | |
272 | ||
273 | _GLIBCXX_CALL(__n) | |
274 | ||
275 | const _Settings& __s = _Settings::get(); | |
276 | ||
277 | if (__num_threads > __n) | |
278 | __num_threads = static_cast<_ThreadIndex>(__n); | |
279 | ||
280 | _BinIndex __num_bins, __num_bins_cache; | |
c2ba9709 JS |
281 | |
282 | #if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_L1 | |
77d16198 | 283 | // Try the L1 cache first. |
c2ba9709 | 284 | |
77d16198 PC |
285 | // Must fit into L1. |
286 | __num_bins_cache = | |
287 | std::max<_DifferenceType>(1, __n / (__s.L1_cache_size_lb | |
288 | / sizeof(_ValueType))); | |
289 | __num_bins_cache = __round_up_to_pow2(__num_bins_cache); | |
c2ba9709 | 290 | |
77d16198 PC |
291 | // No more buckets than TLB entries, power of 2 |
292 | // Power of 2 and at least one element per bin, at most the TLB size. | |
293 | __num_bins = std::min<_DifferenceType>(__n, __num_bins_cache); | |
c2ba9709 JS |
294 | |
295 | #if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB | |
77d16198 PC |
296 | // 2 TLB entries needed per bin. |
297 | __num_bins = std::min<_DifferenceType>(__s.TLB_size / 2, __num_bins); | |
c2ba9709 | 298 | #endif |
77d16198 | 299 | __num_bins = __round_up_to_pow2(__num_bins); |
c2ba9709 | 300 | |
77d16198 PC |
301 | if (__num_bins < __num_bins_cache) |
302 | { | |
c2ba9709 | 303 | #endif |
77d16198 PC |
304 | // Now try the L2 cache |
305 | // Must fit into L2 | |
306 | __num_bins_cache = static_cast<_BinIndex> | |
307 | (std::max<_DifferenceType>(1, __n / (__s.L2_cache_size | |
308 | / sizeof(_ValueType)))); | |
309 | __num_bins_cache = __round_up_to_pow2(__num_bins_cache); | |
310 | ||
311 | // No more buckets than TLB entries, power of 2. | |
312 | __num_bins = static_cast<_BinIndex> | |
313 | (std::min(__n, static_cast<_DifferenceType>(__num_bins_cache))); | |
314 | // Power of 2 and at least one element per bin, at most the TLB size. | |
c2ba9709 | 315 | #if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB |
77d16198 PC |
316 | // 2 TLB entries needed per bin. |
317 | __num_bins = std::min(static_cast<_DifferenceType>(__s.TLB_size / 2), | |
318 | __num_bins); | |
c2ba9709 | 319 | #endif |
77d16198 | 320 | __num_bins = __round_up_to_pow2(__num_bins); |
c2ba9709 | 321 | #if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_L1 |
77d16198 | 322 | } |
c2ba9709 JS |
323 | #endif |
324 | ||
77d16198 | 325 | __num_threads = std::min<_BinIndex>(__num_threads, __num_bins); |
c2ba9709 | 326 | |
77d16198 PC |
327 | if (__num_threads <= 1) |
328 | return __sequential_random_shuffle(__begin, __end, __rng); | |
c2ba9709 | 329 | |
77d16198 PC |
330 | _DRandomShufflingGlobalData<_RAIter> __sd(__begin); |
331 | _DRSSorterPU<_RAIter, _RandomNumber >* __pus; | |
332 | _DifferenceType* __starts; | |
c2ba9709 | 333 | |
77d16198 | 334 | # pragma omp parallel num_threads(__num_threads) |
c2ba9709 | 335 | { |
77d16198 | 336 | _ThreadIndex __num_threads = omp_get_num_threads(); |
e683ee2a | 337 | # pragma omp single |
77d16198 PC |
338 | { |
339 | __pus = new _DRSSorterPU<_RAIter, _RandomNumber>[__num_threads]; | |
340 | ||
341 | __sd._M_temporaries = new _ValueType*[__num_threads]; | |
342 | __sd._M_dist = new _DifferenceType*[__num_bins + 1]; | |
343 | __sd._M_bin_proc = new _ThreadIndex[__num_bins]; | |
344 | for (_BinIndex __b = 0; __b < __num_bins + 1; ++__b) | |
345 | __sd._M_dist[__b] = new _DifferenceType[__num_threads + 1]; | |
346 | for (_BinIndex __b = 0; __b < (__num_bins + 1); ++__b) | |
347 | { | |
348 | __sd._M_dist[0][0] = 0; | |
349 | __sd._M_dist[__b][0] = 0; | |
350 | } | |
351 | __starts = __sd._M_starts = new _DifferenceType[__num_threads + 1]; | |
352 | int __bin_cursor = 0; | |
353 | __sd._M_num_bins = __num_bins; | |
354 | __sd._M_num_bits = __rd_log2(__num_bins); | |
355 | ||
356 | _DifferenceType __chunk_length = __n / __num_threads, | |
357 | __split = __n % __num_threads, | |
358 | __start = 0; | |
359 | _DifferenceType __bin_chunk_length = __num_bins / __num_threads, | |
360 | __bin_split = __num_bins % __num_threads; | |
361 | for (_ThreadIndex __i = 0; __i < __num_threads; ++__i) | |
362 | { | |
363 | __starts[__i] = __start; | |
364 | __start += (__i < __split | |
365 | ? (__chunk_length + 1) : __chunk_length); | |
366 | int __j = __pus[__i]._M_bins_begin = __bin_cursor; | |
367 | ||
368 | // Range of bins for this processor. | |
369 | __bin_cursor += (__i < __bin_split | |
370 | ? (__bin_chunk_length + 1) | |
371 | : __bin_chunk_length); | |
372 | __pus[__i].__bins_end = __bin_cursor; | |
373 | for (; __j < __bin_cursor; ++__j) | |
374 | __sd._M_bin_proc[__j] = __i; | |
375 | __pus[__i]._M_num_threads = __num_threads; | |
376 | __pus[__i]._M_seed = __rng(std::numeric_limits<uint32_t>::max()); | |
377 | __pus[__i]._M_sd = &__sd; | |
378 | } | |
379 | __starts[__num_threads] = __start; | |
380 | } //single | |
381 | // Now shuffle in parallel. | |
382 | __parallel_random_shuffle_drs_pu(__pus); | |
3611e176 | 383 | } // parallel |
c2ba9709 | 384 | |
77d16198 PC |
385 | delete[] __starts; |
386 | delete[] __sd._M_bin_proc; | |
387 | for (int __s = 0; __s < (__num_bins + 1); ++__s) | |
388 | delete[] __sd._M_dist[__s]; | |
389 | delete[] __sd._M_dist; | |
390 | delete[] __sd._M_temporaries; | |
c2ba9709 | 391 | |
77d16198 PC |
392 | delete[] __pus; |
393 | } | |
c2ba9709 | 394 | |
77d16198 PC |
395 | /** @brief Sequential cache-efficient random shuffle. |
396 | * @param __begin Begin iterator of sequence. | |
397 | * @param __end End iterator of sequence. | |
398 | * @param __rng Random number generator to use. | |
399 | */ | |
400 | template<typename _RAIter, typename _RandomNumberGenerator> | |
401 | void | |
402 | __sequential_random_shuffle(_RAIter __begin, _RAIter __end, | |
403 | _RandomNumberGenerator& __rng) | |
404 | { | |
405 | typedef std::iterator_traits<_RAIter> _TraitsType; | |
406 | typedef typename _TraitsType::value_type _ValueType; | |
407 | typedef typename _TraitsType::difference_type _DifferenceType; | |
c2ba9709 | 408 | |
77d16198 PC |
409 | _DifferenceType __n = __end - __begin; |
410 | const _Settings& __s = _Settings::get(); | |
c2ba9709 | 411 | |
77d16198 | 412 | _BinIndex __num_bins, __num_bins_cache; |
c2ba9709 JS |
413 | |
414 | #if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_L1 | |
77d16198 PC |
415 | // Try the L1 cache first, must fit into L1. |
416 | __num_bins_cache = std::max<_DifferenceType> | |
417 | (1, __n / (__s.L1_cache_size_lb / sizeof(_ValueType))); | |
418 | __num_bins_cache = __round_up_to_pow2(__num_bins_cache); | |
419 | ||
420 | // No more buckets than TLB entries, power of 2 | |
421 | // Power of 2 and at least one element per bin, at most the TLB size | |
422 | __num_bins = std::min(__n, (_DifferenceType)__num_bins_cache); | |
c2ba9709 | 423 | #if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB |
77d16198 PC |
424 | // 2 TLB entries needed per bin |
425 | __num_bins = std::min((_DifferenceType)__s.TLB_size / 2, __num_bins); | |
c2ba9709 | 426 | #endif |
77d16198 | 427 | __num_bins = __round_up_to_pow2(__num_bins); |
c2ba9709 | 428 | |
77d16198 PC |
429 | if (__num_bins < __num_bins_cache) |
430 | { | |
c2ba9709 | 431 | #endif |
77d16198 PC |
432 | // Now try the L2 cache, must fit into L2. |
433 | __num_bins_cache = static_cast<_BinIndex> | |
434 | (std::max<_DifferenceType>(1, __n / (__s.L2_cache_size | |
435 | / sizeof(_ValueType)))); | |
436 | __num_bins_cache = __round_up_to_pow2(__num_bins_cache); | |
c2ba9709 | 437 | |
77d16198 PC |
438 | // No more buckets than TLB entries, power of 2 |
439 | // Power of 2 and at least one element per bin, at most the TLB size. | |
440 | __num_bins = static_cast<_BinIndex> | |
441 | (std::min(__n, static_cast<_DifferenceType>(__num_bins_cache))); | |
c2ba9709 JS |
442 | |
443 | #if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB | |
77d16198 PC |
444 | // 2 TLB entries needed per bin |
445 | __num_bins = std::min<_DifferenceType>(__s.TLB_size / 2, __num_bins); | |
c2ba9709 | 446 | #endif |
77d16198 | 447 | __num_bins = __round_up_to_pow2(__num_bins); |
c2ba9709 | 448 | #if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_L1 |
77d16198 | 449 | } |
c2ba9709 JS |
450 | #endif |
451 | ||
77d16198 | 452 | int __num_bits = __rd_log2(__num_bins); |
c2ba9709 | 453 | |
77d16198 PC |
454 | if (__num_bins > 1) |
455 | { | |
456 | _ValueType* __target = | |
457 | static_cast<_ValueType*>(::operator new(sizeof(_ValueType) * __n)); | |
458 | _BinIndex* __oracles = new _BinIndex[__n]; | |
459 | _DifferenceType* __dist0 = new _DifferenceType[__num_bins + 1], | |
460 | * __dist1 = new _DifferenceType[__num_bins + 1]; | |
461 | ||
462 | for (int __b = 0; __b < __num_bins + 1; ++__b) | |
463 | __dist0[__b] = 0; | |
464 | ||
465 | _RandomNumber __bitrng(__rng(0xFFFFFFFF)); | |
c2ba9709 | 466 | |
77d16198 PC |
467 | for (_DifferenceType __i = 0; __i < __n; ++__i) |
468 | { | |
469 | _BinIndex __oracle = __random_number_pow2(__num_bits, __bitrng); | |
470 | __oracles[__i] = __oracle; | |
471 | ||
472 | // To allow prefix (partial) sum. | |
473 | ++(__dist0[__oracle + 1]); | |
474 | } | |
475 | ||
476 | // Sum up bins. | |
477 | __gnu_sequential::partial_sum(__dist0, __dist0 + __num_bins + 1, | |
478 | __dist0); | |
479 | ||
480 | for (int __b = 0; __b < __num_bins + 1; ++__b) | |
481 | __dist1[__b] = __dist0[__b]; | |
482 | ||
483 | // Distribute according to oracles. | |
484 | for (_DifferenceType __i = 0; __i < __n; ++__i) | |
485 | ::new(&(__target[(__dist0[__oracles[__i]])++])) | |
486 | _ValueType(*(__begin + __i)); | |
487 | ||
488 | for (int __b = 0; __b < __num_bins; ++__b) | |
489 | __sequential_random_shuffle(__target + __dist1[__b], | |
490 | __target + __dist1[__b + 1], __rng); | |
491 | ||
492 | // Copy elements back. | |
493 | std::copy(__target, __target + __n, __begin); | |
494 | ||
495 | delete[] __dist0; | |
496 | delete[] __dist1; | |
497 | delete[] __oracles; | |
498 | ::operator delete(__target); | |
499 | } | |
500 | else | |
501 | __gnu_sequential::random_shuffle(__begin, __end, __rng); | |
502 | } | |
503 | ||
504 | /** @brief Parallel random public call. | |
505 | * @param __begin Begin iterator of sequence. | |
506 | * @param __end End iterator of sequence. | |
507 | * @param __rng Random number generator to use. | |
508 | */ | |
509 | template<typename _RAIter, typename _RandomNumberGenerator> | |
510 | inline void | |
511 | __parallel_random_shuffle(_RAIter __begin, _RAIter __end, | |
512 | _RandomNumberGenerator __rng = _RandomNumber()) | |
513 | { | |
514 | typedef std::iterator_traits<_RAIter> _TraitsType; | |
515 | typedef typename _TraitsType::difference_type _DifferenceType; | |
516 | _DifferenceType __n = __end - __begin; | |
517 | __parallel_random_shuffle_drs(__begin, __end, __n, | |
518 | __get_max_threads(), __rng); | |
519 | } | |
c2ba9709 JS |
520 | } |
521 | ||
cbcd1e45 | 522 | #endif /* _GLIBCXX_PARALLEL_RANDOM_SHUFFLE_H */ |