]>
Commit | Line | Data |
---|---|---|
c2ba9709 JS |
1 | // -*- C++ -*- |
2 | ||
748086b7 | 3 | // Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc. |
c2ba9709 JS |
4 | // |
5 | // This file is part of the GNU ISO C++ Library. This library is free | |
6 | // software; you can redistribute it and/or modify it under the terms | |
7 | // of the GNU General Public License as published by the Free Software | |
748086b7 | 8 | // Foundation; either version 3, or (at your option) any later |
c2ba9709 JS |
9 | // version. |
10 | ||
11 | // This library is distributed in the hope that it will be useful, but | |
12 | // WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 | // General Public License for more details. | |
15 | ||
748086b7 JJ |
16 | // Under Section 7 of GPL version 3, you are granted additional |
17 | // permissions described in the GCC Runtime Library Exception, version | |
18 | // 3.1, as published by the Free Software Foundation. | |
c2ba9709 | 19 | |
748086b7 JJ |
20 | // You should have received a copy of the GNU General Public License and |
21 | // a copy of the GCC Runtime Library Exception along with this program; | |
22 | // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see | |
23 | // <http://www.gnu.org/licenses/>. | |
c2ba9709 JS |
24 | |
25 | /** @file parallel/random_shuffle.h | |
26 | * @brief Parallel implementation of std::random_shuffle(). | |
27 | * This file is a GNU parallel extension to the Standard C++ Library. | |
28 | */ | |
29 | ||
30 | // Written by Johannes Singler. | |
31 | ||
32 | #ifndef _GLIBCXX_PARALLEL_RANDOM_SHUFFLE_H | |
33 | #define _GLIBCXX_PARALLEL_RANDOM_SHUFFLE_H 1 | |
34 | ||
35 | #include <limits> | |
6f95a65a | 36 | #include <bits/stl_numeric.h> |
c2ba9709 | 37 | #include <parallel/parallel.h> |
c2ba9709 | 38 | #include <parallel/random_number.h> |
c2ba9709 JS |
39 | |
40 | namespace __gnu_parallel | |
41 | { | |
e683ee2a JS |
42 | /** @brief Type to hold the index of a bin. |
43 | * | |
44 | * Since many variables of this type are allocated, it should be | |
45 | * chosen as small as possible. | |
46 | */ | |
47 | typedef unsigned short bin_index; | |
48 | ||
49 | /** @brief Data known to every thread participating in | |
50 | __gnu_parallel::parallel_random_shuffle(). */ | |
51 | template<typename RandomAccessIterator> | |
c2ba9709 JS |
52 | struct DRandomShufflingGlobalData |
53 | { | |
54 | typedef std::iterator_traits<RandomAccessIterator> traits_type; | |
55 | typedef typename traits_type::value_type value_type; | |
56 | typedef typename traits_type::difference_type difference_type; | |
57 | ||
58 | /** @brief Begin iterator of the source. */ | |
59 | RandomAccessIterator& source; | |
60 | ||
61 | /** @brief Temporary arrays for each thread. */ | |
62 | value_type** temporaries; | |
63 | ||
64 | /** @brief Two-dimensional array to hold the thread-bin distribution. | |
65 | * | |
66 | * Dimensions (num_threads + 1) x (num_bins + 1). */ | |
67 | difference_type** dist; | |
68 | ||
69 | /** @brief Start indexes of the threads' chunks. */ | |
70 | difference_type* starts; | |
71 | ||
72 | /** @brief Number of the thread that will further process the | |
73 | corresponding bin. */ | |
74 | thread_index_t* bin_proc; | |
75 | ||
76 | /** @brief Number of bins to distribute to. */ | |
77 | int num_bins; | |
78 | ||
79 | /** @brief Number of bits needed to address the bins. */ | |
80 | int num_bits; | |
81 | ||
82 | /** @brief Constructor. */ | |
83 | DRandomShufflingGlobalData(RandomAccessIterator& _source) | |
84 | : source(_source) { } | |
85 | }; | |
86 | ||
e683ee2a JS |
87 | /** @brief Local data for a thread participating in |
88 | __gnu_parallel::parallel_random_shuffle(). | |
89 | */ | |
90 | template<typename RandomAccessIterator, typename RandomNumberGenerator> | |
c2ba9709 JS |
91 | struct DRSSorterPU |
92 | { | |
93 | /** @brief Number of threads participating in total. */ | |
94 | int num_threads; | |
95 | ||
c2ba9709 JS |
96 | /** @brief Begin index for bins taken care of by this thread. */ |
97 | bin_index bins_begin; | |
98 | ||
99 | /** @brief End index for bins taken care of by this thread. */ | |
100 | bin_index bins_end; | |
101 | ||
102 | /** @brief Random seed for this thread. */ | |
6df548d2 | 103 | uint32 seed; |
c2ba9709 JS |
104 | |
105 | /** @brief Pointer to global data. */ | |
106 | DRandomShufflingGlobalData<RandomAccessIterator>* sd; | |
107 | }; | |
108 | ||
e683ee2a JS |
109 | /** @brief Generate a random number in @c [0,2^logp). |
110 | * @param logp Logarithm (basis 2) of the upper range bound. | |
111 | * @param rng Random number generator to use. | |
112 | */ | |
113 | template<typename RandomNumberGenerator> | |
6f95a65a BK |
114 | inline int |
115 | random_number_pow2(int logp, RandomNumberGenerator& rng) | |
116 | { return rng.genrand_bits(logp); } | |
c2ba9709 | 117 | |
e683ee2a JS |
118 | /** @brief Random shuffle code executed by each thread. |
119 | * @param pus Array of thread-local data records. */ | |
120 | template<typename RandomAccessIterator, typename RandomNumberGenerator> | |
5817ff8e | 121 | void |
e683ee2a JS |
122 | parallel_random_shuffle_drs_pu(DRSSorterPU<RandomAccessIterator, |
123 | RandomNumberGenerator>* pus) | |
c2ba9709 JS |
124 | { |
125 | typedef std::iterator_traits<RandomAccessIterator> traits_type; | |
126 | typedef typename traits_type::value_type value_type; | |
127 | typedef typename traits_type::difference_type difference_type; | |
128 | ||
e683ee2a JS |
129 | thread_index_t iam = omp_get_thread_num(); |
130 | DRSSorterPU<RandomAccessIterator, RandomNumberGenerator>* d = &pus[iam]; | |
c2ba9709 | 131 | DRandomShufflingGlobalData<RandomAccessIterator>* sd = d->sd; |
c2ba9709 JS |
132 | |
133 | // Indexing: dist[bin][processor] | |
134 | difference_type length = sd->starts[iam + 1] - sd->starts[iam]; | |
135 | bin_index* oracles = new bin_index[length]; | |
136 | difference_type* dist = new difference_type[sd->num_bins + 1]; | |
137 | bin_index* bin_proc = new bin_index[sd->num_bins]; | |
138 | value_type** temporaries = new value_type*[d->num_threads]; | |
139 | ||
140 | // Compute oracles and count appearances. | |
1661473b | 141 | for (bin_index b = 0; b < sd->num_bins + 1; ++b) |
c2ba9709 JS |
142 | dist[b] = 0; |
143 | int num_bits = sd->num_bits; | |
144 | ||
145 | random_number rng(d->seed); | |
146 | ||
147 | // First main loop. | |
1661473b | 148 | for (difference_type i = 0; i < length; ++i) |
c2ba9709 | 149 | { |
e683ee2a JS |
150 | bin_index oracle = random_number_pow2(num_bits, rng); |
151 | oracles[i] = oracle; | |
c2ba9709 | 152 | |
e683ee2a | 153 | // To allow prefix (partial) sum. |
1661473b | 154 | ++(dist[oracle + 1]); |
c2ba9709 JS |
155 | } |
156 | ||
1661473b | 157 | for (bin_index b = 0; b < sd->num_bins + 1; ++b) |
c2ba9709 JS |
158 | sd->dist[b][iam + 1] = dist[b]; |
159 | ||
e683ee2a | 160 | # pragma omp barrier |
c2ba9709 | 161 | |
e683ee2a | 162 | # pragma omp single |
c2ba9709 JS |
163 | { |
164 | // Sum up bins, sd->dist[s + 1][d->num_threads] now contains the | |
165 | // total number of items in bin s | |
1661473b | 166 | for (bin_index s = 0; s < sd->num_bins; ++s) |
e683ee2a JS |
167 | __gnu_sequential::partial_sum(sd->dist[s + 1], |
168 | sd->dist[s + 1] + d->num_threads + 1, | |
169 | sd->dist[s + 1]); | |
c2ba9709 JS |
170 | } |
171 | ||
e683ee2a | 172 | # pragma omp barrier |
c2ba9709 | 173 | |
c2ba9709 | 174 | sequence_index_t offset = 0, global_offset = 0; |
1661473b | 175 | for (bin_index s = 0; s < d->bins_begin; ++s) |
c2ba9709 JS |
176 | global_offset += sd->dist[s + 1][d->num_threads]; |
177 | ||
e683ee2a | 178 | # pragma omp barrier |
c2ba9709 | 179 | |
1661473b | 180 | for (bin_index s = d->bins_begin; s < d->bins_end; ++s) |
c2ba9709 | 181 | { |
1661473b | 182 | for (int t = 0; t < d->num_threads + 1; ++t) |
c2ba9709 JS |
183 | sd->dist[s + 1][t] += offset; |
184 | offset = sd->dist[s + 1][d->num_threads]; | |
185 | } | |
186 | ||
e683ee2a JS |
187 | sd->temporaries[iam] = static_cast<value_type*>( |
188 | ::operator new(sizeof(value_type) * offset)); | |
c2ba9709 | 189 | |
e683ee2a | 190 | # pragma omp barrier |
c2ba9709 | 191 | |
c2ba9709 | 192 | // Draw local copies to avoid false sharing. |
1661473b | 193 | for (bin_index b = 0; b < sd->num_bins + 1; ++b) |
c2ba9709 | 194 | dist[b] = sd->dist[b][iam]; |
1661473b | 195 | for (bin_index b = 0; b < sd->num_bins; ++b) |
c2ba9709 | 196 | bin_proc[b] = sd->bin_proc[b]; |
1661473b | 197 | for (thread_index_t t = 0; t < d->num_threads; ++t) |
c2ba9709 JS |
198 | temporaries[t] = sd->temporaries[t]; |
199 | ||
200 | RandomAccessIterator source = sd->source; | |
201 | difference_type start = sd->starts[iam]; | |
202 | ||
203 | // Distribute according to oracles, second main loop. | |
1661473b | 204 | for (difference_type i = 0; i < length; ++i) |
c2ba9709 | 205 | { |
e683ee2a JS |
206 | bin_index target_bin = oracles[i]; |
207 | thread_index_t target_p = bin_proc[target_bin]; | |
c2ba9709 | 208 | |
e683ee2a | 209 | // Last column [d->num_threads] stays unchanged. |
5817ff8e PC |
210 | ::new(&(temporaries[target_p][dist[target_bin + 1]++])) |
211 | value_type(*(source + i + start)); | |
c2ba9709 JS |
212 | } |
213 | ||
214 | delete[] oracles; | |
215 | delete[] dist; | |
216 | delete[] bin_proc; | |
217 | delete[] temporaries; | |
218 | ||
e683ee2a | 219 | # pragma omp barrier |
c2ba9709 | 220 | |
c2ba9709 | 221 | // Shuffle bins internally. |
1661473b | 222 | for (bin_index b = d->bins_begin; b < d->bins_end; ++b) |
c2ba9709 | 223 | { |
e683ee2a JS |
224 | value_type* begin = |
225 | sd->temporaries[iam] + | |
226 | ((b == d->bins_begin) ? 0 : sd->dist[b][d->num_threads]), | |
227 | * end = | |
228 | sd->temporaries[iam] + sd->dist[b + 1][d->num_threads]; | |
229 | sequential_random_shuffle(begin, end, rng); | |
230 | std::copy(begin, end, sd->source + global_offset + | |
231 | ((b == d->bins_begin) ? 0 : sd->dist[b][d->num_threads])); | |
c2ba9709 JS |
232 | } |
233 | ||
fac9044d | 234 | ::operator delete(sd->temporaries[iam]); |
c2ba9709 JS |
235 | } |
236 | ||
e683ee2a JS |
237 | /** @brief Round up to the next greater power of 2. |
238 | * @param x Integer to round up */ | |
239 | template<typename T> | |
6f95a65a BK |
240 | T |
241 | round_up_to_pow2(T x) | |
c2ba9709 JS |
242 | { |
243 | if (x <= 1) | |
244 | return 1; | |
245 | else | |
c38b84d8 | 246 | return (T)1 << (__log2(x - 1) + 1); |
c2ba9709 JS |
247 | } |
248 | ||
e683ee2a JS |
249 | /** @brief Main parallel random shuffle step. |
250 | * @param begin Begin iterator of sequence. | |
251 | * @param end End iterator of sequence. | |
252 | * @param n Length of sequence. | |
253 | * @param num_threads Number of threads to use. | |
254 | * @param rng Random number generator to use. | |
255 | */ | |
256 | template<typename RandomAccessIterator, typename RandomNumberGenerator> | |
5817ff8e PC |
257 | void |
258 | parallel_random_shuffle_drs(RandomAccessIterator begin, | |
259 | RandomAccessIterator end, | |
260 | typename std::iterator_traits | |
261 | <RandomAccessIterator>::difference_type n, | |
262 | thread_index_t num_threads, | |
263 | RandomNumberGenerator& rng) | |
c2ba9709 JS |
264 | { |
265 | typedef std::iterator_traits<RandomAccessIterator> traits_type; | |
266 | typedef typename traits_type::value_type value_type; | |
267 | typedef typename traits_type::difference_type difference_type; | |
268 | ||
269 | _GLIBCXX_CALL(n) | |
270 | ||
ee1b5fc5 BK |
271 | const _Settings& __s = _Settings::get(); |
272 | ||
c2ba9709 JS |
273 | if (num_threads > n) |
274 | num_threads = static_cast<thread_index_t>(n); | |
275 | ||
276 | bin_index num_bins, num_bins_cache; | |
277 | ||
278 | #if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_L1 | |
279 | // Try the L1 cache first. | |
280 | ||
281 | // Must fit into L1. | |
e683ee2a | 282 | num_bins_cache = std::max<difference_type>( |
ee1b5fc5 | 283 | 1, n / (__s.L1_cache_size_lb / sizeof(value_type))); |
c2ba9709 JS |
284 | num_bins_cache = round_up_to_pow2(num_bins_cache); |
285 | ||
286 | // No more buckets than TLB entries, power of 2 | |
287 | // Power of 2 and at least one element per bin, at most the TLB size. | |
e683ee2a | 288 | num_bins = std::min<difference_type>(n, num_bins_cache); |
c2ba9709 JS |
289 | |
290 | #if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB | |
291 | // 2 TLB entries needed per bin. | |
ee1b5fc5 | 292 | num_bins = std::min<difference_type>(__s.TLB_size / 2, num_bins); |
c2ba9709 JS |
293 | #endif |
294 | num_bins = round_up_to_pow2(num_bins); | |
295 | ||
296 | if (num_bins < num_bins_cache) | |
297 | { | |
298 | #endif | |
e683ee2a JS |
299 | // Now try the L2 cache |
300 | // Must fit into L2 | |
301 | num_bins_cache = static_cast<bin_index>(std::max<difference_type>( | |
ee1b5fc5 | 302 | 1, n / (__s.L2_cache_size / sizeof(value_type)))); |
e683ee2a JS |
303 | num_bins_cache = round_up_to_pow2(num_bins_cache); |
304 | ||
305 | // No more buckets than TLB entries, power of 2. | |
306 | num_bins = static_cast<bin_index>( | |
307 | std::min(n, static_cast<difference_type>(num_bins_cache))); | |
308 | // Power of 2 and at least one element per bin, at most the TLB size. | |
c2ba9709 | 309 | #if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB |
e683ee2a JS |
310 | // 2 TLB entries needed per bin. |
311 | num_bins = std::min( | |
ee1b5fc5 | 312 | static_cast<difference_type>(__s.TLB_size / 2), num_bins); |
c2ba9709 | 313 | #endif |
e683ee2a | 314 | num_bins = round_up_to_pow2(num_bins); |
c2ba9709 JS |
315 | #if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_L1 |
316 | } | |
317 | #endif | |
318 | ||
e683ee2a | 319 | num_threads = std::min<bin_index>(num_threads, num_bins); |
c2ba9709 JS |
320 | |
321 | if (num_threads <= 1) | |
322 | return sequential_random_shuffle(begin, end, rng); | |
323 | ||
324 | DRandomShufflingGlobalData<RandomAccessIterator> sd(begin); | |
e683ee2a JS |
325 | DRSSorterPU<RandomAccessIterator, random_number >* pus; |
326 | difference_type* starts; | |
c2ba9709 | 327 | |
e683ee2a | 328 | # pragma omp parallel num_threads(num_threads) |
c2ba9709 | 329 | { |
3611e176 | 330 | thread_index_t num_threads = omp_get_num_threads(); |
e683ee2a JS |
331 | # pragma omp single |
332 | { | |
333 | pus = new DRSSorterPU<RandomAccessIterator, random_number> | |
334 | [num_threads]; | |
335 | ||
336 | sd.temporaries = new value_type*[num_threads]; | |
337 | sd.dist = new difference_type*[num_bins + 1]; | |
338 | sd.bin_proc = new thread_index_t[num_bins]; | |
1661473b | 339 | for (bin_index b = 0; b < num_bins + 1; ++b) |
e683ee2a | 340 | sd.dist[b] = new difference_type[num_threads + 1]; |
1661473b | 341 | for (bin_index b = 0; b < (num_bins + 1); ++b) |
e683ee2a JS |
342 | { |
343 | sd.dist[0][0] = 0; | |
344 | sd.dist[b][0] = 0; | |
345 | } | |
346 | starts = sd.starts = new difference_type[num_threads + 1]; | |
347 | int bin_cursor = 0; | |
348 | sd.num_bins = num_bins; | |
c38b84d8 | 349 | sd.num_bits = __log2(num_bins); |
e683ee2a JS |
350 | |
351 | difference_type chunk_length = n / num_threads, | |
352 | split = n % num_threads, start = 0; | |
353 | difference_type bin_chunk_length = num_bins / num_threads, | |
354 | bin_split = num_bins % num_threads; | |
1661473b | 355 | for (thread_index_t i = 0; i < num_threads; ++i) |
e683ee2a JS |
356 | { |
357 | starts[i] = start; | |
358 | start += (i < split) ? (chunk_length + 1) : chunk_length; | |
359 | int j = pus[i].bins_begin = bin_cursor; | |
360 | ||
361 | // Range of bins for this processor. | |
362 | bin_cursor += (i < bin_split) ? | |
363 | (bin_chunk_length + 1) : bin_chunk_length; | |
364 | pus[i].bins_end = bin_cursor; | |
1661473b | 365 | for (; j < bin_cursor; ++j) |
e683ee2a JS |
366 | sd.bin_proc[j] = i; |
367 | pus[i].num_threads = num_threads; | |
6df548d2 | 368 | pus[i].seed = rng(std::numeric_limits<uint32>::max()); |
e683ee2a JS |
369 | pus[i].sd = &sd; |
370 | } | |
371 | starts[num_threads] = start; | |
372 | } //single | |
3611e176 JS |
373 | // Now shuffle in parallel. |
374 | parallel_random_shuffle_drs_pu(pus); | |
375 | } // parallel | |
c2ba9709 JS |
376 | |
377 | delete[] starts; | |
378 | delete[] sd.bin_proc; | |
1661473b | 379 | for (int s = 0; s < (num_bins + 1); ++s) |
c2ba9709 JS |
380 | delete[] sd.dist[s]; |
381 | delete[] sd.dist; | |
382 | delete[] sd.temporaries; | |
383 | ||
384 | delete[] pus; | |
385 | } | |
386 | ||
e683ee2a JS |
387 | /** @brief Sequential cache-efficient random shuffle. |
388 | * @param begin Begin iterator of sequence. | |
389 | * @param end End iterator of sequence. | |
390 | * @param rng Random number generator to use. | |
391 | */ | |
392 | template<typename RandomAccessIterator, typename RandomNumberGenerator> | |
5817ff8e | 393 | void |
6f95a65a | 394 | sequential_random_shuffle(RandomAccessIterator begin, |
e683ee2a JS |
395 | RandomAccessIterator end, |
396 | RandomNumberGenerator& rng) | |
c2ba9709 JS |
397 | { |
398 | typedef std::iterator_traits<RandomAccessIterator> traits_type; | |
399 | typedef typename traits_type::value_type value_type; | |
400 | typedef typename traits_type::difference_type difference_type; | |
401 | ||
402 | difference_type n = end - begin; | |
ee1b5fc5 | 403 | const _Settings& __s = _Settings::get(); |
c2ba9709 JS |
404 | |
405 | bin_index num_bins, num_bins_cache; | |
406 | ||
407 | #if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_L1 | |
408 | // Try the L1 cache first, must fit into L1. | |
e683ee2a JS |
409 | num_bins_cache = |
410 | std::max<difference_type> | |
ee1b5fc5 | 411 | (1, n / (__s.L1_cache_size_lb / sizeof(value_type))); |
c2ba9709 JS |
412 | num_bins_cache = round_up_to_pow2(num_bins_cache); |
413 | ||
414 | // No more buckets than TLB entries, power of 2 | |
415 | // Power of 2 and at least one element per bin, at most the TLB size | |
416 | num_bins = std::min(n, (difference_type)num_bins_cache); | |
417 | #if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB | |
418 | // 2 TLB entries needed per bin | |
ee1b5fc5 | 419 | num_bins = std::min((difference_type)__s.TLB_size / 2, num_bins); |
c2ba9709 JS |
420 | #endif |
421 | num_bins = round_up_to_pow2(num_bins); | |
422 | ||
423 | if (num_bins < num_bins_cache) | |
424 | { | |
425 | #endif | |
e683ee2a JS |
426 | // Now try the L2 cache, must fit into L2. |
427 | num_bins_cache = | |
428 | static_cast<bin_index>(std::max<difference_type>( | |
ee1b5fc5 | 429 | 1, n / (__s.L2_cache_size / sizeof(value_type)))); |
e683ee2a | 430 | num_bins_cache = round_up_to_pow2(num_bins_cache); |
c2ba9709 | 431 | |
e683ee2a JS |
432 | // No more buckets than TLB entries, power of 2 |
433 | // Power of 2 and at least one element per bin, at most the TLB size. | |
434 | num_bins = static_cast<bin_index> | |
435 | (std::min(n, static_cast<difference_type>(num_bins_cache))); | |
c2ba9709 JS |
436 | |
437 | #if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB | |
e683ee2a JS |
438 | // 2 TLB entries needed per bin |
439 | num_bins = | |
ee1b5fc5 | 440 | std::min<difference_type>(__s.TLB_size / 2, num_bins); |
c2ba9709 | 441 | #endif |
e683ee2a | 442 | num_bins = round_up_to_pow2(num_bins); |
c2ba9709 JS |
443 | #if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_L1 |
444 | } | |
445 | #endif | |
446 | ||
c38b84d8 | 447 | int num_bits = __log2(num_bins); |
c2ba9709 JS |
448 | |
449 | if (num_bins > 1) | |
450 | { | |
e683ee2a JS |
451 | value_type* target = static_cast<value_type*>( |
452 | ::operator new(sizeof(value_type) * n)); | |
453 | bin_index* oracles = new bin_index[n]; | |
454 | difference_type* dist0 = new difference_type[num_bins + 1], | |
455 | * dist1 = new difference_type[num_bins + 1]; | |
456 | ||
1661473b | 457 | for (int b = 0; b < num_bins + 1; ++b) |
e683ee2a JS |
458 | dist0[b] = 0; |
459 | ||
460 | random_number bitrng(rng(0xFFFFFFFF)); | |
461 | ||
1661473b | 462 | for (difference_type i = 0; i < n; ++i) |
e683ee2a JS |
463 | { |
464 | bin_index oracle = random_number_pow2(num_bits, bitrng); | |
465 | oracles[i] = oracle; | |
466 | ||
467 | // To allow prefix (partial) sum. | |
1661473b | 468 | ++(dist0[oracle + 1]); |
e683ee2a JS |
469 | } |
470 | ||
471 | // Sum up bins. | |
472 | __gnu_sequential::partial_sum(dist0, dist0 + num_bins + 1, dist0); | |
473 | ||
1661473b | 474 | for (int b = 0; b < num_bins + 1; ++b) |
e683ee2a JS |
475 | dist1[b] = dist0[b]; |
476 | ||
477 | // Distribute according to oracles. | |
1661473b | 478 | for (difference_type i = 0; i < n; ++i) |
58a6ef4b | 479 | ::new(&(target[(dist0[oracles[i]])++])) value_type(*(begin + i)); |
e683ee2a | 480 | |
1661473b | 481 | for (int b = 0; b < num_bins; ++b) |
e683ee2a JS |
482 | { |
483 | sequential_random_shuffle(target + dist1[b], | |
484 | target + dist1[b + 1], | |
485 | rng); | |
486 | } | |
487 | ||
361eefe7 JS |
488 | // Copy elements back. |
489 | std::copy(target, target + n, begin); | |
490 | ||
e683ee2a JS |
491 | delete[] dist0; |
492 | delete[] dist1; | |
493 | delete[] oracles; | |
fac9044d | 494 | ::operator delete(target); |
c2ba9709 JS |
495 | } |
496 | else | |
497 | __gnu_sequential::random_shuffle(begin, end, rng); | |
498 | } | |
499 | ||
e683ee2a JS |
500 | /** @brief Parallel random public call. |
501 | * @param begin Begin iterator of sequence. | |
502 | * @param end End iterator of sequence. | |
503 | * @param rng Random number generator to use. | |
504 | */ | |
505 | template<typename RandomAccessIterator, typename RandomNumberGenerator> | |
c2ba9709 | 506 | inline void |
e683ee2a JS |
507 | parallel_random_shuffle(RandomAccessIterator begin, |
508 | RandomAccessIterator end, | |
509 | RandomNumberGenerator rng = random_number()) | |
c2ba9709 JS |
510 | { |
511 | typedef std::iterator_traits<RandomAccessIterator> traits_type; | |
512 | typedef typename traits_type::difference_type difference_type; | |
513 | difference_type n = end - begin; | |
514 | parallel_random_shuffle_drs(begin, end, n, get_max_threads(), rng) ; | |
515 | } | |
516 | ||
517 | } | |
518 | ||
cbcd1e45 | 519 | #endif /* _GLIBCXX_PARALLEL_RANDOM_SHUFFLE_H */ |