]>
Commit | Line | Data |
---|---|---|
94dabea7 | 1 | // -*- C++ -*- |
c2ba9709 | 2 | |
531898c3 | 3 | // Copyright (C) 2007, 2008 Free Software Foundation, Inc. |
c2ba9709 JS |
4 | // |
5 | // This file is part of the GNU ISO C++ Library. This library is free | |
6 | // software; you can redistribute it and/or modify it under the terms | |
7 | // of the GNU General Public License as published by the Free Software | |
8 | // Foundation; either version 2, or (at your option) any later | |
9 | // version. | |
10 | ||
11 | // This library is distributed in the hope that it will be useful, but | |
12 | // WITHOUT ANY WARRANTY; without even the implied warranty of | |
28dac70a | 13 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
c2ba9709 JS |
14 | // General Public License for more details. |
15 | ||
16 | // You should have received a copy of the GNU General Public License | |
17 | // along with this library; see the file COPYING. If not, write to | |
18 | // the Free Software Foundation, 59 Temple Place - Suite 330, Boston, | |
19 | // MA 02111-1307, USA. | |
20 | ||
21 | // As a special exception, you may use this file as part of a free | |
22 | // software library without restriction. Specifically, if other files | |
23 | // instantiate templates or use macros or inline functions from this | |
24 | // file, or you compile this file and link it with other files to | |
25 | // produce an executable, this file does not by itself cause the | |
26 | // resulting executable to be covered by the GNU General Public | |
27 | // License. This exception does not however invalidate any other | |
28 | // reasons why the executable file might be covered by the GNU General | |
29 | // Public License. | |
30 | ||
31 | /** @file parallel/find.h | |
32 | * @brief Parallel implementation base for std::find(), std::equal() | |
33 | * and related functions. | |
34 | * This file is a GNU parallel extension to the Standard C++ Library. | |
35 | */ | |
36 | ||
37 | // Written by Felix Putze and Johannes Singler. | |
38 | ||
39 | #ifndef _GLIBCXX_PARALLEL_FIND_H | |
40 | #define _GLIBCXX_PARALLEL_FIND_H 1 | |
41 | ||
42 | #include <bits/stl_algobase.h> | |
43 | ||
44 | #include <parallel/features.h> | |
45 | #include <parallel/parallel.h> | |
46 | #include <parallel/compatibility.h> | |
47 | #include <parallel/equally_split.h> | |
48 | ||
49 | namespace __gnu_parallel | |
50 | { | |
e683ee2a JS |
51 | /** |
52 | * @brief Parallel std::find, switch for different algorithms. | |
53 | * @param begin1 Begin iterator of first sequence. | |
54 | * @param end1 End iterator of first sequence. | |
55 | * @param begin2 Begin iterator of second sequence. Must have same | |
56 | * length as first sequence. | |
57 | * @param pred Find predicate. | |
58 | * @param selector Functionality (e. g. std::find_if (), std::equal(),...) | |
59 | * @return Place of finding in both sequences. | |
60 | */ | |
531898c3 PC |
61 | template<typename RandomAccessIterator1, |
62 | typename RandomAccessIterator2, | |
63 | typename Pred, | |
64 | typename Selector> | |
5817ff8e | 65 | inline std::pair<RandomAccessIterator1, RandomAccessIterator2> |
c2ba9709 | 66 | find_template(RandomAccessIterator1 begin1, RandomAccessIterator1 end1, |
e683ee2a | 67 | RandomAccessIterator2 begin2, Pred pred, Selector selector) |
c2ba9709 JS |
68 | { |
69 | switch (Settings::find_distribution) | |
70 | { | |
71 | case Settings::GROWING_BLOCKS: | |
e683ee2a | 72 | return find_template(begin1, end1, begin2, pred, selector, |
5817ff8e | 73 | growing_blocks_tag()); |
c2ba9709 | 74 | case Settings::CONSTANT_SIZE_BLOCKS: |
e683ee2a | 75 | return find_template(begin1, end1, begin2, pred, selector, |
5817ff8e | 76 | constant_size_blocks_tag()); |
c2ba9709 | 77 | case Settings::EQUAL_SPLIT: |
e683ee2a | 78 | return find_template(begin1, end1, begin2, pred, selector, |
5817ff8e | 79 | equal_split_tag()); |
c2ba9709 | 80 | default: |
e683ee2a JS |
81 | _GLIBCXX_PARALLEL_ASSERT(false); |
82 | return std::make_pair(begin1, begin2); | |
c2ba9709 JS |
83 | } |
84 | } | |
85 | ||
86 | #if _GLIBCXX_FIND_EQUAL_SPLIT | |
87 | ||
e683ee2a JS |
88 | /** |
89 | * @brief Parallel std::find, equal splitting variant. | |
90 | * @param begin1 Begin iterator of first sequence. | |
91 | * @param end1 End iterator of first sequence. | |
92 | * @param begin2 Begin iterator of second sequence. Second sequence | |
93 | * must have same length as first sequence. | |
94 | * @param pred Find predicate. | |
95 | * @param selector Functionality (e. g. std::find_if (), std::equal(),...) | |
96 | * @return Place of finding in both sequences. | |
97 | */ | |
531898c3 PC |
98 | template<typename RandomAccessIterator1, |
99 | typename RandomAccessIterator2, | |
100 | typename Pred, | |
101 | typename Selector> | |
c2ba9709 | 102 | std::pair<RandomAccessIterator1, RandomAccessIterator2> |
e683ee2a JS |
103 | find_template(RandomAccessIterator1 begin1, |
104 | RandomAccessIterator1 end1, | |
105 | RandomAccessIterator2 begin2, | |
106 | Pred pred, | |
107 | Selector selector, | |
108 | equal_split_tag) | |
c2ba9709 JS |
109 | { |
110 | _GLIBCXX_CALL(end1 - begin1) | |
111 | ||
112 | typedef std::iterator_traits<RandomAccessIterator1> traits_type; | |
113 | typedef typename traits_type::difference_type difference_type; | |
114 | typedef typename traits_type::value_type value_type; | |
115 | ||
116 | difference_type length = end1 - begin1; | |
c2ba9709 | 117 | difference_type result = length; |
e683ee2a | 118 | difference_type* borders; |
c2ba9709 | 119 | |
59b567fb JS |
120 | omp_lock_t result_lock; |
121 | omp_init_lock(&result_lock); | |
c2ba9709 | 122 | |
e683ee2a JS |
123 | thread_index_t num_threads = get_max_threads(); |
124 | # pragma omp parallel num_threads(num_threads) | |
125 | { | |
126 | # pragma omp single | |
127 | { | |
128 | num_threads = omp_get_num_threads(); | |
129 | borders = new difference_type[num_threads + 1]; | |
130 | equally_split(length, num_threads, borders); | |
131 | } //single | |
132 | ||
133 | thread_index_t iam = omp_get_thread_num(); | |
134 | difference_type start = borders[iam], stop = borders[iam + 1]; | |
135 | ||
136 | RandomAccessIterator1 i1 = begin1 + start; | |
137 | RandomAccessIterator2 i2 = begin2 + start; | |
138 | for (difference_type pos = start; pos < stop; ++pos) | |
139 | { | |
140 | #pragma omp flush(result) | |
141 | // Result has been set to something lower. | |
142 | if (result < pos) | |
59b567fb | 143 | break; |
e683ee2a JS |
144 | |
145 | if (selector(i1, i2, pred)) | |
146 | { | |
147 | omp_set_lock(&result_lock); | |
148 | if (pos < result) | |
149 | result = pos; | |
150 | omp_unset_lock(&result_lock); | |
151 | break; | |
152 | } | |
153 | ++i1; | |
154 | ++i2; | |
155 | } | |
156 | } //parallel | |
59b567fb JS |
157 | |
158 | omp_destroy_lock(&result_lock); | |
e683ee2a JS |
159 | delete[] borders; |
160 | ||
5817ff8e PC |
161 | return |
162 | std::pair<RandomAccessIterator1, RandomAccessIterator2>(begin1 + result, | |
163 | begin2 + result); | |
c2ba9709 JS |
164 | } |
165 | ||
166 | #endif | |
167 | ||
168 | #if _GLIBCXX_FIND_GROWING_BLOCKS | |
169 | ||
e683ee2a JS |
170 | /** |
171 | * @brief Parallel std::find, growing block size variant. | |
172 | * @param begin1 Begin iterator of first sequence. | |
173 | * @param end1 End iterator of first sequence. | |
174 | * @param begin2 Begin iterator of second sequence. Second sequence | |
175 | * must have same length as first sequence. | |
176 | * @param pred Find predicate. | |
177 | * @param selector Functionality (e. g. std::find_if (), std::equal(),...) | |
178 | * @return Place of finding in both sequences. | |
179 | * @see __gnu_parallel::Settings::find_sequential_search_size | |
180 | * @see __gnu_parallel::Settings::find_initial_block_size | |
181 | * @see __gnu_parallel::Settings::find_maximum_block_size | |
182 | * @see __gnu_parallel::Settings::find_increasing_factor | |
183 | * | |
184 | * There are two main differences between the growing blocks and | |
185 | * the constant-size blocks variants. | |
186 | * 1. For GB, the block size grows; for CSB, the block size is fixed. | |
187 | ||
188 | * 2. For GB, the blocks are allocated dynamically; | |
189 | * for CSB, the blocks are allocated in a predetermined manner, | |
190 | * namely spacial round-robin. | |
191 | */ | |
531898c3 PC |
192 | template<typename RandomAccessIterator1, |
193 | typename RandomAccessIterator2, | |
194 | typename Pred, | |
195 | typename Selector> | |
c2ba9709 JS |
196 | std::pair<RandomAccessIterator1, RandomAccessIterator2> |
197 | find_template(RandomAccessIterator1 begin1, RandomAccessIterator1 end1, | |
e683ee2a JS |
198 | RandomAccessIterator2 begin2, Pred pred, Selector selector, |
199 | growing_blocks_tag) | |
c2ba9709 JS |
200 | { |
201 | _GLIBCXX_CALL(end1 - begin1) | |
202 | ||
203 | typedef std::iterator_traits<RandomAccessIterator1> traits_type; | |
204 | typedef typename traits_type::difference_type difference_type; | |
205 | typedef typename traits_type::value_type value_type; | |
206 | ||
207 | difference_type length = end1 - begin1; | |
208 | ||
5817ff8e PC |
209 | difference_type sequential_search_size = |
210 | std::min<difference_type>(length, Settings::find_sequential_search_size); | |
c2ba9709 JS |
211 | |
212 | // Try it sequentially first. | |
213 | std::pair<RandomAccessIterator1, RandomAccessIterator2> find_seq_result = | |
e683ee2a JS |
214 | selector.sequential_algorithm( |
215 | begin1, begin1 + sequential_search_size, begin2, pred); | |
c2ba9709 JS |
216 | |
217 | if (find_seq_result.first != (begin1 + sequential_search_size)) | |
218 | return find_seq_result; | |
219 | ||
220 | // Index of beginning of next free block (after sequential find). | |
e683ee2a | 221 | difference_type next_block_start = sequential_search_size; |
c2ba9709 | 222 | difference_type result = length; |
c2ba9709 | 223 | |
59b567fb JS |
224 | omp_lock_t result_lock; |
225 | omp_init_lock(&result_lock); | |
226 | ||
e683ee2a JS |
227 | thread_index_t num_threads = get_max_threads(); |
228 | # pragma omp parallel shared(result) num_threads(num_threads) | |
229 | { | |
230 | # pragma omp single | |
231 | num_threads = omp_get_num_threads(); | |
232 | ||
233 | // Not within first k elements -> start parallel. | |
234 | thread_index_t iam = omp_get_thread_num(); | |
235 | ||
236 | difference_type block_size = Settings::find_initial_block_size; | |
237 | difference_type start = | |
238 | fetch_and_add<difference_type>(&next_block_start, block_size); | |
239 | ||
240 | // Get new block, update pointer to next block. | |
241 | difference_type stop = | |
242 | std::min<difference_type>(length, start + block_size); | |
243 | ||
244 | std::pair<RandomAccessIterator1, RandomAccessIterator2> local_result; | |
245 | ||
246 | while (start < length) | |
247 | { | |
248 | # pragma omp flush(result) | |
249 | // Get new value of result. | |
250 | if (result < start) | |
251 | { | |
252 | // No chance to find first element. | |
253 | break; | |
254 | } | |
255 | ||
256 | local_result = selector.sequential_algorithm( | |
257 | begin1 + start, begin1 + stop, begin2 + start, pred); | |
258 | if (local_result.first != (begin1 + stop)) | |
259 | { | |
260 | omp_set_lock(&result_lock); | |
261 | if ((local_result.first - begin1) < result) | |
262 | { | |
263 | result = local_result.first - begin1; | |
264 | ||
265 | // Result cannot be in future blocks, stop algorithm. | |
266 | fetch_and_add<difference_type>(&next_block_start, length); | |
267 | } | |
268 | omp_unset_lock(&result_lock); | |
269 | } | |
270 | ||
5817ff8e PC |
271 | block_size = |
272 | std::min<difference_type>(block_size | |
273 | * Settings::find_increasing_factor, | |
274 | Settings::find_maximum_block_size); | |
e683ee2a JS |
275 | |
276 | // Get new block, update pointer to next block. | |
277 | start = | |
5817ff8e PC |
278 | fetch_and_add<difference_type>(&next_block_start, block_size); |
279 | stop = ((length < (start + block_size)) | |
280 | ? length : (start + block_size)); | |
e683ee2a JS |
281 | } |
282 | } //parallel | |
c2ba9709 | 283 | |
59b567fb JS |
284 | omp_destroy_lock(&result_lock); |
285 | ||
c2ba9709 | 286 | // Return iterator on found element. |
5817ff8e PC |
287 | return |
288 | std::pair<RandomAccessIterator1, RandomAccessIterator2>(begin1 + result, | |
289 | begin2 + result); | |
c2ba9709 JS |
290 | } |
291 | ||
292 | #endif | |
293 | ||
294 | #if _GLIBCXX_FIND_CONSTANT_SIZE_BLOCKS | |
295 | ||
e683ee2a JS |
296 | /** |
297 | * @brief Parallel std::find, constant block size variant. | |
298 | * @param begin1 Begin iterator of first sequence. | |
299 | * @param end1 End iterator of first sequence. | |
300 | * @param begin2 Begin iterator of second sequence. Second sequence | |
301 | * must have same length as first sequence. | |
302 | * @param pred Find predicate. | |
303 | * @param selector Functionality (e. g. std::find_if (), std::equal(),...) | |
304 | * @return Place of finding in both sequences. | |
305 | * @see __gnu_parallel::Settings::find_sequential_search_size | |
306 | * @see __gnu_parallel::Settings::find_block_size | |
307 | * There are two main differences between the growing blocks and the | |
308 | * constant-size blocks variants. | |
309 | * 1. For GB, the block size grows; for CSB, the block size is fixed. | |
310 | * 2. For GB, the blocks are allocated dynamically; for CSB, the | |
311 | * blocks are allocated in a predetermined manner, namely spacial | |
312 | * round-robin. | |
313 | */ | |
531898c3 PC |
314 | template<typename RandomAccessIterator1, |
315 | typename RandomAccessIterator2, | |
316 | typename Pred, | |
317 | typename Selector> | |
c2ba9709 JS |
318 | std::pair<RandomAccessIterator1, RandomAccessIterator2> |
319 | find_template(RandomAccessIterator1 begin1, RandomAccessIterator1 end1, | |
e683ee2a JS |
320 | RandomAccessIterator2 begin2, Pred pred, Selector selector, |
321 | constant_size_blocks_tag) | |
c2ba9709 JS |
322 | { |
323 | _GLIBCXX_CALL(end1 - begin1) | |
324 | typedef std::iterator_traits<RandomAccessIterator1> traits_type; | |
325 | typedef typename traits_type::difference_type difference_type; | |
326 | typedef typename traits_type::value_type value_type; | |
327 | ||
328 | difference_type length = end1 - begin1; | |
329 | ||
e683ee2a JS |
330 | difference_type sequential_search_size = std::min<difference_type>( |
331 | length, Settings::find_sequential_search_size); | |
c2ba9709 JS |
332 | |
333 | // Try it sequentially first. | |
334 | std::pair<RandomAccessIterator1, RandomAccessIterator2> find_seq_result = | |
e683ee2a JS |
335 | selector.sequential_algorithm(begin1, begin1 + sequential_search_size, |
336 | begin2, pred); | |
c2ba9709 JS |
337 | |
338 | if (find_seq_result.first != (begin1 + sequential_search_size)) | |
339 | return find_seq_result; | |
340 | ||
341 | difference_type result = length; | |
59b567fb JS |
342 | omp_lock_t result_lock; |
343 | omp_init_lock(&result_lock); | |
344 | ||
c2ba9709 | 345 | // Not within first sequential_search_size elements -> start parallel. |
e683ee2a JS |
346 | |
347 | thread_index_t num_threads = get_max_threads(); | |
348 | # pragma omp parallel shared(result) num_threads(num_threads) | |
349 | { | |
350 | # pragma omp single | |
351 | num_threads = omp_get_num_threads(); | |
352 | ||
353 | thread_index_t iam = omp_get_thread_num(); | |
354 | difference_type block_size = Settings::find_initial_block_size; | |
355 | ||
356 | // First element of thread's current iteration. | |
357 | difference_type iteration_start = sequential_search_size; | |
358 | ||
359 | // Where to work (initialization). | |
360 | difference_type start = iteration_start + iam * block_size; | |
361 | difference_type stop = | |
362 | std::min<difference_type>(length, start + block_size); | |
363 | ||
364 | std::pair<RandomAccessIterator1, RandomAccessIterator2> local_result; | |
365 | ||
366 | while (start < length) | |
367 | { | |
368 | // Get new value of result. | |
369 | # pragma omp flush(result) | |
370 | // No chance to find first element. | |
371 | if (result < start) | |
372 | break; | |
373 | local_result = selector.sequential_algorithm( | |
374 | begin1 + start, begin1 + stop, | |
375 | begin2 + start, pred); | |
376 | if (local_result.first != (begin1 + stop)) | |
377 | { | |
378 | omp_set_lock(&result_lock); | |
379 | if ((local_result.first - begin1) < result) | |
380 | result = local_result.first - begin1; | |
381 | omp_unset_lock(&result_lock); | |
382 | // Will not find better value in its interval. | |
383 | break; | |
384 | } | |
385 | ||
386 | iteration_start += num_threads * block_size; | |
387 | ||
388 | // Where to work. | |
389 | start = iteration_start + iam * block_size; | |
390 | stop = std::min<difference_type>(length, start + block_size); | |
391 | } | |
392 | } //parallel | |
c2ba9709 | 393 | |
59b567fb JS |
394 | omp_destroy_lock(&result_lock); |
395 | ||
c2ba9709 | 396 | // Return iterator on found element. |
5817ff8e PC |
397 | return |
398 | std::pair<RandomAccessIterator1, RandomAccessIterator2>(begin1 + result, | |
399 | begin2 + result); | |
c2ba9709 JS |
400 | } |
401 | #endif | |
402 | } // end namespace | |
403 | ||
404 | #endif |