]>
Commit | Line | Data |
---|---|---|
94dabea7 | 1 | // -*- C++ -*- |
c2ba9709 | 2 | |
748086b7 | 3 | // Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc. |
c2ba9709 JS |
4 | // |
5 | // This file is part of the GNU ISO C++ Library. This library is free | |
6 | // software; you can redistribute it and/or modify it under the terms | |
7 | // of the GNU General Public License as published by the Free Software | |
748086b7 | 8 | // Foundation; either version 3, or (at your option) any later |
c2ba9709 JS |
9 | // version. |
10 | ||
11 | // This library is distributed in the hope that it will be useful, but | |
12 | // WITHOUT ANY WARRANTY; without even the implied warranty of | |
28dac70a | 13 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
c2ba9709 JS |
14 | // General Public License for more details. |
15 | ||
748086b7 JJ |
16 | // Under Section 7 of GPL version 3, you are granted additional |
17 | // permissions described in the GCC Runtime Library Exception, version | |
18 | // 3.1, as published by the Free Software Foundation. | |
19 | ||
20 | // You should have received a copy of the GNU General Public License and | |
21 | // a copy of the GCC Runtime Library Exception along with this program; | |
22 | // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see | |
23 | // <http://www.gnu.org/licenses/>. | |
c2ba9709 JS |
24 | |
25 | /** @file parallel/find.h | |
26 | * @brief Parallel implementation base for std::find(), std::equal() | |
27 | * and related functions. | |
28 | * This file is a GNU parallel extension to the Standard C++ Library. | |
29 | */ | |
30 | ||
31 | // Written by Felix Putze and Johannes Singler. | |
32 | ||
33 | #ifndef _GLIBCXX_PARALLEL_FIND_H | |
34 | #define _GLIBCXX_PARALLEL_FIND_H 1 | |
35 | ||
36 | #include <bits/stl_algobase.h> | |
37 | ||
38 | #include <parallel/features.h> | |
39 | #include <parallel/parallel.h> | |
40 | #include <parallel/compatibility.h> | |
41 | #include <parallel/equally_split.h> | |
42 | ||
43 | namespace __gnu_parallel | |
44 | { | |
e683ee2a JS |
45 | /** |
46 | * @brief Parallel std::find, switch for different algorithms. | |
47 | * @param begin1 Begin iterator of first sequence. | |
48 | * @param end1 End iterator of first sequence. | |
49 | * @param begin2 Begin iterator of second sequence. Must have same | |
50 | * length as first sequence. | |
51 | * @param pred Find predicate. | |
52 | * @param selector Functionality (e. g. std::find_if (), std::equal(),...) | |
53 | * @return Place of finding in both sequences. | |
54 | */ | |
531898c3 PC |
55 | template<typename RandomAccessIterator1, |
56 | typename RandomAccessIterator2, | |
57 | typename Pred, | |
58 | typename Selector> | |
5817ff8e | 59 | inline std::pair<RandomAccessIterator1, RandomAccessIterator2> |
c2ba9709 | 60 | find_template(RandomAccessIterator1 begin1, RandomAccessIterator1 end1, |
e683ee2a | 61 | RandomAccessIterator2 begin2, Pred pred, Selector selector) |
c2ba9709 | 62 | { |
ee1b5fc5 | 63 | switch (_Settings::get().find_algorithm) |
c2ba9709 | 64 | { |
ee1b5fc5 | 65 | case GROWING_BLOCKS: |
e683ee2a | 66 | return find_template(begin1, end1, begin2, pred, selector, |
5817ff8e | 67 | growing_blocks_tag()); |
ee1b5fc5 | 68 | case CONSTANT_SIZE_BLOCKS: |
e683ee2a | 69 | return find_template(begin1, end1, begin2, pred, selector, |
5817ff8e | 70 | constant_size_blocks_tag()); |
ee1b5fc5 | 71 | case EQUAL_SPLIT: |
e683ee2a | 72 | return find_template(begin1, end1, begin2, pred, selector, |
5817ff8e | 73 | equal_split_tag()); |
c2ba9709 | 74 | default: |
e683ee2a JS |
75 | _GLIBCXX_PARALLEL_ASSERT(false); |
76 | return std::make_pair(begin1, begin2); | |
c2ba9709 JS |
77 | } |
78 | } | |
79 | ||
80 | #if _GLIBCXX_FIND_EQUAL_SPLIT | |
81 | ||
e683ee2a JS |
82 | /** |
83 | * @brief Parallel std::find, equal splitting variant. | |
84 | * @param begin1 Begin iterator of first sequence. | |
85 | * @param end1 End iterator of first sequence. | |
86 | * @param begin2 Begin iterator of second sequence. Second sequence | |
87 | * must have same length as first sequence. | |
88 | * @param pred Find predicate. | |
89 | * @param selector Functionality (e. g. std::find_if (), std::equal(),...) | |
90 | * @return Place of finding in both sequences. | |
91 | */ | |
531898c3 PC |
92 | template<typename RandomAccessIterator1, |
93 | typename RandomAccessIterator2, | |
94 | typename Pred, | |
95 | typename Selector> | |
c2ba9709 | 96 | std::pair<RandomAccessIterator1, RandomAccessIterator2> |
e683ee2a JS |
97 | find_template(RandomAccessIterator1 begin1, |
98 | RandomAccessIterator1 end1, | |
99 | RandomAccessIterator2 begin2, | |
100 | Pred pred, | |
101 | Selector selector, | |
102 | equal_split_tag) | |
c2ba9709 JS |
103 | { |
104 | _GLIBCXX_CALL(end1 - begin1) | |
105 | ||
106 | typedef std::iterator_traits<RandomAccessIterator1> traits_type; | |
107 | typedef typename traits_type::difference_type difference_type; | |
108 | typedef typename traits_type::value_type value_type; | |
109 | ||
110 | difference_type length = end1 - begin1; | |
c2ba9709 | 111 | difference_type result = length; |
e683ee2a | 112 | difference_type* borders; |
c2ba9709 | 113 | |
59b567fb JS |
114 | omp_lock_t result_lock; |
115 | omp_init_lock(&result_lock); | |
c2ba9709 | 116 | |
e683ee2a JS |
117 | thread_index_t num_threads = get_max_threads(); |
118 | # pragma omp parallel num_threads(num_threads) | |
119 | { | |
120 | # pragma omp single | |
121 | { | |
122 | num_threads = omp_get_num_threads(); | |
123 | borders = new difference_type[num_threads + 1]; | |
124 | equally_split(length, num_threads, borders); | |
125 | } //single | |
126 | ||
127 | thread_index_t iam = omp_get_thread_num(); | |
128 | difference_type start = borders[iam], stop = borders[iam + 1]; | |
129 | ||
130 | RandomAccessIterator1 i1 = begin1 + start; | |
131 | RandomAccessIterator2 i2 = begin2 + start; | |
132 | for (difference_type pos = start; pos < stop; ++pos) | |
133 | { | |
134 | #pragma omp flush(result) | |
135 | // Result has been set to something lower. | |
136 | if (result < pos) | |
59b567fb | 137 | break; |
e683ee2a JS |
138 | |
139 | if (selector(i1, i2, pred)) | |
140 | { | |
141 | omp_set_lock(&result_lock); | |
142 | if (pos < result) | |
143 | result = pos; | |
144 | omp_unset_lock(&result_lock); | |
145 | break; | |
146 | } | |
147 | ++i1; | |
148 | ++i2; | |
149 | } | |
150 | } //parallel | |
59b567fb JS |
151 | |
152 | omp_destroy_lock(&result_lock); | |
e683ee2a JS |
153 | delete[] borders; |
154 | ||
5817ff8e PC |
155 | return |
156 | std::pair<RandomAccessIterator1, RandomAccessIterator2>(begin1 + result, | |
157 | begin2 + result); | |
c2ba9709 JS |
158 | } |
159 | ||
160 | #endif | |
161 | ||
162 | #if _GLIBCXX_FIND_GROWING_BLOCKS | |
163 | ||
e683ee2a JS |
164 | /** |
165 | * @brief Parallel std::find, growing block size variant. | |
166 | * @param begin1 Begin iterator of first sequence. | |
167 | * @param end1 End iterator of first sequence. | |
168 | * @param begin2 Begin iterator of second sequence. Second sequence | |
169 | * must have same length as first sequence. | |
170 | * @param pred Find predicate. | |
171 | * @param selector Functionality (e. g. std::find_if (), std::equal(),...) | |
172 | * @return Place of finding in both sequences. | |
ee1b5fc5 BK |
173 | * @see __gnu_parallel::_Settings::find_sequential_search_size |
174 | * @see __gnu_parallel::_Settings::find_initial_block_size | |
175 | * @see __gnu_parallel::_Settings::find_maximum_block_size | |
176 | * @see __gnu_parallel::_Settings::find_increasing_factor | |
e683ee2a JS |
177 | * |
178 | * There are two main differences between the growing blocks and | |
179 | * the constant-size blocks variants. | |
180 | * 1. For GB, the block size grows; for CSB, the block size is fixed. | |
181 | ||
182 | * 2. For GB, the blocks are allocated dynamically; | |
183 | * for CSB, the blocks are allocated in a predetermined manner, | |
184 | * namely spacial round-robin. | |
185 | */ | |
531898c3 PC |
186 | template<typename RandomAccessIterator1, |
187 | typename RandomAccessIterator2, | |
188 | typename Pred, | |
189 | typename Selector> | |
c2ba9709 JS |
190 | std::pair<RandomAccessIterator1, RandomAccessIterator2> |
191 | find_template(RandomAccessIterator1 begin1, RandomAccessIterator1 end1, | |
e683ee2a JS |
192 | RandomAccessIterator2 begin2, Pred pred, Selector selector, |
193 | growing_blocks_tag) | |
c2ba9709 JS |
194 | { |
195 | _GLIBCXX_CALL(end1 - begin1) | |
196 | ||
197 | typedef std::iterator_traits<RandomAccessIterator1> traits_type; | |
198 | typedef typename traits_type::difference_type difference_type; | |
199 | typedef typename traits_type::value_type value_type; | |
200 | ||
ee1b5fc5 BK |
201 | const _Settings& __s = _Settings::get(); |
202 | ||
c2ba9709 JS |
203 | difference_type length = end1 - begin1; |
204 | ||
5817ff8e | 205 | difference_type sequential_search_size = |
ee1b5fc5 | 206 | std::min<difference_type>(length, __s.find_sequential_search_size); |
c2ba9709 JS |
207 | |
208 | // Try it sequentially first. | |
209 | std::pair<RandomAccessIterator1, RandomAccessIterator2> find_seq_result = | |
e683ee2a JS |
210 | selector.sequential_algorithm( |
211 | begin1, begin1 + sequential_search_size, begin2, pred); | |
c2ba9709 JS |
212 | |
213 | if (find_seq_result.first != (begin1 + sequential_search_size)) | |
214 | return find_seq_result; | |
215 | ||
216 | // Index of beginning of next free block (after sequential find). | |
e683ee2a | 217 | difference_type next_block_start = sequential_search_size; |
c2ba9709 | 218 | difference_type result = length; |
c2ba9709 | 219 | |
59b567fb JS |
220 | omp_lock_t result_lock; |
221 | omp_init_lock(&result_lock); | |
222 | ||
e683ee2a JS |
223 | thread_index_t num_threads = get_max_threads(); |
224 | # pragma omp parallel shared(result) num_threads(num_threads) | |
225 | { | |
226 | # pragma omp single | |
227 | num_threads = omp_get_num_threads(); | |
228 | ||
229 | // Not within first k elements -> start parallel. | |
230 | thread_index_t iam = omp_get_thread_num(); | |
231 | ||
ee1b5fc5 | 232 | difference_type block_size = __s.find_initial_block_size; |
e683ee2a JS |
233 | difference_type start = |
234 | fetch_and_add<difference_type>(&next_block_start, block_size); | |
235 | ||
236 | // Get new block, update pointer to next block. | |
237 | difference_type stop = | |
238 | std::min<difference_type>(length, start + block_size); | |
239 | ||
240 | std::pair<RandomAccessIterator1, RandomAccessIterator2> local_result; | |
241 | ||
242 | while (start < length) | |
243 | { | |
244 | # pragma omp flush(result) | |
245 | // Get new value of result. | |
246 | if (result < start) | |
247 | { | |
248 | // No chance to find first element. | |
249 | break; | |
250 | } | |
251 | ||
252 | local_result = selector.sequential_algorithm( | |
253 | begin1 + start, begin1 + stop, begin2 + start, pred); | |
254 | if (local_result.first != (begin1 + stop)) | |
255 | { | |
256 | omp_set_lock(&result_lock); | |
257 | if ((local_result.first - begin1) < result) | |
258 | { | |
259 | result = local_result.first - begin1; | |
260 | ||
261 | // Result cannot be in future blocks, stop algorithm. | |
262 | fetch_and_add<difference_type>(&next_block_start, length); | |
263 | } | |
264 | omp_unset_lock(&result_lock); | |
265 | } | |
266 | ||
5817ff8e | 267 | block_size = |
ee1b5fc5 BK |
268 | std::min<difference_type>(block_size * __s.find_increasing_factor, |
269 | __s.find_maximum_block_size); | |
e683ee2a JS |
270 | |
271 | // Get new block, update pointer to next block. | |
272 | start = | |
5817ff8e PC |
273 | fetch_and_add<difference_type>(&next_block_start, block_size); |
274 | stop = ((length < (start + block_size)) | |
275 | ? length : (start + block_size)); | |
e683ee2a JS |
276 | } |
277 | } //parallel | |
c2ba9709 | 278 | |
59b567fb JS |
279 | omp_destroy_lock(&result_lock); |
280 | ||
c2ba9709 | 281 | // Return iterator on found element. |
5817ff8e PC |
282 | return |
283 | std::pair<RandomAccessIterator1, RandomAccessIterator2>(begin1 + result, | |
284 | begin2 + result); | |
c2ba9709 JS |
285 | } |
286 | ||
287 | #endif | |
288 | ||
289 | #if _GLIBCXX_FIND_CONSTANT_SIZE_BLOCKS | |
290 | ||
e683ee2a JS |
291 | /** |
292 | * @brief Parallel std::find, constant block size variant. | |
293 | * @param begin1 Begin iterator of first sequence. | |
294 | * @param end1 End iterator of first sequence. | |
295 | * @param begin2 Begin iterator of second sequence. Second sequence | |
296 | * must have same length as first sequence. | |
297 | * @param pred Find predicate. | |
298 | * @param selector Functionality (e. g. std::find_if (), std::equal(),...) | |
299 | * @return Place of finding in both sequences. | |
ee1b5fc5 BK |
300 | * @see __gnu_parallel::_Settings::find_sequential_search_size |
301 | * @see __gnu_parallel::_Settings::find_block_size | |
e683ee2a JS |
302 | * There are two main differences between the growing blocks and the |
303 | * constant-size blocks variants. | |
304 | * 1. For GB, the block size grows; for CSB, the block size is fixed. | |
305 | * 2. For GB, the blocks are allocated dynamically; for CSB, the | |
306 | * blocks are allocated in a predetermined manner, namely spacial | |
307 | * round-robin. | |
308 | */ | |
531898c3 PC |
309 | template<typename RandomAccessIterator1, |
310 | typename RandomAccessIterator2, | |
311 | typename Pred, | |
312 | typename Selector> | |
c2ba9709 JS |
313 | std::pair<RandomAccessIterator1, RandomAccessIterator2> |
314 | find_template(RandomAccessIterator1 begin1, RandomAccessIterator1 end1, | |
e683ee2a JS |
315 | RandomAccessIterator2 begin2, Pred pred, Selector selector, |
316 | constant_size_blocks_tag) | |
c2ba9709 JS |
317 | { |
318 | _GLIBCXX_CALL(end1 - begin1) | |
319 | typedef std::iterator_traits<RandomAccessIterator1> traits_type; | |
320 | typedef typename traits_type::difference_type difference_type; | |
321 | typedef typename traits_type::value_type value_type; | |
322 | ||
ee1b5fc5 BK |
323 | const _Settings& __s = _Settings::get(); |
324 | ||
c2ba9709 JS |
325 | difference_type length = end1 - begin1; |
326 | ||
e683ee2a | 327 | difference_type sequential_search_size = std::min<difference_type>( |
ee1b5fc5 | 328 | length, __s.find_sequential_search_size); |
c2ba9709 JS |
329 | |
330 | // Try it sequentially first. | |
331 | std::pair<RandomAccessIterator1, RandomAccessIterator2> find_seq_result = | |
e683ee2a JS |
332 | selector.sequential_algorithm(begin1, begin1 + sequential_search_size, |
333 | begin2, pred); | |
c2ba9709 JS |
334 | |
335 | if (find_seq_result.first != (begin1 + sequential_search_size)) | |
336 | return find_seq_result; | |
337 | ||
338 | difference_type result = length; | |
59b567fb JS |
339 | omp_lock_t result_lock; |
340 | omp_init_lock(&result_lock); | |
341 | ||
c2ba9709 | 342 | // Not within first sequential_search_size elements -> start parallel. |
e683ee2a JS |
343 | |
344 | thread_index_t num_threads = get_max_threads(); | |
345 | # pragma omp parallel shared(result) num_threads(num_threads) | |
346 | { | |
347 | # pragma omp single | |
348 | num_threads = omp_get_num_threads(); | |
349 | ||
350 | thread_index_t iam = omp_get_thread_num(); | |
ee1b5fc5 | 351 | difference_type block_size = __s.find_initial_block_size; |
e683ee2a JS |
352 | |
353 | // First element of thread's current iteration. | |
354 | difference_type iteration_start = sequential_search_size; | |
355 | ||
356 | // Where to work (initialization). | |
357 | difference_type start = iteration_start + iam * block_size; | |
358 | difference_type stop = | |
359 | std::min<difference_type>(length, start + block_size); | |
360 | ||
361 | std::pair<RandomAccessIterator1, RandomAccessIterator2> local_result; | |
362 | ||
363 | while (start < length) | |
364 | { | |
365 | // Get new value of result. | |
366 | # pragma omp flush(result) | |
367 | // No chance to find first element. | |
368 | if (result < start) | |
369 | break; | |
370 | local_result = selector.sequential_algorithm( | |
371 | begin1 + start, begin1 + stop, | |
372 | begin2 + start, pred); | |
373 | if (local_result.first != (begin1 + stop)) | |
374 | { | |
375 | omp_set_lock(&result_lock); | |
376 | if ((local_result.first - begin1) < result) | |
377 | result = local_result.first - begin1; | |
378 | omp_unset_lock(&result_lock); | |
379 | // Will not find better value in its interval. | |
380 | break; | |
381 | } | |
382 | ||
383 | iteration_start += num_threads * block_size; | |
384 | ||
385 | // Where to work. | |
386 | start = iteration_start + iam * block_size; | |
387 | stop = std::min<difference_type>(length, start + block_size); | |
388 | } | |
389 | } //parallel | |
c2ba9709 | 390 | |
59b567fb JS |
391 | omp_destroy_lock(&result_lock); |
392 | ||
c2ba9709 | 393 | // Return iterator on found element. |
5817ff8e PC |
394 | return |
395 | std::pair<RandomAccessIterator1, RandomAccessIterator2>(begin1 + result, | |
396 | begin2 + result); | |
c2ba9709 JS |
397 | } |
398 | #endif | |
399 | } // end namespace | |
400 | ||
cbcd1e45 | 401 | #endif /* _GLIBCXX_PARALLEL_FIND_H */ |