]>
Commit | Line | Data |
---|---|---|
c2ba9709 JS |
1 | // -*- C++ -*- |
2 | ||
748086b7 | 3 | // Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc. |
c2ba9709 JS |
4 | // |
5 | // This file is part of the GNU ISO C++ Library. This library is free | |
6 | // software; you can redistribute it and/or modify it under the terms | |
7 | // of the GNU General Public License as published by the Free Software | |
748086b7 | 8 | // Foundation; either version 3, or (at your option) any later |
c2ba9709 JS |
9 | // version. |
10 | ||
11 | // This library is distributed in the hope that it will be useful, but | |
12 | // WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 | // General Public License for more details. | |
15 | ||
748086b7 JJ |
16 | // Under Section 7 of GPL version 3, you are granted additional |
17 | // permissions described in the GCC Runtime Library Exception, version | |
18 | // 3.1, as published by the Free Software Foundation. | |
19 | ||
20 | // You should have received a copy of the GNU General Public License and | |
21 | // a copy of the GCC Runtime Library Exception along with this program; | |
22 | // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see | |
23 | // <http://www.gnu.org/licenses/>. | |
c2ba9709 JS |
24 | |
25 | /** @file parallel/partial_sum.h | |
1acba85b JS |
26 | * @brief Parallel implementation of std::partial_sum(), i.e. prefix |
27 | * sums. | |
c2ba9709 JS |
28 | * This file is a GNU parallel extension to the Standard C++ Library. |
29 | */ | |
30 | ||
31 | // Written by Johannes Singler. | |
32 | ||
33 | #ifndef _GLIBCXX_PARALLEL_PARTIAL_SUM_H | |
34 | #define _GLIBCXX_PARALLEL_PARTIAL_SUM_H 1 | |
35 | ||
c2ba9709 | 36 | #include <omp.h> |
58a6ef4b | 37 | #include <new> |
c2ba9709 JS |
38 | #include <bits/stl_algobase.h> |
39 | #include <parallel/parallel.h> | |
40 | #include <parallel/numericfwd.h> | |
41 | ||
42 | namespace __gnu_parallel | |
43 | { | |
44 | // Problem: there is no 0-element given. | |
45 | ||
338311e5 PC |
46 | /** @brief Base case prefix sum routine. |
47 | * @param __begin Begin iterator of input sequence. | |
48 | * @param __end End iterator of input sequence. | |
49 | * @param __result Begin iterator of output sequence. | |
50 | * @param __bin_op Associative binary function. | |
51 | * @param __value Start value. Must be passed since the neutral | |
52 | * element is unknown in general. | |
53 | * @return End iterator of output sequence. */ | |
54 | template<typename _IIter, | |
55 | typename _OutputIterator, | |
56 | typename _BinaryOperation> | |
57 | _OutputIterator | |
58 | __parallel_partial_sum_basecase(_IIter __begin, _IIter __end, | |
59 | _OutputIterator __result, | |
60 | _BinaryOperation __bin_op, | |
61 | typename std::iterator_traits <_IIter>::value_type __value) | |
62 | { | |
63 | if (__begin == __end) | |
64 | return __result; | |
65 | ||
66 | while (__begin != __end) | |
67 | { | |
68 | __value = __bin_op(__value, *__begin); | |
69 | *__result = __value; | |
70 | ++__result; | |
71 | ++__begin; | |
72 | } | |
1acba85b | 73 | return __result; |
338311e5 PC |
74 | } |
75 | ||
76 | /** @brief Parallel partial sum implementation, two-phase approach, | |
77 | no recursion. | |
78 | * @param __begin Begin iterator of input sequence. | |
79 | * @param __end End iterator of input sequence. | |
80 | * @param __result Begin iterator of output sequence. | |
81 | * @param __bin_op Associative binary function. | |
82 | * @param __n Length of sequence. | |
83 | * @param __num_threads Number of threads to use. | |
84 | * @return End iterator of output sequence. | |
85 | */ | |
86 | template<typename _IIter, | |
87 | typename _OutputIterator, | |
88 | typename _BinaryOperation> | |
89 | _OutputIterator | |
90 | __parallel_partial_sum_linear(_IIter __begin, _IIter __end, | |
91 | _OutputIterator __result, | |
92 | _BinaryOperation __bin_op, | |
93 | typename std::iterator_traits<_IIter>::difference_type __n) | |
94 | { | |
95 | typedef std::iterator_traits<_IIter> _TraitsType; | |
96 | typedef typename _TraitsType::value_type _ValueType; | |
97 | typedef typename _TraitsType::difference_type _DifferenceType; | |
98 | ||
99 | if (__begin == __end) | |
100 | return __result; | |
101 | ||
102 | _ThreadIndex __num_threads = | |
1acba85b | 103 | std::min<_DifferenceType>(__get_max_threads(), __n - 1); |
e683ee2a | 104 | |
338311e5 PC |
105 | if (__num_threads < 2) |
106 | { | |
107 | *__result = *__begin; | |
108 | return __parallel_partial_sum_basecase(__begin + 1, __end, | |
109 | __result + 1, __bin_op, | |
110 | *__begin); | |
111 | } | |
c2ba9709 | 112 | |
338311e5 PC |
113 | _DifferenceType* __borders; |
114 | _ValueType* __sums; | |
c2ba9709 | 115 | |
338311e5 | 116 | const _Settings& __s = _Settings::get(); |
ee1b5fc5 | 117 | |
338311e5 | 118 | # pragma omp parallel num_threads(__num_threads) |
c2ba9709 | 119 | { |
e683ee2a | 120 | # pragma omp single |
338311e5 PC |
121 | { |
122 | __num_threads = omp_get_num_threads(); | |
123 | ||
124 | __borders = new _DifferenceType[__num_threads + 2]; | |
125 | ||
126 | if (__s.partial_sum_dilation == 1.0f) | |
127 | equally_split(__n, __num_threads + 1, __borders); | |
128 | else | |
129 | { | |
130 | _DifferenceType __chunk_length = | |
131 | ((double)__n | |
132 | / ((double)__num_threads + __s.partial_sum_dilation)), | |
133 | __borderstart = __n - __num_threads * __chunk_length; | |
134 | __borders[0] = 0; | |
8b0c13a8 | 135 | for (_ThreadIndex __i = 1; __i < (__num_threads + 1); ++__i) |
338311e5 PC |
136 | { |
137 | __borders[__i] = __borderstart; | |
138 | __borderstart += __chunk_length; | |
139 | } | |
140 | __borders[__num_threads + 1] = __n; | |
141 | } | |
142 | ||
143 | __sums = static_cast<_ValueType*>(::operator new(sizeof(_ValueType) | |
15ac3c72 | 144 | * __num_threads)); |
338311e5 PC |
145 | _OutputIterator __target_end; |
146 | } //single | |
e683ee2a | 147 | |
1acba85b JS |
148 | _ThreadIndex __iam = omp_get_thread_num(); |
149 | if (__iam == 0) | |
e683ee2a | 150 | { |
1acba85b | 151 | *__result = *__begin; |
77d16198 PC |
152 | __parallel_partial_sum_basecase(__begin + 1, |
153 | __begin + __borders[1], | |
154 | __result + 1, | |
155 | __bin_op, *__begin); | |
1acba85b | 156 | ::new(&(__sums[__iam])) _ValueType(*(__result + __borders[1] - 1)); |
e683ee2a JS |
157 | } |
158 | else | |
159 | { | |
1acba85b | 160 | ::new(&(__sums[__iam])) |
15ac3c72 JS |
161 | _ValueType(std::accumulate(__begin + __borders[__iam] + 1, |
162 | __begin + __borders[__iam + 1], | |
163 | *(__begin + __borders[__iam]), | |
164 | __bin_op, | |
165 | __gnu_parallel::sequential_tag())); | |
e683ee2a JS |
166 | } |
167 | ||
168 | # pragma omp barrier | |
169 | ||
170 | # pragma omp single | |
338311e5 | 171 | __parallel_partial_sum_basecase(__sums + 1, __sums + __num_threads, |
77d16198 | 172 | __sums + 1, __bin_op, __sums[0]); |
e683ee2a JS |
173 | |
174 | # pragma omp barrier | |
175 | ||
338311e5 PC |
176 | // Still same team. |
177 | __parallel_partial_sum_basecase(__begin + __borders[__iam + 1], | |
178 | __begin + __borders[__iam + 2], | |
179 | __result + __borders[__iam + 1], | |
180 | __bin_op, __sums[__iam]); | |
e683ee2a JS |
181 | } //parallel |
182 | ||
338311e5 PC |
183 | ::operator delete(__sums); |
184 | delete[] __borders; | |
185 | ||
186 | return __result + __n; | |
187 | } | |
188 | ||
189 | /** @brief Parallel partial sum front-__end. | |
190 | * @param __begin Begin iterator of input sequence. | |
191 | * @param __end End iterator of input sequence. | |
192 | * @param __result Begin iterator of output sequence. | |
193 | * @param __bin_op Associative binary function. | |
194 | * @return End iterator of output sequence. */ | |
195 | template<typename _IIter, | |
196 | typename _OutputIterator, | |
197 | typename _BinaryOperation> | |
198 | _OutputIterator | |
199 | __parallel_partial_sum(_IIter __begin, _IIter __end, | |
200 | _OutputIterator __result, _BinaryOperation __bin_op) | |
201 | { | |
202 | _GLIBCXX_CALL(__begin - __end) | |
203 | ||
204 | typedef std::iterator_traits<_IIter> _TraitsType; | |
205 | typedef typename _TraitsType::value_type _ValueType; | |
206 | typedef typename _TraitsType::difference_type _DifferenceType; | |
207 | ||
208 | _DifferenceType __n = __end - __begin; | |
209 | ||
210 | switch (_Settings::get().partial_sum_algorithm) | |
211 | { | |
212 | case LINEAR: | |
213 | // Need an initial offset. | |
214 | return __parallel_partial_sum_linear(__begin, __end, __result, | |
215 | __bin_op, __n); | |
216 | default: | |
217 | // Partial_sum algorithm not implemented. | |
218 | _GLIBCXX_PARALLEL_ASSERT(0); | |
219 | return __result + __n; | |
220 | } | |
221 | } | |
c2ba9709 JS |
222 | } |
223 | ||
cbcd1e45 | 224 | #endif /* _GLIBCXX_PARALLEL_PARTIAL_SUM_H */ |