]>
Commit | Line | Data |
---|---|---|
c2ba9709 JS |
1 | // -*- C++ -*- |
2 | ||
748086b7 | 3 | // Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc. |
c2ba9709 JS |
4 | // |
5 | // This file is part of the GNU ISO C++ Library. This library is free | |
6 | // software; you can redistribute it and/or modify it under the terms | |
7 | // of the GNU General Public License as published by the Free Software | |
748086b7 | 8 | // Foundation; either version 3, or (at your option) any later |
c2ba9709 JS |
9 | // version. |
10 | ||
11 | // This library is distributed in the hope that it will be useful, but | |
12 | // WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 | // General Public License for more details. | |
15 | ||
748086b7 JJ |
16 | // Under Section 7 of GPL version 3, you are granted additional |
17 | // permissions described in the GCC Runtime Library Exception, version | |
18 | // 3.1, as published by the Free Software Foundation. | |
19 | ||
20 | // You should have received a copy of the GNU General Public License and | |
21 | // a copy of the GCC Runtime Library Exception along with this program; | |
22 | // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see | |
23 | // <http://www.gnu.org/licenses/>. | |
c2ba9709 JS |
24 | |
25 | /** @file parallel/partial_sum.h | |
26 | * @brief Parallel implementation of std::partial_sum(), i. e. prefix | |
27 | * sums. | |
28 | * This file is a GNU parallel extension to the Standard C++ Library. | |
29 | */ | |
30 | ||
31 | // Written by Johannes Singler. | |
32 | ||
33 | #ifndef _GLIBCXX_PARALLEL_PARTIAL_SUM_H | |
34 | #define _GLIBCXX_PARALLEL_PARTIAL_SUM_H 1 | |
35 | ||
c2ba9709 | 36 | #include <omp.h> |
58a6ef4b | 37 | #include <new> |
c2ba9709 JS |
38 | #include <bits/stl_algobase.h> |
39 | #include <parallel/parallel.h> | |
40 | #include <parallel/numericfwd.h> | |
41 | ||
42 | namespace __gnu_parallel | |
43 | { | |
44 | // Problem: there is no 0-element given. | |
45 | ||
e683ee2a JS |
46 | /** @brief Base case prefix sum routine. |
47 | * @param begin Begin iterator of input sequence. | |
48 | * @param end End iterator of input sequence. | |
49 | * @param result Begin iterator of output sequence. | |
50 | * @param bin_op Associative binary function. | |
51 | * @param value Start value. Must be passed since the neutral | |
52 | * element is unknown in general. | |
53 | * @return End iterator of output sequence. */ | |
5817ff8e PC |
54 | template<typename InputIterator, |
55 | typename OutputIterator, | |
56 | typename BinaryOperation> | |
57 | OutputIterator | |
58 | parallel_partial_sum_basecase(InputIterator begin, InputIterator end, | |
59 | OutputIterator result, BinaryOperation bin_op, | |
60 | typename std::iterator_traits | |
61 | <InputIterator>::value_type value) | |
c2ba9709 JS |
62 | { |
63 | if (begin == end) | |
64 | return result; | |
65 | ||
66 | while (begin != end) | |
67 | { | |
e683ee2a JS |
68 | value = bin_op(value, *begin); |
69 | *result = value; | |
1661473b JS |
70 | ++result; |
71 | ++begin; | |
c2ba9709 JS |
72 | } |
73 | return result; | |
74 | } | |
75 | ||
e683ee2a JS |
76 | /** @brief Parallel partial sum implementation, two-phase approach, |
77 | no recursion. | |
78 | * @param begin Begin iterator of input sequence. | |
79 | * @param end End iterator of input sequence. | |
80 | * @param result Begin iterator of output sequence. | |
81 | * @param bin_op Associative binary function. | |
82 | * @param n Length of sequence. | |
83 | * @param num_threads Number of threads to use. | |
84 | * @return End iterator of output sequence. | |
85 | */ | |
5817ff8e PC |
86 | template<typename InputIterator, |
87 | typename OutputIterator, | |
88 | typename BinaryOperation> | |
c2ba9709 | 89 | OutputIterator |
5817ff8e PC |
90 | parallel_partial_sum_linear(InputIterator begin, InputIterator end, |
91 | OutputIterator result, BinaryOperation bin_op, | |
92 | typename std::iterator_traits | |
93 | <InputIterator>::difference_type n) | |
c2ba9709 JS |
94 | { |
95 | typedef std::iterator_traits<InputIterator> traits_type; | |
96 | typedef typename traits_type::value_type value_type; | |
97 | typedef typename traits_type::difference_type difference_type; | |
98 | ||
1661473b JS |
99 | if (begin == end) |
100 | return result; | |
101 | ||
e683ee2a JS |
102 | thread_index_t num_threads = |
103 | std::min<difference_type>(get_max_threads(), n - 1); | |
104 | ||
c2ba9709 JS |
105 | if (num_threads < 2) |
106 | { | |
e683ee2a JS |
107 | *result = *begin; |
108 | return parallel_partial_sum_basecase( | |
109 | begin + 1, end, result + 1, bin_op, *begin); | |
c2ba9709 JS |
110 | } |
111 | ||
e683ee2a JS |
112 | difference_type* borders; |
113 | value_type* sums; | |
c2ba9709 | 114 | |
ee1b5fc5 BK |
115 | const _Settings& __s = _Settings::get(); |
116 | ||
e683ee2a | 117 | # pragma omp parallel num_threads(num_threads) |
c2ba9709 | 118 | { |
e683ee2a JS |
119 | # pragma omp single |
120 | { | |
121 | num_threads = omp_get_num_threads(); | |
122 | ||
123 | borders = new difference_type[num_threads + 2]; | |
124 | ||
ee1b5fc5 | 125 | if (__s.partial_sum_dilation == 1.0f) |
e683ee2a JS |
126 | equally_split(n, num_threads + 1, borders); |
127 | else | |
128 | { | |
129 | difference_type chunk_length = | |
5817ff8e | 130 | ((double)n |
ee1b5fc5 | 131 | / ((double)num_threads + __s.partial_sum_dilation)), |
5817ff8e | 132 | borderstart = n - num_threads * chunk_length; |
e683ee2a | 133 | borders[0] = 0; |
1661473b | 134 | for (int i = 1; i < (num_threads + 1); ++i) |
e683ee2a JS |
135 | { |
136 | borders[i] = borderstart; | |
137 | borderstart += chunk_length; | |
138 | } | |
139 | borders[num_threads + 1] = n; | |
140 | } | |
141 | ||
5817ff8e PC |
142 | sums = static_cast<value_type*>(::operator new(sizeof(value_type) |
143 | * num_threads)); | |
e683ee2a JS |
144 | OutputIterator target_end; |
145 | } //single | |
146 | ||
1661473b | 147 | thread_index_t iam = omp_get_thread_num(); |
e683ee2a JS |
148 | if (iam == 0) |
149 | { | |
150 | *result = *begin; | |
151 | parallel_partial_sum_basecase(begin + 1, begin + borders[1], | |
5817ff8e | 152 | result + 1, bin_op, *begin); |
58a6ef4b | 153 | ::new(&(sums[iam])) value_type(*(result + borders[1] - 1)); |
e683ee2a JS |
154 | } |
155 | else | |
156 | { | |
5817ff8e PC |
157 | ::new(&(sums[iam])) |
158 | value_type(std::accumulate(begin + borders[iam] + 1, | |
159 | begin + borders[iam + 1], | |
160 | *(begin + borders[iam]), | |
161 | bin_op, | |
162 | __gnu_parallel::sequential_tag())); | |
e683ee2a JS |
163 | } |
164 | ||
165 | # pragma omp barrier | |
166 | ||
167 | # pragma omp single | |
168 | parallel_partial_sum_basecase( | |
169 | sums + 1, sums + num_threads, sums + 1, bin_op, sums[0]); | |
170 | ||
171 | # pragma omp barrier | |
172 | ||
173 | // Still same team. | |
174 | parallel_partial_sum_basecase(begin + borders[iam + 1], | |
5817ff8e PC |
175 | begin + borders[iam + 2], |
176 | result + borders[iam + 1], bin_op, | |
177 | sums[iam]); | |
e683ee2a JS |
178 | } //parallel |
179 | ||
fac9044d | 180 | ::operator delete(sums); |
e683ee2a | 181 | delete[] borders; |
c2ba9709 JS |
182 | |
183 | return result + n; | |
184 | } | |
185 | ||
e683ee2a JS |
186 | /** @brief Parallel partial sum front-end. |
187 | * @param begin Begin iterator of input sequence. | |
188 | * @param end End iterator of input sequence. | |
189 | * @param result Begin iterator of output sequence. | |
190 | * @param bin_op Associative binary function. | |
191 | * @return End iterator of output sequence. */ | |
5817ff8e PC |
192 | template<typename InputIterator, |
193 | typename OutputIterator, | |
194 | typename BinaryOperation> | |
c2ba9709 JS |
195 | OutputIterator |
196 | parallel_partial_sum(InputIterator begin, InputIterator end, | |
e683ee2a | 197 | OutputIterator result, BinaryOperation bin_op) |
c2ba9709 | 198 | { |
e683ee2a | 199 | _GLIBCXX_CALL(begin - end) |
c2ba9709 JS |
200 | |
201 | typedef std::iterator_traits<InputIterator> traits_type; | |
202 | typedef typename traits_type::value_type value_type; | |
203 | typedef typename traits_type::difference_type difference_type; | |
204 | ||
205 | difference_type n = end - begin; | |
206 | ||
ee1b5fc5 | 207 | switch (_Settings::get().partial_sum_algorithm) |
c2ba9709 | 208 | { |
ee1b5fc5 | 209 | case LINEAR: |
e683ee2a JS |
210 | // Need an initial offset. |
211 | return parallel_partial_sum_linear(begin, end, result, bin_op, n); | |
c2ba9709 | 212 | default: |
e683ee2a JS |
213 | // Partial_sum algorithm not implemented. |
214 | _GLIBCXX_PARALLEL_ASSERT(0); | |
215 | return result + n; | |
c2ba9709 JS |
216 | } |
217 | } | |
218 | } | |
219 | ||
cbcd1e45 | 220 | #endif /* _GLIBCXX_PARALLEL_PARTIAL_SUM_H */ |