]>
Commit | Line | Data |
---|---|---|
c2ba9709 JS |
1 | // -*- C++ -*- |
2 | ||
fac9044d | 3 | // Copyright (C) 2007, 2008 Free Software Foundation, Inc. |
c2ba9709 JS |
4 | // |
5 | // This file is part of the GNU ISO C++ Library. This library is free | |
6 | // software; you can redistribute it and/or modify it under the terms | |
7 | // of the GNU General Public License as published by the Free Software | |
8 | // Foundation; either version 2, or (at your option) any later | |
9 | // version. | |
10 | ||
11 | // This library is distributed in the hope that it will be useful, but | |
12 | // WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 | // General Public License for more details. | |
15 | ||
16 | // You should have received a copy of the GNU General Public License | |
17 | // along with this library; see the file COPYING. If not, write to | |
18 | // the Free Software Foundation, 59 Temple Place - Suite 330, Boston, | |
19 | // MA 02111-1307, USA. | |
20 | ||
21 | // As a special exception, you may use this file as part of a free | |
22 | // software library without restriction. Specifically, if other files | |
23 | // instantiate templates or use macros or inline functions from this | |
24 | // file, or you compile this file and link it with other files to | |
25 | // produce an executable, this file does not by itself cause the | |
26 | // resulting executable to be covered by the GNU General Public | |
27 | // License. This exception does not however invalidate any other | |
28 | // reasons why the executable file might be covered by the GNU General | |
29 | // Public License. | |
30 | ||
31 | /** @file parallel/partial_sum.h | |
32 | * @brief Parallel implementation of std::partial_sum(), i. e. prefix | |
33 | * sums. | |
34 | * This file is a GNU parallel extension to the Standard C++ Library. | |
35 | */ | |
36 | ||
37 | // Written by Johannes Singler. | |
38 | ||
39 | #ifndef _GLIBCXX_PARALLEL_PARTIAL_SUM_H | |
40 | #define _GLIBCXX_PARALLEL_PARTIAL_SUM_H 1 | |
41 | ||
c2ba9709 | 42 | #include <omp.h> |
58a6ef4b | 43 | #include <new> |
c2ba9709 JS |
44 | #include <bits/stl_algobase.h> |
45 | #include <parallel/parallel.h> | |
46 | #include <parallel/numericfwd.h> | |
47 | ||
48 | namespace __gnu_parallel | |
49 | { | |
50 | // Problem: there is no 0-element given. | |
51 | ||
e683ee2a JS |
52 | /** @brief Base case prefix sum routine. |
53 | * @param begin Begin iterator of input sequence. | |
54 | * @param end End iterator of input sequence. | |
55 | * @param result Begin iterator of output sequence. | |
56 | * @param bin_op Associative binary function. | |
57 | * @param value Start value. Must be passed since the neutral | |
58 | * element is unknown in general. | |
59 | * @return End iterator of output sequence. */ | |
5817ff8e PC |
60 | template<typename InputIterator, |
61 | typename OutputIterator, | |
62 | typename BinaryOperation> | |
63 | OutputIterator | |
64 | parallel_partial_sum_basecase(InputIterator begin, InputIterator end, | |
65 | OutputIterator result, BinaryOperation bin_op, | |
66 | typename std::iterator_traits | |
67 | <InputIterator>::value_type value) | |
c2ba9709 JS |
68 | { |
69 | if (begin == end) | |
70 | return result; | |
71 | ||
72 | while (begin != end) | |
73 | { | |
e683ee2a JS |
74 | value = bin_op(value, *begin); |
75 | *result = value; | |
1661473b JS |
76 | ++result; |
77 | ++begin; | |
c2ba9709 JS |
78 | } |
79 | return result; | |
80 | } | |
81 | ||
e683ee2a JS |
82 | /** @brief Parallel partial sum implementation, two-phase approach, |
83 | no recursion. | |
84 | * @param begin Begin iterator of input sequence. | |
85 | * @param end End iterator of input sequence. | |
86 | * @param result Begin iterator of output sequence. | |
87 | * @param bin_op Associative binary function. | |
88 | * @param n Length of sequence. | |
89 | * @param num_threads Number of threads to use. | |
90 | * @return End iterator of output sequence. | |
91 | */ | |
5817ff8e PC |
92 | template<typename InputIterator, |
93 | typename OutputIterator, | |
94 | typename BinaryOperation> | |
c2ba9709 | 95 | OutputIterator |
5817ff8e PC |
96 | parallel_partial_sum_linear(InputIterator begin, InputIterator end, |
97 | OutputIterator result, BinaryOperation bin_op, | |
98 | typename std::iterator_traits | |
99 | <InputIterator>::difference_type n) | |
c2ba9709 JS |
100 | { |
101 | typedef std::iterator_traits<InputIterator> traits_type; | |
102 | typedef typename traits_type::value_type value_type; | |
103 | typedef typename traits_type::difference_type difference_type; | |
104 | ||
1661473b JS |
105 | if (begin == end) |
106 | return result; | |
107 | ||
e683ee2a JS |
108 | thread_index_t num_threads = |
109 | std::min<difference_type>(get_max_threads(), n - 1); | |
110 | ||
c2ba9709 JS |
111 | if (num_threads < 2) |
112 | { | |
e683ee2a JS |
113 | *result = *begin; |
114 | return parallel_partial_sum_basecase( | |
115 | begin + 1, end, result + 1, bin_op, *begin); | |
c2ba9709 JS |
116 | } |
117 | ||
e683ee2a JS |
118 | difference_type* borders; |
119 | value_type* sums; | |
c2ba9709 | 120 | |
ee1b5fc5 BK |
121 | const _Settings& __s = _Settings::get(); |
122 | ||
e683ee2a | 123 | # pragma omp parallel num_threads(num_threads) |
c2ba9709 | 124 | { |
e683ee2a JS |
125 | # pragma omp single |
126 | { | |
127 | num_threads = omp_get_num_threads(); | |
128 | ||
129 | borders = new difference_type[num_threads + 2]; | |
130 | ||
ee1b5fc5 | 131 | if (__s.partial_sum_dilation == 1.0f) |
e683ee2a JS |
132 | equally_split(n, num_threads + 1, borders); |
133 | else | |
134 | { | |
135 | difference_type chunk_length = | |
5817ff8e | 136 | ((double)n |
ee1b5fc5 | 137 | / ((double)num_threads + __s.partial_sum_dilation)), |
5817ff8e | 138 | borderstart = n - num_threads * chunk_length; |
e683ee2a | 139 | borders[0] = 0; |
1661473b | 140 | for (int i = 1; i < (num_threads + 1); ++i) |
e683ee2a JS |
141 | { |
142 | borders[i] = borderstart; | |
143 | borderstart += chunk_length; | |
144 | } | |
145 | borders[num_threads + 1] = n; | |
146 | } | |
147 | ||
5817ff8e PC |
148 | sums = static_cast<value_type*>(::operator new(sizeof(value_type) |
149 | * num_threads)); | |
e683ee2a JS |
150 | OutputIterator target_end; |
151 | } //single | |
152 | ||
1661473b | 153 | thread_index_t iam = omp_get_thread_num(); |
e683ee2a JS |
154 | if (iam == 0) |
155 | { | |
156 | *result = *begin; | |
157 | parallel_partial_sum_basecase(begin + 1, begin + borders[1], | |
5817ff8e | 158 | result + 1, bin_op, *begin); |
58a6ef4b | 159 | ::new(&(sums[iam])) value_type(*(result + borders[1] - 1)); |
e683ee2a JS |
160 | } |
161 | else | |
162 | { | |
5817ff8e PC |
163 | ::new(&(sums[iam])) |
164 | value_type(std::accumulate(begin + borders[iam] + 1, | |
165 | begin + borders[iam + 1], | |
166 | *(begin + borders[iam]), | |
167 | bin_op, | |
168 | __gnu_parallel::sequential_tag())); | |
e683ee2a JS |
169 | } |
170 | ||
171 | # pragma omp barrier | |
172 | ||
173 | # pragma omp single | |
174 | parallel_partial_sum_basecase( | |
175 | sums + 1, sums + num_threads, sums + 1, bin_op, sums[0]); | |
176 | ||
177 | # pragma omp barrier | |
178 | ||
179 | // Still same team. | |
180 | parallel_partial_sum_basecase(begin + borders[iam + 1], | |
5817ff8e PC |
181 | begin + borders[iam + 2], |
182 | result + borders[iam + 1], bin_op, | |
183 | sums[iam]); | |
e683ee2a JS |
184 | } //parallel |
185 | ||
fac9044d | 186 | ::operator delete(sums); |
e683ee2a | 187 | delete[] borders; |
c2ba9709 JS |
188 | |
189 | return result + n; | |
190 | } | |
191 | ||
e683ee2a JS |
192 | /** @brief Parallel partial sum front-end. |
193 | * @param begin Begin iterator of input sequence. | |
194 | * @param end End iterator of input sequence. | |
195 | * @param result Begin iterator of output sequence. | |
196 | * @param bin_op Associative binary function. | |
197 | * @return End iterator of output sequence. */ | |
5817ff8e PC |
198 | template<typename InputIterator, |
199 | typename OutputIterator, | |
200 | typename BinaryOperation> | |
c2ba9709 JS |
201 | OutputIterator |
202 | parallel_partial_sum(InputIterator begin, InputIterator end, | |
e683ee2a | 203 | OutputIterator result, BinaryOperation bin_op) |
c2ba9709 | 204 | { |
e683ee2a | 205 | _GLIBCXX_CALL(begin - end) |
c2ba9709 JS |
206 | |
207 | typedef std::iterator_traits<InputIterator> traits_type; | |
208 | typedef typename traits_type::value_type value_type; | |
209 | typedef typename traits_type::difference_type difference_type; | |
210 | ||
211 | difference_type n = end - begin; | |
212 | ||
ee1b5fc5 | 213 | switch (_Settings::get().partial_sum_algorithm) |
c2ba9709 | 214 | { |
ee1b5fc5 | 215 | case LINEAR: |
e683ee2a JS |
216 | // Need an initial offset. |
217 | return parallel_partial_sum_linear(begin, end, result, bin_op, n); | |
c2ba9709 | 218 | default: |
e683ee2a JS |
219 | // Partial_sum algorithm not implemented. |
220 | _GLIBCXX_PARALLEL_ASSERT(0); | |
221 | return result + n; | |
c2ba9709 JS |
222 | } |
223 | } | |
224 | } | |
225 | ||
cbcd1e45 | 226 | #endif /* _GLIBCXX_PARALLEL_PARTIAL_SUM_H */ |