]> git.ipfire.org Git - thirdparty/gcc.git/blame - libstdc++-v3/include/parallel/settings.h
Add parallel mode.
[thirdparty/gcc.git] / libstdc++-v3 / include / parallel / settings.h
CommitLineData
c2ba9709
JS
1// -*- C++ -*-
2
3// Copyright (C) 2007 Free Software Foundation, Inc.
4//
5// This file is part of the GNU ISO C++ Library. This library is free
6// software; you can redistribute it and/or modify it under the terms
7// of the GNU General Public License as published by the Free Software
8// Foundation; either version 2, or (at your option) any later
9// version.
10
11// This library is distributed in the hope that it will be useful, but
12// WITHOUT ANY WARRANTY; without even the implied warranty of
13// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14// General Public License for more details.
15
16// You should have received a copy of the GNU General Public License
17// along with this library; see the file COPYING. If not, write to
18// the Free Software Foundation, 59 Temple Place - Suite 330, Boston,
19// MA 02111-1307, USA.
20
21// As a special exception, you may use this file as part of a free
22// software library without restriction. Specifically, if other files
23// instantiate templates or use macros or inline functions from this
24// file, or you compile this file and link it with other files to
25// produce an executable, this file does not by itself cause the
26// resulting executable to be covered by the GNU General Public
27// License. This exception does not however invalidate any other
28// reasons why the executable file might be covered by the GNU General
29// Public License.
30
31/** @file parallel/settings.h
32 * @brief Settings and tuning parameters, heuristics to decide
33 * whether to use parallelized algorithms.
34 * This file is a GNU parallel extension to the Standard C++ Library.
35 *
36 * @section parallelization_decision The decision whether to run an algorithm in parallel.
37 *
38 * There are several ways the user can switch on and off the
39 * parallel execution of an algorithm, both at compile- and
40 * run-time.
41 *
42 * Only sequential execution can be forced at compile-time.
43 * This reduces code size and protects code parts that have
44 * non-thread-safe side effects.
45 *
46 * Ultimately forcing parallel execution at compile-time does
47 * make much sense.
48 * Often, the sequential algorithm implementation is used as
49 * a subroutine, so no reduction in code size can be achieved.
50 * Also, the machine the program is run on might have only one
51 * processor core, so to avoid overhead, the algorithm is
52 * executed sequentially.
53 *
54 * To force sequential execution of an algorithm ultimately
55 * at compile-time, the user must add the tag
56 * __gnu_parallel::sequential_tag() to the end of the
57 * parameter list, e. g.
58 *
59 * \code
60 * std::sort(v.begin(), v.end(), __gnu_parallel::sequential_tag());
61 * \endcode
62 *
63 * This is compatible with all overloaded algorithm variants.
64 * No additional code will be instantiated, at all.
65 * The same holds for most algorithm calls with iterators
66 * not providing random access.
67 *
68 * If the algorithm call is not forced to be executed sequentially
69 * at compile-time, the decision is made at run-time, for each call.
70 * First, the two (conceptually) global variables
71 * __gnu_parallel::Settings::force_sequential and
72 * __gnu_parallel::Settings::force_parallel are executed.
73 * If the former one is true, the sequential algorithm is executed.
74 * If the latter one is true and the former one is false,
75 * the algorithm is executed in parallel.
76 *
77 * If none of these conditions has fired so far, a heuristic is used.
78 * The parallel algorithm implementation is called only if the
79 * input size is sufficiently large.
80 * For most algorithms, the input size is the (combined) length of
81 * the input sequence(s).
82 * The threshold can be set by the user, individually for each
83 * algorithm.
84 * The according variables are called
85 * __gnu_parallel::Settings::[algorithm]_minimal_n .
86 *
87 * For some of the algorithms, there are even more tuning options,
88 * e. g. the ability to choose from multiple algorithm variants.
89 * See the __gnu_parallel::Settings class for details.
90 */
91
92// Written by Johannes Singler and Felix Putze.
93
94#ifndef _GLIBCXX_PARALLEL_SETTINGS_H
95#define _GLIBCXX_PARALLEL_SETTINGS_H 1
96
97#include <omp.h>
98#include <parallel/types.h>
99
100/**
101 * @brief The extensible condition on whether the parallel variant of
102 * an algorithm sould be called.
103 * @param c A condition that is overruled by
104 * __gnu_parallel::Settings::force_parallel, i. e. usually a decision based on
105 * the input size.
106 */
107#define _GLIBCXX_PARALLEL_CONDITION(c) (!(__gnu_parallel::Settings::force_sequential) && ((__gnu_parallel::get_max_threads() > 1 && (c)) || __gnu_parallel::Settings::force_parallel))
108
109namespace __gnu_parallel
110{
111 // NB: Including this file cannot produce (unresolved) symbols from
112 // the OpenMP runtime unless the parallel mode is actually invoked
113 // and active, which imples that the OpenMP runtime is actually
114 // going to be linked in.
115 inline int
116 get_max_threads()
117 { return omp_get_max_threads() > 1 ? omp_get_max_threads() : 1; }
118
119namespace
120{
121 // XXX look at _Tune in mt_allocator.h
122 /** @brief Run-time settings for the parallel mode. */
123 struct Settings
124 {
125 /** @brief Different parallel sorting algorithms to choose
126 from: multi-way mergesort, quicksort, load-balanced
127 quicksort. */
128 enum SortAlgorithm
129 { MWMS, QS, QS_BALANCED };
130
131 /** @brief Different merging algorithms: bubblesort-alike,
132 loser-tree variants, enum sentinel */
133 enum MultiwayMergeAlgorithm
134 { BUBBLE, LOSER_TREE_EXPLICIT, LOSER_TREE, LOSER_TREE_COMBINED, LOSER_TREE_SENTINEL, MWM_ALGORITHM_LAST };
135
136 /** @brief Different splitting strategies for sorting/merging:
137 by sampling, exact */
138 enum Splitting
139 { SAMPLING, EXACT };
140
141 /** @brief Different partial sum algorithms: recursive, linear */
142 enum PartialSumAlgorithm
143 { RECURSIVE, LINEAR };
144
145 /** @brief Different find distribution strategies: growing
146 blocks, equal-sized blocks, equal splitting. */
147 enum FindDistribution
148 { GROWING_BLOCKS, CONSTANT_SIZE_BLOCKS, EQUAL_SPLIT };
149
150 /** @brief Force all algorithms to be executed sequentially.
151 * This setting cannot be overwritten. */
152 static volatile bool force_sequential;
153
154 /** @brief Force all algorithms to be executed in parallel.
155 * This setting can be overriden by __gnu_parallel::sequential_tag
156 * (compile-time), and force_sequential (run-time). */
157 static volatile bool force_parallel;
158
159 /** @brief Algorithm to use for sorting. */
160 static volatile SortAlgorithm sort_algorithm;
161
162 /** @brief Strategy to use for splitting the input when
163 sorting (MWMS). */
164 static volatile Splitting sort_splitting;
165
166 /** @brief Minimal input size for parallel sorting. */
167 static volatile sequence_index_t sort_minimal_n;
168 /** @brief Oversampling factor for parallel std::sort (MWMS). */
169 static volatile unsigned int sort_mwms_oversampling;
170 /** @brief Such many samples to take to find a good pivot
171 (quicksort). */
172 static volatile unsigned int sort_qs_num_samples_preset;
173
174 /** @brief Maximal subsequence length to swtich to unbalanced
175 * base case. Applies to std::sort with dynamically
176 * load-balanced quicksort. */
177 static volatile sequence_index_t sort_qsb_base_case_maximal_n;
178
179 /** @brief Minimal input size for parallel std::partition. */
180 static volatile sequence_index_t partition_minimal_n;
181
182 /** @brief Chunk size for parallel std::partition. */
183 static volatile sequence_index_t partition_chunk_size;
184
185 /** @brief Chunk size for parallel std::partition, relative to
186 * input size. If >0.0, this value overrides
187 * partition_chunk_size. */
188 static volatile double partition_chunk_share;
189
190 /** @brief Minimal input size for parallel std::nth_element. */
191 static volatile sequence_index_t nth_element_minimal_n;
192
193 /** @brief Minimal input size for parallel std::partial_sort. */
194 static volatile sequence_index_t partial_sort_minimal_n;
195
196 /** @brief Minimal input size for parallel std::adjacent_difference. */
197 static volatile unsigned int adjacent_difference_minimal_n;
198
199 /** @brief Minimal input size for parallel std::partial_sum. */
200 static volatile unsigned int partial_sum_minimal_n;
201
202 /** @brief Algorithm to use for std::partial_sum. */
203 static volatile PartialSumAlgorithm partial_sum_algorithm;
204
205 /** @brief Assume "sum and write result" to be that factor
206 * slower than just "sum". This value is used for
207 * std::partial_sum. */
208 static volatile float partial_sum_dilatation;
209
210 /** @brief Minimal input size for parallel std::random_shuffle. */
211 static volatile unsigned int random_shuffle_minimal_n;
212
213 /** @brief Minimal input size for parallel std::merge. */
214 static volatile sequence_index_t merge_minimal_n;
215
216 /** @brief Splitting strategy for parallel std::merge. */
217 static volatile Splitting merge_splitting;
218
219 /** @brief Oversampling factor for parallel std::merge.
220 * Such many samples per thread are collected. */
221 static volatile unsigned int merge_oversampling;
222
223 /** @brief Algorithm to use for parallel
224 __gnu_parallel::multiway_merge. */
225 static volatile MultiwayMergeAlgorithm multiway_merge_algorithm;
226
227 /** @brief Splitting strategy to use for parallel
228 __gnu_parallel::multiway_merge. */
229 static volatile Splitting multiway_merge_splitting;
230
231 //// Oversampling factor for parallel __gnu_parallel::multiway_merge.
232 static volatile unsigned int multiway_merge_oversampling;
233
234 /// Minimal input size for parallel __gnu_parallel::multiway_merge.
235 static volatile sequence_index_t multiway_merge_minimal_n;
236
237 /// Oversampling factor for parallel __gnu_parallel::multiway_merge.
238 static volatile int multiway_merge_minimal_k;
239
240 /** @brief Minimal input size for parallel std::unique_copy. */
241 static volatile sequence_index_t unique_copy_minimal_n;
242
243 static volatile sequence_index_t workstealing_chunk_size;
244
245 /** @brief Minimal input size for parallel std::for_each. */
246 static volatile sequence_index_t for_each_minimal_n;
247
248 /** @brief Minimal input size for parallel std::count and
249 std::count_if. */
250 static volatile sequence_index_t count_minimal_n;
251
252 /** @brief Minimal input size for parallel std::transform. */
253 static volatile sequence_index_t transform_minimal_n;
254
255 /** @brief Minimal input size for parallel std::replace and
256 std::replace_if. */
257 static volatile sequence_index_t replace_minimal_n;
258
259 /** @brief Minimal input size for parallel std::generate. */
260 static volatile sequence_index_t generate_minimal_n;
261
262 /** @brief Minimal input size for parallel std::fill. */
263 static volatile sequence_index_t fill_minimal_n;
264
265 /** @brief Minimal input size for parallel std::min_element. */
266 static volatile sequence_index_t min_element_minimal_n;
267
268 /** @brief Minimal input size for parallel std::max_element. */
269 static volatile sequence_index_t max_element_minimal_n;
270
271 /** @brief Minimal input size for parallel std::accumulate. */
272 static volatile sequence_index_t accumulate_minimal_n;
273
274 /** @brief Distribution strategy for parallel std::find. */
275 static volatile FindDistribution find_distribution;
276
277 /** @brief Start with looking for that many elements
278 sequentially, for std::find. */
279 static volatile sequence_index_t find_sequential_search_size;
280
281 /** @brief Initial block size for parallel std::find. */
282 static volatile sequence_index_t find_initial_block_size;
283
284 /** @brief Maximal block size for parallel std::find. */
285 static volatile sequence_index_t find_maximum_block_size;
286
287 /** @brief Block size increase factor for parallel std::find. */
288 static volatile double find_increasing_factor;
289
290 //set operations
291 /** @brief Minimal input size for parallel std::set_union. */
292 static volatile sequence_index_t set_union_minimal_n;
293
294 /** @brief Minimal input size for parallel
295 std::set_symmetric_difference. */
296 static volatile sequence_index_t set_symmetric_difference_minimal_n;
297
298 /** @brief Minimal input size for parallel std::set_difference. */
299 static volatile sequence_index_t set_difference_minimal_n;
300
301 /** @brief Minimal input size for parallel std::set_intersection. */
302 static volatile sequence_index_t set_intersection_minimal_n;
303
304 //hardware dependent tuning parameters
305 /** @brief Size of the L1 cache in bytes (underestimation). */
306 static volatile unsigned long long L1_cache_size;
307
308 /** @brief Size of the L2 cache in bytes (underestimation). */
309 static volatile unsigned long long L2_cache_size;
310
311 /** @brief Size of the Translation Lookaside Buffer
312 (underestimation). */
313 static volatile unsigned int TLB_size;
314
315 /** @brief Overestimation of cache line size. Used to avoid
316 * false sharing, i. e. elements of different threads are at
317 * least this amount apart. */
318 static unsigned int cache_line_size;
319
320 //statistics
321 /** @brief Statistic on the number of stolen ranges in
322 load-balanced quicksort.*/
323 static volatile sequence_index_t qsb_steals;
324 };
325
326 volatile bool Settings::force_parallel = false;
327 volatile bool Settings::force_sequential = false;
328 volatile Settings::SortAlgorithm Settings::sort_algorithm = Settings::MWMS;
329 volatile Settings::Splitting Settings::sort_splitting = Settings::EXACT;
330 volatile sequence_index_t Settings::sort_minimal_n = 1000;
331
332 volatile unsigned int Settings::sort_mwms_oversampling = 10;
333 volatile unsigned int Settings::sort_qs_num_samples_preset = 100;
334 volatile sequence_index_t Settings::sort_qsb_base_case_maximal_n = 100;
335 volatile sequence_index_t Settings::partition_minimal_n = 1000;
336 volatile sequence_index_t Settings::nth_element_minimal_n = 1000;
337 volatile sequence_index_t Settings::partial_sort_minimal_n = 1000;
338 volatile sequence_index_t Settings::partition_chunk_size = 1000;
339 volatile double Settings::partition_chunk_share = 0.0;
340 volatile unsigned int Settings::adjacent_difference_minimal_n = 1000;
341 volatile Settings::PartialSumAlgorithm Settings::partial_sum_algorithm = Settings::LINEAR;
342 volatile unsigned int Settings::partial_sum_minimal_n = 1000;
343 volatile float Settings::partial_sum_dilatation = 1.0f;
344 volatile unsigned int Settings::random_shuffle_minimal_n = 1000;
345 volatile Settings::Splitting Settings::merge_splitting = Settings::EXACT;
346 volatile sequence_index_t Settings::merge_minimal_n = 1000;
347 volatile unsigned int Settings::merge_oversampling = 10;
348 volatile sequence_index_t Settings::multiway_merge_minimal_n = 1000;
349 volatile int Settings::multiway_merge_minimal_k = 2;
350
351 // unique copy
352 volatile sequence_index_t Settings::unique_copy_minimal_n = 10000;
353 volatile Settings::MultiwayMergeAlgorithm Settings::multiway_merge_algorithm = Settings::LOSER_TREE;
354 volatile Settings::Splitting Settings::multiway_merge_splitting = Settings::EXACT;
355 volatile unsigned int Settings::multiway_merge_oversampling = 10;
356 volatile Settings::FindDistribution Settings::find_distribution = Settings::CONSTANT_SIZE_BLOCKS;
357 volatile sequence_index_t Settings::find_sequential_search_size = 256;
358 volatile sequence_index_t Settings::find_initial_block_size = 256;
359 volatile sequence_index_t Settings::find_maximum_block_size = 8192;
360 volatile double Settings::find_increasing_factor = 2.0;
361 volatile sequence_index_t Settings::workstealing_chunk_size = 100;
362 volatile sequence_index_t Settings::for_each_minimal_n = 1000;
363 volatile sequence_index_t Settings::count_minimal_n = 1000;
364 volatile sequence_index_t Settings::transform_minimal_n = 1000;
365 volatile sequence_index_t Settings::replace_minimal_n = 1000;
366 volatile sequence_index_t Settings::generate_minimal_n = 1000;
367 volatile sequence_index_t Settings::fill_minimal_n = 1000;
368 volatile sequence_index_t Settings::min_element_minimal_n = 1000;
369 volatile sequence_index_t Settings::max_element_minimal_n = 1000;
370 volatile sequence_index_t Settings::accumulate_minimal_n = 1000;
371
372 //set operations
373 volatile sequence_index_t Settings::set_union_minimal_n = 1000;
374 volatile sequence_index_t Settings::set_intersection_minimal_n = 1000;
375 volatile sequence_index_t Settings::set_difference_minimal_n = 1000;
376 volatile sequence_index_t Settings::set_symmetric_difference_minimal_n = 1000;
377 volatile unsigned long long Settings::L1_cache_size = 16 << 10;
378 volatile unsigned long long Settings::L2_cache_size = 256 << 10;
379 volatile unsigned int Settings::TLB_size = 128;
380 unsigned int Settings::cache_line_size = 64;
381
382 //statistics
383 volatile sequence_index_t Settings::qsb_steals = 0;
384} // end anonymous namespace
385
386}
387
388#endif /* _GLIBCXX_SETTINGS_H */