[thirdparty/gcc.git] / libstdc++-v3 / include / parallel / settings.h

// -*- C++ -*-

// Copyright (C) 2007 Free Software Foundation, Inc.
//
// This file is part of the GNU ISO C++ Library.  This library is free
// software; you can redistribute it and/or modify it under the terms
// of the GNU General Public License as published by the Free Software
// Foundation; either version 2, or (at your option) any later
// version.

// This library is distributed in the hope that it will be useful, but
// WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
// General Public License for more details.

// You should have received a copy of the GNU General Public License
// along with this library; see the file COPYING.  If not, write to
// the Free Software Foundation, 59 Temple Place - Suite 330, Boston,
// MA 02111-1307, USA.

// As a special exception, you may use this file as part of a free
// software library without restriction.  Specifically, if other files
// instantiate templates or use macros or inline functions from this
// file, or you compile this file and link it with other files to
// produce an executable, this file does not by itself cause the
// resulting executable to be covered by the GNU General Public
// License.  This exception does not however invalidate any other
// reasons why the executable file might be covered by the GNU General
// Public License.

/** @file parallel/settings.h
 *  @brief Settings and tuning parameters, heuristics to decide
 *  whether to use parallelized algorithms.
 *  This file is a GNU parallel extension to the Standard C++ Library.
 *
 *  @section parallelization_decision  The decision whether to run an algorithm in parallel.
 *
 *  There are several ways the user can switch on and off the 
 *  parallel execution of an algorithm, both at compile- and 
 *  run-time.
 *
 *  Only sequential execution can be forced at compile-time.
 *  This reduces code size and protects code parts that have 
 *  non-thread-safe side effects.
 *
 *  Ultimately forcing parallel execution at compile-time does 
 *  make much sense.
 *  Often, the sequential algorithm implementation is used as
 *  a subroutine, so no reduction in code size can be achieved.
 *  Also, the machine the program is run on might have only one
 *  processor core, so to avoid overhead, the algorithm is
 *  executed sequentially.
 *
 *  To force sequential execution of an algorithm ultimately
 *  at compile-time, the user must add the tag 
 *  __gnu_parallel::sequential_tag() to the end of the
 *  parameter list, e. g.
 *
 *  \code
 *  std::sort(v.begin(), v.end(), __gnu_parallel::sequential_tag());
 *  \endcode
 *
 *  This is compatible with all overloaded algorithm variants.
 *  No additional code will be instantiated, at all.
 *  The same holds for most algorithm calls with iterators 
 *  not providing random access.
 *
 *  If the algorithm call is not forced to be executed sequentially
 *  at compile-time, the decision is made at run-time, for each call.
 *  First, the two (conceptually) global variables 
 *  __gnu_parallel::Settings::force_sequential and 
 *  __gnu_parallel::Settings::force_parallel are executed.
 *  If the former one is true, the sequential algorithm is executed.
 *  If the latter one is true and the former one is false, 
 *  the algorithm is executed in parallel.
 *
 *  If none of these conditions has fired so far, a heuristic is used.
 *  The parallel algorithm implementation is called only if the
 *  input size is sufficiently large.
 *  For most algorithms, the input size is the (combined) length of 
 *  the input sequence(s).
 *  The threshold can be set by the user, individually for each
 *  algorithm.
 *  The according variables are called 
 *  __gnu_parallel::Settings::[algorithm]_minimal_n .
 *
 *  For some of the algorithms, there are even more tuning options,
 *  e. g. the ability to choose from multiple algorithm variants.
 *  See the __gnu_parallel::Settings class for details.
 */

// Written by Johannes Singler and Felix Putze.

#ifndef _GLIBCXX_PARALLEL_SETTINGS_H
#define _GLIBCXX_PARALLEL_SETTINGS_H 1

#include <omp.h>
#include <parallel/types.h>

/** 
  * @brief The extensible condition on whether the parallel variant of
  * an algorithm sould be called.
  * @param c A condition that is overruled by
  * __gnu_parallel::Settings::force_parallel, i. e. usually a decision based on
  * the input size.
  */
#define _GLIBCXX_PARALLEL_CONDITION(c) (!(__gnu_parallel::Settings::force_sequential) && ((__gnu_parallel::get_max_threads() > 1 && (c)) || __gnu_parallel::Settings::force_parallel))

namespace __gnu_parallel
{
  // NB: Including this file cannot produce (unresolved) symbols from
  // the OpenMP runtime unless the parallel mode is actually invoked
  // and active, which imples that the OpenMP runtime is actually
  // going to be linked in.
  inline int
  get_max_threads() 
  { return omp_get_max_threads() > 1 ? omp_get_max_threads() : 1; }

namespace 
{
  // XXX look at _Tune in mt_allocator.h
  /** @brief Run-time settings for the parallel mode. */
  struct Settings
  {
    /** @brief Different parallel sorting algorithms to choose
	from: multi-way mergesort, quicksort, load-balanced
	quicksort. */
    enum SortAlgorithm 
    { MWMS, QS, QS_BALANCED };

    /** @brief Different merging algorithms: bubblesort-alike,
	loser-tree variants, enum sentinel */
    enum MultiwayMergeAlgorithm
    { BUBBLE, LOSER_TREE_EXPLICIT, LOSER_TREE, LOSER_TREE_COMBINED, LOSER_TREE_SENTINEL, MWM_ALGORITHM_LAST };

    /** @brief Different splitting strategies for sorting/merging:
	by sampling, exact */
    enum Splitting 
    { SAMPLING, EXACT };

    /** @brief Different partial sum algorithms: recursive, linear */
    enum PartialSumAlgorithm 
    { RECURSIVE, LINEAR };

    /** @brief Different find distribution strategies: growing
	blocks, equal-sized blocks, equal splitting. */
    enum FindDistribution 
    { GROWING_BLOCKS, CONSTANT_SIZE_BLOCKS, EQUAL_SPLIT };

    /** @brief Force all algorithms to be executed sequentially.
     * This setting cannot be overwritten. */
    static volatile bool force_sequential;

    /** @brief Force all algorithms to be executed in parallel.
     * This setting can be overriden by __gnu_parallel::sequential_tag
     * (compile-time), and force_sequential (run-time). */
    static volatile bool force_parallel;

    /** @brief Algorithm to use for sorting. */
    static volatile SortAlgorithm sort_algorithm;

    /** @brief Strategy to use for splitting the input when
	sorting (MWMS). */
    static volatile Splitting sort_splitting;

    /** @brief Minimal input size for parallel sorting. */
    static volatile sequence_index_t sort_minimal_n;
    /** @brief Oversampling factor for parallel std::sort (MWMS). */
    static volatile unsigned int sort_mwms_oversampling;
    /** @brief Such many samples to take to find a good pivot
	(quicksort). */
    static volatile unsigned int sort_qs_num_samples_preset;

    /** @brief Maximal subsequence length to swtich to unbalanced
     * base case.  Applies to std::sort with dynamically
     * load-balanced quicksort. */
    static volatile sequence_index_t sort_qsb_base_case_maximal_n;

    /** @brief Minimal input size for parallel std::partition. */
    static volatile sequence_index_t partition_minimal_n;

    /** @brief Chunk size for parallel std::partition. */
    static volatile sequence_index_t partition_chunk_size;

    /** @brief Chunk size for parallel std::partition, relative to
     * input size.  If >0.0, this value overrides
     * partition_chunk_size. */
    static volatile double partition_chunk_share;

    /** @brief Minimal input size for parallel std::nth_element. */
    static volatile sequence_index_t nth_element_minimal_n;

    /** @brief Minimal input size for parallel std::partial_sort. */
    static volatile sequence_index_t partial_sort_minimal_n;

    /** @brief Minimal input size for parallel std::adjacent_difference. */
    static volatile unsigned int adjacent_difference_minimal_n;

    /** @brief Minimal input size for parallel std::partial_sum. */
    static volatile unsigned int partial_sum_minimal_n;

    /** @brief Algorithm to use for std::partial_sum. */
    static volatile PartialSumAlgorithm partial_sum_algorithm;

    /** @brief Assume "sum and write result" to be that factor
     *  slower than just "sum".  This value is used for
     *  std::partial_sum. */
    static volatile float partial_sum_dilatation;

    /** @brief Minimal input size for parallel std::random_shuffle. */
    static volatile unsigned int random_shuffle_minimal_n;

    /** @brief Minimal input size for parallel std::merge. */
    static volatile sequence_index_t merge_minimal_n;

    /** @brief Splitting strategy for parallel std::merge. */
    static volatile Splitting merge_splitting;

    /** @brief Oversampling factor for parallel std::merge.
     * Such many samples per thread are collected. */
    static volatile unsigned int merge_oversampling;

    /** @brief Algorithm to use for parallel
	__gnu_parallel::multiway_merge. */
    static volatile MultiwayMergeAlgorithm multiway_merge_algorithm;

    /** @brief Splitting strategy to use for parallel
	__gnu_parallel::multiway_merge. */
    static volatile Splitting multiway_merge_splitting;

    //// Oversampling factor for parallel __gnu_parallel::multiway_merge.
    static volatile unsigned int multiway_merge_oversampling;

    /// Minimal input size for parallel __gnu_parallel::multiway_merge.
    static volatile sequence_index_t multiway_merge_minimal_n;

    /// Oversampling factor for parallel __gnu_parallel::multiway_merge.
    static volatile int multiway_merge_minimal_k;

    /** @brief Minimal input size for parallel std::unique_copy. */
    static volatile sequence_index_t unique_copy_minimal_n;

    static volatile sequence_index_t workstealing_chunk_size;

    /** @brief Minimal input size for parallel std::for_each. */
    static volatile sequence_index_t for_each_minimal_n;

    /** @brief Minimal input size for parallel std::count and
	std::count_if. */
    static volatile sequence_index_t count_minimal_n;

    /** @brief Minimal input size for parallel std::transform. */
    static volatile sequence_index_t transform_minimal_n;

    /** @brief Minimal input size for parallel std::replace and
	std::replace_if. */
    static volatile sequence_index_t replace_minimal_n;

    /** @brief Minimal input size for parallel std::generate. */
    static volatile sequence_index_t generate_minimal_n;

    /** @brief Minimal input size for parallel std::fill. */
    static volatile sequence_index_t fill_minimal_n;

    /** @brief Minimal input size for parallel std::min_element. */
    static volatile sequence_index_t min_element_minimal_n;

    /** @brief Minimal input size for parallel std::max_element. */
    static volatile sequence_index_t max_element_minimal_n;

    /** @brief Minimal input size for parallel std::accumulate. */
    static volatile sequence_index_t accumulate_minimal_n;

    /** @brief Distribution strategy for parallel std::find. */
    static volatile FindDistribution find_distribution;

    /** @brief Start with looking for that many elements
	sequentially, for std::find. */
    static volatile sequence_index_t find_sequential_search_size;

    /** @brief Initial block size for parallel std::find. */
    static volatile sequence_index_t find_initial_block_size;

    /** @brief Maximal block size for parallel std::find. */
    static volatile sequence_index_t find_maximum_block_size;

    /** @brief Block size increase factor for parallel std::find. */
    static volatile double find_increasing_factor;

    //set operations
    /** @brief Minimal input size for parallel std::set_union. */
    static volatile sequence_index_t set_union_minimal_n;

    /** @brief Minimal input size for parallel
	std::set_symmetric_difference. */
    static volatile sequence_index_t set_symmetric_difference_minimal_n;

    /** @brief Minimal input size for parallel std::set_difference. */
    static volatile sequence_index_t set_difference_minimal_n;

    /** @brief Minimal input size for parallel std::set_intersection. */
    static volatile sequence_index_t set_intersection_minimal_n;

    //hardware dependent tuning parameters
    /** @brief Size of the L1 cache in bytes (underestimation). */
    static volatile unsigned long long L1_cache_size;

    /** @brief Size of the L2 cache in bytes (underestimation). */
    static volatile unsigned long long L2_cache_size;

    /** @brief Size of the Translation Lookaside Buffer
	(underestimation). */
    static volatile unsigned int TLB_size;

    /** @brief Overestimation of cache line size.  Used to avoid
     * false sharing, i. e. elements of different threads are at
     * least this amount apart. */
    static unsigned int cache_line_size;

    //statistics
    /** @brief Statistic on the number of stolen ranges in
	load-balanced quicksort.*/
    static volatile sequence_index_t qsb_steals;
  };

  volatile bool Settings::force_parallel = false;
  volatile bool Settings::force_sequential = false;
  volatile  Settings::SortAlgorithm Settings::sort_algorithm = Settings::MWMS;
  volatile  Settings::Splitting Settings::sort_splitting = Settings::EXACT;
  volatile sequence_index_t Settings::sort_minimal_n = 1000;

  volatile unsigned int Settings::sort_mwms_oversampling = 10;
  volatile unsigned int Settings::sort_qs_num_samples_preset = 100;
  volatile sequence_index_t Settings::sort_qsb_base_case_maximal_n = 100;
  volatile sequence_index_t Settings::partition_minimal_n = 1000;
  volatile sequence_index_t Settings::nth_element_minimal_n = 1000;
  volatile sequence_index_t Settings::partial_sort_minimal_n = 1000;
  volatile sequence_index_t Settings::partition_chunk_size = 1000;
  volatile double Settings::partition_chunk_share = 0.0;
  volatile unsigned int Settings::adjacent_difference_minimal_n = 1000;
  volatile  Settings::PartialSumAlgorithm Settings::partial_sum_algorithm = Settings::LINEAR;
  volatile unsigned int Settings::partial_sum_minimal_n = 1000;
  volatile float Settings::partial_sum_dilatation = 1.0f;
  volatile unsigned int Settings::random_shuffle_minimal_n = 1000;
  volatile  Settings::Splitting Settings::merge_splitting = Settings::EXACT;
  volatile sequence_index_t Settings::merge_minimal_n = 1000;
  volatile unsigned int Settings::merge_oversampling = 10;
  volatile sequence_index_t Settings::multiway_merge_minimal_n = 1000;
  volatile int Settings::multiway_merge_minimal_k = 2;

  // unique copy
  volatile sequence_index_t Settings::unique_copy_minimal_n = 10000;
  volatile  Settings::MultiwayMergeAlgorithm Settings::multiway_merge_algorithm = Settings::LOSER_TREE;
  volatile  Settings::Splitting Settings::multiway_merge_splitting = Settings::EXACT;
  volatile unsigned int Settings::multiway_merge_oversampling = 10;
  volatile  Settings::FindDistribution Settings::find_distribution = Settings::CONSTANT_SIZE_BLOCKS;
  volatile sequence_index_t Settings::find_sequential_search_size = 256;
  volatile sequence_index_t Settings::find_initial_block_size = 256;
  volatile sequence_index_t Settings::find_maximum_block_size = 8192;
  volatile double Settings::find_increasing_factor = 2.0;
  volatile sequence_index_t Settings::workstealing_chunk_size = 100;
  volatile sequence_index_t Settings::for_each_minimal_n = 1000;
  volatile sequence_index_t Settings::count_minimal_n = 1000;
  volatile sequence_index_t Settings::transform_minimal_n = 1000;
  volatile sequence_index_t Settings::replace_minimal_n = 1000;
  volatile sequence_index_t Settings::generate_minimal_n = 1000;
  volatile sequence_index_t Settings::fill_minimal_n = 1000;
  volatile sequence_index_t Settings::min_element_minimal_n = 1000;
  volatile sequence_index_t Settings::max_element_minimal_n = 1000;
  volatile sequence_index_t Settings::accumulate_minimal_n = 1000;

  //set operations
  volatile sequence_index_t Settings::set_union_minimal_n = 1000;
  volatile sequence_index_t Settings::set_intersection_minimal_n = 1000;
  volatile sequence_index_t Settings::set_difference_minimal_n = 1000;
  volatile sequence_index_t Settings::set_symmetric_difference_minimal_n = 1000;
  volatile unsigned long long Settings::L1_cache_size = 16 << 10;
  volatile unsigned long long Settings::L2_cache_size = 256 << 10;
  volatile unsigned int Settings::TLB_size = 128;
  unsigned int Settings::cache_line_size = 64;

  //statistics
  volatile sequence_index_t Settings::qsb_steals = 0;
} // end anonymous namespace

}

#endif /* _GLIBCXX_SETTINGS_H */
Commit	Line	Data
c2ba9709 JS	1	// -- C++ --
	2
	3	// Copyright (C) 2007 Free Software Foundation, Inc.
	4	//
	5	// This file is part of the GNU ISO C++ Library. This library is free
	6	// software; you can redistribute it and/or modify it under the terms
	7	// of the GNU General Public License as published by the Free Software
	8	// Foundation; either version 2, or (at your option) any later
	9	// version.
	10
	11	// This library is distributed in the hope that it will be useful, but
	12	// WITHOUT ANY WARRANTY; without even the implied warranty of
	13	// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	14	// General Public License for more details.
	15
	16	// You should have received a copy of the GNU General Public License
	17	// along with this library; see the file COPYING. If not, write to
	18	// the Free Software Foundation, 59 Temple Place - Suite 330, Boston,
	19	// MA 02111-1307, USA.
	20
	21	// As a special exception, you may use this file as part of a free
	22	// software library without restriction. Specifically, if other files
	23	// instantiate templates or use macros or inline functions from this
	24	// file, or you compile this file and link it with other files to
	25	// produce an executable, this file does not by itself cause the
	26	// resulting executable to be covered by the GNU General Public
	27	// License. This exception does not however invalidate any other
	28	// reasons why the executable file might be covered by the GNU General
	29	// Public License.
	30
	31	/** @file parallel/settings.h
	32	* @brief Settings and tuning parameters, heuristics to decide
	33	* whether to use parallelized algorithms.
	34	* This file is a GNU parallel extension to the Standard C++ Library.
	35	*
	36	* @section parallelization_decision The decision whether to run an algorithm in parallel.
	37	*
	38	* There are several ways the user can switch on and off the
	39	* parallel execution of an algorithm, both at compile- and
	40	* run-time.
	41	*
	42	* Only sequential execution can be forced at compile-time.
	43	* This reduces code size and protects code parts that have
	44	* non-thread-safe side effects.
	45	*
	46	* Ultimately forcing parallel execution at compile-time does
	47	* make much sense.
	48	* Often, the sequential algorithm implementation is used as
	49	* a subroutine, so no reduction in code size can be achieved.
	50	* Also, the machine the program is run on might have only one
	51	* processor core, so to avoid overhead, the algorithm is
	52	* executed sequentially.
	53	*
	54	* To force sequential execution of an algorithm ultimately
	55	* at compile-time, the user must add the tag
	56	* __gnu_parallel::sequential_tag() to the end of the
	57	* parameter list, e. g.
	58	*
	59	* \code
	60	* std::sort(v.begin(), v.end(), __gnu_parallel::sequential_tag());
	61	* \endcode
	62	*
	63	* This is compatible with all overloaded algorithm variants.
	64	* No additional code will be instantiated, at all.
65	* The same holds for most algorithm calls with iterators
66	* not providing random access.
67	*
68	* If the algorithm call is not forced to be executed sequentially
69	* at compile-time, the decision is made at run-time, for each call.
70	* First, the two (conceptually) global variables
71	* __gnu_parallel::Settings::force_sequential and
72	* __gnu_parallel::Settings::force_parallel are executed.
73	* If the former one is true, the sequential algorithm is executed.
74	* If the latter one is true and the former one is false,
75	* the algorithm is executed in parallel.
76	*
77	* If none of these conditions has fired so far, a heuristic is used.
78	* The parallel algorithm implementation is called only if the
79	* input size is sufficiently large.
80	* For most algorithms, the input size is the (combined) length of
81	* the input sequence(s).
82	* The threshold can be set by the user, individually for each
83	* algorithm.
84	* The according variables are called
85	* __gnu_parallel::Settings::[algorithm]_minimal_n .
86	*
87	* For some of the algorithms, there are even more tuning options,
88	* e. g. the ability to choose from multiple algorithm variants.
89	* See the __gnu_parallel::Settings class for details.
90	*/
91
92	// Written by Johannes Singler and Felix Putze.
93
94	#ifndef _GLIBCXX_PARALLEL_SETTINGS_H
95	#define _GLIBCXX_PARALLEL_SETTINGS_H 1
96
97	#include <omp.h>
98	#include <parallel/types.h>
99
100	/**
101	* @brief The extensible condition on whether the parallel variant of
102	* an algorithm sould be called.
103	* @param c A condition that is overruled by
104	* __gnu_parallel::Settings::force_parallel, i. e. usually a decision based on
105	* the input size.
106	*/
107	#define _GLIBCXX_PARALLEL_CONDITION(c) (!(__gnu_parallel::Settings::force_sequential) && ((__gnu_parallel::get_max_threads() > 1 && (c)) \|\| __gnu_parallel::Settings::force_parallel))
108
109	namespace __gnu_parallel
110	{
111	// NB: Including this file cannot produce (unresolved) symbols from
112	// the OpenMP runtime unless the parallel mode is actually invoked
113	// and active, which imples that the OpenMP runtime is actually
114	// going to be linked in.
115	inline int
116	get_max_threads()
117	{ return omp_get_max_threads() > 1 ? omp_get_max_threads() : 1; }
118
119	namespace
120	{
121	// XXX look at _Tune in mt_allocator.h
122	/** @brief Run-time settings for the parallel mode. */
123	struct Settings
124	{
125	/** @brief Different parallel sorting algorithms to choose
126	from: multi-way mergesort, quicksort, load-balanced
127	quicksort. */
128	enum SortAlgorithm
129	{ MWMS, QS, QS_BALANCED };
130
131	/** @brief Different merging algorithms: bubblesort-alike,
132	loser-tree variants, enum sentinel */
133	enum MultiwayMergeAlgorithm
134	{ BUBBLE, LOSER_TREE_EXPLICIT, LOSER_TREE, LOSER_TREE_COMBINED, LOSER_TREE_SENTINEL, MWM_ALGORITHM_LAST };
135
136	/** @brief Different splitting strategies for sorting/merging:
137	by sampling, exact */
138	enum Splitting
139	{ SAMPLING, EXACT };
140
141	/** @brief Different partial sum algorithms: recursive, linear */
142	enum PartialSumAlgorithm
143	{ RECURSIVE, LINEAR };
144
145	/** @brief Different find distribution strategies: growing
146	blocks, equal-sized blocks, equal splitting. */
147	enum FindDistribution
148	{ GROWING_BLOCKS, CONSTANT_SIZE_BLOCKS, EQUAL_SPLIT };
149
150	/** @brief Force all algorithms to be executed sequentially.
151	* This setting cannot be overwritten. */
152	static volatile bool force_sequential;
153
154	/** @brief Force all algorithms to be executed in parallel.
155	* This setting can be overriden by __gnu_parallel::sequential_tag
156	* (compile-time), and force_sequential (run-time). */
157	static volatile bool force_parallel;
158
159	/** @brief Algorithm to use for sorting. */
160	static volatile SortAlgorithm sort_algorithm;
161
162	/** @brief Strategy to use for splitting the input when
163	sorting (MWMS). */
164	static volatile Splitting sort_splitting;
165
166	/** @brief Minimal input size for parallel sorting. */
167	static volatile sequence_index_t sort_minimal_n;
168	/** @brief Oversampling factor for parallel std::sort (MWMS). */
169	static volatile unsigned int sort_mwms_oversampling;
170	/** @brief Such many samples to take to find a good pivot
171	(quicksort). */
172	static volatile unsigned int sort_qs_num_samples_preset;
173
174	/** @brief Maximal subsequence length to swtich to unbalanced
175	* base case. Applies to std::sort with dynamically
176	* load-balanced quicksort. */
177	static volatile sequence_index_t sort_qsb_base_case_maximal_n;
178
179	/** @brief Minimal input size for parallel std::partition. */
180	static volatile sequence_index_t partition_minimal_n;
181
182	/** @brief Chunk size for parallel std::partition. */
183	static volatile sequence_index_t partition_chunk_size;
184
185	/** @brief Chunk size for parallel std::partition, relative to
186	* input size. If >0.0, this value overrides
187	* partition_chunk_size. */
188	static volatile double partition_chunk_share;
189
190	/** @brief Minimal input size for parallel std::nth_element. */
191	static volatile sequence_index_t nth_element_minimal_n;
192
193	/** @brief Minimal input size for parallel std::partial_sort. */
194	static volatile sequence_index_t partial_sort_minimal_n;
195
196	/** @brief Minimal input size for parallel std::adjacent_difference. */
197	static volatile unsigned int adjacent_difference_minimal_n;
198
199	/** @brief Minimal input size for parallel std::partial_sum. */
200	static volatile unsigned int partial_sum_minimal_n;
201
202	/** @brief Algorithm to use for std::partial_sum. */
203	static volatile PartialSumAlgorithm partial_sum_algorithm;
204
205	/** @brief Assume "sum and write result" to be that factor
206	* slower than just "sum". This value is used for
207	* std::partial_sum. */
208	static volatile float partial_sum_dilatation;
209
210	/** @brief Minimal input size for parallel std::random_shuffle. */
211	static volatile unsigned int random_shuffle_minimal_n;
212
213	/** @brief Minimal input size for parallel std::merge. */
214	static volatile sequence_index_t merge_minimal_n;
215
216	/** @brief Splitting strategy for parallel std::merge. */
217	static volatile Splitting merge_splitting;
218
219	/** @brief Oversampling factor for parallel std::merge.
220	* Such many samples per thread are collected. */
221	static volatile unsigned int merge_oversampling;
222
223	/** @brief Algorithm to use for parallel
224	__gnu_parallel::multiway_merge. */
225	static volatile MultiwayMergeAlgorithm multiway_merge_algorithm;
226
227	/** @brief Splitting strategy to use for parallel
228	__gnu_parallel::multiway_merge. */
229	static volatile Splitting multiway_merge_splitting;
230
231	//// Oversampling factor for parallel __gnu_parallel::multiway_merge.
232	static volatile unsigned int multiway_merge_oversampling;
233
234	/// Minimal input size for parallel __gnu_parallel::multiway_merge.
235	static volatile sequence_index_t multiway_merge_minimal_n;
236
237	/// Oversampling factor for parallel __gnu_parallel::multiway_merge.
238	static volatile int multiway_merge_minimal_k;
239
240	/** @brief Minimal input size for parallel std::unique_copy. */
241	static volatile sequence_index_t unique_copy_minimal_n;
242
243	static volatile sequence_index_t workstealing_chunk_size;
244
245	/** @brief Minimal input size for parallel std::for_each. */
246	static volatile sequence_index_t for_each_minimal_n;
247
248	/** @brief Minimal input size for parallel std::count and
249	std::count_if. */
250	static volatile sequence_index_t count_minimal_n;
251
252	/** @brief Minimal input size for parallel std::transform. */
253	static volatile sequence_index_t transform_minimal_n;
254
255	/** @brief Minimal input size for parallel std::replace and
256	std::replace_if. */
257	static volatile sequence_index_t replace_minimal_n;
258
259	/** @brief Minimal input size for parallel std::generate. */
260	static volatile sequence_index_t generate_minimal_n;
261
262	/** @brief Minimal input size for parallel std::fill. */
263	static volatile sequence_index_t fill_minimal_n;
264
265	/** @brief Minimal input size for parallel std::min_element. */
266	static volatile sequence_index_t min_element_minimal_n;
267
268	/** @brief Minimal input size for parallel std::max_element. */
269	static volatile sequence_index_t max_element_minimal_n;
270
271	/** @brief Minimal input size for parallel std::accumulate. */
272	static volatile sequence_index_t accumulate_minimal_n;
273
274	/** @brief Distribution strategy for parallel std::find. */
275	static volatile FindDistribution find_distribution;
276
277	/** @brief Start with looking for that many elements
278	sequentially, for std::find. */
279	static volatile sequence_index_t find_sequential_search_size;
280
281	/** @brief Initial block size for parallel std::find. */
282	static volatile sequence_index_t find_initial_block_size;
283
284	/** @brief Maximal block size for parallel std::find. */
285	static volatile sequence_index_t find_maximum_block_size;
286
287	/** @brief Block size increase factor for parallel std::find. */
288	static volatile double find_increasing_factor;
289
290	//set operations
291	/** @brief Minimal input size for parallel std::set_union. */
292	static volatile sequence_index_t set_union_minimal_n;
293
294	/** @brief Minimal input size for parallel
295	std::set_symmetric_difference. */
296	static volatile sequence_index_t set_symmetric_difference_minimal_n;
297
298	/** @brief Minimal input size for parallel std::set_difference. */
299	static volatile sequence_index_t set_difference_minimal_n;
300
301	/** @brief Minimal input size for parallel std::set_intersection. */
302	static volatile sequence_index_t set_intersection_minimal_n;
303
304	//hardware dependent tuning parameters
305	/** @brief Size of the L1 cache in bytes (underestimation). */
306	static volatile unsigned long long L1_cache_size;
307
308	/** @brief Size of the L2 cache in bytes (underestimation). */
309	static volatile unsigned long long L2_cache_size;
310
311	/** @brief Size of the Translation Lookaside Buffer
312	(underestimation). */
313	static volatile unsigned int TLB_size;
314
315	/** @brief Overestimation of cache line size. Used to avoid
316	* false sharing, i. e. elements of different threads are at
317	* least this amount apart. */
318	static unsigned int cache_line_size;
319
320	//statistics
321	/** @brief Statistic on the number of stolen ranges in
322	load-balanced quicksort.*/
323	static volatile sequence_index_t qsb_steals;
324	};
325
326	volatile bool Settings::force_parallel = false;
327	volatile bool Settings::force_sequential = false;
328	volatile Settings::SortAlgorithm Settings::sort_algorithm = Settings::MWMS;
329	volatile Settings::Splitting Settings::sort_splitting = Settings::EXACT;
330	volatile sequence_index_t Settings::sort_minimal_n = 1000;
331
332	volatile unsigned int Settings::sort_mwms_oversampling = 10;
333	volatile unsigned int Settings::sort_qs_num_samples_preset = 100;
334	volatile sequence_index_t Settings::sort_qsb_base_case_maximal_n = 100;
335	volatile sequence_index_t Settings::partition_minimal_n = 1000;
336	volatile sequence_index_t Settings::nth_element_minimal_n = 1000;
337	volatile sequence_index_t Settings::partial_sort_minimal_n = 1000;
338	volatile sequence_index_t Settings::partition_chunk_size = 1000;
339	volatile double Settings::partition_chunk_share = 0.0;
340	volatile unsigned int Settings::adjacent_difference_minimal_n = 1000;
341	volatile Settings::PartialSumAlgorithm Settings::partial_sum_algorithm = Settings::LINEAR;
342	volatile unsigned int Settings::partial_sum_minimal_n = 1000;
343	volatile float Settings::partial_sum_dilatation = 1.0f;
344	volatile unsigned int Settings::random_shuffle_minimal_n = 1000;
345	volatile Settings::Splitting Settings::merge_splitting = Settings::EXACT;
346	volatile sequence_index_t Settings::merge_minimal_n = 1000;
347	volatile unsigned int Settings::merge_oversampling = 10;
348	volatile sequence_index_t Settings::multiway_merge_minimal_n = 1000;
349	volatile int Settings::multiway_merge_minimal_k = 2;
350
351	// unique copy
352	volatile sequence_index_t Settings::unique_copy_minimal_n = 10000;
353	volatile Settings::MultiwayMergeAlgorithm Settings::multiway_merge_algorithm = Settings::LOSER_TREE;
354	volatile Settings::Splitting Settings::multiway_merge_splitting = Settings::EXACT;
355	volatile unsigned int Settings::multiway_merge_oversampling = 10;
356	volatile Settings::FindDistribution Settings::find_distribution = Settings::CONSTANT_SIZE_BLOCKS;
357	volatile sequence_index_t Settings::find_sequential_search_size = 256;
358	volatile sequence_index_t Settings::find_initial_block_size = 256;
359	volatile sequence_index_t Settings::find_maximum_block_size = 8192;
360	volatile double Settings::find_increasing_factor = 2.0;
361	volatile sequence_index_t Settings::workstealing_chunk_size = 100;
362	volatile sequence_index_t Settings::for_each_minimal_n = 1000;
363	volatile sequence_index_t Settings::count_minimal_n = 1000;
364	volatile sequence_index_t Settings::transform_minimal_n = 1000;
365	volatile sequence_index_t Settings::replace_minimal_n = 1000;
366	volatile sequence_index_t Settings::generate_minimal_n = 1000;
367	volatile sequence_index_t Settings::fill_minimal_n = 1000;
368	volatile sequence_index_t Settings::min_element_minimal_n = 1000;
369	volatile sequence_index_t Settings::max_element_minimal_n = 1000;
370	volatile sequence_index_t Settings::accumulate_minimal_n = 1000;
371
372	//set operations
373	volatile sequence_index_t Settings::set_union_minimal_n = 1000;
374	volatile sequence_index_t Settings::set_intersection_minimal_n = 1000;
375	volatile sequence_index_t Settings::set_difference_minimal_n = 1000;
376	volatile sequence_index_t Settings::set_symmetric_difference_minimal_n = 1000;
377	volatile unsigned long long Settings::L1_cache_size = 16 << 10;
378	volatile unsigned long long Settings::L2_cache_size = 256 << 10;
379	volatile unsigned int Settings::TLB_size = 128;
380	unsigned int Settings::cache_line_size = 64;
381
382	//statistics
383	volatile sequence_index_t Settings::qsb_steals = 0;
384	} // end anonymous namespace
385
386	}
387
388	#endif /* _GLIBCXX_SETTINGS_H */