* params.c (set_param_value): Initialize the "set" field.
* params.h (struct param_info): Add "set" field.
(PARAM_SET_P): New macro.
(PREFETCH_LATENCY, SIMULTANEOUS_PREFETCHES, L1_CACHE_SIZE,
L1_CACHE_LINE_SIZE): New macros.
* toplev.c (DEFPARAM): Initialize the "set" field.
* tree-ssa-loop-prefetch.c (PREFETCH_LATENCY,
SIMULTANEOUS_PREFETCHES): Removed.
(PREFETCH_BLOCK): Use L1_CACHE_LINE_SIZE.
(tree_ssa_prefetch_arrays): Dump the values of the parameters.
* config/sparc/sparc.c: Include params.h.
(sparc_override_options): Set SIMULTANEOUS_PREFETCHES and
L1_CACHE_LINE_SIZE parameters.
* config/sparc/sparc.h (PREFETCH_BLOCK, SIMULTANEOUS_PREFETCHES):
Removed.
* config/i386/i386.h (PREFETCH_BLOCK, SIMULTANEOUS_PREFETCHES):
Removed.
* config/i386/i386.c: Include params.h.
(k8_cost): Change default value for SIMULTANEOUS_PREFETCHES.
(override_options): Set SIMULTANEOUS_PREFETCHES and
L1_CACHE_LINE_SIZE parameters.
* config/sh/sh.h (SIMULTANEOUS_PREFETCHES): Removed.
(OPTIMIZATION_OPTIONS): Set SIMULTANEOUS_PREFETCHES and
L1_CACHE_LINE_SIZE parameters.
* config/ia64/ia64.c (ia64_optimization_options): Set
SIMULTANEOUS_PREFETCHES and L1_CACHE_LINE_SIZE parameters.
* config/ia64/ia64.h (SIMULTANEOUS_PREFETCHES, PREFETCH_BLOCK):
Removed.
* params.def (PARAM_PREFETCH_LATENCY, PARAM_SIMULTANEOUS_PREFETCHES,
PARAM_L1_CACHE_SIZE, PARAM_L1_CACHE_LINE_SIZE): New params.
* doc/invoke.texi: Document new params.
From-SVN: r118728
+2006-11-12 Zdenek Dvorak <dvorakz@suse.cz>
+
+ * params.c (set_param_value): Initialize the "set" field.
+ * params.h (struct param_info): Add "set" field.
+ (PARAM_SET_P): New macro.
+ (PREFETCH_LATENCY, SIMULTANEOUS_PREFETCHES, L1_CACHE_SIZE,
+ L1_CACHE_LINE_SIZE): New macros.
+ * toplev.c (DEFPARAM): Initialize the "set" field.
+ * tree-ssa-loop-prefetch.c (PREFETCH_LATENCY,
+ SIMULTANEOUS_PREFETCHES): Removed.
+ (PREFETCH_BLOCK): Use L1_CACHE_LINE_SIZE.
+ (tree_ssa_prefetch_arrays): Dump the values of the parameters.
+ * config/sparc/sparc.c: Include params.h.
+ (sparc_override_options): Set SIMULTANEOUS_PREFETCHES and
+ L1_CACHE_LINE_SIZE parameters.
+ * config/sparc/sparc.h (PREFETCH_BLOCK, SIMULTANEOUS_PREFETCHES):
+ Removed.
+ * config/i386/i386.h (PREFETCH_BLOCK, SIMULTANEOUS_PREFETCHES):
+ Removed.
+ * config/i386/i386.c: Include params.h.
+ (k8_cost): Change default value for SIMULTANEOUS_PREFETCHES.
+ (override_options): Set SIMULTANEOUS_PREFETCHES and
+ L1_CACHE_LINE_SIZE parameters.
+ * config/sh/sh.h (SIMULTANEOUS_PREFETCHES): Removed.
+ (OPTIMIZATION_OPTIONS): Set SIMULTANEOUS_PREFETCHES and
+ L1_CACHE_LINE_SIZE parameters.
+ * config/ia64/ia64.c (ia64_optimization_options): Set
+ SIMULTANEOUS_PREFETCHES and L1_CACHE_LINE_SIZE parameters.
+ * config/ia64/ia64.h (SIMULTANEOUS_PREFETCHES, PREFETCH_BLOCK):
+ Removed.
+ * params.def (PARAM_PREFETCH_LATENCY, PARAM_SIMULTANEOUS_PREFETCHES,
+ PARAM_L1_CACHE_SIZE, PARAM_L1_CACHE_LINE_SIZE): New params.
+ * doc/invoke.texi: Document new params.
+
2006-11-12 Roger Sayle <roger@eyesopen.com>
PR tree-optimization/13827
#include "tree-gimple.h"
#include "dwarf2.h"
#include "tm-constrs.h"
+#include "params.h"
#ifndef CHECK_STACK_LIMIT
#define CHECK_STACK_LIMIT (-1)
in SImode, DImode and TImode */
5, /* MMX or SSE register to integer */
64, /* size of prefetch block */
- 6, /* number of parallel prefetches */
+ /* New AMD processors neer drop prefetches; if they cannot be performed
+ immediately, they are queued. We set number of simultaneous prefetches
+ to a large constant to reflect this (it probably is not a good idea not
+ to limit number of prefetches at all, as their execution also takes some
+ time). */
+ 100, /* number of parallel prefetches */
5, /* Branch cost */
COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
COSTS_N_INSNS (4), /* cost of FMUL instruction. */
so it won't slow down the compilation and make x87 code slower. */
if (!TARGET_SCHEDULE)
flag_schedule_insns_after_reload = flag_schedule_insns = 0;
+
+ if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
+ set_param_value ("simultaneous-prefetches",
+ ix86_cost->simultaneous_prefetches);
+ if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
+ set_param_value ("l1-cache-line-size", ix86_cost->prefetch_block);
}
\f
/* switch to the appropriate section for output of DECL.
/* Define this as 1 if `char' should by default be signed; else as 0. */
#define DEFAULT_SIGNED_CHAR 1
-/* Number of bytes moved into a data cache for a single prefetch operation. */
-#define PREFETCH_BLOCK ix86_cost->prefetch_block
-
-/* Number of prefetch operations that can be done in parallel. */
-#define SIMULTANEOUS_PREFETCHES ix86_cost->simultaneous_prefetches
-
/* Max number of bytes we can move from memory to memory
in one reasonably fast instruction. */
#define MOVE_MAX 16
{
/* Let the scheduler form additional regions. */
set_param_value ("max-sched-extend-regions-iters", 2);
+
+ /* Set the default values for cache-related parameters. */
+ set_param_value ("simultaneous-prefetches", 6);
+ set_param_value ("l1-cache-line-size", 32);
+
}
#include "gt-ia64.h"
#pragma weak. Note, #pragma weak will only be supported if SUPPORT_WEAK is
defined. */
-/* If this architecture supports prefetch, define this to be the number of
- prefetch commands that can be executed in parallel.
-
- ??? This number is bogus and needs to be replaced before the value is
- actually used in optimizations. */
-
-#define SIMULTANEOUS_PREFETCHES 6
-
-/* If this architecture supports prefetch, define this to be the size of
- the cache line that is prefetched. */
-
-#define PREFETCH_BLOCK 32
-
#define HANDLE_SYSV_PRAGMA 1
/* A C expression for the maximum number of instructions to execute via
the user explicitly requested this to be on or off. */ \
if (flag_schedule_insns > 0) \
flag_schedule_insns = 2; \
+ \
+ set_param_value ("simultaneous-prefetches", 2); \
} while (0)
#define ASSEMBLER_DIALECT assembler_dialect
2:\n" TEXT_SECTION_ASM_OP);
#endif /* (defined CRT_BEGIN || defined CRT_END) && ! __SHMEDIA__ */
-#define SIMULTANEOUS_PREFETCHES 2
-
/* FIXME: middle-end support for highpart optimizations is missing. */
#define high_life_started reload_in_progress
#include "cfglayout.h"
#include "tree-gimple.h"
#include "langhooks.h"
+#include "params.h"
/* Processor costs */
static const
if (!(target_flags_explicit & MASK_LONG_DOUBLE_128))
target_flags |= MASK_LONG_DOUBLE_128;
#endif
+
+ if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
+ set_param_value ("simultaneous-prefetches",
+ ((sparc_cpu == PROCESSOR_ULTRASPARC
+ || sparc_cpu == PROCESSOR_NIAGARA)
+ ? 2
+ : (sparc_cpu == PROCESSOR_ULTRASPARC3
+ ? 8 : 3)));
+ if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
+ set_param_value ("l1-cache-line-size",
+ ((sparc_cpu == PROCESSOR_ULTRASPARC
+ || sparc_cpu == PROCESSOR_ULTRASPARC3
+ || sparc_cpu == PROCESSOR_NIAGARA)
+ ? 64 : 32));
}
\f
#ifdef SUBTARGET_ATTRIBUTE_TABLE
: (sparc_cpu == PROCESSOR_NIAGARA \
? 4 \
: 3)))
-
-#define PREFETCH_BLOCK \
- ((sparc_cpu == PROCESSOR_ULTRASPARC \
- || sparc_cpu == PROCESSOR_ULTRASPARC3 \
- || sparc_cpu == PROCESSOR_NIAGARA) \
- ? 64 : 32)
-
-#define SIMULTANEOUS_PREFETCHES \
- ((sparc_cpu == PROCESSOR_ULTRASPARC \
- || sparc_cpu == PROCESSOR_NIAGARA) \
- ? 2 \
- : (sparc_cpu == PROCESSOR_ULTRASPARC3 \
- ? 8 : 3))
\f
/* Control the assembler format that we output. */
Maximum number of fields in a structure we will treat in
a field sensitive manner during pointer analysis.
+@item prefetch-latency
+Estimate on average number of instructions that are executed before
+prefetch finishes. The distance we prefetch ahead is proportional
+to this constant. Increasing this number may also lead to less
+streams being prefetched (see @option{simultaneous-prefetches}).
+
+@item simultaneous-prefetches
+Maximum number of prefetches that can run at the same time.
+
+@item l1-cache-line-size
+The size of cache line in L1 cache, in bytes.
+
+@item l1-cache-size
+The number of cache lines in L1 cache.
+
@end table
@end table
compiler_params[i].option,
compiler_params[i].max_value);
else
- compiler_params[i].value = value;
+ {
+ compiler_params[i].value = value;
+ compiler_params[i].set = true;
+ }
return;
}
"The maximum number of instructions ready to be issued to be considered by the scheduler during the first scheduling pass",
100, 0, 0)
+/* Prefetching and cache-optimizations related parameters. Default values are
+ usually set by machine description. */
+
+/* The number of insns executed before prefetch is completed. */
+
+DEFPARAM (PARAM_PREFETCH_LATENCY,
+ "prefetch-latency",
+ "The number of insns executed before prefetch is completed",
+ 200, 0, 0)
+
+/* The number of prefetches that can run at the same time. */
+
+DEFPARAM (PARAM_SIMULTANEOUS_PREFETCHES,
+ "simultaneous-prefetches",
+ "The number of prefetches that can run at the same time",
+ 3, 0, 0)
+
+/* The size of L1 cache in number of cache lines. */
+
+DEFPARAM (PARAM_L1_CACHE_SIZE,
+ "l1-cache-size",
+ "The size of L1 cache",
+ 1024, 0, 0)
+
+/* The size of L1 cache line in bytes. */
+
+DEFPARAM (PARAM_L1_CACHE_LINE_SIZE,
+ "l1-cache-line-size",
+ "The size of L1 cache line",
+ 32, 0, 0)
+
/*
Local variables:
mode:c
/* The associated value. */
int value;
+ /* True if the parameter was explicitly set. */
+ bool set;
+
/* Minimum acceptable value. */
int min_value;
#define PARAM_VALUE(ENUM) \
(compiler_params[(int) ENUM].value)
+/* True if the value of the parameter was explicitly changed. */
+#define PARAM_SET_P(ENUM) \
+ (compiler_params[(int) ENUM].set)
+
/* Macros for the various parameters. */
#define SALIAS_MAX_IMPLICIT_FIELDS \
PARAM_VALUE (PARAM_SALIAS_MAX_IMPLICIT_FIELDS)
((size_t) PARAM_VALUE (PARAM_MAX_FIELDS_FOR_FIELD_SENSITIVE))
#define MAX_SCHED_READY_INSNS \
PARAM_VALUE (PARAM_MAX_SCHED_READY_INSNS)
+#define PREFETCH_LATENCY \
+ PARAM_VALUE (PARAM_PREFETCH_LATENCY)
+#define SIMULTANEOUS_PREFETCHES \
+ PARAM_VALUE (PARAM_SIMULTANEOUS_PREFETCHES)
+#define L1_CACHE_SIZE \
+ PARAM_VALUE (PARAM_L1_CACHE_SIZE)
+#define L1_CACHE_LINE_SIZE \
+ PARAM_VALUE (PARAM_L1_CACHE_LINE_SIZE)
#endif /* ! GCC_PARAMS_H */
static const param_info lang_independent_params[] = {
#define DEFPARAM(ENUM, OPTION, HELP, DEFAULT, MIN, MAX) \
- { OPTION, DEFAULT, MIN, MAX, HELP },
+ { OPTION, DEFAULT, false, MIN, MAX, HELP },
#include "params.def"
#undef DEFPARAM
- { NULL, 0, 0, 0, NULL }
+ { NULL, 0, false, 0, 0, NULL }
};
/* Output files for assembler code (real compiler output)
/* Magic constants follow. These should be replaced by machine specific
numbers. */
-/* A number that should roughly correspond to the number of instructions
- executed before the prefetch is completed. */
-
-#ifndef PREFETCH_LATENCY
-#define PREFETCH_LATENCY 200
-#endif
-
-/* Number of prefetches that can run at the same time. */
-
-#ifndef SIMULTANEOUS_PREFETCHES
-#define SIMULTANEOUS_PREFETCHES 3
-#endif
-
/* True if write can be prefetched by a read prefetch. */
#ifndef WRITE_CAN_USE_READ_PREFETCH
#define READ_CAN_USE_WRITE_PREFETCH 0
#endif
-/* Cache line size. Assumed to be a power of two. */
+/* The size of the block loaded by a single prefetch. Usually, this is
+ the same as cache line size (at the moment, we only consider one level
+ of cache hierarchy). */
#ifndef PREFETCH_BLOCK
-#define PREFETCH_BLOCK 32
+#define PREFETCH_BLOCK L1_CACHE_LINE_SIZE
#endif
/* Do we have a forward hardware sequential prefetching? */
|| PREFETCH_BLOCK == 0)
return 0;
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "Prefetching parameters:\n");
+ fprintf (dump_file, " simultaneous prefetches: %d\n",
+ SIMULTANEOUS_PREFETCHES);
+ fprintf (dump_file, " prefetch latency: %d\n", PREFETCH_LATENCY);
+ fprintf (dump_file, " L1 cache size: %d (%d bytes)\n",
+ L1_CACHE_SIZE, L1_CACHE_SIZE * L1_CACHE_LINE_SIZE);
+ fprintf (dump_file, " L1 cache line size: %d\n", L1_CACHE_LINE_SIZE);
+ fprintf (dump_file, " prefetch block size: %d\n", PREFETCH_BLOCK);
+ fprintf (dump_file, "\n");
+ }
+
initialize_original_copy_tables ();
if (!built_in_decls[BUILT_IN_PREFETCH])