1. Allow reassociation on FP additions.
2. Avoid generating loop-dependant FMA chains. Added a tuning
option 'AARCH64_EXTRA_TUNE_AVOID_CROSS_LOOP_FMA' for this.
gcc/ChangeLog:
* config/aarch64/aarch64-tuning-flags.def
(AARCH64_EXTRA_TUNING_OPTION): New tuning option to avoid
cross-loop FMA.
* config/aarch64/aarch64.cc
(aarch64_override_options_internal): Set
param_avoid_fma_max_bits according to tuning option.
* config/aarch64/tuning_models/ampere1.h (ampere1_tunings):
Modify tunings related with FMA.
* config/aarch64/tuning_models/ampere1a.h (ampere1a_tunings):
Likewise.
* config/aarch64/tuning_models/ampere1b.h (ampere1b_tunings):
Likewise.
AARCH64_EXTRA_TUNING_OPTION ("matched_vector_throughput", MATCHED_VECTOR_THROUGHPUT)
+AARCH64_EXTRA_TUNING_OPTION ("avoid_cross_loop_fma", AVOID_CROSS_LOOP_FMA)
+
#undef AARCH64_EXTRA_TUNING_OPTION
&& opts->x_optimize >= aarch64_tune_params.prefetch->default_opt_level)
opts->x_flag_prefetch_loop_arrays = 1;
+ /* Avoid loop-dependant FMA chains. */
+ if (aarch64_tune_params.extra_tuning_flags
+ & AARCH64_EXTRA_TUNE_AVOID_CROSS_LOOP_FMA)
+ SET_OPTION_IF_UNSET (opts, &global_options_set, param_avoid_fma_max_bits,
+ 512);
+
aarch64_override_options_after_change_1 (opts);
}
2, /* min_div_recip_mul_df. */
0, /* max_case_values. */
tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
- (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
+ (AARCH64_EXTRA_TUNE_AVOID_CROSS_LOOP_FMA), /* tune_flags. */
&ere1_prefetch_tune,
AARCH64_LDP_STP_POLICY_ALIGNED, /* ldp_policy_model. */
AARCH64_LDP_STP_POLICY_ALIGNED /* stp_policy_model. */
"32:16", /* loop_align. */
2, /* int_reassoc_width. */
4, /* fp_reassoc_width. */
- 1, /* fma_reassoc_width. */
+ 4, /* fma_reassoc_width. */
2, /* vec_reassoc_width. */
2, /* min_div_recip_mul_sf. */
2, /* min_div_recip_mul_df. */
0, /* max_case_values. */
tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
- (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
+ (AARCH64_EXTRA_TUNE_AVOID_CROSS_LOOP_FMA), /* tune_flags. */
&ere1_prefetch_tune,
AARCH64_LDP_STP_POLICY_ALIGNED, /* ldp_policy_model. */
AARCH64_LDP_STP_POLICY_ALIGNED /* stp_policy_model. */
"32:16", /* loop_align. */
2, /* int_reassoc_width. */
4, /* fp_reassoc_width. */
- 1, /* fma_reassoc_width. */
+ 4, /* fma_reassoc_width. */
2, /* vec_reassoc_width. */
2, /* min_div_recip_mul_sf. */
2, /* min_div_recip_mul_df. */
0, /* max_case_values. */
tune_params::AUTOPREFETCHER_STRONG, /* autoprefetcher_model. */
- (AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND), /* tune_flags. */
+ (AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND |
+ AARCH64_EXTRA_TUNE_AVOID_CROSS_LOOP_FMA), /* tune_flags. */
&ere1b_prefetch_tune,
AARCH64_LDP_STP_POLICY_ALIGNED, /* ldp_policy_model. */
AARCH64_LDP_STP_POLICY_ALIGNED /* stp_policy_model. */