]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
aarch64: add 'AARCH64_EXTRA_TUNE_FULLY_PIPELINED_FMA'
authorDi Zhao <dizhao@os.amperecomputing.com>
Tue, 2 Jan 2024 04:35:03 +0000 (12:35 +0800)
committerDi Zhao <dizhao@os.amperecomputing.com>
Tue, 2 Jan 2024 04:35:03 +0000 (12:35 +0800)
This patch adds a new tuning option
'AARCH64_EXTRA_TUNE_FULLY_PIPELINED_FMA', to consider fully
pipelined FMAs in reassociation. Also, set this option by default
for Ampere CPUs.

gcc/ChangeLog:

* config/aarch64/aarch64-tuning-flags.def
(AARCH64_EXTRA_TUNING_OPTION): New tuning option
AARCH64_EXTRA_TUNE_FULLY_PIPELINED_FMA.
* config/aarch64/aarch64.cc
(aarch64_override_options_internal): Set
param_fully_pipelined_fma according to tuning option.
* config/aarch64/tuning_models/ampere1.h: Add
AARCH64_EXTRA_TUNE_FULLY_PIPELINED_FMA to tune_flags.
* config/aarch64/tuning_models/ampere1a.h: Likewise.
* config/aarch64/tuning_models/ampere1b.h: Likewise.

gcc/config/aarch64/aarch64-tuning-flags.def
gcc/config/aarch64/aarch64.cc
gcc/config/aarch64/tuning_models/ampere1.h
gcc/config/aarch64/tuning_models/ampere1a.h
gcc/config/aarch64/tuning_models/ampere1b.h

index f28a73839a63540e4ca7186c956fe7fb61d765ae..1488a8448869db9bcd391edeedafb3ff50724346 100644 (file)
@@ -49,4 +49,6 @@ AARCH64_EXTRA_TUNING_OPTION ("matched_vector_throughput", MATCHED_VECTOR_THROUGH
 
 AARCH64_EXTRA_TUNING_OPTION ("avoid_cross_loop_fma", AVOID_CROSS_LOOP_FMA)
 
+AARCH64_EXTRA_TUNING_OPTION ("fully_pipelined_fma", FULLY_PIPELINED_FMA)
+
 #undef AARCH64_EXTRA_TUNING_OPTION
index 9858de6b171cc320301092a41e33910de3366ecc..298477d88bbc6f9bcd4192d25803c7b642336175 100644 (file)
@@ -18321,6 +18321,12 @@ aarch64_override_options_internal (struct gcc_options *opts)
     SET_OPTION_IF_UNSET (opts, &global_options_set, param_avoid_fma_max_bits,
                         512);
 
+  /* Consider fully pipelined FMA in reassociation.  */
+  if (aarch64_tune_params.extra_tuning_flags
+      & AARCH64_EXTRA_TUNE_FULLY_PIPELINED_FMA)
+    SET_OPTION_IF_UNSET (opts, &global_options_set, param_fully_pipelined_fma,
+                        1);
+
   aarch64_override_options_after_change_1 (opts);
 }
 
index a144e8f94b305c2e2e5682dab5b7e8789ad8b6ce..ac215d3aaf94989af281b355db4e60642bb21b4c 100644 (file)
@@ -104,7 +104,8 @@ static const struct tune_params ampere1_tunings =
   2,   /* min_div_recip_mul_df.  */
   0,   /* max_case_values.  */
   tune_params::AUTOPREFETCHER_WEAK,    /* autoprefetcher_model.  */
-  (AARCH64_EXTRA_TUNE_AVOID_CROSS_LOOP_FMA),   /* tune_flags.  */
+  (AARCH64_EXTRA_TUNE_AVOID_CROSS_LOOP_FMA
+   | AARCH64_EXTRA_TUNE_FULLY_PIPELINED_FMA), /* tune_flags.  */
   &ampere1_prefetch_tune,
   AARCH64_LDP_STP_POLICY_ALIGNED,   /* ldp_policy_model.  */
   AARCH64_LDP_STP_POLICY_ALIGNED    /* stp_policy_model.  */
index f688ed08a792d1e6c18730bc0fe67c33fc20f3a7..00249600d22be97cf921aee368494a0014e00684 100644 (file)
@@ -56,7 +56,8 @@ static const struct tune_params ampere1a_tunings =
   2,   /* min_div_recip_mul_df.  */
   0,   /* max_case_values.  */
   tune_params::AUTOPREFETCHER_WEAK,    /* autoprefetcher_model.  */
-  (AARCH64_EXTRA_TUNE_AVOID_CROSS_LOOP_FMA),   /* tune_flags.  */
+  (AARCH64_EXTRA_TUNE_AVOID_CROSS_LOOP_FMA
+   | AARCH64_EXTRA_TUNE_FULLY_PIPELINED_FMA), /* tune_flags.  */
   &ampere1_prefetch_tune,
   AARCH64_LDP_STP_POLICY_ALIGNED,   /* ldp_policy_model.  */
   AARCH64_LDP_STP_POLICY_ALIGNED    /* stp_policy_model.  */
index a98b6a980f70b7f40964733ae531452ca4ff9626..15cc896143fbfa3215910b402a918d1989069f6b 100644 (file)
@@ -105,8 +105,9 @@ static const struct tune_params ampere1b_tunings =
   2,   /* min_div_recip_mul_df.  */
   0,   /* max_case_values.  */
   tune_params::AUTOPREFETCHER_STRONG,  /* autoprefetcher_model.  */
-  (AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND |
-   AARCH64_EXTRA_TUNE_AVOID_CROSS_LOOP_FMA),   /* tune_flags.  */
+  (AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND
+   | AARCH64_EXTRA_TUNE_AVOID_CROSS_LOOP_FMA
+   | AARCH64_EXTRA_TUNE_FULLY_PIPELINED_FMA), /* tune_flags.  */
   &ampere1b_prefetch_tune,
   AARCH64_LDP_STP_POLICY_ALIGNED,   /* ldp_policy_model.  */
   AARCH64_LDP_STP_POLICY_ALIGNED    /* stp_policy_model.  */