]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
aarch64: Add internal tune flag to minimise VL-based scalar ops
authorKyrylo Tkachov <kyrylo.tkachov@arm.com>
Mon, 8 Mar 2021 09:35:14 +0000 (09:35 +0000)
committerKyrylo Tkachov <kyrylo.tkachov@arm.com>
Mon, 8 Mar 2021 09:35:14 +0000 (09:35 +0000)
This is a backport of the cse_sve_vl_constants tuning param to GCC 10.

Bootstrapped and tested on the branch on aarch64-none-linux-gnu.

gcc/ChangeLog:

* config/aarch64/aarch64-tuning-flags.def (cse_sve_vl_constants):
Define.
* config/aarch64/aarch64.md (add<mode>3): Force CONST_POLY_INT immediates
into a register when the above is enabled.
* config/aarch64/aarch64.c (neoversev1_tunings):
AARCH64_EXTRA_TUNE_CSE_SVE_VL_CONSTANTS.
(aarch64_rtx_costs): Use AARCH64_EXTRA_TUNE_CSE_SVE_VL_CONSTANTS.

gcc/testsuite/

* gcc.target/aarch64/sve/cse_sve_vl_constants_1.c: New test.

gcc/config/aarch64/aarch64-tuning-flags.def
gcc/config/aarch64/aarch64.c
gcc/config/aarch64/aarch64.md
gcc/testsuite/gcc.target/aarch64/sve/cse_sve_vl_constants_1.c [new file with mode: 0644]

index 52c7f4763f58119f6cee1bd75410558ee733e1a8..7677ec0b0b4022a7f3c3cf7c2a4ab1c7664ff523 100644 (file)
@@ -49,4 +49,6 @@ AARCH64_EXTRA_TUNING_OPTION ("rename_load_regs", RENAME_LOAD_REGS)
 /* Prefer Advanced SIMD over SVE for auto-vectorization.  */
 AARCH64_EXTRA_TUNING_OPTION ("prefer_advsimd_autovec", PREFER_ADVSIMD_AUTOVEC)
 
+AARCH64_EXTRA_TUNING_OPTION ("cse_sve_vl_constants", CSE_SVE_VL_CONSTANTS)
+
 #undef AARCH64_EXTRA_TUNING_OPTION
index 46fe683550683dbb7c9cccbe05f9441a49cb837d..775f79d88ff8e07a32b9ce44b4afcc5cbee5a1db 100644 (file)
@@ -1403,7 +1403,8 @@ static const struct tune_params neoversev1_tunings =
   2,   /* min_div_recip_mul_df.  */
   0,   /* max_case_values.  */
   tune_params::AUTOPREFETCHER_WEAK,    /* autoprefetcher_model.  */
-  (AARCH64_EXTRA_TUNE_PREFER_ADVSIMD_AUTOVEC), /* tune_flags.  */
+  (AARCH64_EXTRA_TUNE_PREFER_ADVSIMD_AUTOVEC
+   | AARCH64_EXTRA_TUNE_CSE_SVE_VL_CONSTANTS), /* tune_flags.  */
   &generic_prefetch_tune
 };
 
@@ -12532,8 +12533,18 @@ cost_plus:
            *cost += rtx_cost (op0, mode, PLUS, 0, speed);
 
            if (speed)
-             /* ADD (immediate).  */
-             *cost += extra_cost->alu.arith;
+             {
+               /* ADD (immediate).  */
+               *cost += extra_cost->alu.arith;
+
+               /* Some tunings prefer to not use the VL-based scalar ops.
+                  Increase the cost of the poly immediate to prevent their
+                  formation.  */
+               if (GET_CODE (op1) == CONST_POLY_INT
+                   && (aarch64_tune_params.extra_tuning_flags
+                       & AARCH64_EXTRA_TUNE_CSE_SVE_VL_CONSTANTS))
+                 *cost += COSTS_N_INSNS (1);
+             }
            return true;
          }
 
index 7f262e067ce90918c6c6ae0336cd102cfbc459da..8f6bbcde904219708f7cf24c3dcb781d856a52f7 100644 (file)
       && (!REG_P (op1)
         || !REGNO_PTR_FRAME_P (REGNO (op1))))
     operands[2] = force_reg (<MODE>mode, operands[2]);
+  /* Some tunings prefer to avoid VL-based operations.
+     Split off the poly immediate here.  The rtx costs hook will reject attempts
+     to combine them back.  */
+  else if (GET_CODE (operands[2]) == CONST_POLY_INT
+          && can_create_pseudo_p ()
+          && (aarch64_tune_params.extra_tuning_flags
+              & AARCH64_EXTRA_TUNE_CSE_SVE_VL_CONSTANTS))
+    operands[2] = force_reg (<MODE>mode, operands[2]);
   /* Expand polynomial additions now if the destination is the stack
      pointer, since we don't want to use that as a temporary.  */
   else if (operands[0] == stack_pointer_rtx
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cse_sve_vl_constants_1.c b/gcc/testsuite/gcc.target/aarch64/sve/cse_sve_vl_constants_1.c
new file mode 100644 (file)
index 0000000..dd04b66
--- /dev/null
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -moverride=tune=cse_sve_vl_constants" } */
+
+void __attribute__((noinline, noclone))
+vadd (int *dst, int *op1, int *op2, int count)
+{
+  for (int i = 0; i < count; ++i)
+    dst[i] = op1[i] + op2[i];
+}
+
+/* { dg-final { scan-assembler-not {\tincw\tx[0-9]+} } } */
+