AArch64: Update costing for vector conversions [PR110625]

author Tamar Christina <tamar.christina@arm.com>

Fri, 29 Dec 2023 15:58:29 +0000 (15:58 +0000)

committer Tamar Christina <tamar.christina@arm.com>

Fri, 29 Dec 2023 15:58:29 +0000 (15:58 +0000)
author Tamar Christina <tamar.christina@arm.com>
Fri, 29 Dec 2023 15:58:29 +0000 (15:58 +0000)
committer Tamar Christina <tamar.christina@arm.com>
Fri, 29 Dec 2023 15:58:29 +0000 (15:58 +0000)
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc

index f9850320f61c5ddccf47e6583d304e5f405a484f..9858de6b171cc320301092a41e33910de3366ecc 100644 (file)
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -16077,6 +16077,15 @@ private:
       leaving a vectorization of { elts }.  */
    bool m_stores_to_vector_load_decl = false;
  
+  /* Non-zero if the last operation we costed is a vector promotion or demotion.
+     In this case the value is the number of insns in the last operation.
+
+     On AArch64 vector promotion and demotions require us to first widen or
+     narrow the input and only after that emit conversion instructions.  For
+     costing this means we need to emit the cost of the final conversions as
+     well.  */
+  unsigned int m_num_last_promote_demote = 0;
+
    /* - If M_VEC_FLAGS is zero then we're costing the original scalar code.
       - If M_VEC_FLAGS & VEC_ADVSIMD is nonzero then we're costing Advanced
         SIMD code.
@@ -17132,6 +17141,29 @@ aarch64_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
      stmt_cost = aarch64_sve_adjust_stmt_cost (m_vinfo, kind, stmt_info,
                                               vectype, stmt_cost);
  
+  /*  Vector promotion and demotion requires us to widen the operation first
+      and only after that perform the conversion.  Unfortunately the mid-end
+      expects this to be doable as a single operation and doesn't pass on
+      enough context here for us to tell which operation is happening.  To
+      account for this we count every promote-demote operation twice and if
+      the previously costed operation was also a promote-demote we reduce
+      the cost of the currently being costed operation to simulate the final
+      conversion cost.  Note that for SVE we can do better here if the converted
+      value comes from a load since the widening load would consume the widening
+      operations.  However since we're in stage 3 we can't change the helper
+      vect_is_extending_load and duplicating the code seems not useful.  */
+  gassign *assign = NULL;
+  if (kind == vec_promote_demote
+      && (assign = dyn_cast <gassign *> (STMT_VINFO_STMT (stmt_info)))
+      && gimple_assign_rhs_code (assign) == FLOAT_EXPR)
+    {
+      auto new_count = count * 2 - m_num_last_promote_demote;
+      m_num_last_promote_demote = count;
+      count = new_count;
+    }
+  else
+    m_num_last_promote_demote = 0;
+
    if (stmt_info && aarch64_use_new_vector_costs_p ())
      {
        /* Account for any extra "embedded" costs that apply additively
diff --git a/gcc/testsuite/gcc.target/aarch64/pr110625_4.c b/gcc/testsuite/gcc.target/aarch64/pr110625_4.c

new file mode 100644 (file)

index 0000000..34dac19
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr110625_4.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mcpu=neoverse-n2 -fdump-tree-vect-details" } */
+
+typedef struct {
+  short blue, green, red, opacity;
+} Pixel;
+
+double foo (long n, double *k, Pixel *k_pixels) {
+  double result_2, result_1, result_0;
+  for (; n; n++, k--) {
+    result_0 += *k * k_pixels[n].red;
+    result_1 += *k * k_pixels[n].green;
+    result_2 += *k * k_pixels[n].blue;
+  }
+  return result_0 + result_1 + result_2;
+}
+
+/* { dg-final { scan-tree-dump-not "LOOP VECTORIZED" "vect" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpack_fcvt_signed_1.c b/gcc/testsuite/gcc.target/aarch64/sve/unpack_fcvt_signed_1.c

index 0f96dc2ff007340541c2ba7d51e1ccfa0f3f2d39..4c5e88657408f61156035012212ed542fac45efb 100644 (file)
--- a/gcc/testsuite/gcc.target/aarch64/sve/unpack_fcvt_signed_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/unpack_fcvt_signed_1.c
@@ -1,5 +1,5 @@
  /* { dg-do compile } */
-/* { dg-options "-O2 -ftree-vectorize -fno-inline" } */
+/* { dg-options "-O2 -ftree-vectorize -fno-inline --param aarch64-sve-compare-costs=0" } */
  
  #include <stdint.h>
  
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpack_fcvt_unsigned_1.c b/gcc/testsuite/gcc.target/aarch64/sve/unpack_fcvt_unsigned_1.c

index 70465f91eba4f80140b2059481eb8f06bbc9ace7..3ff2bd127756b2ff08095513b09325db4779ba02 100644 (file)
--- a/gcc/testsuite/gcc.target/aarch64/sve/unpack_fcvt_unsigned_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/unpack_fcvt_unsigned_1.c
@@ -1,5 +1,5 @@
  /* { dg-do compile } */
-/* { dg-options "-O2 -ftree-vectorize" } */
+/* { dg-options "-O2 -ftree-vectorize --param aarch64-sve-compare-costs=0" } */
  
  #include <stdint.h>
author	Tamar Christina <tamar.christina@arm.com>
	Fri, 29 Dec 2023 15:58:29 +0000 (15:58 +0000)
committer	Tamar Christina <tamar.christina@arm.com>
	Fri, 29 Dec 2023 15:58:29 +0000 (15:58 +0000)
gcc/config/aarch64/aarch64.cc		patch \| blob \| blame \| history
gcc/testsuite/gcc.target/aarch64/pr110625_4.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/aarch64/sve/unpack_fcvt_signed_1.c		patch \| blob \| blame \| history
gcc/testsuite/gcc.target/aarch64/sve/unpack_fcvt_unsigned_1.c		patch \| blob \| blame \| history