RISC-V: Apply vla vs. vls mode heuristic vector COST model

author Juzhe-Zhong <juzhe.zhong@rivai.ai>

Tue, 12 Dec 2023 14:25:52 +0000 (22:25 +0800)

committer Pan Li <pan2.li@intel.com>

Tue, 12 Dec 2023 23:19:26 +0000 (07:19 +0800)
author Juzhe-Zhong <juzhe.zhong@rivai.ai>
Tue, 12 Dec 2023 14:25:52 +0000 (22:25 +0800)
committer Pan Li <pan2.li@intel.com>
Tue, 12 Dec 2023 23:19:26 +0000 (07:19 +0800)
diff --git a/gcc/config/riscv/riscv-vector-costs.cc b/gcc/config/riscv/riscv-vector-costs.cc

index 3fcb5f3176f76d6b22e3caa3bbd1d83c385ed396..7888cef58fe9f71449f926dcdc3ed2828abb78a5 100644 (file)
--- a/gcc/config/riscv/riscv-vector-costs.cc
+++ b/gcc/config/riscv/riscv-vector-costs.cc
@@ -41,6 +41,7 @@ along with GCC; see the file COPYING3.  If not see
  #include "ssa.h"
  #include "backend.h"
  #include "tree-data-ref.h"
+#include "tree-ssa-loop-niter.h"
  
  /* This file should be included last.  */
  #include "riscv-vector-costs.h"
@@ -601,7 +602,101 @@ preferred_new_lmul_p (loop_vec_info other_loop_vinfo)
  
  costs::costs (vec_info *vinfo, bool costing_for_scalar)
    : vector_costs (vinfo, costing_for_scalar)
-{}
+{
+  if (costing_for_scalar)
+    m_cost_type = SCALAR_COST;
+  else if (riscv_v_ext_vector_mode_p (vinfo->vector_mode))
+    m_cost_type = VLA_VECTOR_COST;
+  else
+    m_cost_type = VLS_VECTOR_COST;
+}
+
+/* Do one-time initialization of the costs given that we're
+   costing the loop vectorization described by LOOP_VINFO.  */
+void
+costs::analyze_loop_vinfo (loop_vec_info loop_vinfo)
+{
+  /* Record the number of times that the vector loop would execute,
+     if known.  */
+  class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
+  auto scalar_niters = max_stmt_executions_int (loop);
+  if (scalar_niters >= 0)
+    {
+      unsigned int vf = vect_vf_for_cost (loop_vinfo);
+      if (LOOP_VINFO_LENS (loop_vinfo).is_empty ())
+       m_num_vector_iterations = scalar_niters / vf;
+      else
+       m_num_vector_iterations = CEIL (scalar_niters, vf);
+    }
+
+  /* Detect whether we're vectorizing for VLA and should apply the unrolling
+     heuristic described above m_unrolled_vls_niters.  */
+  record_potential_vls_unrolling (loop_vinfo);
+}
+
+/* Decide whether to use the unrolling heuristic described above
+   m_unrolled_vls_niters, updating that field if so.  LOOP_VINFO
+   describes the loop that we're vectorizing.  */
+void
+costs::record_potential_vls_unrolling (loop_vec_info loop_vinfo)
+{
+  /* We only want to apply the heuristic if LOOP_VINFO is being
+     vectorized for VLA.  */
+  if (m_cost_type != VLA_VECTOR_COST)
+    return;
+
+  /* We don't want to apply the heuristic to outer loops, since it's
+     harder to track two levels of unrolling.  */
+  if (LOOP_VINFO_LOOP (loop_vinfo)->inner)
+    return;
+
+  /* Only handle cases in which the number of VLS iterations
+     would be known at compile time but the number of SVE iterations
+     would not.  */
+  if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
+      || BYTES_PER_RISCV_VECTOR.is_constant ())
+    return;
+
+  /* Guess how many times the VLS loop would iterate and make
+     sure that it is within the complete unrolling limit.  Even if the
+     number of iterations is small enough, the number of statements might
+     not be, which is why we need to estimate the number of statements too.  */
+  unsigned int vls_vf = vect_vf_for_cost (loop_vinfo);
+  unsigned HOST_WIDE_INT unrolled_vls_niters
+    = LOOP_VINFO_INT_NITERS (loop_vinfo) / vls_vf;
+  if (unrolled_vls_niters > (unsigned int) param_max_completely_peel_times)
+    return;
+
+  /* Record that we're applying the heuristic and should try to estimate
+     the number of statements in the VLS loop.  */
+  m_unrolled_vls_niters = unrolled_vls_niters;
+}
+
+/* Return true if (a) we're applying the VLS vs. VLA unrolling
+   heuristic described above m_unrolled_vls_niters and (b) the heuristic
+   says that we should prefer the VLS loop.  */
+bool
+costs::prefer_unrolled_loop () const
+{
+  if (!m_unrolled_vls_stmts)
+    return false;
+
+  if (dump_enabled_p ())
+    dump_printf_loc (MSG_NOTE, vect_location,
+                    "Number of insns in"
+                    " unrolled VLS loop = " HOST_WIDE_INT_PRINT_UNSIGNED "\n",
+                    m_unrolled_vls_stmts);
+
+  /* The balance here is tricky.  On the one hand, we can't be sure whether
+     the code is vectorizable with VLS or not.  However, even if
+     it isn't vectorizable with VLS, there's a possibility that
+     the scalar code could also be unrolled.  Some of the code might then
+     benefit from SLP, or from using LDP and STP.  We therefore apply
+     the heuristic regardless of can_use_vls_p.  */
+  return (m_unrolled_vls_stmts
+         && (m_unrolled_vls_stmts
+             <= (unsigned int) param_max_completely_peeled_insns));
+}
  
  bool
  costs::better_main_loop_than_p (const vector_costs *uncast_other) const
@@ -618,6 +713,21 @@ costs::better_main_loop_than_p (const vector_costs *uncast_other) const
                      GET_MODE_NAME (other_loop_vinfo->vector_mode),
                      vect_vf_for_cost (other_loop_vinfo));
  
+  /* Apply the unrolling heuristic described above m_unrolled_vls_niters.  */
+  if (bool (m_unrolled_vls_stmts) != bool (other->m_unrolled_vls_stmts))
+    {
+      bool this_prefer_unrolled = this->prefer_unrolled_loop ();
+      bool other_prefer_unrolled = other->prefer_unrolled_loop ();
+      if (this_prefer_unrolled != other_prefer_unrolled)
+       {
+         if (dump_enabled_p ())
+           dump_printf_loc (MSG_NOTE, vect_location,
+                            "Preferring VLS loop because"
+                            " it can be unrolled\n");
+         return other_prefer_unrolled;
+       }
+    }
+
    if (!LOOP_VINFO_NITERS_KNOWN_P (this_loop_vinfo)
        && riscv_autovec_lmul == RVV_DYNAMIC)
      {
@@ -643,6 +753,28 @@ costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
    /* TODO: Use default STMT cost model.
            We will support more accurate STMT cost model later.  */
    int stmt_cost = default_builtin_vectorization_cost (kind, vectype, misalign);
+
+  /* Do one-time initialization based on the vinfo.  */
+  loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (m_vinfo);
+  if (!m_analyzed_vinfo)
+    {
+      if (loop_vinfo)
+       analyze_loop_vinfo (loop_vinfo);
+
+      m_analyzed_vinfo = true;
+    }
+
+  if (stmt_info)
+    {
+      /* If we're applying the VLA vs. VLS unrolling heuristic,
+        estimate the number of statements in the unrolled VLS
+        loop.  For simplicitly, we assume that one iteration of the
+        VLS loop would need the same number of statements
+        as one iteration of the VLA loop.  */
+      if (where == vect_body && m_unrolled_vls_niters)
+       m_unrolled_vls_stmts += count * m_unrolled_vls_niters;
+    }
+
    return record_stmt_cost (stmt_info, where, count * stmt_cost);
  }
  
diff --git a/gcc/config/riscv/riscv-vector-costs.h b/gcc/config/riscv/riscv-vector-costs.h

index e18775e230b11cb2a9e976c546af155cf24a7c13..ff294a60aaf8a1f25fc728ce292ea815ddf5c0e7 100644 (file)
--- a/gcc/config/riscv/riscv-vector-costs.h
+++ b/gcc/config/riscv/riscv-vector-costs.h
@@ -30,6 +30,13 @@ struct stmt_point
    gimple *stmt;
  };
  
+enum cost_type_enum
+{
+  SCALAR_COST,
+  VLA_VECTOR_COST,
+  VLS_VECTOR_COST
+};
+
  /* Pair typedef used by live range: <start, end>.  */
  typedef std::pair<unsigned int, unsigned int> pair;
  
@@ -49,6 +56,42 @@ private:
                               tree vectype, int misalign,
                               vect_cost_model_location where) override;
    void finish_cost (const vector_costs *) override;
+
+  /* True if we have performed one-time initialization based on the
+     vec_info.  */
+  bool m_analyzed_vinfo = false;
+
+  /* - If M_COST_TYPE = SCALAR_COST then we're costing the original scalar code.
+     - If M_COST_TYPE = VLA_VECTOR_COST is nonzero then we're costing VLA
+       partial vectorization codes.
+     - If M_COST_TYPE = VLS_VECTOR_COST is nonzero then we're costing VLS
+       minimum length vector codes.  */
+  enum cost_type_enum m_cost_type;
+
+  /* On some CPUs, VLA and VLS provide the same theoretical vector
+     throughput, such as 4x128 VLS vs. 2x256 VLA.  In those
+     situations, we try to predict whether an VLS implementation
+     of the loop could be completely unrolled and become straight-line code.
+     If so, it is generally better to use the VLS version rather
+     than length-agnostic VLA, since the VLA loop would execute an unknown
+     number of times and so could not be completely unrolled in the same way.
+
+     If we're applying this heuristic, M_UNROLLED_VLS_NITERS is the
+     number of VLS loop iterations that would be unrolled and
+     M_UNROLLED_VLS_STMTS estimates the total number of statements
+     in the unrolled loop.  Both values are zero if we're not applying
+     the heuristic.  */
+  unsigned HOST_WIDE_INT m_unrolled_vls_niters = 0;
+  unsigned HOST_WIDE_INT m_unrolled_vls_stmts = 0;
+
+  /* If we're vectorizing a loop that executes a constant number of times,
+     this variable gives the number of times that the vector loop would
+     iterate, otherwise it is zero.  */
+  uint64_t m_num_vector_iterations = 0;
+
+  void analyze_loop_vinfo (loop_vec_info);
+  void record_potential_vls_unrolling (loop_vec_info);
+  bool prefer_unrolled_loop () const;
  };
  
  } // namespace riscv_vector
diff --git a/gcc/config/riscv/t-riscv b/gcc/config/riscv/t-riscv

index 1aac8b58bb526d9ef831ffa452bd8661a8c9733e..2b2ec40952542ac3ef1046e0494ebf09477cf3e2 100644 (file)
--- a/gcc/config/riscv/t-riscv
+++ b/gcc/config/riscv/t-riscv
@@ -74,7 +74,7 @@ riscv-vector-costs.o: $(srcdir)/config/riscv/riscv-vector-costs.cc \
    $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(TARGET_H) $(FUNCTION_H) \
    $(TREE_H) basic-block.h $(RTL_H) gimple.h targhooks.h cfgloop.h \
    fold-const.h $(TM_P_H) tree-vectorizer.h gimple-iterator.h bitmap.h \
-  ssa.h backend.h \
+  ssa.h backend.h tree-data-ref.h tree-ssa-loop-niter.h \
    $(srcdir)/config/riscv/riscv-vector-costs.h
         $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
                 $(srcdir)/config/riscv/riscv-vector-costs.cc
diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-1.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-1.c

new file mode 100644 (file)

index 0000000..1ef4215
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-1.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
+
+void
+foo (int *__restrict a, int *__restrict b, int *__restrict c)
+{
+  for (int i = 0; i < 16; i++)
+    a[i] = b[i] + c[i];
+}
+
+/* { dg-final { scan-assembler-times {vsetivli\s+zero,\s*4,\s*e32,\s*m1,\s*t[au],\s*m[au]} 1 } } */
+/* { dg-final { scan-assembler-times {vsetivli} 1 } } */
+/* { dg-final { scan-assembler-not {vsetvli} } } */
diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-10.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-10.c

new file mode 100644 (file)

index 0000000..3ddffa3
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-10.c
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 --param=riscv-autovec-lmul=m4 -fno-schedule-insns -fno-schedule-insns2" } */
+
+#include <stdint-gcc.h>
+
+#define N 40
+
+int a[N];
+
+__attribute__ ((noinline)) int
+foo (){
+  int i,j;
+  int sum,x;
+
+  for (i = 0; i < N; i++) {
+    sum = 0;
+    for (j = 0; j < N; j++) {
+      sum += (i + j);
+    }
+    a[i] = sum;
+  }
+  return 0;
+}
+
+/* { dg-final { scan-assembler-times {vsetivli\s+zero,\s*8,\s*e32,\s*m2,\s*t[au],\s*m[au]} 1 } } */
+/* { dg-final { scan-assembler-times {vsetivli\s+zero,\s*16,\s*e32,\s*m4,\s*t[au],\s*m[au]} 1 } } */
+/* { dg-final { scan-assembler-times {vsetivli} 2 } } */
+/* { dg-final { scan-assembler-not {vsetvli} } } */
diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-11.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-11.c

new file mode 100644 (file)

index 0000000..7625ec5
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-11.c
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 --param=riscv-autovec-lmul=m8 -fno-schedule-insns -fno-schedule-insns2" } */
+
+#include <stdint-gcc.h>
+
+#define N 40
+
+int a[N];
+
+__attribute__ ((noinline)) int
+foo (){
+  int i,j;
+  int sum,x;
+
+  for (i = 0; i < N; i++) {
+    sum = 0;
+    for (j = 0; j < N; j++) {
+      sum += (i + j);
+    }
+    a[i] = sum;
+  }
+  return 0;
+}
+
+/* { dg-final { scan-assembler-times {vsetivli\s+zero,\s*8,\s*e32,\s*m2,\s*t[au],\s*m[au]} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*m8,\s*t[au],\s*m[au]} 1 } } */
+/* { dg-final { scan-assembler-times {vsetivli} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli} 1 } } */
diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-12.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-12.c

new file mode 100644 (file)

index 0000000..7625ec5
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-12.c
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 --param=riscv-autovec-lmul=m8 -fno-schedule-insns -fno-schedule-insns2" } */
+
+#include <stdint-gcc.h>
+
+#define N 40
+
+int a[N];
+
+__attribute__ ((noinline)) int
+foo (){
+  int i,j;
+  int sum,x;
+
+  for (i = 0; i < N; i++) {
+    sum = 0;
+    for (j = 0; j < N; j++) {
+      sum += (i + j);
+    }
+    a[i] = sum;
+  }
+  return 0;
+}
+
+/* { dg-final { scan-assembler-times {vsetivli\s+zero,\s*8,\s*e32,\s*m2,\s*t[au],\s*m[au]} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*m8,\s*t[au],\s*m[au]} 1 } } */
+/* { dg-final { scan-assembler-times {vsetivli} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli} 1 } } */
diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-2.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-2.c

new file mode 100644 (file)

index 0000000..ca203f5
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-2.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 --param=riscv-autovec-lmul=m2" } */
+
+void
+foo (int *__restrict a, int *__restrict b, int *__restrict c)
+{
+  for (int i = 0; i < 16; i++)
+    a[i] = b[i] + c[i];
+}
+
+/* { dg-final { scan-assembler-times {vsetivli\s+zero,\s*8,\s*e32,\s*m2,\s*t[au],\s*m[au]} 1 } } */
+/* { dg-final { scan-assembler-times {vsetivli} 1 } } */
+/* { dg-final { scan-assembler-not {vsetvli} } } */
diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-3.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-3.c

new file mode 100644 (file)

index 0000000..f8e5335
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-3.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 --param=riscv-autovec-lmul=m4" } */
+
+void
+foo (int *__restrict a, int *__restrict b, int *__restrict c)
+{
+  for (int i = 0; i < 16; i++)
+    a[i] = b[i] + c[i];
+}
+
+/* { dg-final { scan-assembler-times {vsetivli\s+zero,\s*16,\s*e32,\s*m4,\s*t[au],\s*m[au]} 1 } } */
+/* { dg-final { scan-assembler-times {vsetivli} 1 } } */
+/* { dg-final { scan-assembler-not {vsetvli} } } */
diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-4.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-4.c

new file mode 100644 (file)

index 0000000..4859d57
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-4.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 --param=riscv-autovec-lmul=m8" } */
+
+void
+foo (int *__restrict a, int *__restrict b, int *__restrict c)
+{
+  for (int i = 0; i < 16; i++)
+    a[i] = b[i] + c[i];
+}
+
+/* { dg-final { scan-assembler-times {vsetivli\s+zero,\s*16,\s*e32,\s*m4,\s*t[au],\s*m[au]} 1 } } */
+/* { dg-final { scan-assembler-times {vsetivli} 1 } } */
+/* { dg-final { scan-assembler-not {vsetvli} } } */
diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-5.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-5.c

new file mode 100644 (file)

index 0000000..8a56802
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-5.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 --param=riscv-autovec-lmul=dynamic" } */
+
+void
+foo (int *__restrict a, int *__restrict b, int *__restrict c)
+{
+  for (int i = 0; i < 16; i++)
+    a[i] = b[i] + c[i];
+}
+
+/* { dg-final { scan-assembler-times {vsetivli\s+zero,\s*16,\s*e32,\s*m4,\s*t[au],\s*m[au]} 1 } } */
+/* { dg-final { scan-assembler-times {vsetivli} 1 } } */
+/* { dg-final { scan-assembler-not {vsetvli} } } */
diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-6.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-6.c

new file mode 100644 (file)

index 0000000..46ebd5f
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-6.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 --param=riscv-autovec-lmul=m8" } */
+
+void
+foo (int *__restrict a, int *__restrict b, int *__restrict c)
+{
+  for (int i = 0; i < 32; i++)
+    a[i] = b[i] + c[i];
+}
+
+/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*m8,\s*t[au],\s*m[au]} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli} 1 } } */
+/* { dg-final { scan-assembler-not {vsetivli} } } */
diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-7.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-7.c

new file mode 100644 (file)

index 0000000..f5aceca
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-7.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 --param=riscv-autovec-lmul=dynamic" } */
+
+void
+foo (int *__restrict a, int *__restrict b, int *__restrict c)
+{
+  for (int i = 0; i < 32; i++)
+    a[i] = b[i] + c[i];
+}
+
+/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*m8,\s*t[au],\s*m[au]} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli} 1 } } */
+/* { dg-final { scan-assembler-not {vsetivli} } } */
diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-8.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-8.c

new file mode 100644 (file)

index 0000000..ea6a7cb
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-8.c
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
+
+#include <stdint-gcc.h>
+
+#define N 40
+
+int a[N];
+
+__attribute__ ((noinline)) int
+foo (){
+  int i,j;
+  int sum,x;
+
+  for (i = 0; i < N; i++) {
+    sum = 0;
+    for (j = 0; j < N; j++) {
+      sum += (i + j);
+    }
+    a[i] = sum;
+  }
+  return 0;
+}
+
+/* { dg-final { scan-assembler-times {vsetivli\s+zero,\s*4,\s*e32,\s*m1,\s*t[au],\s*m[au]} 1 } } */
+/* { dg-final { scan-assembler-times {vsetivli} 1 } } */
+/* { dg-final { scan-assembler-not {vsetvli} } } */
diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-9.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-9.c

new file mode 100644 (file)

index 0000000..7f03cb9
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-9.c
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 --param=riscv-autovec-lmul=m2" } */
+
+#include <stdint-gcc.h>
+
+#define N 40
+
+int a[N];
+
+__attribute__ ((noinline)) int
+foo (){
+  int i,j;
+  int sum,x;
+
+  for (i = 0; i < N; i++) {
+    sum = 0;
+    for (j = 0; j < N; j++) {
+      sum += (i + j);
+    }
+    a[i] = sum;
+  }
+  return 0;
+}
+
+/* { dg-final { scan-assembler-times {vsetivli\s+zero,\s*8,\s*e32,\s*m2,\s*t[au],\s*m[au]} 1 } } */
+/* { dg-final { scan-assembler-times {vsetivli} 1 } } */
+/* { dg-final { scan-assembler-not {vsetvli} } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr111313.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr111313.c

index 1e01cfefd477f6d8ae19b4c9edc2c2804fcb2211..a4f8c37f95d9e94df64e0b997fd2861cccf994df 100644 (file)
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr111313.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr111313.c
@@ -1,5 +1,5 @@
  /* { dg-do compile } */
-/* { dg-options "-march=rv64gcv -mabi=lp64d --param=riscv-autovec-preference=scalable -O3 -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d --param=riscv-autovec-preference=scalable -O3 -fno-schedule-insns -fno-schedule-insns2 -fno-vect-cost-model" } */
  
  #define K 32
  short in[2*K][K];
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/shift-3.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/shift-3.c

index e25e7b59c3ebd0f7b696b055c229f8cad0b4f9c6..8de1b9c0c41da27ca1f4daea7cdb2edf046b50ca 100644 (file)
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/shift-3.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/shift-3.c
@@ -53,5 +53,5 @@ DEF_OP_VV (shift, 128, int64_t, <<)
  DEF_OP_VV (shift, 256, int64_t, <<)
  DEF_OP_VV (shift, 512, int64_t, <<)
  
-/* { dg-final { scan-assembler-times {vsll\.vv\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 43 } } */
+/* { dg-final { scan-assembler-times {vsll\.vv\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 46 } } */
  /* { dg-final { scan-assembler-not {csrr} } } */
author	Juzhe-Zhong <juzhe.zhong@rivai.ai>
	Tue, 12 Dec 2023 14:25:52 +0000 (22:25 +0800)
committer	Pan Li <pan2.li@intel.com>
	Tue, 12 Dec 2023 23:19:26 +0000 (07:19 +0800)
gcc/config/riscv/riscv-vector-costs.cc		patch \| blob \| blame \| history
gcc/config/riscv/riscv-vector-costs.h		patch \| blob \| blame \| history
gcc/config/riscv/t-riscv		patch \| blob \| blame \| history
gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-1.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-10.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-11.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-12.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-2.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-3.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-4.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-5.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-6.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-7.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-8.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-9.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/riscv/rvv/autovec/pr111313.c		patch \| blob \| blame \| history
gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/shift-3.c		patch \| blob \| blame \| history