]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
RISC-V: Apply vla vs. vls mode heuristic vector COST model
authorJuzhe-Zhong <juzhe.zhong@rivai.ai>
Tue, 12 Dec 2023 14:25:52 +0000 (22:25 +0800)
committerPan Li <pan2.li@intel.com>
Tue, 12 Dec 2023 23:19:26 +0000 (07:19 +0800)
This patch apply vla vs. vls mode heuristic which can fixes the following FAILs:
FAIL: gcc.target/riscv/rvv/autovec/pr111751.c -O3 -ftree-vectorize
scan-assembler-not vset
FAIL: gcc.target/riscv/rvv/autovec/pr111751.c -O3 -ftree-vectorize
scan-assembler-times li\\s+[a-x0-9]+,0\\s+ret 2

The root cause of this FAIL is we failed to pick VLS mode for the vectorization.

Before this patch:

foo2:
        addi    sp,sp,-208
        addi    a2,sp,64
        addi    a5,sp,128
        lui     a6,%hi(.LANCHOR0)
        sd      ra,200(sp)
        addi    a6,a6,%lo(.LANCHOR0)
        mv      a0,a2
        mv      a1,a5
        li      a3,16
        mv      a4,sp
        vsetivli        zero,8,e64,m8,ta,ma
        vle64.v v8,0(a6)
        vse64.v v8,0(a2)
        vse64.v v8,0(a5)
.L4:
        vsetvli a5,a3,e32,m1,ta,ma
        slli    a2,a5,2
        vle32.v v2,0(a1)
        vle32.v v1,0(a0)
        sub     a3,a3,a5
        vadd.vv v1,v1,v2
        vse32.v v1,0(a4)
        add     a1,a1,a2
        add     a0,a0,a2
        add     a4,a4,a2
        bne     a3,zero,.L4
        lw      a4,128(sp)
        lw      a5,64(sp)
        addw    a5,a5,a4
        lw      a4,0(sp)
        bne     a4,a5,.L5
        lw      a4,132(sp)
        lw      a5,68(sp)
        addw    a5,a5,a4
        lw      a4,4(sp)
        bne     a4,a5,.L5
        lw      a4,136(sp)
        lw      a5,72(sp)
        addw    a5,a5,a4
        lw      a4,8(sp)
        bne     a4,a5,.L5
        lw      a4,140(sp)
        lw      a5,76(sp)
        addw    a5,a5,a4
        lw      a4,12(sp)
        bne     a4,a5,.L5
        lw      a4,144(sp)
        lw      a5,80(sp)
        addw    a5,a5,a4
        lw      a4,16(sp)
        bne     a4,a5,.L5
        lw      a4,148(sp)
        lw      a5,84(sp)
        addw    a5,a5,a4
        lw      a4,20(sp)
        bne     a4,a5,.L5
        lw      a4,152(sp)
        lw      a5,88(sp)
        addw    a5,a5,a4
        lw      a4,24(sp)
        bne     a4,a5,.L5
        lw      a4,156(sp)
        lw      a5,92(sp)
        addw    a5,a5,a4
        lw      a4,28(sp)
        bne     a4,a5,.L5
        lw      a4,160(sp)
        lw      a5,96(sp)
        addw    a5,a5,a4
        lw      a4,32(sp)
        bne     a4,a5,.L5
        lw      a4,164(sp)
        lw      a5,100(sp)
        addw    a5,a5,a4
        lw      a4,36(sp)
        bne     a4,a5,.L5
        lw      a4,168(sp)
        lw      a5,104(sp)
        addw    a5,a5,a4
        lw      a4,40(sp)
        bne     a4,a5,.L5
        lw      a4,172(sp)
        lw      a5,108(sp)
        addw    a5,a5,a4
        lw      a4,44(sp)
        bne     a4,a5,.L5
        lw      a4,176(sp)
        lw      a5,112(sp)
        addw    a5,a5,a4
        lw      a4,48(sp)
        bne     a4,a5,.L5
        lw      a4,180(sp)
        lw      a5,116(sp)
        addw    a5,a5,a4
        lw      a4,52(sp)
        bne     a4,a5,.L5
        lw      a4,184(sp)
        lw      a5,120(sp)
        addw    a5,a5,a4
        lw      a4,56(sp)
        bne     a4,a5,.L5
        lw      a4,188(sp)
        lw      a5,124(sp)
        addw    a5,a5,a4
        lw      a4,60(sp)
        bne     a4,a5,.L5
        ld      ra,200(sp)
        li      a0,0
        addi    sp,sp,208
        jr      ra
.L5:
        call    abort

After this patch:

        li      a0,0
        ret

The heuristic leverage ARM SVE and fully tested and confirm we have same behavior
as ARM SVE GCC and RVV Clang.

gcc/ChangeLog:

* config/riscv/riscv-vector-costs.cc (costs::analyze_loop_vinfo): New function.
(costs::record_potential_vls_unrolling): Ditto.
(costs::prefer_unrolled_loop): Ditto.
(costs::better_main_loop_than_p): Ditto.
(costs::add_stmt_cost): Ditto.
* config/riscv/riscv-vector-costs.h (enum cost_type_enum): New enum.
* config/riscv/t-riscv: Add new include files.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/pr111313.c: Adapt test.
* gcc.target/riscv/rvv/autovec/vls/shift-3.c: Ditto.
* gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-1.c: New test.
* gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-10.c: New test.
* gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-11.c: New test.
* gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-12.c: New test.
* gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-2.c: New test.
* gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-3.c: New test.
* gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-4.c: New test.
* gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-5.c: New test.
* gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-6.c: New test.
* gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-7.c: New test.
* gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-8.c: New test.
* gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-9.c: New test.

17 files changed:
gcc/config/riscv/riscv-vector-costs.cc
gcc/config/riscv/riscv-vector-costs.h
gcc/config/riscv/t-riscv
gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-1.c [new file with mode: 0644]
gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-10.c [new file with mode: 0644]
gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-11.c [new file with mode: 0644]
gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-12.c [new file with mode: 0644]
gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-2.c [new file with mode: 0644]
gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-3.c [new file with mode: 0644]
gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-4.c [new file with mode: 0644]
gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-5.c [new file with mode: 0644]
gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-6.c [new file with mode: 0644]
gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-7.c [new file with mode: 0644]
gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-8.c [new file with mode: 0644]
gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-9.c [new file with mode: 0644]
gcc/testsuite/gcc.target/riscv/rvv/autovec/pr111313.c
gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/shift-3.c

index 3fcb5f3176f76d6b22e3caa3bbd1d83c385ed396..7888cef58fe9f71449f926dcdc3ed2828abb78a5 100644 (file)
@@ -41,6 +41,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "ssa.h"
 #include "backend.h"
 #include "tree-data-ref.h"
+#include "tree-ssa-loop-niter.h"
 
 /* This file should be included last.  */
 #include "riscv-vector-costs.h"
@@ -601,7 +602,101 @@ preferred_new_lmul_p (loop_vec_info other_loop_vinfo)
 
 costs::costs (vec_info *vinfo, bool costing_for_scalar)
   : vector_costs (vinfo, costing_for_scalar)
-{}
+{
+  if (costing_for_scalar)
+    m_cost_type = SCALAR_COST;
+  else if (riscv_v_ext_vector_mode_p (vinfo->vector_mode))
+    m_cost_type = VLA_VECTOR_COST;
+  else
+    m_cost_type = VLS_VECTOR_COST;
+}
+
+/* Do one-time initialization of the costs given that we're
+   costing the loop vectorization described by LOOP_VINFO.  */
+void
+costs::analyze_loop_vinfo (loop_vec_info loop_vinfo)
+{
+  /* Record the number of times that the vector loop would execute,
+     if known.  */
+  class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
+  auto scalar_niters = max_stmt_executions_int (loop);
+  if (scalar_niters >= 0)
+    {
+      unsigned int vf = vect_vf_for_cost (loop_vinfo);
+      if (LOOP_VINFO_LENS (loop_vinfo).is_empty ())
+       m_num_vector_iterations = scalar_niters / vf;
+      else
+       m_num_vector_iterations = CEIL (scalar_niters, vf);
+    }
+
+  /* Detect whether we're vectorizing for VLA and should apply the unrolling
+     heuristic described above m_unrolled_vls_niters.  */
+  record_potential_vls_unrolling (loop_vinfo);
+}
+
+/* Decide whether to use the unrolling heuristic described above
+   m_unrolled_vls_niters, updating that field if so.  LOOP_VINFO
+   describes the loop that we're vectorizing.  */
+void
+costs::record_potential_vls_unrolling (loop_vec_info loop_vinfo)
+{
+  /* We only want to apply the heuristic if LOOP_VINFO is being
+     vectorized for VLA.  */
+  if (m_cost_type != VLA_VECTOR_COST)
+    return;
+
+  /* We don't want to apply the heuristic to outer loops, since it's
+     harder to track two levels of unrolling.  */
+  if (LOOP_VINFO_LOOP (loop_vinfo)->inner)
+    return;
+
+  /* Only handle cases in which the number of VLS iterations
+     would be known at compile time but the number of SVE iterations
+     would not.  */
+  if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
+      || BYTES_PER_RISCV_VECTOR.is_constant ())
+    return;
+
+  /* Guess how many times the VLS loop would iterate and make
+     sure that it is within the complete unrolling limit.  Even if the
+     number of iterations is small enough, the number of statements might
+     not be, which is why we need to estimate the number of statements too.  */
+  unsigned int vls_vf = vect_vf_for_cost (loop_vinfo);
+  unsigned HOST_WIDE_INT unrolled_vls_niters
+    = LOOP_VINFO_INT_NITERS (loop_vinfo) / vls_vf;
+  if (unrolled_vls_niters > (unsigned int) param_max_completely_peel_times)
+    return;
+
+  /* Record that we're applying the heuristic and should try to estimate
+     the number of statements in the VLS loop.  */
+  m_unrolled_vls_niters = unrolled_vls_niters;
+}
+
+/* Return true if (a) we're applying the VLS vs. VLA unrolling
+   heuristic described above m_unrolled_vls_niters and (b) the heuristic
+   says that we should prefer the VLS loop.  */
+bool
+costs::prefer_unrolled_loop () const
+{
+  if (!m_unrolled_vls_stmts)
+    return false;
+
+  if (dump_enabled_p ())
+    dump_printf_loc (MSG_NOTE, vect_location,
+                    "Number of insns in"
+                    " unrolled VLS loop = " HOST_WIDE_INT_PRINT_UNSIGNED "\n",
+                    m_unrolled_vls_stmts);
+
+  /* The balance here is tricky.  On the one hand, we can't be sure whether
+     the code is vectorizable with VLS or not.  However, even if
+     it isn't vectorizable with VLS, there's a possibility that
+     the scalar code could also be unrolled.  Some of the code might then
+     benefit from SLP, or from using LDP and STP.  We therefore apply
+     the heuristic regardless of can_use_vls_p.  */
+  return (m_unrolled_vls_stmts
+         && (m_unrolled_vls_stmts
+             <= (unsigned int) param_max_completely_peeled_insns));
+}
 
 bool
 costs::better_main_loop_than_p (const vector_costs *uncast_other) const
@@ -618,6 +713,21 @@ costs::better_main_loop_than_p (const vector_costs *uncast_other) const
                     GET_MODE_NAME (other_loop_vinfo->vector_mode),
                     vect_vf_for_cost (other_loop_vinfo));
 
+  /* Apply the unrolling heuristic described above m_unrolled_vls_niters.  */
+  if (bool (m_unrolled_vls_stmts) != bool (other->m_unrolled_vls_stmts))
+    {
+      bool this_prefer_unrolled = this->prefer_unrolled_loop ();
+      bool other_prefer_unrolled = other->prefer_unrolled_loop ();
+      if (this_prefer_unrolled != other_prefer_unrolled)
+       {
+         if (dump_enabled_p ())
+           dump_printf_loc (MSG_NOTE, vect_location,
+                            "Preferring VLS loop because"
+                            " it can be unrolled\n");
+         return other_prefer_unrolled;
+       }
+    }
+
   if (!LOOP_VINFO_NITERS_KNOWN_P (this_loop_vinfo)
       && riscv_autovec_lmul == RVV_DYNAMIC)
     {
@@ -643,6 +753,28 @@ costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
   /* TODO: Use default STMT cost model.
           We will support more accurate STMT cost model later.  */
   int stmt_cost = default_builtin_vectorization_cost (kind, vectype, misalign);
+
+  /* Do one-time initialization based on the vinfo.  */
+  loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (m_vinfo);
+  if (!m_analyzed_vinfo)
+    {
+      if (loop_vinfo)
+       analyze_loop_vinfo (loop_vinfo);
+
+      m_analyzed_vinfo = true;
+    }
+
+  if (stmt_info)
+    {
+      /* If we're applying the VLA vs. VLS unrolling heuristic,
+        estimate the number of statements in the unrolled VLS
+        loop.  For simplicitly, we assume that one iteration of the
+        VLS loop would need the same number of statements
+        as one iteration of the VLA loop.  */
+      if (where == vect_body && m_unrolled_vls_niters)
+       m_unrolled_vls_stmts += count * m_unrolled_vls_niters;
+    }
+
   return record_stmt_cost (stmt_info, where, count * stmt_cost);
 }
 
index e18775e230b11cb2a9e976c546af155cf24a7c13..ff294a60aaf8a1f25fc728ce292ea815ddf5c0e7 100644 (file)
@@ -30,6 +30,13 @@ struct stmt_point
   gimple *stmt;
 };
 
+enum cost_type_enum
+{
+  SCALAR_COST,
+  VLA_VECTOR_COST,
+  VLS_VECTOR_COST
+};
+
 /* Pair typedef used by live range: <start, end>.  */
 typedef std::pair<unsigned int, unsigned int> pair;
 
@@ -49,6 +56,42 @@ private:
                              tree vectype, int misalign,
                              vect_cost_model_location where) override;
   void finish_cost (const vector_costs *) override;
+
+  /* True if we have performed one-time initialization based on the
+     vec_info.  */
+  bool m_analyzed_vinfo = false;
+
+  /* - If M_COST_TYPE = SCALAR_COST then we're costing the original scalar code.
+     - If M_COST_TYPE = VLA_VECTOR_COST is nonzero then we're costing VLA
+       partial vectorization codes.
+     - If M_COST_TYPE = VLS_VECTOR_COST is nonzero then we're costing VLS
+       minimum length vector codes.  */
+  enum cost_type_enum m_cost_type;
+
+  /* On some CPUs, VLA and VLS provide the same theoretical vector
+     throughput, such as 4x128 VLS vs. 2x256 VLA.  In those
+     situations, we try to predict whether an VLS implementation
+     of the loop could be completely unrolled and become straight-line code.
+     If so, it is generally better to use the VLS version rather
+     than length-agnostic VLA, since the VLA loop would execute an unknown
+     number of times and so could not be completely unrolled in the same way.
+
+     If we're applying this heuristic, M_UNROLLED_VLS_NITERS is the
+     number of VLS loop iterations that would be unrolled and
+     M_UNROLLED_VLS_STMTS estimates the total number of statements
+     in the unrolled loop.  Both values are zero if we're not applying
+     the heuristic.  */
+  unsigned HOST_WIDE_INT m_unrolled_vls_niters = 0;
+  unsigned HOST_WIDE_INT m_unrolled_vls_stmts = 0;
+
+  /* If we're vectorizing a loop that executes a constant number of times,
+     this variable gives the number of times that the vector loop would
+     iterate, otherwise it is zero.  */
+  uint64_t m_num_vector_iterations = 0;
+
+  void analyze_loop_vinfo (loop_vec_info);
+  void record_potential_vls_unrolling (loop_vec_info);
+  bool prefer_unrolled_loop () const;
 };
 
 } // namespace riscv_vector
index 1aac8b58bb526d9ef831ffa452bd8661a8c9733e..2b2ec40952542ac3ef1046e0494ebf09477cf3e2 100644 (file)
@@ -74,7 +74,7 @@ riscv-vector-costs.o: $(srcdir)/config/riscv/riscv-vector-costs.cc \
   $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(TARGET_H) $(FUNCTION_H) \
   $(TREE_H) basic-block.h $(RTL_H) gimple.h targhooks.h cfgloop.h \
   fold-const.h $(TM_P_H) tree-vectorizer.h gimple-iterator.h bitmap.h \
-  ssa.h backend.h \
+  ssa.h backend.h tree-data-ref.h tree-ssa-loop-niter.h \
   $(srcdir)/config/riscv/riscv-vector-costs.h
        $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
                $(srcdir)/config/riscv/riscv-vector-costs.cc
diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-1.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-1.c
new file mode 100644 (file)
index 0000000..1ef4215
--- /dev/null
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
+
+void
+foo (int *__restrict a, int *__restrict b, int *__restrict c)
+{
+  for (int i = 0; i < 16; i++)
+    a[i] = b[i] + c[i];
+}
+
+/* { dg-final { scan-assembler-times {vsetivli\s+zero,\s*4,\s*e32,\s*m1,\s*t[au],\s*m[au]} 1 } } */
+/* { dg-final { scan-assembler-times {vsetivli} 1 } } */
+/* { dg-final { scan-assembler-not {vsetvli} } } */
diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-10.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-10.c
new file mode 100644 (file)
index 0000000..3ddffa3
--- /dev/null
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 --param=riscv-autovec-lmul=m4 -fno-schedule-insns -fno-schedule-insns2" } */
+
+#include <stdint-gcc.h>
+
+#define N 40
+
+int a[N];
+
+__attribute__ ((noinline)) int
+foo (){
+  int i,j;
+  int sum,x;
+
+  for (i = 0; i < N; i++) {
+    sum = 0;
+    for (j = 0; j < N; j++) {
+      sum += (i + j);
+    }
+    a[i] = sum;
+  }
+  return 0;
+}
+
+/* { dg-final { scan-assembler-times {vsetivli\s+zero,\s*8,\s*e32,\s*m2,\s*t[au],\s*m[au]} 1 } } */
+/* { dg-final { scan-assembler-times {vsetivli\s+zero,\s*16,\s*e32,\s*m4,\s*t[au],\s*m[au]} 1 } } */
+/* { dg-final { scan-assembler-times {vsetivli} 2 } } */
+/* { dg-final { scan-assembler-not {vsetvli} } } */
diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-11.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-11.c
new file mode 100644 (file)
index 0000000..7625ec5
--- /dev/null
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 --param=riscv-autovec-lmul=m8 -fno-schedule-insns -fno-schedule-insns2" } */
+
+#include <stdint-gcc.h>
+
+#define N 40
+
+int a[N];
+
+__attribute__ ((noinline)) int
+foo (){
+  int i,j;
+  int sum,x;
+
+  for (i = 0; i < N; i++) {
+    sum = 0;
+    for (j = 0; j < N; j++) {
+      sum += (i + j);
+    }
+    a[i] = sum;
+  }
+  return 0;
+}
+
+/* { dg-final { scan-assembler-times {vsetivli\s+zero,\s*8,\s*e32,\s*m2,\s*t[au],\s*m[au]} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*m8,\s*t[au],\s*m[au]} 1 } } */
+/* { dg-final { scan-assembler-times {vsetivli} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli} 1 } } */
diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-12.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-12.c
new file mode 100644 (file)
index 0000000..7625ec5
--- /dev/null
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 --param=riscv-autovec-lmul=m8 -fno-schedule-insns -fno-schedule-insns2" } */
+
+#include <stdint-gcc.h>
+
+#define N 40
+
+int a[N];
+
+__attribute__ ((noinline)) int
+foo (){
+  int i,j;
+  int sum,x;
+
+  for (i = 0; i < N; i++) {
+    sum = 0;
+    for (j = 0; j < N; j++) {
+      sum += (i + j);
+    }
+    a[i] = sum;
+  }
+  return 0;
+}
+
+/* { dg-final { scan-assembler-times {vsetivli\s+zero,\s*8,\s*e32,\s*m2,\s*t[au],\s*m[au]} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*m8,\s*t[au],\s*m[au]} 1 } } */
+/* { dg-final { scan-assembler-times {vsetivli} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli} 1 } } */
diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-2.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-2.c
new file mode 100644 (file)
index 0000000..ca203f5
--- /dev/null
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 --param=riscv-autovec-lmul=m2" } */
+
+void
+foo (int *__restrict a, int *__restrict b, int *__restrict c)
+{
+  for (int i = 0; i < 16; i++)
+    a[i] = b[i] + c[i];
+}
+
+/* { dg-final { scan-assembler-times {vsetivli\s+zero,\s*8,\s*e32,\s*m2,\s*t[au],\s*m[au]} 1 } } */
+/* { dg-final { scan-assembler-times {vsetivli} 1 } } */
+/* { dg-final { scan-assembler-not {vsetvli} } } */
diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-3.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-3.c
new file mode 100644 (file)
index 0000000..f8e5335
--- /dev/null
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 --param=riscv-autovec-lmul=m4" } */
+
+void
+foo (int *__restrict a, int *__restrict b, int *__restrict c)
+{
+  for (int i = 0; i < 16; i++)
+    a[i] = b[i] + c[i];
+}
+
+/* { dg-final { scan-assembler-times {vsetivli\s+zero,\s*16,\s*e32,\s*m4,\s*t[au],\s*m[au]} 1 } } */
+/* { dg-final { scan-assembler-times {vsetivli} 1 } } */
+/* { dg-final { scan-assembler-not {vsetvli} } } */
diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-4.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-4.c
new file mode 100644 (file)
index 0000000..4859d57
--- /dev/null
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 --param=riscv-autovec-lmul=m8" } */
+
+void
+foo (int *__restrict a, int *__restrict b, int *__restrict c)
+{
+  for (int i = 0; i < 16; i++)
+    a[i] = b[i] + c[i];
+}
+
+/* { dg-final { scan-assembler-times {vsetivli\s+zero,\s*16,\s*e32,\s*m4,\s*t[au],\s*m[au]} 1 } } */
+/* { dg-final { scan-assembler-times {vsetivli} 1 } } */
+/* { dg-final { scan-assembler-not {vsetvli} } } */
diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-5.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-5.c
new file mode 100644 (file)
index 0000000..8a56802
--- /dev/null
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 --param=riscv-autovec-lmul=dynamic" } */
+
+void
+foo (int *__restrict a, int *__restrict b, int *__restrict c)
+{
+  for (int i = 0; i < 16; i++)
+    a[i] = b[i] + c[i];
+}
+
+/* { dg-final { scan-assembler-times {vsetivli\s+zero,\s*16,\s*e32,\s*m4,\s*t[au],\s*m[au]} 1 } } */
+/* { dg-final { scan-assembler-times {vsetivli} 1 } } */
+/* { dg-final { scan-assembler-not {vsetvli} } } */
diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-6.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-6.c
new file mode 100644 (file)
index 0000000..46ebd5f
--- /dev/null
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 --param=riscv-autovec-lmul=m8" } */
+
+void
+foo (int *__restrict a, int *__restrict b, int *__restrict c)
+{
+  for (int i = 0; i < 32; i++)
+    a[i] = b[i] + c[i];
+}
+
+/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*m8,\s*t[au],\s*m[au]} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli} 1 } } */
+/* { dg-final { scan-assembler-not {vsetivli} } } */
diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-7.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-7.c
new file mode 100644 (file)
index 0000000..f5aceca
--- /dev/null
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 --param=riscv-autovec-lmul=dynamic" } */
+
+void
+foo (int *__restrict a, int *__restrict b, int *__restrict c)
+{
+  for (int i = 0; i < 32; i++)
+    a[i] = b[i] + c[i];
+}
+
+/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e32,\s*m8,\s*t[au],\s*m[au]} 1 } } */
+/* { dg-final { scan-assembler-times {vsetvli} 1 } } */
+/* { dg-final { scan-assembler-not {vsetivli} } } */
diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-8.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-8.c
new file mode 100644 (file)
index 0000000..ea6a7cb
--- /dev/null
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
+
+#include <stdint-gcc.h>
+
+#define N 40
+
+int a[N];
+
+__attribute__ ((noinline)) int
+foo (){
+  int i,j;
+  int sum,x;
+
+  for (i = 0; i < N; i++) {
+    sum = 0;
+    for (j = 0; j < N; j++) {
+      sum += (i + j);
+    }
+    a[i] = sum;
+  }
+  return 0;
+}
+
+/* { dg-final { scan-assembler-times {vsetivli\s+zero,\s*4,\s*e32,\s*m1,\s*t[au],\s*m[au]} 1 } } */
+/* { dg-final { scan-assembler-times {vsetivli} 1 } } */
+/* { dg-final { scan-assembler-not {vsetvli} } } */
diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-9.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/vla_vs_vls-9.c
new file mode 100644 (file)
index 0000000..7f03cb9
--- /dev/null
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 --param=riscv-autovec-lmul=m2" } */
+
+#include <stdint-gcc.h>
+
+#define N 40
+
+int a[N];
+
+__attribute__ ((noinline)) int
+foo (){
+  int i,j;
+  int sum,x;
+
+  for (i = 0; i < N; i++) {
+    sum = 0;
+    for (j = 0; j < N; j++) {
+      sum += (i + j);
+    }
+    a[i] = sum;
+  }
+  return 0;
+}
+
+/* { dg-final { scan-assembler-times {vsetivli\s+zero,\s*8,\s*e32,\s*m2,\s*t[au],\s*m[au]} 1 } } */
+/* { dg-final { scan-assembler-times {vsetivli} 1 } } */
+/* { dg-final { scan-assembler-not {vsetvli} } } */
index 1e01cfefd477f6d8ae19b4c9edc2c2804fcb2211..a4f8c37f95d9e94df64e0b997fd2861cccf994df 100644 (file)
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-march=rv64gcv -mabi=lp64d --param=riscv-autovec-preference=scalable -O3 -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d --param=riscv-autovec-preference=scalable -O3 -fno-schedule-insns -fno-schedule-insns2 -fno-vect-cost-model" } */
 
 #define K 32
 short in[2*K][K];
index e25e7b59c3ebd0f7b696b055c229f8cad0b4f9c6..8de1b9c0c41da27ca1f4daea7cdb2edf046b50ca 100644 (file)
@@ -53,5 +53,5 @@ DEF_OP_VV (shift, 128, int64_t, <<)
 DEF_OP_VV (shift, 256, int64_t, <<)
 DEF_OP_VV (shift, 512, int64_t, <<)
 
-/* { dg-final { scan-assembler-times {vsll\.vv\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 43 } } */
+/* { dg-final { scan-assembler-times {vsll\.vv\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 46 } } */
 /* { dg-final { scan-assembler-not {csrr} } } */