]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
RISC-V: Adjust loop len by costing 1 when NITER < VF
authorJuzhe-Zhong <juzhe.zhong@rivai.ai>
Mon, 15 Jan 2024 01:22:40 +0000 (09:22 +0800)
committerLehua Ding <lehua.ding@rivai.ai>
Mon, 15 Jan 2024 11:45:44 +0000 (19:45 +0800)
Rebase in v3: Rebase to the trunk and commit it as it's approved by Robin.
Update in v2: Add dynmaic lmul test.

This patch fixes the regression between GCC 13.2.0 and trunk GCC (GCC-14)

GCC 13.2.0:

lui a5,%hi(a)
li a4,19
sb a4,%lo(a)(a5)
li a0,0
ret

Trunk GCC:

        vsetvli a5,zero,e8,mf2,ta,ma
        li      a4,-32768
        vid.v   v1
        vsetvli zero,zero,e16,m1,ta,ma
        addiw   a4,a4,104
        vmv.v.i v3,15
        lui     a1,%hi(a)
        li      a0,19
        vsetvli zero,zero,e8,mf2,ta,ma
        vadd.vi v1,v1,1
        sb      a0,%lo(a)(a1)
        vsetvli zero,zero,e16,m1,ta,ma
        vzext.vf2       v2,v1
        vmv.v.x v1,a4
        vminu.vv        v2,v2,v3
        vsrl.vv v1,v1,v2
        vslidedown.vi   v1,v1,17
        vmv.x.s a0,v1
        snez    a0,a0
        ret

The root cause we are vectorizing the codes inefficiently since we doesn't cost len when NITERS < VF.
Leverage loop control of mask targets or rs6000 fixes the regression.

Tested no regression. Ok for trunk ?

PR target/113281

gcc/ChangeLog:

* config/riscv/riscv-vector-costs.cc (costs::adjust_vect_cost_per_loop): New function.
(costs::finish_cost): Adjust cost for LOOP LEN with NITERS < VF.
* config/riscv/riscv-vector-costs.h: New function.

gcc/testsuite/ChangeLog:

* gcc.dg/vect/costmodel/riscv/rvv/pr113281-3.c: New test.
* gcc.dg/vect/costmodel/riscv/rvv/pr113281-4.c: New test.
* gcc.dg/vect/costmodel/riscv/rvv/pr113281-5.c: New test.

gcc/config/riscv/riscv-vector-costs.cc
gcc/config/riscv/riscv-vector-costs.h
gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr113281-3.c [new file with mode: 0644]
gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr113281-4.c [new file with mode: 0644]
gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr113281-5.c [new file with mode: 0644]

index 090275c7efeae5cafb1441b6d051e8ea6c5ebd36..90ab93b7506a119994067929f1612f6fbea753ba 100644 (file)
@@ -1097,9 +1097,66 @@ costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
   return record_stmt_cost (stmt_info, where, count * stmt_cost);
 }
 
+/* For some target specific vectorization cost which can't be handled per stmt,
+   we check the requisite conditions and adjust the vectorization cost
+   accordingly if satisfied.  One typical example is to model model and adjust
+   loop_len cost for known_lt (NITERS, VF).  */
+
+void
+costs::adjust_vect_cost_per_loop (loop_vec_info loop_vinfo)
+{
+  if (LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)
+      && !LOOP_VINFO_USING_DECREMENTING_IV_P (loop_vinfo))
+    {
+      /* In middle-end loop vectorizer, we don't count the loop_len cost in
+        vect_estimate_min_profitable_iters when NITERS < VF, that is, we only
+        count cost of len that we need to iterate loop more than once with VF.
+        It's correct for most of the cases:
+
+        E.g. VF = [4, 4]
+          for (int i = 0; i < 3; i ++)
+            a[i] += b[i];
+
+        We don't need to cost MIN_EXPR or SELECT_VL for the case above.
+
+        However, for some inefficient vectorized cases, it does use MIN_EXPR
+        to generate len.
+
+        E.g. VF = [256, 256]
+
+        Loop body:
+          # loop_len_110 = PHI <18(2), _119(11)>
+          ...
+          _117 = MIN_EXPR <ivtmp_114, 18>;
+          _118 = 18 - _117;
+          _119 = MIN_EXPR <_118, POLY_INT_CST [256, 256]>;
+          ...
+
+        Epilogue:
+          ...
+          _112 = .VEC_EXTRACT (vect_patt_27.14_109, _111);
+
+        We cost 1 unconditionally for this situation like other targets which
+        apply mask as the loop control.  */
+      rgroup_controls *rgc;
+      unsigned int num_vectors_m1;
+      unsigned int body_stmts = 0;
+      FOR_EACH_VEC_ELT (LOOP_VINFO_LENS (loop_vinfo), num_vectors_m1, rgc)
+       if (rgc->type)
+         body_stmts += num_vectors_m1 + 1;
+
+      add_stmt_cost (body_stmts, scalar_stmt, NULL, NULL, NULL_TREE, 0,
+                    vect_body);
+    }
+}
+
 void
 costs::finish_cost (const vector_costs *scalar_costs)
 {
+  if (loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (m_vinfo))
+    {
+      adjust_vect_cost_per_loop (loop_vinfo);
+    }
   vector_costs::finish_cost (scalar_costs);
 }
 
index dc0d61f5d4a4b76af5ebafe718d9cc593fe7c396..4e2bbfd5ca989a9e2fd7644feb79697fcd6689c5 100644 (file)
@@ -96,6 +96,8 @@ private:
      V_REGS spills according to the analysis.  */
   bool m_has_unexpected_spills_p = false;
   void record_potential_unexpected_spills (loop_vec_info);
+
+  void adjust_vect_cost_per_loop (loop_vec_info);
 };
 
 } // namespace riscv_vector
diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr113281-3.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr113281-3.c
new file mode 100644 (file)
index 0000000..706e191
--- /dev/null
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvl4096b -mabi=lp64d -O3 -ftree-vectorize --param=riscv-autovec-lmul=m8" } */
+
+unsigned char a;
+
+int main() {
+  short b = a = 0;
+  for (; a != 19; a++)
+    if (a)
+      b = 32872 >> a;
+
+  if (b == 0)
+    return 0;
+  else
+    return 1;
+}
+
+/* { dg-final { scan-assembler-not {vset} } } */
diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr113281-4.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr113281-4.c
new file mode 100644 (file)
index 0000000..b0305db
--- /dev/null
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvl4096b -mabi=lp64d -O3 -ftree-vectorize --param=riscv-autovec-lmul=m8 --param=riscv-autovec-preference=fixed-vlmax" } */
+
+unsigned char a;
+
+int main() {
+  short b = a = 0;
+  for (; a != 19; a++)
+    if (a)
+      b = 32872 >> a;
+
+  if (b == 0)
+    return 0;
+  else
+    return 1;
+}
+
+/* { dg-final { scan-assembler-not {vset} } } */
diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr113281-5.c b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr113281-5.c
new file mode 100644 (file)
index 0000000..d3f5717
--- /dev/null
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvl4096b -mabi=lp64d -O3 -ftree-vectorize --param=riscv-autovec-lmul=dynamic" } */
+
+unsigned char a;
+
+int main() {
+  short b = a = 0;
+  for (; a != 19; a++)
+    if (a)
+      b = 32872 >> a;
+
+  if (b == 0)
+    return 0;
+  else
+    return 1;
+}
+
+/* { dg-final { scan-assembler-not {vset} } } */