]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
[PR target/118945][PATCH v3] RISC-V: Add 'prefer_agnostic' tune parameter for vector...
authorZhongyao Chen <chenzhongyao.hit@gmail.com>
Sat, 4 Oct 2025 14:29:32 +0000 (08:29 -0600)
committerJeff Law <jlaw@ventanamicro.com>
Sat, 4 Oct 2025 14:29:32 +0000 (08:29 -0600)
Improve RISC-V vector code generation by preferring tail-agnostic (ta) and
mask-agnostic (ma) policies for vector instructions when merge operands
are undefined. This optimization, controlled by a uarch-specific `prefer_agnostic`
tuning parameter, reduces `vsetvl` instructions and avoids conservative
undisturbed policy selections, addressing PR target/118945.

Changes from v2:
        - more detailed comment.
        - refine the test to check for vsetvli ta/tu number explicitly.

PR target/118945
gcc/ChangeLog:

* config/riscv/riscv.cc (riscv_prefer_agnostic_p): New function.
(riscv_tune_param): Add prefer_agnostic member.
(various tune info structures): Initialize prefer_agnostic.
* config/riscv/riscv-protos.h (riscv_prefer_agnostic_p): Add
prototype.
* config/riscv/riscv-v.cc (get_prefer_tail_policy,
get_prefer_mask_policy): Use riscv_prefer_agnostic_p.
* config/riscv/riscv-vsetvl.cc (vsetvl_info::get_demand_flags):
demand policy for agnostic when prefer_agnostic is true.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/pr118945-1.c: New file.
* gcc.target/riscv/rvv/autovec/pr118945-2.c: New file.

gcc/config/riscv/riscv-protos.h
gcc/config/riscv/riscv-v.cc
gcc/config/riscv/riscv-vsetvl.cc
gcc/config/riscv/riscv.cc
gcc/testsuite/gcc.target/riscv/rvv/autovec/pr118945-1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/riscv/rvv/autovec/pr118945-2.c [new file with mode: 0644]

index e4473f45d0e44c528dc78cd4b5891a208e722eb5..346d7a812fbd41986638b8b7d85acb28fedab9cf 100644 (file)
@@ -832,6 +832,7 @@ extern bool th_print_operand_address (FILE *, machine_mode, rtx);
 #endif
 
 extern bool strided_load_broadcast_p (void);
+extern bool riscv_prefer_agnostic_p (void);
 extern bool riscv_use_divmod_expander (void);
 void riscv_init_cumulative_args (CUMULATIVE_ARGS *, const_tree, rtx, tree, int);
 extern bool
index 8021bc14e7c00788e363fab8b4da4364d344cb2e..1d7d8a61b051d3630ed38390af611a2b8c79daf4 100644 (file)
@@ -2140,10 +2140,8 @@ get_ma (rtx ma)
 enum tail_policy
 get_prefer_tail_policy ()
 {
-  /* TODO: By default, we choose to use TAIL_ANY which allows
-     compiler pick up either agnostic or undisturbed. Maybe we
-     will have a compile option like -mprefer=agnostic to set
-     this value???.  */
+  if (riscv_prefer_agnostic_p ())
+    return TAIL_AGNOSTIC;
   return TAIL_ANY;
 }
 
@@ -2151,10 +2149,8 @@ get_prefer_tail_policy ()
 enum mask_policy
 get_prefer_mask_policy ()
 {
-  /* TODO: By default, we choose to use MASK_ANY which allows
-     compiler pick up either agnostic or undisturbed. Maybe we
-     will have a compile option like -mprefer=agnostic to set
-     this value???.  */
+  if (riscv_prefer_agnostic_p ())
+    return MASK_AGNOSTIC;
   return MASK_ANY;
 }
 
index 4fe0ae6d97b70c3d476373489caa86e6deb8e455..3586d0cdcc249e047be212d6d4c057889ce12ee3 100644 (file)
@@ -1144,9 +1144,10 @@ public:
              dflags |= demand_flags::DEMAND_LMUL_P;
          }
 
-       if (!m_ta)
+       /* Demand policy for agnostic if the uarch has a preference.  */
+       if (!m_ta || riscv_prefer_agnostic_p ())
          dflags |= demand_flags::DEMAND_TAIL_POLICY_P;
-       if (!m_ma)
+       if (!m_ma || riscv_prefer_agnostic_p ())
          dflags |= demand_flags::DEMAND_MASK_POLICY_P;
       }
 
index 41ee4014c0dd83b3baaccd65c8b4b252c29254de..bf3bcad4d73b6da35d05e6e9bbe2d320330d2edd 100644 (file)
@@ -317,6 +317,7 @@ struct riscv_tune_param
   const char *function_align;
   const char *jump_align;
   const char *loop_align;
+  bool prefer_agnostic;
 };
 
 
@@ -481,6 +482,7 @@ static const struct riscv_tune_param generic_tune_info = {
   NULL,                                                /* function_align */
   NULL,                                                /* jump_align */
   NULL,                                                /* loop_align */
+  false,                                       /* prefer-agnostic.  */
 };
 
 /* Costs to use when optimizing for rocket.  */
@@ -505,6 +507,7 @@ static const struct riscv_tune_param rocket_tune_info = {
   NULL,                                                /* function_align */
   NULL,                                                /* jump_align */
   NULL,                                                /* loop_align */
+  false,                                       /* prefer-agnostic.  */
 };
 
 /* Costs to use when optimizing for Sifive 7 Series.  */
@@ -529,6 +532,7 @@ static const struct riscv_tune_param sifive_7_tune_info = {
   NULL,                                                /* function_align */
   NULL,                                                /* jump_align */
   NULL,                                                /* loop_align */
+  false,                                       /* prefer-agnostic.  */
 };
 
 /* Costs to use when optimizing for Sifive p400 Series.  */
@@ -553,6 +557,7 @@ static const struct riscv_tune_param sifive_p400_tune_info = {
   NULL,                                                /* function_align */
   NULL,                                                /* jump_align */
   NULL,                                                /* loop_align */
+  true,                                                /* prefer-agnostic.  */
 };
 
 /* Costs to use when optimizing for Sifive p600 Series.  */
@@ -577,6 +582,7 @@ static const struct riscv_tune_param sifive_p600_tune_info = {
   NULL,                                                /* function_align */
   NULL,                                                /* jump_align */
   NULL,                                                /* loop_align */
+  true,                                                /* prefer-agnostic.  */
 };
 
 /* Costs to use when optimizing for T-HEAD c906.  */
@@ -601,6 +607,7 @@ static const struct riscv_tune_param thead_c906_tune_info = {
   NULL,                                                /* function_align */
   NULL,                                                /* jump_align */
   NULL,                                                /* loop_align */
+  false,                                       /* prefer-agnostic.  */
 };
 
 /* Costs to use when optimizing for xiangshan nanhu.  */
@@ -625,6 +632,7 @@ static const struct riscv_tune_param xiangshan_nanhu_tune_info = {
   NULL,                                                /* function_align */
   NULL,                                                /* jump_align */
   NULL,                                                /* loop_align */
+  true,                                                /* prefer-agnostic.  */
 };
 
 /* Costs to use when optimizing for a generic ooo profile.  */
@@ -649,6 +657,7 @@ static const struct riscv_tune_param generic_ooo_tune_info = {
   NULL,                                                /* function_align */
   NULL,                                                /* jump_align */
   NULL,                                                /* loop_align */
+  true,                                                /* prefer-agnostic.  */
 };
 
 /* Costs to use when optimizing for Tenstorrent Ascalon 8 wide.  */
@@ -673,6 +682,7 @@ static const struct riscv_tune_param tt_ascalon_d8_tune_info = {
   NULL,                                                /* function_align */
   NULL,                                                /* jump_align */
   NULL,                                                /* loop_align */
+  true,                                                /* prefer-agnostic.  */
 };
 
 /* Costs to use when optimizing for size.  */
@@ -697,6 +707,7 @@ static const struct riscv_tune_param optimize_size_tune_info = {
   NULL,                                                /* function_align */
   NULL,                                                /* jump_align */
   NULL,                                                /* loop_align */
+  false,                                       /* prefer-agnostic.  */
 };
 
 /* Costs to use when optimizing for MIPS P8700 */
@@ -720,7 +731,8 @@ static const struct riscv_tune_param mips_p8700_tune_info = {
   NULL,         /* vector cost */
   NULL,         /* function_align */
   NULL,         /* jump_align */
-  NULL,         /* loop_align */
+  NULL,                /* loop_align.  */
+  true,                /* prefer-agnostic.  */
 };
 
 static bool riscv_avoid_shrink_wrapping_separate ();
@@ -12842,6 +12854,15 @@ strided_load_broadcast_p ()
   return tune_param->use_zero_stride_load;
 }
 
+/* Return TRUE if we should use the tail agnostic and mask agnostic policies for
+   vector code, false otherwise.  */
+
+bool
+riscv_prefer_agnostic_p ()
+{
+  return tune_param->prefer_agnostic;
+}
+
 /* Return TRUE if we should use the divmod expander, FALSE otherwise.  This
    allows the behavior to be tuned for specific implementations as well as
    when optimizing for size.  */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr118945-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr118945-1.c
new file mode 100644 (file)
index 0000000..fc37bef
--- /dev/null
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-mtune=generic-ooo -O3 -march=rv64gcv_zvl256b_zba -mabi=lp64d -mrvv-max-lmul=m2 -mrvv-vector-bits=scalable" } */
+
+int test(int* in, int n)
+{
+  int accum = 0;
+  for (int i = 0; i < n; i++)
+        accum += in[i];
+
+  return accum;
+}
+
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-z0-9]+,\s*[a-z0-9]+,\s*e[0-9]+,\s*m[f0-9]+,\s*ta,\s*ma} 3 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-z0-9]+,\s*[a-z0-9]+,\s*e[0-9]+,\s*m[f0-9]+,\s*tu,\s*ma} 1 } } */
+
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr118945-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr118945-2.c
new file mode 100644 (file)
index 0000000..9565740
--- /dev/null
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rva23u64 -mtune=generic-ooo -Ofast -S" } */
+
+void vmult(
+    double* dst,
+    const double* src,
+    const unsigned int* rowstart,
+    const unsigned int* colnums,
+    const double* val,
+    const unsigned int n_rows
+) {
+    const double* val_ptr = &val[rowstart[0]];
+    const unsigned int* colnum_ptr = &colnums[rowstart[0]];
+    double* dst_ptr = dst;
+
+    for (unsigned int row = 0; row < n_rows; ++row) {
+        double s = 0.;
+        const double* const val_end_of_row = &val[rowstart[row + 1]];
+        while (val_ptr != val_end_of_row) {
+            s += *val_ptr++ * src[*colnum_ptr++];
+        }
+        *dst_ptr++ = s;
+    }
+}
+
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-z0-9]+,\s*[a-z0-9]+,\s*e[0-9]+,\s*m[f0-9]+,\s*ta,\s*ma} 4 } } */
+/* { dg-final { scan-assembler-times {vsetvli\s+[a-z0-9]+,\s*[a-z0-9]+,\s*e[0-9]+,\s*m[f0-9]+,\s*tu,\s*ma} 1 } } */
+