From: Zhongyao Chen Date: Sat, 4 Oct 2025 14:29:32 +0000 (-0600) Subject: [PR target/118945][PATCH v3] RISC-V: Add 'prefer_agnostic' tune parameter for vector... X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=4b4d5fc649a2678d539f6ed119ee2a1bb4db9a2e;p=thirdparty%2Fgcc.git [PR target/118945][PATCH v3] RISC-V: Add 'prefer_agnostic' tune parameter for vector policies Improve RISC-V vector code generation by preferring tail-agnostic (ta) and mask-agnostic (ma) policies for vector instructions when merge operands are undefined. This optimization, controlled by a uarch-specific `prefer_agnostic` tuning parameter, reduces `vsetvl` instructions and avoids conservative undisturbed policy selections, addressing PR target/118945. Changes from v2: - more detailed comment. - refine the test to check for vsetvli ta/tu number explicitly. PR target/118945 gcc/ChangeLog: * config/riscv/riscv.cc (riscv_prefer_agnostic_p): New function. (riscv_tune_param): Add prefer_agnostic member. (various tune info structures): Initialize prefer_agnostic. * config/riscv/riscv-protos.h (riscv_prefer_agnostic_p): Add prototype. * config/riscv/riscv-v.cc (get_prefer_tail_policy, get_prefer_mask_policy): Use riscv_prefer_agnostic_p. * config/riscv/riscv-vsetvl.cc (vsetvl_info::get_demand_flags): demand policy for agnostic when prefer_agnostic is true. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/pr118945-1.c: New file. * gcc.target/riscv/rvv/autovec/pr118945-2.c: New file. --- diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h index e4473f45d0e..346d7a812fb 100644 --- a/gcc/config/riscv/riscv-protos.h +++ b/gcc/config/riscv/riscv-protos.h @@ -832,6 +832,7 @@ extern bool th_print_operand_address (FILE *, machine_mode, rtx); #endif extern bool strided_load_broadcast_p (void); +extern bool riscv_prefer_agnostic_p (void); extern bool riscv_use_divmod_expander (void); void riscv_init_cumulative_args (CUMULATIVE_ARGS *, const_tree, rtx, tree, int); extern bool diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc index 8021bc14e7c..1d7d8a61b05 100644 --- a/gcc/config/riscv/riscv-v.cc +++ b/gcc/config/riscv/riscv-v.cc @@ -2140,10 +2140,8 @@ get_ma (rtx ma) enum tail_policy get_prefer_tail_policy () { - /* TODO: By default, we choose to use TAIL_ANY which allows - compiler pick up either agnostic or undisturbed. Maybe we - will have a compile option like -mprefer=agnostic to set - this value???. */ + if (riscv_prefer_agnostic_p ()) + return TAIL_AGNOSTIC; return TAIL_ANY; } @@ -2151,10 +2149,8 @@ get_prefer_tail_policy () enum mask_policy get_prefer_mask_policy () { - /* TODO: By default, we choose to use MASK_ANY which allows - compiler pick up either agnostic or undisturbed. Maybe we - will have a compile option like -mprefer=agnostic to set - this value???. */ + if (riscv_prefer_agnostic_p ()) + return MASK_AGNOSTIC; return MASK_ANY; } diff --git a/gcc/config/riscv/riscv-vsetvl.cc b/gcc/config/riscv/riscv-vsetvl.cc index 4fe0ae6d97b..3586d0cdcc2 100644 --- a/gcc/config/riscv/riscv-vsetvl.cc +++ b/gcc/config/riscv/riscv-vsetvl.cc @@ -1144,9 +1144,10 @@ public: dflags |= demand_flags::DEMAND_LMUL_P; } - if (!m_ta) + /* Demand policy for agnostic if the uarch has a preference. */ + if (!m_ta || riscv_prefer_agnostic_p ()) dflags |= demand_flags::DEMAND_TAIL_POLICY_P; - if (!m_ma) + if (!m_ma || riscv_prefer_agnostic_p ()) dflags |= demand_flags::DEMAND_MASK_POLICY_P; } diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index 41ee4014c0d..bf3bcad4d73 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -317,6 +317,7 @@ struct riscv_tune_param const char *function_align; const char *jump_align; const char *loop_align; + bool prefer_agnostic; }; @@ -481,6 +482,7 @@ static const struct riscv_tune_param generic_tune_info = { NULL, /* function_align */ NULL, /* jump_align */ NULL, /* loop_align */ + false, /* prefer-agnostic. */ }; /* Costs to use when optimizing for rocket. */ @@ -505,6 +507,7 @@ static const struct riscv_tune_param rocket_tune_info = { NULL, /* function_align */ NULL, /* jump_align */ NULL, /* loop_align */ + false, /* prefer-agnostic. */ }; /* Costs to use when optimizing for Sifive 7 Series. */ @@ -529,6 +532,7 @@ static const struct riscv_tune_param sifive_7_tune_info = { NULL, /* function_align */ NULL, /* jump_align */ NULL, /* loop_align */ + false, /* prefer-agnostic. */ }; /* Costs to use when optimizing for Sifive p400 Series. */ @@ -553,6 +557,7 @@ static const struct riscv_tune_param sifive_p400_tune_info = { NULL, /* function_align */ NULL, /* jump_align */ NULL, /* loop_align */ + true, /* prefer-agnostic. */ }; /* Costs to use when optimizing for Sifive p600 Series. */ @@ -577,6 +582,7 @@ static const struct riscv_tune_param sifive_p600_tune_info = { NULL, /* function_align */ NULL, /* jump_align */ NULL, /* loop_align */ + true, /* prefer-agnostic. */ }; /* Costs to use when optimizing for T-HEAD c906. */ @@ -601,6 +607,7 @@ static const struct riscv_tune_param thead_c906_tune_info = { NULL, /* function_align */ NULL, /* jump_align */ NULL, /* loop_align */ + false, /* prefer-agnostic. */ }; /* Costs to use when optimizing for xiangshan nanhu. */ @@ -625,6 +632,7 @@ static const struct riscv_tune_param xiangshan_nanhu_tune_info = { NULL, /* function_align */ NULL, /* jump_align */ NULL, /* loop_align */ + true, /* prefer-agnostic. */ }; /* Costs to use when optimizing for a generic ooo profile. */ @@ -649,6 +657,7 @@ static const struct riscv_tune_param generic_ooo_tune_info = { NULL, /* function_align */ NULL, /* jump_align */ NULL, /* loop_align */ + true, /* prefer-agnostic. */ }; /* Costs to use when optimizing for Tenstorrent Ascalon 8 wide. */ @@ -673,6 +682,7 @@ static const struct riscv_tune_param tt_ascalon_d8_tune_info = { NULL, /* function_align */ NULL, /* jump_align */ NULL, /* loop_align */ + true, /* prefer-agnostic. */ }; /* Costs to use when optimizing for size. */ @@ -697,6 +707,7 @@ static const struct riscv_tune_param optimize_size_tune_info = { NULL, /* function_align */ NULL, /* jump_align */ NULL, /* loop_align */ + false, /* prefer-agnostic. */ }; /* Costs to use when optimizing for MIPS P8700 */ @@ -720,7 +731,8 @@ static const struct riscv_tune_param mips_p8700_tune_info = { NULL, /* vector cost */ NULL, /* function_align */ NULL, /* jump_align */ - NULL, /* loop_align */ + NULL, /* loop_align. */ + true, /* prefer-agnostic. */ }; static bool riscv_avoid_shrink_wrapping_separate (); @@ -12842,6 +12854,15 @@ strided_load_broadcast_p () return tune_param->use_zero_stride_load; } +/* Return TRUE if we should use the tail agnostic and mask agnostic policies for + vector code, false otherwise. */ + +bool +riscv_prefer_agnostic_p () +{ + return tune_param->prefer_agnostic; +} + /* Return TRUE if we should use the divmod expander, FALSE otherwise. This allows the behavior to be tuned for specific implementations as well as when optimizing for size. */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr118945-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr118945-1.c new file mode 100644 index 00000000000..fc37bef1258 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr118945-1.c @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-options "-mtune=generic-ooo -O3 -march=rv64gcv_zvl256b_zba -mabi=lp64d -mrvv-max-lmul=m2 -mrvv-vector-bits=scalable" } */ + +int test(int* in, int n) +{ + int accum = 0; + for (int i = 0; i < n; i++) + accum += in[i]; + + return accum; +} + +/* { dg-final { scan-assembler-times {vsetvli\s+[a-z0-9]+,\s*[a-z0-9]+,\s*e[0-9]+,\s*m[f0-9]+,\s*ta,\s*ma} 3 } } */ +/* { dg-final { scan-assembler-times {vsetvli\s+[a-z0-9]+,\s*[a-z0-9]+,\s*e[0-9]+,\s*m[f0-9]+,\s*tu,\s*ma} 1 } } */ + diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr118945-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr118945-2.c new file mode 100644 index 00000000000..956574067ce --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr118945-2.c @@ -0,0 +1,28 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rva23u64 -mtune=generic-ooo -Ofast -S" } */ + +void vmult( + double* dst, + const double* src, + const unsigned int* rowstart, + const unsigned int* colnums, + const double* val, + const unsigned int n_rows +) { + const double* val_ptr = &val[rowstart[0]]; + const unsigned int* colnum_ptr = &colnums[rowstart[0]]; + double* dst_ptr = dst; + + for (unsigned int row = 0; row < n_rows; ++row) { + double s = 0.; + const double* const val_end_of_row = &val[rowstart[row + 1]]; + while (val_ptr != val_end_of_row) { + s += *val_ptr++ * src[*colnum_ptr++]; + } + *dst_ptr++ = s; + } +} + +/* { dg-final { scan-assembler-times {vsetvli\s+[a-z0-9]+,\s*[a-z0-9]+,\s*e[0-9]+,\s*m[f0-9]+,\s*ta,\s*ma} 4 } } */ +/* { dg-final { scan-assembler-times {vsetvli\s+[a-z0-9]+,\s*[a-z0-9]+,\s*e[0-9]+,\s*m[f0-9]+,\s*tu,\s*ma} 1 } } */ +