Currently select_vl is a direct optab with its mode always Xmode/Pmode.
This does not give us sufficient freedom to enable/disable vsetvl
(=SELECT_VL) depending on the vector mode.
This patch makes select_vl a convert optab and adjusts the associated IFN
functions as well as the query/emit code in the vectorizer.
With this patch nothing new is actually exercised yet. This is going to
happen in a separate riscv patch that enables "VLS" select_vl.
gcc/ChangeLog:
* config/riscv/autovec.md (select_vl<mode>): Rename to...
(select_vl<V:mode><P:mode>): ...this.
* doc/md.texi: Document new behavior.
* internal-fn.cc (select_vl_direct): Make
(expand_select_vl_optab_fn): Adjust for convert optab.
(direct_select_vl_optab_supported_p): Ditto.
* internal-fn.def (SELECT_VL): Ditto.
* optabs.def (OPTAB_CD): Add select_vl.
(OPTAB_D): Remove select_vl.
* tree-vect-loop-manip.cc (vect_set_loop_controls_directly):
Adjust for convert select_vl optab.
* tree-vect-loop.cc: Ditto.
;; == SELECT_VL
;; =========================================================================
-(define_expand "select_vl<mode>"
+(define_expand "select_vl<V:mode><P:mode>"
[(match_operand:P 0 "register_operand")
(match_operand:P 1 "vector_length_operand")
- (match_operand:P 2 "immediate_operand")]
+ (match_operand:P 2 "immediate_operand")
+ (match_operand:V 3)]
"TARGET_VECTOR"
{
riscv_vector::expand_select_vl (operands);
operand0[i] = operand0[i - 1] && (operand1 + i < operand2);
@end smallexample
-@cindex @code{select_vl@var{m}} instruction pattern
-@item @code{select_vl@var{m}}
-Set operand 0 to the number of scalar iterations that should be handled
-by one iteration of a vector loop. Operand 1 is the total number of
-scalar iterations that the loop needs to process and operand 2 is a
-maximum bound on the result (also known as the maximum ``vectorization
-factor'').
+@cindex @code{select_vl@var{m}@var{n}} instruction pattern
+@item @code{select_vl@var{m}@var{n}}
+Set operand 0 (of mode @var{n}) to the number of scalar iterations that
+should be handled by one iteration of a vector loop. Operand 1 is the
+total number of scalar iterations that the loop needs to process and
+operand 2 is a maximum bound on the result (also known as the
+maximum ``vectorization factor''). Operand 3 (of mode @var{m}) is
+a dummy parameter to pass the vector mode to be used.
The maximum value of operand 0 is given by:
@smallexample
always calculate the maximum value.
This optab is only useful on targets that implement @samp{len_load_@var{m}}
-and/or @samp{len_store_@var{m}}.
+and/or @samp{len_store_@var{m}} or the associated @samp{_len} variants.
@cindex @code{check_raw_ptrs@var{m}} instruction pattern
@item @samp{check_raw_ptrs@var{m}}
#define check_ptrs_direct { 0, 0, false }
#define crc_direct { 1, -1, true }
#define reduc_sbool_direct { 0, 0, true }
+#define select_vl_direct { 2, 0, false }
const direct_internal_fn_info direct_internal_fn_array[IFN_LAST + 1] = {
#define DEF_INTERNAL_FN(CODE, FLAGS, FNSPEC) not_direct,
#define expand_check_ptrs_optab_fn(FN, STMT, OPTAB) \
expand_direct_optab_fn (FN, STMT, OPTAB, 4)
+#define expand_select_vl_optab_fn(FN, STMT, OPTAB) \
+ expand_convert_optab_fn (FN, STMT, OPTAB, 3)
+
/* Expanders for optabs that can use expand_convert_optab_fn. */
#define expand_unary_convert_optab_fn(FN, STMT, OPTAB) \
#define direct_vec_set_optab_supported_p direct_optab_supported_p
#define direct_vec_extract_optab_supported_p convert_optab_supported_p
#define direct_reduc_sbool_optab_supported_p direct_optab_supported_p
+#define direct_select_vl_optab_supported_p convert_optab_supported_p
/* Return the optab used by internal function FN. */
DEF_INTERNAL_OPTAB_FN (MASK_LEN_STORE, 0, mask_len_store, mask_len_store)
DEF_INTERNAL_OPTAB_FN (WHILE_ULT, ECF_CONST | ECF_NOTHROW, while_ult, while)
-DEF_INTERNAL_OPTAB_FN (SELECT_VL, ECF_CONST | ECF_NOTHROW, select_vl, binary)
+DEF_INTERNAL_OPTAB_FN (SELECT_VL, ECF_CONST | ECF_NOTHROW, select_vl, select_vl)
DEF_INTERNAL_OPTAB_FN (CHECK_RAW_PTRS, ECF_CONST | ECF_NOTHROW,
check_raw_ptrs, check_ptrs)
DEF_INTERNAL_OPTAB_FN (CHECK_WAR_PTRS, ECF_CONST | ECF_NOTHROW,
OPTAB_CD (usdot_prod_optab, "usdot_prod$I$a$b")
OPTAB_CD (while_ult_optab, "while_ult$a$b")
+OPTAB_CD (select_vl_optab, "select_vl$a$b")
OPTAB_NL(add_optab, "add$P$a3", PLUS, "add", '3', gen_int_fp_fixed_libfunc)
OPTAB_NX(add_optab, "add$F$a3")
OPTAB_D (len_store_optab, "len_store_$a")
OPTAB_D (mask_len_strided_load_optab, "mask_len_strided_load_$a")
OPTAB_D (mask_len_strided_store_optab, "mask_len_strided_store_$a")
-OPTAB_D (select_vl_optab, "select_vl$a")
OPTAB_D (andn_optab, "andn$a3")
OPTAB_D (iorn_optab, "iorn$a3")
{
create_iv (nitems_total, MINUS_EXPR, step, NULL_TREE, loop, &incr_gsi,
insert_after, &index_before_incr, &index_after_incr);
+ tree vectype = build_zero_cst (rgc->type);
tree len = gimple_build (header_seq, IFN_SELECT_VL, iv_type,
- index_before_incr, nitems_step);
+ index_before_incr, nitems_step,
+ vectype);
gimple_seq_add_stmt (header_seq, gimple_build_assign (step, len));
}
else
if (LOOP_VINFO_USING_DECREMENTING_IV_P (loop_vinfo))
{
tree iv_type = LOOP_VINFO_RGROUP_IV_TYPE (loop_vinfo);
- if (direct_internal_fn_supported_p (IFN_SELECT_VL, iv_type,
- OPTIMIZE_FOR_SPEED)
- && LOOP_VINFO_LENS (loop_vinfo).length () == 1
+ if (LOOP_VINFO_LENS (loop_vinfo).length () == 1
&& LOOP_VINFO_LENS (loop_vinfo)[0].factor == 1
&& (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
|| !LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant ()))
LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo) = true;
+ if (LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo))
+ for (auto rgc : LOOP_VINFO_LENS (loop_vinfo))
+ if (rgc.type
+ && !direct_internal_fn_supported_p (IFN_SELECT_VL,
+ rgc.type, iv_type,
+ OPTIMIZE_FOR_SPEED))
+ {
+ LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo) = false;
+ break;
+ }
+
/* If any of the SLP instances cover more than a single lane
we cannot use .SELECT_VL at the moment, even if the number
of lanes is uniform throughout the SLP graph. */