"&& 1"
[(const_int 0)]
{
- riscv_vector::expand_reduction (<WREDUC_UNSPEC>, riscv_vector::REDUCE_OP,
+ riscv_vector::expand_reduction (<WREDUC_UNSPEC>, <WREDUC_UNSPEC_VL0_SAFE>,
+ riscv_vector::REDUCE_OP,
operands,
CONST0_RTX (<V_DOUBLE_EXTEND_VEL>mode));
DONE;
[(const_int 0)]
{
riscv_vector::expand_reduction (UNSPEC_WREDUC_SUM_UNORDERED,
+ UNSPEC_WREDUC_SUM_UNORDERED_VL0_SAFE,
riscv_vector::REDUCE_OP_FRM_DYN,
operands,
CONST0_RTX (<V_DOUBLE_EXTEND_VEL>mode));
[(const_int 0)]
{
riscv_vector::expand_reduction (UNSPEC_WREDUC_SUM_ORDERED,
+ UNSPEC_WREDUC_SUM_ORDERED_VL0_SAFE,
riscv_vector::REDUCE_OP_FRM_DYN,
operands, operands[2]);
DONE;
{
rtx ops[] = {operands[0], operands[2], operands[3], operands[4]};
riscv_vector::expand_reduction (UNSPEC_WREDUC_SUM_ORDERED,
+ UNSPEC_WREDUC_SUM_ORDERED_VL0_SAFE,
riscv_vector::REDUCE_OP_M_FRM_DYN,
ops, operands[1]);
}
rtx ops[] = {operands[0], operands[2], operands[1],
gen_int_mode (GET_MODE_NUNITS (<MODE>mode), Pmode)};
riscv_vector::expand_reduction (<WREDUC_UNSPEC>,
+ <WREDUC_UNSPEC_VL0_SAFE>,
riscv_vector::REDUCE_OP_M,
ops, CONST0_RTX (<V_DOUBLE_EXTEND_VEL>mode));
DONE;
{
rtx ops[] = {operands[0], operands[3], operands[1], operands[2]};
riscv_vector::expand_reduction (<WREDUC_UNSPEC>,
+ <WREDUC_UNSPEC_VL0_SAFE>,
riscv_vector::REDUCE_OP_M,
ops, CONST0_RTX (<V_DOUBLE_EXTEND_VEL>mode));
DONE;
rtx ops[] = {operands[0], operands[2], operands[1],
gen_int_mode (GET_MODE_NUNITS (<MODE>mode), Pmode)};
riscv_vector::expand_reduction (UNSPEC_WREDUC_SUM_UNORDERED,
+ UNSPEC_WREDUC_SUM_UNORDERED_VL0_SAFE,
riscv_vector::REDUCE_OP_M_FRM_DYN,
ops, CONST0_RTX (<V_DOUBLE_EXTEND_VEL>mode));
DONE;
{
rtx ops[] = {operands[0], operands[3], operands[1], operands[2]};
riscv_vector::expand_reduction (UNSPEC_WREDUC_SUM_UNORDERED,
+ UNSPEC_WREDUC_SUM_UNORDERED_VL0_SAFE,
riscv_vector::REDUCE_OP_M_FRM_DYN,
ops, CONST0_RTX (<V_DOUBLE_EXTEND_VEL>mode));
DONE;
"&& 1"
[(const_int 0)]
{
- riscv_vector::expand_reduction (UNSPEC_REDUC_SUM, riscv_vector::REDUCE_OP,
+ riscv_vector::expand_reduction (UNSPEC_REDUC_SUM,
+ UNSPEC_REDUC_SUM_VL0_SAFE,
+ riscv_vector::REDUCE_OP,
operands, CONST0_RTX (<VEL>mode));
DONE;
}
{
int prec = GET_MODE_PRECISION (<VEL>mode);
rtx min = immed_wide_int_const (wi::min_value (prec, SIGNED), <VEL>mode);
- riscv_vector::expand_reduction (UNSPEC_REDUC_MAX, riscv_vector::REDUCE_OP,
+ riscv_vector::expand_reduction (UNSPEC_REDUC_MAX,
+ UNSPEC_REDUC_MAX_VL0_SAFE,
+ riscv_vector::REDUCE_OP,
operands, min);
DONE;
})
(match_operand:V_VLSI 1 "register_operand")]
"TARGET_VECTOR"
{
- riscv_vector::expand_reduction (UNSPEC_REDUC_MAXU, riscv_vector::REDUCE_OP,
+ riscv_vector::expand_reduction (UNSPEC_REDUC_MAXU,
+ UNSPEC_REDUC_MAXU_VL0_SAFE,
+ riscv_vector::REDUCE_OP,
operands, CONST0_RTX (<VEL>mode));
DONE;
})
{
int prec = GET_MODE_PRECISION (<VEL>mode);
rtx max = immed_wide_int_const (wi::max_value (prec, SIGNED), <VEL>mode);
- riscv_vector::expand_reduction (UNSPEC_REDUC_MIN, riscv_vector::REDUCE_OP,
+ riscv_vector::expand_reduction (UNSPEC_REDUC_MIN,
+ UNSPEC_REDUC_MIN_VL0_SAFE,
+ riscv_vector::REDUCE_OP,
operands, max);
DONE;
})
{
int prec = GET_MODE_PRECISION (<VEL>mode);
rtx max = immed_wide_int_const (wi::max_value (prec, UNSIGNED), <VEL>mode);
- riscv_vector::expand_reduction (UNSPEC_REDUC_MINU, riscv_vector::REDUCE_OP,
+ riscv_vector::expand_reduction (UNSPEC_REDUC_MINU,
+ UNSPEC_REDUC_MINU_VL0_SAFE,
+ riscv_vector::REDUCE_OP,
operands, max);
DONE;
})
(match_operand:V_VLSI 1 "register_operand")]
"TARGET_VECTOR"
{
- riscv_vector::expand_reduction (UNSPEC_REDUC_AND, riscv_vector::REDUCE_OP,
+ riscv_vector::expand_reduction (UNSPEC_REDUC_AND,
+ UNSPEC_REDUC_AND_VL0_SAFE,
+ riscv_vector::REDUCE_OP,
operands, CONSTM1_RTX (<VEL>mode));
DONE;
})
(match_operand:V_VLSI 1 "register_operand")]
"TARGET_VECTOR"
{
- riscv_vector::expand_reduction (UNSPEC_REDUC_OR, riscv_vector::REDUCE_OP,
+ riscv_vector::expand_reduction (UNSPEC_REDUC_OR,
+ UNSPEC_REDUC_OR_VL0_SAFE,
+ riscv_vector::REDUCE_OP,
operands, CONST0_RTX (<VEL>mode));
DONE;
})
(match_operand:V_VLSI 1 "register_operand")]
"TARGET_VECTOR"
{
- riscv_vector::expand_reduction (UNSPEC_REDUC_XOR, riscv_vector::REDUCE_OP,
+ riscv_vector::expand_reduction (UNSPEC_REDUC_XOR,
+ UNSPEC_REDUC_XOR_VL0_SAFE,
+ riscv_vector::REDUCE_OP,
operands, CONST0_RTX (<VEL>mode));
DONE;
})
[(const_int 0)]
{
riscv_vector::expand_reduction (UNSPEC_REDUC_SUM_UNORDERED,
+ UNSPEC_REDUC_SUM_UNORDERED_VL0_SAFE,
riscv_vector::REDUCE_OP_FRM_DYN,
operands, CONST0_RTX (<VEL>mode));
DONE;
REAL_VALUE_TYPE rv;
real_inf (&rv, true);
rtx f = const_double_from_real_value (rv, <VEL>mode);
- riscv_vector::expand_reduction (UNSPEC_REDUC_MAX, riscv_vector::REDUCE_OP,
+ riscv_vector::expand_reduction (UNSPEC_REDUC_MAX,
+ UNSPEC_REDUC_MAX_VL0_SAFE,
+ riscv_vector::REDUCE_OP,
operands, f);
DONE;
})
REAL_VALUE_TYPE rv;
real_inf (&rv, false);
rtx f = const_double_from_real_value (rv, <VEL>mode);
- riscv_vector::expand_reduction (UNSPEC_REDUC_MIN, riscv_vector::REDUCE_OP,
+ riscv_vector::expand_reduction (UNSPEC_REDUC_MIN,
+ UNSPEC_REDUC_MIN_VL0_SAFE,
+ riscv_vector::REDUCE_OP,
operands, f);
DONE;
})
REAL_VALUE_TYPE rv;
real_inf (&rv, true);
rtx f = const_double_from_real_value (rv, <VEL>mode);
- riscv_vector::expand_reduction (UNSPEC_REDUC_MAX, riscv_vector::REDUCE_OP,
+ riscv_vector::expand_reduction (UNSPEC_REDUC_MAX,
+ UNSPEC_REDUC_MAX_VL0_SAFE,
+ riscv_vector::REDUCE_OP,
operands, f);
DONE;
})
REAL_VALUE_TYPE rv;
real_inf (&rv, false);
rtx f = const_double_from_real_value (rv, <VEL>mode);
- riscv_vector::expand_reduction (UNSPEC_REDUC_MIN, riscv_vector::REDUCE_OP,
+ riscv_vector::expand_reduction (UNSPEC_REDUC_MIN,
+ UNSPEC_REDUC_MIN_VL0_SAFE,
+ riscv_vector::REDUCE_OP,
operands, f);
DONE;
})
{
rtx ops[] = {operands[0], operands[2]};
riscv_vector::expand_reduction (UNSPEC_REDUC_SUM_ORDERED,
+ UNSPEC_REDUC_SUM_ORDERED_VL0_SAFE,
riscv_vector::REDUCE_OP_FRM_DYN,
ops, operands[1]);
DONE;
{
rtx ops[] = {operands[0], operands[2], operands[3], operands[4]};
riscv_vector::expand_reduction (UNSPEC_REDUC_SUM_ORDERED,
+ UNSPEC_REDUC_SUM_ORDERED_VL0_SAFE,
riscv_vector::REDUCE_OP_M_FRM_DYN,
ops, operands[1]);
}
bool expand_vec_cmp_float (rtx, rtx_code, rtx, rtx, bool);
void expand_cond_len_unop (unsigned, rtx *);
void expand_cond_len_binop (unsigned, rtx *);
-void expand_reduction (unsigned, unsigned, rtx *, rtx);
+void expand_reduction (unsigned, unsigned, unsigned, rtx *, rtx);
void expand_vec_ceil (rtx, rtx, machine_mode, machine_mode);
void expand_vec_floor (rtx, rtx, machine_mode, machine_mode);
void expand_vec_nearbyint (rtx, rtx, machine_mode, machine_mode);
Case 2: ops = {scalar_dest, vector_src, mask, vl}
*/
void
-expand_reduction (unsigned unspec, unsigned insn_flags, rtx *ops, rtx init)
+expand_reduction (unsigned unspec, unsigned unspec_for_vl0_safe,
+ unsigned insn_flags, rtx *ops, rtx init)
{
rtx scalar_dest = ops[0];
rtx vector_src = ops[1];
machine_mode vmode = GET_MODE (vector_src);
machine_mode vel_mode = GET_MODE (scalar_dest);
machine_mode m1_mode = get_m1_mode (vel_mode).require ();
+ rtx vl_op = NULL_RTX;
+ bool need_vl0_safe = false;
+ if (need_mask_operand_p (insn_flags))
+ {
+ vl_op = ops[3];
+ need_vl0_safe = !CONST_INT_P (vl_op) && !CONST_POLY_INT_P (vl_op);
+ }
rtx m1_tmp = gen_reg_rtx (m1_mode);
rtx scalar_move_ops[] = {m1_tmp, init};
insn_code icode = code_for_pred_broadcast (m1_mode);
if (need_mask_operand_p (insn_flags))
- emit_nonvlmax_insn (icode, SCALAR_MOVE_OP, scalar_move_ops, ops[3]);
+ {
+ if (need_vl0_safe)
+ emit_nonvlmax_insn (icode, SCALAR_MOVE_OP, scalar_move_ops, const1_rtx);
+ else
+ emit_nonvlmax_insn (icode, SCALAR_MOVE_OP, scalar_move_ops, vl_op);
+ }
else
emit_vlmax_insn (icode, SCALAR_MOVE_OP, scalar_move_ops);
rtx m1_tmp2 = gen_reg_rtx (m1_mode);
rtx reduc_ops[] = {m1_tmp2, vector_src, m1_tmp};
- icode = code_for_pred (unspec, vmode);
+
+ if (need_vl0_safe)
+ icode = code_for_pred (unspec_for_vl0_safe, vmode);
+ else
+ icode = code_for_pred (unspec, vmode);
if (need_mask_operand_p (insn_flags))
{
rtx mask_len_reduc_ops[] = {m1_tmp2, ops[2], vector_src, m1_tmp};
- emit_nonvlmax_insn (icode, insn_flags, mask_len_reduc_ops, ops[3]);
+ emit_nonvlmax_insn (icode, insn_flags, mask_len_reduc_ops, vl_op);
}
else
emit_vlmax_insn (icode, insn_flags, reduc_ops);
;; Integer and Float Reduction
UNSPEC_REDUC
UNSPEC_REDUC_SUM
+ UNSPEC_REDUC_SUM_VL0_SAFE
UNSPEC_REDUC_SUM_ORDERED
UNSPEC_REDUC_SUM_UNORDERED
+ UNSPEC_REDUC_SUM_ORDERED_VL0_SAFE
+ UNSPEC_REDUC_SUM_UNORDERED_VL0_SAFE
UNSPEC_REDUC_MAXU
UNSPEC_REDUC_MAX
UNSPEC_REDUC_MINU
UNSPEC_REDUC_AND
UNSPEC_REDUC_OR
UNSPEC_REDUC_XOR
+ UNSPEC_REDUC_MAXU_VL0_SAFE
+ UNSPEC_REDUC_MAX_VL0_SAFE
+ UNSPEC_REDUC_MINU_VL0_SAFE
+ UNSPEC_REDUC_MIN_VL0_SAFE
+ UNSPEC_REDUC_AND_VL0_SAFE
+ UNSPEC_REDUC_OR_VL0_SAFE
+ UNSPEC_REDUC_XOR_VL0_SAFE
UNSPEC_WREDUC_SUM
UNSPEC_WREDUC_SUMU
+ UNSPEC_WREDUC_SUM_VL0_SAFE
+ UNSPEC_WREDUC_SUMU_VL0_SAFE
UNSPEC_WREDUC_SUM_ORDERED
UNSPEC_WREDUC_SUM_UNORDERED
+ UNSPEC_WREDUC_SUM_ORDERED_VL0_SAFE
+ UNSPEC_WREDUC_SUM_UNORDERED_VL0_SAFE
])
(define_c_enum "unspecv" [
UNSPEC_REDUC_MIN UNSPEC_REDUC_AND UNSPEC_REDUC_OR UNSPEC_REDUC_XOR
])
+(define_int_iterator ANY_REDUC_VL0_SAFE [
+ UNSPEC_REDUC_SUM_VL0_SAFE UNSPEC_REDUC_MAXU_VL0_SAFE UNSPEC_REDUC_MAX_VL0_SAFE UNSPEC_REDUC_MINU_VL0_SAFE
+ UNSPEC_REDUC_MIN_VL0_SAFE UNSPEC_REDUC_AND_VL0_SAFE UNSPEC_REDUC_OR_VL0_SAFE UNSPEC_REDUC_XOR_VL0_SAFE
+])
+
(define_int_iterator ANY_WREDUC [
UNSPEC_WREDUC_SUM UNSPEC_WREDUC_SUMU
])
+(define_int_iterator ANY_WREDUC_VL0_SAFE [
+ UNSPEC_WREDUC_SUM_VL0_SAFE UNSPEC_WREDUC_SUMU_VL0_SAFE
+])
+
(define_int_iterator ANY_FREDUC [
UNSPEC_REDUC_MAX UNSPEC_REDUC_MIN
])
+(define_int_iterator ANY_FREDUC_VL0_SAFE [
+ UNSPEC_REDUC_MAX_VL0_SAFE UNSPEC_REDUC_MIN_VL0_SAFE
+])
+
(define_int_iterator ANY_FREDUC_SUM [
UNSPEC_REDUC_SUM_ORDERED UNSPEC_REDUC_SUM_UNORDERED
])
+(define_int_iterator ANY_FREDUC_SUM_VL0_SAFE [
+ UNSPEC_REDUC_SUM_ORDERED_VL0_SAFE UNSPEC_REDUC_SUM_UNORDERED_VL0_SAFE
+])
+
(define_int_iterator ANY_FWREDUC_SUM [
UNSPEC_WREDUC_SUM_ORDERED UNSPEC_WREDUC_SUM_UNORDERED
])
+(define_int_iterator ANY_FWREDUC_SUM_VL0_SAFE [
+ UNSPEC_WREDUC_SUM_ORDERED_VL0_SAFE UNSPEC_WREDUC_SUM_UNORDERED_VL0_SAFE
+])
+
+(define_int_attr reduc_op_pat_name [
+ (UNSPEC_REDUC_SUM "redsum")
+ (UNSPEC_REDUC_SUM_VL0_SAFE "redsum_vl0s")
+ (UNSPEC_REDUC_SUM_ORDERED "redosum") (UNSPEC_REDUC_SUM_UNORDERED "redusum")
+ (UNSPEC_REDUC_SUM_ORDERED_VL0_SAFE "redosum_vl0s") (UNSPEC_REDUC_SUM_UNORDERED_VL0_SAFE "redusum_vl0s")
+ (UNSPEC_REDUC_MAXU "redmaxu") (UNSPEC_REDUC_MAX "redmax") (UNSPEC_REDUC_MINU "redminu") (UNSPEC_REDUC_MIN "redmin")
+ (UNSPEC_REDUC_MAXU_VL0_SAFE "redmaxu_vl0s") (UNSPEC_REDUC_MAX_VL0_SAFE "redmax_vl0s") (UNSPEC_REDUC_MINU_VL0_SAFE "redminu_vl0s") (UNSPEC_REDUC_MIN_VL0_SAFE "redmin_vl0s")
+ (UNSPEC_REDUC_AND "redand") (UNSPEC_REDUC_OR "redor") (UNSPEC_REDUC_XOR "redxor")
+ (UNSPEC_REDUC_AND_VL0_SAFE "redand_vl0s") (UNSPEC_REDUC_OR_VL0_SAFE "redor_vl0s") (UNSPEC_REDUC_XOR_VL0_SAFE "redxor_vl0s")
+ (UNSPEC_WREDUC_SUM "wredsum") (UNSPEC_WREDUC_SUMU "wredsumu")
+ (UNSPEC_WREDUC_SUM_VL0_SAFE "wredsum_vl0s") (UNSPEC_WREDUC_SUMU_VL0_SAFE "wredsumu_vl0s")
+ (UNSPEC_WREDUC_SUM_ORDERED "wredosum") (UNSPEC_WREDUC_SUM_UNORDERED "wredusum")
+ (UNSPEC_WREDUC_SUM_ORDERED_VL0_SAFE "wredosum_vl0s") (UNSPEC_WREDUC_SUM_UNORDERED_VL0_SAFE "wredusum_vl0s")
+])
+
(define_int_attr reduc_op [
(UNSPEC_REDUC_SUM "redsum")
+ (UNSPEC_REDUC_SUM_VL0_SAFE "redsum")
(UNSPEC_REDUC_SUM_ORDERED "redosum") (UNSPEC_REDUC_SUM_UNORDERED "redusum")
+ (UNSPEC_REDUC_SUM_ORDERED_VL0_SAFE "redosum") (UNSPEC_REDUC_SUM_UNORDERED_VL0_SAFE "redusum")
(UNSPEC_REDUC_MAXU "redmaxu") (UNSPEC_REDUC_MAX "redmax") (UNSPEC_REDUC_MINU "redminu") (UNSPEC_REDUC_MIN "redmin")
+ (UNSPEC_REDUC_MAXU_VL0_SAFE "redmaxu") (UNSPEC_REDUC_MAX_VL0_SAFE "redmax") (UNSPEC_REDUC_MINU_VL0_SAFE "redminu") (UNSPEC_REDUC_MIN_VL0_SAFE "redmin")
(UNSPEC_REDUC_AND "redand") (UNSPEC_REDUC_OR "redor") (UNSPEC_REDUC_XOR "redxor")
+ (UNSPEC_REDUC_AND_VL0_SAFE "redand") (UNSPEC_REDUC_OR_VL0_SAFE "redor") (UNSPEC_REDUC_XOR_VL0_SAFE "redxor")
(UNSPEC_WREDUC_SUM "wredsum") (UNSPEC_WREDUC_SUMU "wredsumu")
+ (UNSPEC_WREDUC_SUM_VL0_SAFE "wredsum") (UNSPEC_WREDUC_SUMU_VL0_SAFE "wredsumu")
(UNSPEC_WREDUC_SUM_ORDERED "wredosum") (UNSPEC_WREDUC_SUM_UNORDERED "wredusum")
+ (UNSPEC_WREDUC_SUM_ORDERED_VL0_SAFE "wredosum") (UNSPEC_WREDUC_SUM_UNORDERED_VL0_SAFE "wredusum")
])
(define_code_attr WREDUC_UNSPEC [(zero_extend "UNSPEC_WREDUC_SUMU") (sign_extend "UNSPEC_WREDUC_SUM")])
+(define_code_attr WREDUC_UNSPEC_VL0_SAFE [(zero_extend "UNSPEC_WREDUC_SUMU_VL0_SAFE") (sign_extend "UNSPEC_WREDUC_SUM_VL0_SAFE")])
(define_mode_attr VINDEX [
(RVVM8QI "RVVM8QI") (RVVM4QI "RVVM4QI") (RVVM2QI "RVVM2QI") (RVVM1QI "RVVM1QI")
(UNSPEC_ORDERED "o") (UNSPEC_UNORDERED "u")
(UNSPEC_REDUC_SUM_ORDERED "o") (UNSPEC_REDUC_SUM_UNORDERED "u")
(UNSPEC_WREDUC_SUM_ORDERED "o") (UNSPEC_WREDUC_SUM_UNORDERED "u")
+ (UNSPEC_REDUC_SUM_ORDERED_VL0_SAFE "o") (UNSPEC_REDUC_SUM_UNORDERED_VL0_SAFE "u")
+ (UNSPEC_WREDUC_SUM_ORDERED_VL0_SAFE "o") (UNSPEC_WREDUC_SUM_UNORDERED_VL0_SAFE "u")
])
(define_int_attr v_su [(UNSPEC_VMULHS "") (UNSPEC_VMULHU "u") (UNSPEC_VMULHSU "su")
;; - 14.3 Vector Single-Width Floating-Point Reduction Instructions
;; - 14.4 Vector Widening Floating-Point Reduction Instructions
;; -------------------------------------------------------------------------------
+;;
+;; NOTE for VL0 safe variantreduction:
+;; The VL0 safe variantis used by the auto vectorizer to generate vectorized code
+;; only, because the auto vectorizer expect reduction should propgat the start
+;; value to dest even VL=0, the only way is force vd=vs1 by constraint.
;; Integer Reduction (vred(sum|maxu|max|minu|min|and|or|xor).vs)
-(define_insn "@pred_<reduc_op><mode>"
+(define_insn "@pred_<reduc_op_pat_name><mode>"
[(set (match_operand:<V_LMUL1> 0 "register_operand" "=vr, vr")
(unspec:<V_LMUL1>
[(unspec:<VM>
[(set_attr "type" "vired")
(set_attr "mode" "<MODE>")])
+;; Integer Reduction (vred(sum|maxu|max|minu|min|and|or|xor).vs)
+;; but for auto vectorizer (see "NOTE for VL0 safe variantreduction" for detail)
+(define_insn "@pred_<reduc_op_pat_name><mode>"
+ [(set (match_operand:<V_LMUL1> 0 "register_operand" "=vr")
+ (unspec:<V_LMUL1>
+ [(unspec:<VM>
+ [(match_operand:<VM> 1 "vector_mask_operand" "vmWc1")
+ (match_operand 5 "vector_length_operand" " rK")
+ (match_operand 6 "const_int_operand" " i")
+ (match_operand 7 "const_int_operand" " i")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+ (unspec:<V_LMUL1> [
+ (match_operand:V_VLSI 3 "register_operand" " vr")
+ (match_operand:<V_LMUL1> 4 "register_operand" " 0")
+ ] ANY_REDUC_VL0_SAFE)
+ (match_operand:<V_LMUL1> 2 "vector_merge_operand" " vu")] UNSPEC_REDUC))]
+ "TARGET_VECTOR"
+ "v<reduc_op>.vs\t%0,%3,%4%p1"
+ [(set_attr "type" "vired")
+ (set_attr "mode" "<MODE>")])
+
;; Integer Widen Reduction Sum (vwredsum[u].vs)
-(define_insn "@pred_<reduc_op><mode>"
+(define_insn "@pred_<reduc_op_pat_name><mode>"
[(set (match_operand:<V_EXT_LMUL1> 0 "register_operand" "=vr, vr")
(unspec:<V_EXT_LMUL1>
[(unspec:<VM>
[(set_attr "type" "viwred")
(set_attr "mode" "<MODE>")])
+;; Integer Widen Reduction Sum (vwredsum[u].vs)
+;; but for auto vectorizer (see "NOTE for VL0 safe variantreduction" for detail)
+(define_insn "@pred_<reduc_op_pat_name><mode>"
+ [(set (match_operand:<V_EXT_LMUL1> 0 "register_operand" "=vr")
+ (unspec:<V_EXT_LMUL1>
+ [(unspec:<VM>
+ [(match_operand:<VM> 1 "vector_mask_operand" "vmWc1")
+ (match_operand 5 "vector_length_operand" " rK")
+ (match_operand 6 "const_int_operand" " i")
+ (match_operand 7 "const_int_operand" " i")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+ (unspec:<V_EXT_LMUL1> [
+ (match_operand:VI_QHS 3 "register_operand" " vr")
+ (match_operand:<V_EXT_LMUL1> 4 "register_operand" " 0")
+ ] ANY_WREDUC_VL0_SAFE)
+ (match_operand:<V_EXT_LMUL1> 2 "vector_merge_operand" " vu")] UNSPEC_REDUC))]
+ "TARGET_VECTOR"
+ "v<reduc_op>.vs\t%0,%3,%4%p1"
+ [(set_attr "type" "viwred")
+ (set_attr "mode" "<MODE>")])
+
;; Float Reduction (vfred(max|min).vs)
-(define_insn "@pred_<reduc_op><mode>"
+(define_insn "@pred_<reduc_op_pat_name><mode>"
[(set (match_operand:<V_LMUL1> 0 "register_operand" "=vr, vr")
(unspec:<V_LMUL1>
[(unspec:<VM>
[(set_attr "type" "vfredu")
(set_attr "mode" "<MODE>")])
+;; Float Reduction (vfred(max|min).vs)
+;; but for auto vectorizer (see "NOTE for VL0 safe variantreduction" for detail)
+(define_insn "@pred_<reduc_op_pat_name><mode>"
+ [(set (match_operand:<V_LMUL1> 0 "register_operand" "=vr")
+ (unspec:<V_LMUL1>
+ [(unspec:<VM>
+ [(match_operand:<VM> 1 "vector_mask_operand" "vmWc1")
+ (match_operand 5 "vector_length_operand" " rK")
+ (match_operand 6 "const_int_operand" " i")
+ (match_operand 7 "const_int_operand" " i")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+ (unspec:<V_LMUL1> [
+ (match_operand:V_VLSF 3 "register_operand" " vr")
+ (match_operand:<V_LMUL1> 4 "register_operand" " 0")
+ ] ANY_FREDUC_VL0_SAFE)
+ (match_operand:<V_LMUL1> 2 "vector_merge_operand" " vu")] UNSPEC_REDUC))]
+ "TARGET_VECTOR"
+ "vf<reduc_op>.vs\t%0,%3,%4%p1"
+ [(set_attr "type" "vfredu")
+ (set_attr "mode" "<MODE>")])
+
;; Float Reduction Sum (vfred[ou]sum.vs)
-(define_insn "@pred_<reduc_op><mode>"
+(define_insn "@pred_<reduc_op_pat_name><mode>"
[(set (match_operand:<V_LMUL1> 0 "register_operand" "=vr,vr")
(unspec:<V_LMUL1>
[(unspec:<VM>
(set (attr "frm_mode")
(symbol_ref "riscv_vector::get_frm_mode (operands[8])"))])
+;; Float Reduction Sum (vfred[ou]sum.vs)
+;; but for auto vectorizer (see "NOTE for VL0 safe variantreduction" for detail)
+(define_insn "@pred_<reduc_op_pat_name><mode>"
+ [(set (match_operand:<V_LMUL1> 0 "register_operand" "=vr")
+ (unspec:<V_LMUL1>
+ [(unspec:<VM>
+ [(match_operand:<VM> 1 "vector_mask_operand" "vmWc1")
+ (match_operand 5 "vector_length_operand" " rK")
+ (match_operand 6 "const_int_operand" " i")
+ (match_operand 7 "const_int_operand" " i")
+ (match_operand 8 "const_int_operand" " i")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)
+ (reg:SI FRM_REGNUM)] UNSPEC_VPREDICATE)
+ (unspec:<V_LMUL1> [
+ (match_operand:V_VLSF 3 "register_operand" " vr")
+ (match_operand:<V_LMUL1> 4 "register_operand" " 0")
+ ] ANY_FREDUC_SUM_VL0_SAFE)
+ (match_operand:<V_LMUL1> 2 "vector_merge_operand" " vu")] UNSPEC_REDUC))]
+ "TARGET_VECTOR"
+ "vf<reduc_op>.vs\t%0,%3,%4%p1"
+ [(set_attr "type" "vfred<order>")
+ (set_attr "mode" "<MODE>")
+ (set (attr "frm_mode")
+ (symbol_ref "riscv_vector::get_frm_mode (operands[8])"))])
+
;; Float Widen Reduction Sum (vfwred[ou]sum.vs)
-(define_insn "@pred_<reduc_op><mode>"
+(define_insn "@pred_<reduc_op_pat_name><mode>"
[(set (match_operand:<V_EXT_LMUL1> 0 "register_operand" "=vr, vr")
(unspec:<V_EXT_LMUL1>
[(unspec:<VM>
(set (attr "frm_mode")
(symbol_ref "riscv_vector::get_frm_mode (operands[8])"))])
+;; Float Widen Reduction Sum (vfwred[ou]sum.vs)
+;; but for auto vectorizer (see "NOTE for VL0 safe variantreduction" for detail)
+(define_insn "@pred_<reduc_op_pat_name><mode>"
+ [(set (match_operand:<V_EXT_LMUL1> 0 "register_operand" "=vr")
+ (unspec:<V_EXT_LMUL1>
+ [(unspec:<VM>
+ [(match_operand:<VM> 1 "vector_mask_operand" "vmWc1")
+ (match_operand 5 "vector_length_operand" " rK")
+ (match_operand 6 "const_int_operand" " i")
+ (match_operand 7 "const_int_operand" " i")
+ (match_operand 8 "const_int_operand" " i")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)
+ (reg:SI FRM_REGNUM)] UNSPEC_VPREDICATE)
+ (unspec:<V_EXT_LMUL1> [
+ (match_operand:VF_HS 3 "register_operand" " vr")
+ (match_operand:<V_EXT_LMUL1> 4 "register_operand" " 0")
+ ] ANY_FWREDUC_SUM_VL0_SAFE)
+ (match_operand:<V_EXT_LMUL1> 2 "vector_merge_operand" " vu")] UNSPEC_REDUC))]
+ "TARGET_VECTOR"
+ "vf<reduc_op>.vs\t%0,%3,%4%p1"
+ [(set_attr "type" "vfwred<order>")
+ (set_attr "mode" "<MODE>")
+ (set (attr "frm_mode")
+ (symbol_ref "riscv_vector::get_frm_mode (operands[8])"))])
+
;; -------------------------------------------------------------------------------
;; ---- Predicated permutation operations
;; -------------------------------------------------------------------------------
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -fno-vect-cost-model -O3 -mabi=lp64d" } */
+/* { dg-skip-if "" { *-*-* } { "-flto" } { "" } } */
+
+/* { dg-final { check-function-bodies "**" "" } } */
+
+/*
+** f1:
+** ...
+** vsetivli zero,1,.*
+** ...
+** vfmv.s.f .*
+** ...
+** vsetvli zero,.*
+** ...
+** vfredosum.vs .*
+** ...
+** vfmv.f.s .*
+** ...
+*/
+
+float f1(float *arr, int n)
+{
+ float sum = 0;
+ for (int i = 0; i < n; i++)
+ sum += arr[i];
+ return sum;
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -fno-vect-cost-model -O3 -mabi=lp64d -ffast-math" } */
+/* { dg-skip-if "" { *-*-* } { "-flto" } { "" } } */
+
+/* { dg-final { check-function-bodies "**" "" } } */
+
+/*
+** f1:
+** ...
+** vsetvli [ast][0-9]+,zero,.*
+** ...
+** vmv.s.x .*
+** ...
+** vfredusum.vs .*
+** ...
+** vfmv.f.s .*
+** ...
+*/
+
+float f1(float *arr, int n)
+{
+ float sum = 0;
+ for (int i = 0; i < n; i++)
+ sum += arr[i];
+ return sum;
+}
+/* { dg-final { scan-assembler-not {\tvsetivli\tzero,1,.*} } } */
--- /dev/null
+! { dg-do run }
+! { dg-options "-fno-vect-cost-model" }
+
+ program dqnorm_calculator
+ implicit none
+
+ ! Declare variables
+ integer, parameter :: nx = 33, ny = 33, nz =16
+ real(8) :: dq(5, nx, ny, nz)
+ real(8) :: result, expected_result, tolerance
+ integer :: i, j, k, l
+
+ ! Initialize the dq array with values calculated as k + j + i + 5
+ do k = 1, nz
+ do j = 1, ny
+ do i = 1, nx
+ do l = 1, 5
+ dq(l, i, j, k) = k + j + i + 5
+ end do
+ end do
+ end do
+ end do
+
+ ! Call the subroutine to calculate the norm
+ call redsum(dq, nx, ny, nz, result)
+
+ ! Check the result
+ expected_result = 214213560.0d0
+ tolerance = 0.0001d0
+ if (abs(result - expected_result) > tolerance) then
+ print *, "Result is incorrect: ", result
+ call abort()
+ end if
+ end
+
+ subroutine redsum(dq, nx, ny, nz, result)
+ implicit none
+
+ ! Declare arguments and local variables
+ integer, intent(in) :: nx, ny, nz
+ real(8), intent(in) :: dq(5, nx, ny, nz)
+ real(8), intent(out) :: result
+ real(8) :: dqnorm
+ integer :: i, j, k, l
+
+ ! Initialize dqnorm
+ dqnorm = 0.0d0
+
+ ! Compute the sum of squares of dq elements
+ do k = 1, nz
+ do j = 1, ny
+ do i = 1, nx
+ do l = 1, 5
+ dqnorm = dqnorm + dq(l, i, j, k) * dq(l, i, j, k)
+ end do
+ end do
+ end do
+ end do
+
+ result = dqnorm
+
+ end subroutine redsum
+