;; op[0] = (narrow) ((wide) op[1] + (wide) op[2] + 1)) >> 1;
;; -------------------------------------------------------------------------
-(define_expand "<u>avg<v_double_trunc>3_floor"
+(define_expand "avg<v_double_trunc>3_floor"
[(set (match_operand:<V_DOUBLE_TRUNC> 0 "register_operand")
(truncate:<V_DOUBLE_TRUNC>
- (<ext_to_rshift>:VWEXTI
+ (ashiftrt:VWEXTI
(plus:VWEXTI
- (any_extend:VWEXTI
+ (sign_extend:VWEXTI
(match_operand:<V_DOUBLE_TRUNC> 1 "register_operand"))
- (any_extend:VWEXTI
+ (sign_extend:VWEXTI
(match_operand:<V_DOUBLE_TRUNC> 2 "register_operand"))))))]
"TARGET_VECTOR"
{
/* First emit a widening addition. */
rtx tmp1 = gen_reg_rtx (<MODE>mode);
rtx ops1[] = {tmp1, operands[1], operands[2]};
- insn_code icode = code_for_pred_dual_widen (PLUS, <CODE>, <MODE>mode);
+ insn_code icode = code_for_pred_dual_widen (PLUS, SIGN_EXTEND, <MODE>mode);
riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP, ops1);
/* Then a narrowing shift. */
rtx ops2[] = {operands[0], tmp1, const1_rtx};
- icode = code_for_pred_narrow_scalar (<EXT_TO_RSHIFT>, <MODE>mode);
+ icode = code_for_pred_narrow_scalar (ASHIFTRT, <MODE>mode);
riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP, ops2);
DONE;
})
-(define_expand "<u>avg<v_double_trunc>3_ceil"
+(define_expand "avg<v_double_trunc>3_ceil"
[(set (match_operand:<V_DOUBLE_TRUNC> 0 "register_operand")
(truncate:<V_DOUBLE_TRUNC>
- (<ext_to_rshift>:VWEXTI
+ (ashiftrt:VWEXTI
(plus:VWEXTI
(plus:VWEXTI
- (any_extend:VWEXTI
+ (sign_extend:VWEXTI
(match_operand:<V_DOUBLE_TRUNC> 1 "register_operand"))
- (any_extend:VWEXTI
+ (sign_extend:VWEXTI
(match_operand:<V_DOUBLE_TRUNC> 2 "register_operand")))
(const_int 1)))))]
"TARGET_VECTOR"
/* First emit a widening addition. */
rtx tmp1 = gen_reg_rtx (<MODE>mode);
rtx ops1[] = {tmp1, operands[1], operands[2]};
- insn_code icode = code_for_pred_dual_widen (PLUS, <CODE>, <MODE>mode);
+ insn_code icode = code_for_pred_dual_widen (PLUS, SIGN_EXTEND, <MODE>mode);
riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP, ops1);
/* Then add 1. */
/* Finally, a narrowing shift. */
rtx ops3[] = {operands[0], tmp2, const1_rtx};
- icode = code_for_pred_narrow_scalar (<EXT_TO_RSHIFT>, <MODE>mode);
+ icode = code_for_pred_narrow_scalar (ASHIFTRT, <MODE>mode);
riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP, ops3);
DONE;
})
+;; csrwi vxrm, 2
+;; vaaddu.vv vd, vs2, vs1
+(define_expand "uavg<mode>3_floor"
+ [(match_operand:V_VLSI 0 "register_operand")
+ (match_operand:V_VLSI 1 "register_operand")
+ (match_operand:V_VLSI 2 "register_operand")]
+ "TARGET_VECTOR"
+{
+ insn_code icode = code_for_pred (UNSPEC_VAADDU, <MODE>mode);
+ riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP_VXRM_RDN, operands);
+ DONE;
+})
+
+;; csrwi vxrm, 0
+;; vaaddu.vv vd, vs2, vs1
+(define_expand "uavg<mode>3_ceil"
+ [(match_operand:V_VLSI 0 "register_operand")
+ (match_operand:V_VLSI 1 "register_operand")
+ (match_operand:V_VLSI 2 "register_operand")]
+ "TARGET_VECTOR"
+{
+ insn_code icode = code_for_pred (UNSPEC_VAADDU, <MODE>mode);
+ riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP_VXRM_RNU, operands);
+ DONE;
+})
+
;; -------------------------------------------------------------------------
;; ---- [FP] Rounding.
;; -------------------------------------------------------------------------
/* Means INSN has FRM operand and the value is FRM_RNE. */
FRM_RNE_P = 1 << 19,
+
+ /* Means INSN has VXRM operand and the value is VXRM_RNU. */
+ VXRM_RNU_P = 1 << 20,
+
+ /* Means INSN has VXRM operand and the value is VXRM_RDN. */
+ VXRM_RDN_P = 1 << 21,
};
enum insn_type : unsigned int
BINARY_OP_TAMU = __MASK_OP_TAMU | BINARY_OP_P,
BINARY_OP_TUMA = __MASK_OP_TUMA | BINARY_OP_P,
BINARY_OP_FRM_DYN = BINARY_OP | FRM_DYN_P,
+ BINARY_OP_VXRM_RNU = BINARY_OP | VXRM_RNU_P,
+ BINARY_OP_VXRM_RDN = BINARY_OP | VXRM_RDN_P,
/* Ternary operator. Always have real merge operand. */
TERNARY_OP = HAS_DEST_P | HAS_MASK_P | USE_ALL_TRUES_MASK_P | HAS_MERGE_P
add_input_operand (frm_rtx, Pmode);
}
+ void
+ add_rounding_mode_operand (enum fixed_point_rounding_mode rounding_mode)
+ {
+ rtx frm_rtx = gen_int_mode (rounding_mode, Pmode);
+ add_input_operand (frm_rtx, Pmode);
+ }
+
/* Return the vtype mode based on insn_flags.
vtype mode mean the mode vsetvl insn set. */
machine_mode
add_rounding_mode_operand (FRM_RMM);
else if (m_insn_flags & FRM_RNE_P)
add_rounding_mode_operand (FRM_RNE);
+ else if (m_insn_flags & VXRM_RNU_P)
+ add_rounding_mode_operand (VXRM_RNU);
+ else if (m_insn_flags & VXRM_RDN_P)
+ add_rounding_mode_operand (VXRM_RDN);
gcc_assert (insn_data[(int) icode].n_operands == m_opno);
expand (icode, any_mem_p);
(define_code_attr nmsub_nmadd [(plus "nmsub") (minus "nmadd")])
(define_code_attr nmsac_nmacc [(plus "nmsac") (minus "nmacc")])
-(define_code_attr ext_to_rshift [(sign_extend "ashiftrt")
- (zero_extend "lshiftrt")])
-(define_code_attr EXT_TO_RSHIFT [(sign_extend "ASHIFTRT")
- (zero_extend "LSHIFTRT")])
-
(define_code_iterator and_ior [and ior])
(define_code_iterator any_float_binop [plus mult minus div])
(set_attr "mode" "<MODE>")])
(define_insn "@pred_<sat_op><mode>"
- [(set (match_operand:VI 0 "register_operand" "=vd, vd, vr, vr")
- (if_then_else:VI
+ [(set (match_operand:V_VLSI 0 "register_operand" "=vd, vd, vr, vr")
+ (if_then_else:V_VLSI
(unspec:<VM>
[(match_operand:<VM> 1 "vector_mask_operand" " vm, vm,Wc1,Wc1")
(match_operand 5 "vector_length_operand" " rK, rK, rK, rK")
(reg:SI VL_REGNUM)
(reg:SI VTYPE_REGNUM)
(reg:SI VXRM_REGNUM)] UNSPEC_VPREDICATE)
- (unspec:VI
- [(match_operand:VI 3 "register_operand" " vr, vr, vr, vr")
- (match_operand:VI 4 "register_operand" " vr, vr, vr, vr")] VSAT_OP)
- (match_operand:VI 2 "vector_merge_operand" " vu, 0, vu, 0")))]
+ (unspec:V_VLSI
+ [(match_operand:V_VLSI 3 "register_operand" " vr, vr, vr, vr")
+ (match_operand:V_VLSI 4 "register_operand" " vr, vr, vr, vr")] VSAT_OP)
+ (match_operand:V_VLSI 2 "vector_merge_operand" " vu, 0, vu, 0")))]
"TARGET_VECTOR"
"v<sat_op>.vv\t%0,%3,%4%p1"
[(set_attr "type" "<sat_insn_type>")
DEF_AVG_FLOOR (uint8_t, uint16_t, 2048)
/* { dg-final { scan-assembler-times {vwadd\.vv} 10 } } */
-/* { dg-final { scan-assembler-times {vwaddu\.vv} 10 } } */
+/* { dg-final { scan-assembler-times {csrwi\s*vxrm,\s*2} 10 } } */
/* { dg-final { scan-assembler-times {vnsra\.wi} 10 } } */
-/* { dg-final { scan-assembler-times {vnsrl\.wi} 10 } } */
+/* { dg-final { scan-assembler-times {vaaddu\.vv} 10 } } */
/* { dg-final { scan-assembler-not {csrr} } } */
/* { dg-final { scan-tree-dump-not "1,1" "optimized" } } */
/* { dg-final { scan-tree-dump-not "2,2" "optimized" } } */
DEF_AVG_FLOOR (uint16_t, uint32_t, 1024)
/* { dg-final { scan-assembler-times {vwadd\.vv} 9 } } */
-/* { dg-final { scan-assembler-times {vwaddu\.vv} 9 } } */
+/* { dg-final { scan-assembler-times {csrwi\s*vxrm,\s*2} 9 } } */
/* { dg-final { scan-assembler-times {vnsra\.wi} 9 } } */
-/* { dg-final { scan-assembler-times {vnsrl\.wi} 9 } } */
+/* { dg-final { scan-assembler-times {vaaddu\.vv} 9 } } */
/* { dg-final { scan-assembler-not {csrr} } } */
/* { dg-final { scan-tree-dump-not "1,1" "optimized" } } */
/* { dg-final { scan-tree-dump-not "2,2" "optimized" } } */
DEF_AVG_FLOOR (uint32_t, uint64_t, 512)
/* { dg-final { scan-assembler-times {vwadd\.vv} 8 } } */
-/* { dg-final { scan-assembler-times {vwaddu\.vv} 8 } } */
+/* { dg-final { scan-assembler-times {csrwi\s*vxrm,\s*2} 8 } } */
/* { dg-final { scan-assembler-times {vnsra\.wi} 8 } } */
-/* { dg-final { scan-assembler-times {vnsrl\.wi} 8 } } */
+/* { dg-final { scan-assembler-times {vaaddu\.vv} 8 } } */
/* { dg-final { scan-assembler-not {csrr} } } */
/* { dg-final { scan-tree-dump-not "1,1" "optimized" } } */
/* { dg-final { scan-tree-dump-not "2,2" "optimized" } } */
DEF_AVG_CEIL (uint8_t, uint16_t, 2048)
/* { dg-final { scan-assembler-times {vwadd\.vv} 10 } } */
-/* { dg-final { scan-assembler-times {vwaddu\.vv} 10 } } */
+/* { dg-final { scan-assembler-times {csrwi\s*vxrm,\s*0} 10 } } */
/* { dg-final { scan-assembler-times {vnsra\.wi} 10 } } */
-/* { dg-final { scan-assembler-times {vnsrl\.wi} 10 } } */
-/* { dg-final { scan-assembler-times {vadd\.vi} 20 } } */
+/* { dg-final { scan-assembler-times {vaaddu\.vv} 10 } } */
+/* { dg-final { scan-assembler-times {vadd\.vi} 10 } } */
/* { dg-final { scan-assembler-not {csrr} } } */
/* { dg-final { scan-tree-dump-not "1,1" "optimized" } } */
/* { dg-final { scan-tree-dump-not "2,2" "optimized" } } */
DEF_AVG_CEIL (uint16_t, uint32_t, 1024)
/* { dg-final { scan-assembler-times {vwadd\.vv} 9 } } */
-/* { dg-final { scan-assembler-times {vwaddu\.vv} 9 } } */
+/* { dg-final { scan-assembler-times {csrwi\s*vxrm,\s*0} 9 } } */
/* { dg-final { scan-assembler-times {vnsra\.wi} 9 } } */
-/* { dg-final { scan-assembler-times {vnsrl\.wi} 9 } } */
-/* { dg-final { scan-assembler-times {vadd\.vi} 18 } } */
+/* { dg-final { scan-assembler-times {vaaddu\.vv} 9 } } */
+/* { dg-final { scan-assembler-times {vadd\.vi} 9 } } */
/* { dg-final { scan-assembler-not {csrr} } } */
/* { dg-final { scan-tree-dump-not "1,1" "optimized" } } */
/* { dg-final { scan-tree-dump-not "2,2" "optimized" } } */
DEF_AVG_CEIL (uint16_t, uint32_t, 512)
/* { dg-final { scan-assembler-times {vwadd\.vv} 8 } } */
-/* { dg-final { scan-assembler-times {vwaddu\.vv} 8 } } */
+/* { dg-final { scan-assembler-times {csrwi\s*vxrm,\s*0} 8 } } */
/* { dg-final { scan-assembler-times {vnsra\.wi} 8 } } */
-/* { dg-final { scan-assembler-times {vnsrl\.wi} 8 } } */
-/* { dg-final { scan-assembler-times {vadd\.vi} 16 } } */
+/* { dg-final { scan-assembler-times {vaaddu\.vv} 8 } } */
+/* { dg-final { scan-assembler-times {vadd\.vi} 8 } } */
/* { dg-final { scan-assembler-not {csrr} } } */
/* { dg-final { scan-tree-dump-not "1,1" "optimized" } } */
/* { dg-final { scan-tree-dump-not "2,2" "optimized" } } */
#include "vec-avg-template.h"
/* { dg-final { scan-assembler-times {\tvwadd\.vv} 6 } } */
-/* { dg-final { scan-assembler-times {\tvwaddu\.vv} 6 } } */
-/* { dg-final { scan-assembler-times {\tvadd\.vi} 6 } } */
-/* { dg-final { scan-assembler-times {\tvnsrl.wi} 6 } } */
+/* { dg-final { scan-assembler-times {csrwi\s*vxrm,\s*0} 3 } } */
+/* { dg-final { scan-assembler-times {csrwi\s*vxrm,\s*2} 3 } } */
+/* { dg-final { scan-assembler-times {\tvadd\.vi} 3 } } */
/* { dg-final { scan-assembler-times {\tvnsra.wi} 6 } } */
+/* { dg-final { scan-assembler-times {vaaddu\.vv} 6 } } */
#include "vec-avg-template.h"
/* { dg-final { scan-assembler-times {\tvwadd\.vv} 6 } } */
-/* { dg-final { scan-assembler-times {\tvwaddu\.vv} 6 } } */
-/* { dg-final { scan-assembler-times {\tvadd\.vi} 6 } } */
-/* { dg-final { scan-assembler-times {\tvnsrl\.wi} 6 } } */
+/* { dg-final { scan-assembler-times {csrwi\s*vxrm,\s*0} 3 } } */
+/* { dg-final { scan-assembler-times {csrwi\s*vxrm,\s*2} 3 } } */
+/* { dg-final { scan-assembler-times {\tvadd\.vi} 3 } } */
/* { dg-final { scan-assembler-times {\tvnsra\.wi} 6 } } */
+/* { dg-final { scan-assembler-times {vaaddu\.vv} 6 } } */