This patch support mask operations (comparison and logical).
This patch reduce these FAILs of "vect" testsuite:
FAIL: gcc.dg/vect/vect-bic-bitmask-12.c -flto -ffat-lto-objects scan-tree-dump dce7 "<=\\s*.+{ 255,.+}"
FAIL: gcc.dg/vect/vect-bic-bitmask-12.c scan-tree-dump dce7 "<=\\s*.+{ 255,.+}"
FAIL: gcc.dg/vect/vect-bic-bitmask-23.c -flto -ffat-lto-objects scan-tree-dump dce7 "<=\\s*.+{ 255, 15, 1, 65535 }"
FAIL: gcc.dg/vect/vect-bic-bitmask-23.c scan-tree-dump dce7 "<=\\s*.+{ 255, 15, 1, 65535 }"
Full regression passed (with reducing 4 FAILs).
gcc/ChangeLog:
* config/riscv/autovec-opt.md: Add VLS mask modes.
* config/riscv/autovec.md (@vcond_mask_<mode><vm>): Remove @.
(vcond_mask_<mode><vm>): Add VLS mask modes.
* config/riscv/vector.md: Ditto.
gcc/testsuite/ChangeLog:
* gcc.target/riscv/rvv/autovec/vls/def.h: Add VLS tests.
* gcc.target/riscv/rvv/autovec/vls/cmp-1.c: New test.
* gcc.target/riscv/rvv/autovec/vls/cmp-2.c: New test.
* gcc.target/riscv/rvv/autovec/vls/cmp-3.c: New test.
* gcc.target/riscv/rvv/autovec/vls/cmp-4.c: New test.
* gcc.target/riscv/rvv/autovec/vls/cmp-5.c: New test.
* gcc.target/riscv/rvv/autovec/vls/cmp-6.c: New test.
* gcc.target/riscv/rvv/autovec/vls/mask-1.c: New test.
* gcc.target/riscv/rvv/autovec/vls/mask-2.c: New test.
* gcc.target/riscv/rvv/autovec/vls/mask-3.c: New test.
;; -------------------------------------------------------------------------
(define_insn_and_split "*<optab>not<mode>"
- [(set (match_operand:VB 0 "register_operand" "=vr")
- (bitmanip_bitwise:VB
- (not:VB (match_operand:VB 2 "register_operand" " vr"))
- (match_operand:VB 1 "register_operand" " vr")))]
+ [(set (match_operand:VB_VLS 0 "register_operand" "=vr")
+ (bitmanip_bitwise:VB_VLS
+ (not:VB_VLS (match_operand:VB_VLS 2 "register_operand" " vr"))
+ (match_operand:VB_VLS 1 "register_operand" " vr")))]
"TARGET_VECTOR && can_create_pseudo_p ()"
"#"
"&& 1"
;; -------------------------------------------------------------------------
(define_insn_and_split "*n<optab><mode>"
- [(set (match_operand:VB 0 "register_operand" "=vr")
- (not:VB
- (any_bitwise:VB
- (match_operand:VB 1 "register_operand" " vr")
- (match_operand:VB 2 "register_operand" " vr"))))]
+ [(set (match_operand:VB_VLS 0 "register_operand" "=vr")
+ (not:VB_VLS
+ (any_bitwise:VB_VLS
+ (match_operand:VB_VLS 1 "register_operand" " vr")
+ (match_operand:VB_VLS 2 "register_operand" " vr"))))]
"TARGET_VECTOR && can_create_pseudo_p ()"
"#"
"&& 1"
;; -------------------------------------------------------------------------
(define_insn_and_split "<optab><mode>3"
- [(set (match_operand:VB 0 "register_operand" "=vr")
- (any_bitwise:VB (match_operand:VB 1 "register_operand" " vr")
- (match_operand:VB 2 "register_operand" " vr")))]
+ [(set (match_operand:VB_VLS 0 "register_operand" "=vr")
+ (any_bitwise:VB_VLS (match_operand:VB_VLS 1 "register_operand" " vr")
+ (match_operand:VB_VLS 2 "register_operand" " vr")))]
"TARGET_VECTOR && can_create_pseudo_p ()"
"#"
"&& 1"
;; -------------------------------------------------------------------------
(define_insn_and_split "one_cmpl<mode>2"
- [(set (match_operand:VB 0 "register_operand" "=vr")
- (not:VB (match_operand:VB 1 "register_operand" " vr")))]
+ [(set (match_operand:VB_VLS 0 "register_operand" "=vr")
+ (not:VB_VLS (match_operand:VB_VLS 1 "register_operand" " vr")))]
"TARGET_VECTOR && can_create_pseudo_p ()"
"#"
"&& 1"
;; - vfmerge.vf
;; -------------------------------------------------------------------------
-(define_insn_and_split "@vcond_mask_<mode><vm>"
- [(set (match_operand:V 0 "register_operand")
- (if_then_else:V
+(define_insn_and_split "vcond_mask_<mode><vm>"
+ [(set (match_operand:V_VLS 0 "register_operand")
+ (if_then_else:V_VLS
(match_operand:<VM> 3 "register_operand")
- (match_operand:V 1 "nonmemory_operand")
- (match_operand:V 2 "register_operand")))]
+ (match_operand:V_VLS 1 "nonmemory_operand")
+ (match_operand:V_VLS 2 "register_operand")))]
"TARGET_VECTOR && can_create_pseudo_p ()"
"#"
"&& 1"
(define_expand "vec_cmp<mode><vm>"
[(set (match_operand:<VM> 0 "register_operand")
(match_operator:<VM> 1 "comparison_operator"
- [(match_operand:VI 2 "register_operand")
- (match_operand:VI 3 "register_operand")]))]
+ [(match_operand:V_VLSI 2 "register_operand")
+ (match_operand:V_VLSI 3 "register_operand")]))]
"TARGET_VECTOR"
{
riscv_vector::expand_vec_cmp (operands[0], GET_CODE (operands[1]),
(define_expand "vec_cmpu<mode><vm>"
[(set (match_operand:<VM> 0 "register_operand")
(match_operator:<VM> 1 "comparison_operator"
- [(match_operand:VI 2 "register_operand")
- (match_operand:VI 3 "register_operand")]))]
+ [(match_operand:V_VLSI 2 "register_operand")
+ (match_operand:V_VLSI 3 "register_operand")]))]
"TARGET_VECTOR"
{
riscv_vector::expand_vec_cmp (operands[0], GET_CODE (operands[1]),
(define_expand "vec_cmp<mode><vm>"
[(set (match_operand:<VM> 0 "register_operand")
(match_operator:<VM> 1 "comparison_operator"
- [(match_operand:VF 2 "register_operand")
- (match_operand:VF 3 "register_operand")]))]
+ [(match_operand:V_VLSF 2 "register_operand")
+ (match_operand:V_VLSF 3 "register_operand")]))]
"TARGET_VECTOR"
{
riscv_vector::expand_vec_cmp_float (operands[0], GET_CODE (operands[1]),
(reg:SI VL_REGNUM)
(reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
(match_operator:<VM> 3 "signed_order_operator"
- [(match_operand:VF 4 "register_operand")
- (match_operand:VF 5 "register_operand")])
+ [(match_operand:V_VLSF 4 "register_operand")
+ (match_operand:V_VLSF 5 "register_operand")])
(match_operand:<VM> 2 "vector_merge_operand")))]
"TARGET_VECTOR"
{})
(reg:SI VL_REGNUM)
(reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
(match_operator:<VM> 3 "signed_order_operator"
- [(match_operand:VF 4 "register_operand" " vr, vr")
- (match_operand:VF 5 "register_operand" " vr, vr")])
+ [(match_operand:V_VLSF 4 "register_operand" " vr, vr")
+ (match_operand:V_VLSF 5 "register_operand" " vr, vr")])
(match_operand:<VM> 2 "vector_merge_operand" " vu, 0")))]
- "TARGET_VECTOR && known_le (GET_MODE_SIZE (<MODE>mode), BYTES_PER_RISCV_VECTOR)"
+ "TARGET_VECTOR && riscv_vector::cmp_lmul_le_one (<MODE>mode)"
"vmf%B3.vv\t%0,%4,%5%p1"
[(set_attr "type" "vfcmp")
(set_attr "mode" "<MODE>")])
(reg:SI VL_REGNUM)
(reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
(match_operator:<VM> 2 "signed_order_operator"
- [(match_operand:VF 3 "register_operand" " vr")
- (match_operand:VF 4 "register_operand" " vr")])
+ [(match_operand:V_VLSF 3 "register_operand" " vr")
+ (match_operand:V_VLSF 4 "register_operand" " vr")])
(match_dup 1)))]
"TARGET_VECTOR"
"vmf%B2.vv\t%0,%3,%4,v0.t"
(reg:SI VL_REGNUM)
(reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
(match_operator:<VM> 3 "signed_order_operator"
- [(match_operand:VF 4 "register_operand" " vr, 0, vr, 0, 0, vr, 0, vr, vr")
- (match_operand:VF 5 "register_operand" " vr, vr, 0, 0, vr, 0, 0, vr, vr")])
+ [(match_operand:V_VLSF 4 "register_operand" " vr, 0, vr, 0, 0, vr, 0, vr, vr")
+ (match_operand:V_VLSF 5 "register_operand" " vr, vr, 0, 0, vr, 0, 0, vr, vr")])
(match_operand:<VM> 2 "vector_merge_operand" " vu, vu, vu, vu, 0, 0, 0, vu, 0")))]
- "TARGET_VECTOR && known_gt (GET_MODE_SIZE (<MODE>mode), BYTES_PER_RISCV_VECTOR)"
+ "TARGET_VECTOR && riscv_vector::cmp_lmul_gt_one (<MODE>mode)"
"vmf%B3.vv\t%0,%4,%5%p1"
[(set_attr "type" "vfcmp")
(set_attr "mode" "<MODE>")])
(reg:SI VL_REGNUM)
(reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
(match_operator:<VM> 3 "signed_order_operator"
- [(match_operand:VF 4 "register_operand")
- (vec_duplicate:VF
+ [(match_operand:V_VLSF 4 "register_operand")
+ (vec_duplicate:V_VLSF
(match_operand:<VEL> 5 "register_operand"))])
(match_operand:<VM> 2 "vector_merge_operand")))]
"TARGET_VECTOR"
(reg:SI VL_REGNUM)
(reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
(match_operator:<VM> 2 "signed_order_operator"
- [(match_operand:VF 3 "register_operand" " vr")
- (vec_duplicate:VF
+ [(match_operand:V_VLSF 3 "register_operand" " vr")
+ (vec_duplicate:V_VLSF
(match_operand:<VEL> 4 "register_operand" " f"))])
(match_dup 1)))]
"TARGET_VECTOR"
(reg:SI VL_REGNUM)
(reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
(match_operator:<VM> 3 "signed_order_operator"
- [(match_operand:VF 4 "register_operand" " vr, vr")
- (vec_duplicate:VF
+ [(match_operand:V_VLSF 4 "register_operand" " vr, vr")
+ (vec_duplicate:V_VLSF
(match_operand:<VEL> 5 "register_operand" " f, f"))])
(match_operand:<VM> 2 "vector_merge_operand" " vu, 0")))]
- "TARGET_VECTOR && known_le (GET_MODE_SIZE (<MODE>mode), BYTES_PER_RISCV_VECTOR)"
+ "TARGET_VECTOR && riscv_vector::cmp_lmul_le_one (<MODE>mode)"
"vmf%B3.vf\t%0,%4,%5%p1"
[(set_attr "type" "vfcmp")
(set_attr "mode" "<MODE>")])
(reg:SI VL_REGNUM)
(reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
(match_operator:<VM> 3 "signed_order_operator"
- [(match_operand:VF 4 "register_operand" " vr, 0, 0, vr, vr")
- (vec_duplicate:VF
+ [(match_operand:V_VLSF 4 "register_operand" " vr, 0, 0, vr, vr")
+ (vec_duplicate:V_VLSF
(match_operand:<VEL> 5 "register_operand" " f, f, f, f, f"))])
(match_operand:<VM> 2 "vector_merge_operand" " vu, vu, 0, vu, 0")))]
- "TARGET_VECTOR && known_gt (GET_MODE_SIZE (<MODE>mode), BYTES_PER_RISCV_VECTOR)"
+ "TARGET_VECTOR && riscv_vector::cmp_lmul_gt_one (<MODE>mode)"
"vmf%B3.vf\t%0,%4,%5%p1"
[(set_attr "type" "vfcmp")
(set_attr "mode" "<MODE>")])
(reg:SI VL_REGNUM)
(reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
(match_operator:<VM> 3 "equality_operator"
- [(vec_duplicate:VF
+ [(vec_duplicate:V_VLSF
(match_operand:<VEL> 5 "register_operand"))
- (match_operand:VF 4 "register_operand")])
+ (match_operand:V_VLSF 4 "register_operand")])
(match_operand:<VM> 2 "vector_merge_operand")))]
"TARGET_VECTOR"
{})
(reg:SI VL_REGNUM)
(reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
(match_operator:<VM> 2 "equality_operator"
- [(vec_duplicate:VF
+ [(vec_duplicate:V_VLSF
(match_operand:<VEL> 4 "register_operand" " f"))
- (match_operand:VF 3 "register_operand" " vr")])
+ (match_operand:V_VLSF 3 "register_operand" " vr")])
(match_dup 1)))]
"TARGET_VECTOR"
"vmf%B2.vf\t%0,%3,%4,v0.t"
(reg:SI VL_REGNUM)
(reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
(match_operator:<VM> 3 "equality_operator"
- [(vec_duplicate:VF
+ [(vec_duplicate:V_VLSF
(match_operand:<VEL> 5 "register_operand" " f, f"))
- (match_operand:VF 4 "register_operand" " vr, vr")])
+ (match_operand:V_VLSF 4 "register_operand" " vr, vr")])
(match_operand:<VM> 2 "vector_merge_operand" " vu, 0")))]
- "TARGET_VECTOR && known_le (GET_MODE_SIZE (<MODE>mode), BYTES_PER_RISCV_VECTOR)"
+ "TARGET_VECTOR && riscv_vector::cmp_lmul_le_one (<MODE>mode)"
"vmf%B3.vf\t%0,%4,%5%p1"
[(set_attr "type" "vfcmp")
(set_attr "mode" "<MODE>")])
(reg:SI VL_REGNUM)
(reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
(match_operator:<VM> 3 "equality_operator"
- [(vec_duplicate:VF
+ [(vec_duplicate:V_VLSF
(match_operand:<VEL> 5 "register_operand" " f, f, f, f, f"))
- (match_operand:VF 4 "register_operand" " vr, 0, 0, vr, vr")])
+ (match_operand:V_VLSF 4 "register_operand" " vr, 0, 0, vr, vr")])
(match_operand:<VM> 2 "vector_merge_operand" " vu, vu, 0, vu, 0")))]
- "TARGET_VECTOR && known_gt (GET_MODE_SIZE (<MODE>mode), BYTES_PER_RISCV_VECTOR)"
+ "TARGET_VECTOR && riscv_vector::cmp_lmul_gt_one (<MODE>mode)"
"vmf%B3.vf\t%0,%4,%5%p1"
[(set_attr "type" "vfcmp")
(set_attr "mode" "<MODE>")])
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 -fno-schedule-insns -fno-schedule-insns2 --param=riscv-autovec-lmul=m8 -fdump-tree-optimized" } */
+
+#include "def.h"
+
+DEF_OP_VV (eq, 1, int8_t, ==)
+DEF_OP_VV (eq, 2, int8_t, ==)
+DEF_OP_VV (eq, 4, int8_t, ==)
+DEF_OP_VV (eq, 8, int8_t, ==)
+DEF_OP_VV (eq, 16, int8_t, ==)
+DEF_OP_VV (eq, 32, int8_t, ==)
+DEF_OP_VV (eq, 64, int8_t, ==)
+DEF_OP_VV (eq, 128, int8_t, ==)
+DEF_OP_VV (eq, 256, int8_t, ==)
+DEF_OP_VV (eq, 512, int8_t, ==)
+DEF_OP_VV (eq, 1024, int8_t, ==)
+DEF_OP_VV (eq, 2048, int8_t, ==)
+DEF_OP_VV (eq, 4096, int8_t, ==)
+
+DEF_OP_VV (eq, 1, int16_t, ==)
+DEF_OP_VV (eq, 2, int16_t, ==)
+DEF_OP_VV (eq, 4, int16_t, ==)
+DEF_OP_VV (eq, 8, int16_t, ==)
+DEF_OP_VV (eq, 16, int16_t, ==)
+DEF_OP_VV (eq, 32, int16_t, ==)
+DEF_OP_VV (eq, 64, int16_t, ==)
+DEF_OP_VV (eq, 128, int16_t, ==)
+DEF_OP_VV (eq, 256, int16_t, ==)
+DEF_OP_VV (eq, 512, int16_t, ==)
+DEF_OP_VV (eq, 1024, int16_t, ==)
+DEF_OP_VV (eq, 2048, int16_t, ==)
+
+DEF_OP_VV (eq, 1, int32_t, ==)
+DEF_OP_VV (eq, 2, int32_t, ==)
+DEF_OP_VV (eq, 4, int32_t, ==)
+DEF_OP_VV (eq, 8, int32_t, ==)
+DEF_OP_VV (eq, 16, int32_t, ==)
+DEF_OP_VV (eq, 32, int32_t, ==)
+DEF_OP_VV (eq, 64, int32_t, ==)
+DEF_OP_VV (eq, 128, int32_t, ==)
+DEF_OP_VV (eq, 256, int32_t, ==)
+DEF_OP_VV (eq, 512, int32_t, ==)
+DEF_OP_VV (eq, 1024, int32_t, ==)
+
+DEF_OP_VV (eq, 1, int64_t, ==)
+DEF_OP_VV (eq, 2, int64_t, ==)
+DEF_OP_VV (eq, 4, int64_t, ==)
+DEF_OP_VV (eq, 8, int64_t, ==)
+DEF_OP_VV (eq, 16, int64_t, ==)
+DEF_OP_VV (eq, 32, int64_t, ==)
+DEF_OP_VV (eq, 64, int64_t, ==)
+DEF_OP_VV (eq, 128, int64_t, ==)
+DEF_OP_VV (eq, 256, int64_t, ==)
+DEF_OP_VV (eq, 512, int64_t, ==)
+
+DEF_OP_VV (eq, 1, _Float16, ==)
+DEF_OP_VV (eq, 2, _Float16, ==)
+DEF_OP_VV (eq, 4, _Float16, ==)
+DEF_OP_VV (eq, 8, _Float16, ==)
+DEF_OP_VV (eq, 16, _Float16, ==)
+DEF_OP_VV (eq, 32, _Float16, ==)
+DEF_OP_VV (eq, 64, _Float16, ==)
+DEF_OP_VV (eq, 128, _Float16, ==)
+DEF_OP_VV (eq, 256, _Float16, ==)
+DEF_OP_VV (eq, 512, _Float16, ==)
+DEF_OP_VV (eq, 1024, _Float16, ==)
+DEF_OP_VV (eq, 2048, _Float16, ==)
+
+DEF_OP_VV (eq, 1, float, ==)
+DEF_OP_VV (eq, 2, float, ==)
+DEF_OP_VV (eq, 4, float, ==)
+DEF_OP_VV (eq, 8, float, ==)
+DEF_OP_VV (eq, 16, float, ==)
+DEF_OP_VV (eq, 32, float, ==)
+DEF_OP_VV (eq, 64, float, ==)
+DEF_OP_VV (eq, 128, float, ==)
+DEF_OP_VV (eq, 256, float, ==)
+DEF_OP_VV (eq, 512, float, ==)
+DEF_OP_VV (eq, 1024, float, ==)
+
+DEF_OP_VV (eq, 1, double, ==)
+DEF_OP_VV (eq, 2, double, ==)
+DEF_OP_VV (eq, 4, double, ==)
+DEF_OP_VV (eq, 8, double, ==)
+DEF_OP_VV (eq, 16, double, ==)
+DEF_OP_VV (eq, 32, double, ==)
+DEF_OP_VV (eq, 64, double, ==)
+DEF_OP_VV (eq, 128, double, ==)
+DEF_OP_VV (eq, 256, double, ==)
+DEF_OP_VV (eq, 512, double, ==)
+
+/* { dg-final { scan-assembler-times {vmseq\.vv} 42 } } */
+/* { dg-final { scan-assembler-times {vmfeq\.vv} 27 } } */
+/* { dg-final { scan-assembler-not {csrr} } } */
+/* { dg-final { scan-tree-dump-not "1,1" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "2,2" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "4,4" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "16,16" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "32,32" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "64,64" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "128,128" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "256,256" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "512,512" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "1024,1024" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "2048,2048" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "4096,4096" "optimized" } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 -fno-schedule-insns -fno-schedule-insns2 --param=riscv-autovec-lmul=m8 -fdump-tree-optimized" } */
+
+#include "def.h"
+
+DEF_OP_VV (ne, 1, int8_t, !=)
+DEF_OP_VV (ne, 2, int8_t, !=)
+DEF_OP_VV (ne, 4, int8_t, !=)
+DEF_OP_VV (ne, 8, int8_t, !=)
+DEF_OP_VV (ne, 16, int8_t, !=)
+DEF_OP_VV (ne, 32, int8_t, !=)
+DEF_OP_VV (ne, 64, int8_t, !=)
+DEF_OP_VV (ne, 128, int8_t, !=)
+DEF_OP_VV (ne, 256, int8_t, !=)
+DEF_OP_VV (ne, 512, int8_t, !=)
+DEF_OP_VV (ne, 1024, int8_t, !=)
+DEF_OP_VV (ne, 2048, int8_t, !=)
+DEF_OP_VV (ne, 4096, int8_t, !=)
+
+DEF_OP_VV (ne, 1, int16_t, !=)
+DEF_OP_VV (ne, 2, int16_t, !=)
+DEF_OP_VV (ne, 4, int16_t, !=)
+DEF_OP_VV (ne, 8, int16_t, !=)
+DEF_OP_VV (ne, 16, int16_t, !=)
+DEF_OP_VV (ne, 32, int16_t, !=)
+DEF_OP_VV (ne, 64, int16_t, !=)
+DEF_OP_VV (ne, 128, int16_t, !=)
+DEF_OP_VV (ne, 256, int16_t, !=)
+DEF_OP_VV (ne, 512, int16_t, !=)
+DEF_OP_VV (ne, 1024, int16_t, !=)
+DEF_OP_VV (ne, 2048, int16_t, !=)
+
+DEF_OP_VV (ne, 1, int32_t, !=)
+DEF_OP_VV (ne, 2, int32_t, !=)
+DEF_OP_VV (ne, 4, int32_t, !=)
+DEF_OP_VV (ne, 8, int32_t, !=)
+DEF_OP_VV (ne, 16, int32_t, !=)
+DEF_OP_VV (ne, 32, int32_t, !=)
+DEF_OP_VV (ne, 64, int32_t, !=)
+DEF_OP_VV (ne, 128, int32_t, !=)
+DEF_OP_VV (ne, 256, int32_t, !=)
+DEF_OP_VV (ne, 512, int32_t, !=)
+DEF_OP_VV (ne, 1024, int32_t, !=)
+
+DEF_OP_VV (ne, 1, int64_t, !=)
+DEF_OP_VV (ne, 2, int64_t, !=)
+DEF_OP_VV (ne, 4, int64_t, !=)
+DEF_OP_VV (ne, 8, int64_t, !=)
+DEF_OP_VV (ne, 16, int64_t, !=)
+DEF_OP_VV (ne, 32, int64_t, !=)
+DEF_OP_VV (ne, 64, int64_t, !=)
+DEF_OP_VV (ne, 128, int64_t, !=)
+DEF_OP_VV (ne, 256, int64_t, !=)
+DEF_OP_VV (ne, 512, int64_t, !=)
+
+DEF_OP_VV (ne, 1, _Float16, !=)
+DEF_OP_VV (ne, 2, _Float16, !=)
+DEF_OP_VV (ne, 4, _Float16, !=)
+DEF_OP_VV (ne, 8, _Float16, !=)
+DEF_OP_VV (ne, 16, _Float16, !=)
+DEF_OP_VV (ne, 32, _Float16, !=)
+DEF_OP_VV (ne, 64, _Float16, !=)
+DEF_OP_VV (ne, 128, _Float16, !=)
+DEF_OP_VV (ne, 256, _Float16, !=)
+DEF_OP_VV (ne, 512, _Float16, !=)
+DEF_OP_VV (ne, 1024, _Float16, !=)
+DEF_OP_VV (ne, 2048, _Float16, !=)
+
+DEF_OP_VV (ne, 1, float, !=)
+DEF_OP_VV (ne, 2, float, !=)
+DEF_OP_VV (ne, 4, float, !=)
+DEF_OP_VV (ne, 8, float, !=)
+DEF_OP_VV (ne, 16, float, !=)
+DEF_OP_VV (ne, 32, float, !=)
+DEF_OP_VV (ne, 64, float, !=)
+DEF_OP_VV (ne, 128, float, !=)
+DEF_OP_VV (ne, 256, float, !=)
+DEF_OP_VV (ne, 512, float, !=)
+DEF_OP_VV (ne, 1024, float, !=)
+
+DEF_OP_VV (ne, 1, double, !=)
+DEF_OP_VV (ne, 2, double, !=)
+DEF_OP_VV (ne, 4, double, !=)
+DEF_OP_VV (ne, 8, double, !=)
+DEF_OP_VV (ne, 16, double, !=)
+DEF_OP_VV (ne, 32, double, !=)
+DEF_OP_VV (ne, 64, double, !=)
+DEF_OP_VV (ne, 128, double, !=)
+DEF_OP_VV (ne, 256, double, !=)
+DEF_OP_VV (ne, 512, double, !=)
+
+/* { dg-final { scan-assembler-times {vmsne\.vv} 42 } } */
+/* { dg-final { scan-assembler-times {vmfne\.vv} 27 } } */
+/* { dg-final { scan-assembler-not {csrr} } } */
+/* { dg-final { scan-tree-dump-not "1,1" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "2,2" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "4,4" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "16,16" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "32,32" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "64,64" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "128,128" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "256,256" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "512,512" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "1024,1024" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "2048,2048" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "4096,4096" "optimized" } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 -fno-schedule-insns -fno-schedule-insns2 --param=riscv-autovec-lmul=m8 -fdump-tree-optimized" } */
+
+#include "def.h"
+
+DEF_OP_VV (gt, 1, int8_t, >)
+DEF_OP_VV (gt, 2, int8_t, >)
+DEF_OP_VV (gt, 4, int8_t, >)
+DEF_OP_VV (gt, 8, int8_t, >)
+DEF_OP_VV (gt, 16, int8_t, >)
+DEF_OP_VV (gt, 32, int8_t, >)
+DEF_OP_VV (gt, 64, int8_t, >)
+DEF_OP_VV (gt, 128, int8_t, >)
+DEF_OP_VV (gt, 256, int8_t, >)
+DEF_OP_VV (gt, 512, int8_t, >)
+DEF_OP_VV (gt, 1024, int8_t, >)
+DEF_OP_VV (gt, 2048, int8_t, >)
+DEF_OP_VV (gt, 4096, int8_t, >)
+
+DEF_OP_VV (gt, 1, int16_t, >)
+DEF_OP_VV (gt, 2, int16_t, >)
+DEF_OP_VV (gt, 4, int16_t, >)
+DEF_OP_VV (gt, 8, int16_t, >)
+DEF_OP_VV (gt, 16, int16_t, >)
+DEF_OP_VV (gt, 32, int16_t, >)
+DEF_OP_VV (gt, 64, int16_t, >)
+DEF_OP_VV (gt, 128, int16_t, >)
+DEF_OP_VV (gt, 256, int16_t, >)
+DEF_OP_VV (gt, 512, int16_t, >)
+DEF_OP_VV (gt, 1024, int16_t, >)
+DEF_OP_VV (gt, 2048, int16_t, >)
+
+DEF_OP_VV (gt, 1, int32_t, >)
+DEF_OP_VV (gt, 2, int32_t, >)
+DEF_OP_VV (gt, 4, int32_t, >)
+DEF_OP_VV (gt, 8, int32_t, >)
+DEF_OP_VV (gt, 16, int32_t, >)
+DEF_OP_VV (gt, 32, int32_t, >)
+DEF_OP_VV (gt, 64, int32_t, >)
+DEF_OP_VV (gt, 128, int32_t, >)
+DEF_OP_VV (gt, 256, int32_t, >)
+DEF_OP_VV (gt, 512, int32_t, >)
+DEF_OP_VV (gt, 1024, int32_t, >)
+
+DEF_OP_VV (gt, 1, int64_t, >)
+DEF_OP_VV (gt, 2, int64_t, >)
+DEF_OP_VV (gt, 4, int64_t, >)
+DEF_OP_VV (gt, 8, int64_t, >)
+DEF_OP_VV (gt, 16, int64_t, >)
+DEF_OP_VV (gt, 32, int64_t, >)
+DEF_OP_VV (gt, 64, int64_t, >)
+DEF_OP_VV (gt, 128, int64_t, >)
+DEF_OP_VV (gt, 256, int64_t, >)
+DEF_OP_VV (gt, 512, int64_t, >)
+
+DEF_OP_VV (gt, 1, _Float16, >)
+DEF_OP_VV (gt, 2, _Float16, >)
+DEF_OP_VV (gt, 4, _Float16, >)
+DEF_OP_VV (gt, 8, _Float16, >)
+DEF_OP_VV (gt, 16, _Float16, >)
+DEF_OP_VV (gt, 32, _Float16, >)
+DEF_OP_VV (gt, 64, _Float16, >)
+DEF_OP_VV (gt, 128, _Float16, >)
+DEF_OP_VV (gt, 256, _Float16, >)
+DEF_OP_VV (gt, 512, _Float16, >)
+DEF_OP_VV (gt, 1024, _Float16, >)
+DEF_OP_VV (gt, 2048, _Float16, >)
+
+DEF_OP_VV (gt, 1, float, >)
+DEF_OP_VV (gt, 2, float, >)
+DEF_OP_VV (gt, 4, float, >)
+DEF_OP_VV (gt, 8, float, >)
+DEF_OP_VV (gt, 16, float, >)
+DEF_OP_VV (gt, 32, float, >)
+DEF_OP_VV (gt, 64, float, >)
+DEF_OP_VV (gt, 128, float, >)
+DEF_OP_VV (gt, 256, float, >)
+DEF_OP_VV (gt, 512, float, >)
+DEF_OP_VV (gt, 1024, float, >)
+
+DEF_OP_VV (gt, 1, double, >)
+DEF_OP_VV (gt, 2, double, >)
+DEF_OP_VV (gt, 4, double, >)
+DEF_OP_VV (gt, 8, double, >)
+DEF_OP_VV (gt, 16, double, >)
+DEF_OP_VV (gt, 32, double, >)
+DEF_OP_VV (gt, 64, double, >)
+DEF_OP_VV (gt, 128, double, >)
+DEF_OP_VV (gt, 256, double, >)
+DEF_OP_VV (gt, 512, double, >)
+
+/* { dg-final { scan-assembler-times {vmsgt\.vv} 40 } } */
+/* { dg-final { scan-assembler-times {vmfgt\.vv} 27 } } */
+/* { dg-final { scan-assembler-not {csrr} } } */
+/* { dg-final { scan-tree-dump-not "1,1" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "2,2" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "4,4" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "16,16" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "32,32" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "64,64" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "128,128" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "256,256" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "512,512" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "1024,1024" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "2048,2048" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "4096,4096" "optimized" } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 -fno-schedule-insns -fno-schedule-insns2 --param=riscv-autovec-lmul=m8 -fdump-tree-optimized" } */
+
+#include "def.h"
+
+DEF_OP_VV (ge, 1, int8_t, >=)
+DEF_OP_VV (ge, 2, int8_t, >=)
+DEF_OP_VV (ge, 4, int8_t, >=)
+DEF_OP_VV (ge, 8, int8_t, >=)
+DEF_OP_VV (ge, 16, int8_t, >=)
+DEF_OP_VV (ge, 32, int8_t, >=)
+DEF_OP_VV (ge, 64, int8_t, >=)
+DEF_OP_VV (ge, 128, int8_t, >=)
+DEF_OP_VV (ge, 256, int8_t, >=)
+DEF_OP_VV (ge, 512, int8_t, >=)
+DEF_OP_VV (ge, 1024, int8_t, >=)
+DEF_OP_VV (ge, 2048, int8_t, >=)
+DEF_OP_VV (ge, 4096, int8_t, >=)
+
+DEF_OP_VV (ge, 1, int16_t, >=)
+DEF_OP_VV (ge, 2, int16_t, >=)
+DEF_OP_VV (ge, 4, int16_t, >=)
+DEF_OP_VV (ge, 8, int16_t, >=)
+DEF_OP_VV (ge, 16, int16_t, >=)
+DEF_OP_VV (ge, 32, int16_t, >=)
+DEF_OP_VV (ge, 64, int16_t, >=)
+DEF_OP_VV (ge, 128, int16_t, >=)
+DEF_OP_VV (ge, 256, int16_t, >=)
+DEF_OP_VV (ge, 512, int16_t, >=)
+DEF_OP_VV (ge, 1024, int16_t, >=)
+DEF_OP_VV (ge, 2048, int16_t, >=)
+
+DEF_OP_VV (ge, 1, int32_t, >=)
+DEF_OP_VV (ge, 2, int32_t, >=)
+DEF_OP_VV (ge, 4, int32_t, >=)
+DEF_OP_VV (ge, 8, int32_t, >=)
+DEF_OP_VV (ge, 16, int32_t, >=)
+DEF_OP_VV (ge, 32, int32_t, >=)
+DEF_OP_VV (ge, 64, int32_t, >=)
+DEF_OP_VV (ge, 128, int32_t, >=)
+DEF_OP_VV (ge, 256, int32_t, >=)
+DEF_OP_VV (ge, 512, int32_t, >=)
+DEF_OP_VV (ge, 1024, int32_t, >=)
+
+DEF_OP_VV (ge, 1, int64_t, >=)
+DEF_OP_VV (ge, 2, int64_t, >=)
+DEF_OP_VV (ge, 4, int64_t, >=)
+DEF_OP_VV (ge, 8, int64_t, >=)
+DEF_OP_VV (ge, 16, int64_t, >=)
+DEF_OP_VV (ge, 32, int64_t, >=)
+DEF_OP_VV (ge, 64, int64_t, >=)
+DEF_OP_VV (ge, 128, int64_t, >=)
+DEF_OP_VV (ge, 256, int64_t, >=)
+DEF_OP_VV (ge, 512, int64_t, >=)
+
+DEF_OP_VV (ge, 1, _Float16, >=)
+DEF_OP_VV (ge, 2, _Float16, >=)
+DEF_OP_VV (ge, 4, _Float16, >=)
+DEF_OP_VV (ge, 8, _Float16, >=)
+DEF_OP_VV (ge, 16, _Float16, >=)
+DEF_OP_VV (ge, 32, _Float16, >=)
+DEF_OP_VV (ge, 64, _Float16, >=)
+DEF_OP_VV (ge, 128, _Float16, >=)
+DEF_OP_VV (ge, 256, _Float16, >=)
+DEF_OP_VV (ge, 512, _Float16, >=)
+DEF_OP_VV (ge, 1024, _Float16, >=)
+DEF_OP_VV (ge, 2048, _Float16, >=)
+
+DEF_OP_VV (ge, 1, float, >=)
+DEF_OP_VV (ge, 2, float, >=)
+DEF_OP_VV (ge, 4, float, >=)
+DEF_OP_VV (ge, 8, float, >=)
+DEF_OP_VV (ge, 16, float, >=)
+DEF_OP_VV (ge, 32, float, >=)
+DEF_OP_VV (ge, 64, float, >=)
+DEF_OP_VV (ge, 128, float, >=)
+DEF_OP_VV (ge, 256, float, >=)
+DEF_OP_VV (ge, 512, float, >=)
+DEF_OP_VV (ge, 1024, float, >=)
+
+DEF_OP_VV (ge, 1, double, >=)
+DEF_OP_VV (ge, 2, double, >=)
+DEF_OP_VV (ge, 4, double, >=)
+DEF_OP_VV (ge, 8, double, >=)
+DEF_OP_VV (ge, 16, double, >=)
+DEF_OP_VV (ge, 32, double, >=)
+DEF_OP_VV (ge, 64, double, >=)
+DEF_OP_VV (ge, 128, double, >=)
+DEF_OP_VV (ge, 256, double, >=)
+DEF_OP_VV (ge, 512, double, >=)
+
+/* { dg-final { scan-assembler-times {vmsge\.vv} 40 } } */
+/* { dg-final { scan-assembler-times {vmfge\.vv} 27 } } */
+/* { dg-final { scan-assembler-not {csrr} } } */
+/* { dg-final { scan-tree-dump-not "1,1" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "2,2" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "4,4" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "16,16" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "32,32" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "64,64" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "128,128" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "256,256" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "512,512" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "1024,1024" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "2048,2048" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "4096,4096" "optimized" } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 -fno-schedule-insns -fno-schedule-insns2 --param=riscv-autovec-lmul=m8 -fdump-tree-optimized" } */
+
+#include "def.h"
+
+DEF_OP_VV (lt, 1, int8_t, <)
+DEF_OP_VV (lt, 2, int8_t, <)
+DEF_OP_VV (lt, 4, int8_t, <)
+DEF_OP_VV (lt, 8, int8_t, <)
+DEF_OP_VV (lt, 16, int8_t, <)
+DEF_OP_VV (lt, 32, int8_t, <)
+DEF_OP_VV (lt, 64, int8_t, <)
+DEF_OP_VV (lt, 128, int8_t, <)
+DEF_OP_VV (lt, 256, int8_t, <)
+DEF_OP_VV (lt, 512, int8_t, <)
+DEF_OP_VV (lt, 1024, int8_t, <)
+DEF_OP_VV (lt, 2048, int8_t, <)
+DEF_OP_VV (lt, 4096, int8_t, <)
+
+DEF_OP_VV (lt, 1, int16_t, <)
+DEF_OP_VV (lt, 2, int16_t, <)
+DEF_OP_VV (lt, 4, int16_t, <)
+DEF_OP_VV (lt, 8, int16_t, <)
+DEF_OP_VV (lt, 16, int16_t, <)
+DEF_OP_VV (lt, 32, int16_t, <)
+DEF_OP_VV (lt, 64, int16_t, <)
+DEF_OP_VV (lt, 128, int16_t, <)
+DEF_OP_VV (lt, 256, int16_t, <)
+DEF_OP_VV (lt, 512, int16_t, <)
+DEF_OP_VV (lt, 1024, int16_t, <)
+DEF_OP_VV (lt, 2048, int16_t, <)
+
+DEF_OP_VV (lt, 1, int32_t, <)
+DEF_OP_VV (lt, 2, int32_t, <)
+DEF_OP_VV (lt, 4, int32_t, <)
+DEF_OP_VV (lt, 8, int32_t, <)
+DEF_OP_VV (lt, 16, int32_t, <)
+DEF_OP_VV (lt, 32, int32_t, <)
+DEF_OP_VV (lt, 64, int32_t, <)
+DEF_OP_VV (lt, 128, int32_t, <)
+DEF_OP_VV (lt, 256, int32_t, <)
+DEF_OP_VV (lt, 512, int32_t, <)
+DEF_OP_VV (lt, 1024, int32_t, <)
+
+DEF_OP_VV (lt, 1, int64_t, <)
+DEF_OP_VV (lt, 2, int64_t, <)
+DEF_OP_VV (lt, 4, int64_t, <)
+DEF_OP_VV (lt, 8, int64_t, <)
+DEF_OP_VV (lt, 16, int64_t, <)
+DEF_OP_VV (lt, 32, int64_t, <)
+DEF_OP_VV (lt, 64, int64_t, <)
+DEF_OP_VV (lt, 128, int64_t, <)
+DEF_OP_VV (lt, 256, int64_t, <)
+DEF_OP_VV (lt, 512, int64_t, <)
+
+DEF_OP_VV (lt, 1, _Float16, <)
+DEF_OP_VV (lt, 2, _Float16, <)
+DEF_OP_VV (lt, 4, _Float16, <)
+DEF_OP_VV (lt, 8, _Float16, <)
+DEF_OP_VV (lt, 16, _Float16, <)
+DEF_OP_VV (lt, 32, _Float16, <)
+DEF_OP_VV (lt, 64, _Float16, <)
+DEF_OP_VV (lt, 128, _Float16, <)
+DEF_OP_VV (lt, 256, _Float16, <)
+DEF_OP_VV (lt, 512, _Float16, <)
+DEF_OP_VV (lt, 1024, _Float16, <)
+DEF_OP_VV (lt, 2048, _Float16, <)
+
+DEF_OP_VV (lt, 1, float, <)
+DEF_OP_VV (lt, 2, float, <)
+DEF_OP_VV (lt, 4, float, <)
+DEF_OP_VV (lt, 8, float, <)
+DEF_OP_VV (lt, 16, float, <)
+DEF_OP_VV (lt, 32, float, <)
+DEF_OP_VV (lt, 64, float, <)
+DEF_OP_VV (lt, 128, float, <)
+DEF_OP_VV (lt, 256, float, <)
+DEF_OP_VV (lt, 512, float, <)
+DEF_OP_VV (lt, 1024, float, <)
+
+DEF_OP_VV (lt, 1, double, <)
+DEF_OP_VV (lt, 2, double, <)
+DEF_OP_VV (lt, 4, double, <)
+DEF_OP_VV (lt, 8, double, <)
+DEF_OP_VV (lt, 16, double, <)
+DEF_OP_VV (lt, 32, double, <)
+DEF_OP_VV (lt, 64, double, <)
+DEF_OP_VV (lt, 128, double, <)
+DEF_OP_VV (lt, 256, double, <)
+DEF_OP_VV (lt, 512, double, <)
+
+/* { dg-final { scan-assembler-times {vmslt\.vv} 40 } } */
+/* { dg-final { scan-assembler-times {vmflt\.vv} 27 } } */
+/* { dg-final { scan-assembler-not {csrr} } } */
+/* { dg-final { scan-tree-dump-not "1,1" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "2,2" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "4,4" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "16,16" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "32,32" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "64,64" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "128,128" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "256,256" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "512,512" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "1024,1024" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "2048,2048" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "4096,4096" "optimized" } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 -fno-schedule-insns -fno-schedule-insns2 --param=riscv-autovec-lmul=m8 -fdump-tree-optimized" } */
+
+#include "def.h"
+
+DEF_OP_VV (le, 1, int8_t, <=)
+DEF_OP_VV (le, 2, int8_t, <=)
+DEF_OP_VV (le, 4, int8_t, <=)
+DEF_OP_VV (le, 8, int8_t, <=)
+DEF_OP_VV (le, 16, int8_t, <=)
+DEF_OP_VV (le, 32, int8_t, <=)
+DEF_OP_VV (le, 64, int8_t, <=)
+DEF_OP_VV (le, 128, int8_t, <=)
+DEF_OP_VV (le, 256, int8_t, <=)
+DEF_OP_VV (le, 512, int8_t, <=)
+DEF_OP_VV (le, 1024, int8_t, <=)
+DEF_OP_VV (le, 2048, int8_t, <=)
+DEF_OP_VV (le, 4096, int8_t, <=)
+
+DEF_OP_VV (le, 1, int16_t, <=)
+DEF_OP_VV (le, 2, int16_t, <=)
+DEF_OP_VV (le, 4, int16_t, <=)
+DEF_OP_VV (le, 8, int16_t, <=)
+DEF_OP_VV (le, 16, int16_t, <=)
+DEF_OP_VV (le, 32, int16_t, <=)
+DEF_OP_VV (le, 64, int16_t, <=)
+DEF_OP_VV (le, 128, int16_t, <=)
+DEF_OP_VV (le, 256, int16_t, <=)
+DEF_OP_VV (le, 512, int16_t, <=)
+DEF_OP_VV (le, 1024, int16_t, <=)
+DEF_OP_VV (le, 2048, int16_t, <=)
+
+DEF_OP_VV (le, 1, int32_t, <=)
+DEF_OP_VV (le, 2, int32_t, <=)
+DEF_OP_VV (le, 4, int32_t, <=)
+DEF_OP_VV (le, 8, int32_t, <=)
+DEF_OP_VV (le, 16, int32_t, <=)
+DEF_OP_VV (le, 32, int32_t, <=)
+DEF_OP_VV (le, 64, int32_t, <=)
+DEF_OP_VV (le, 128, int32_t, <=)
+DEF_OP_VV (le, 256, int32_t, <=)
+DEF_OP_VV (le, 512, int32_t, <=)
+DEF_OP_VV (le, 1024, int32_t, <=)
+
+DEF_OP_VV (le, 1, int64_t, <=)
+DEF_OP_VV (le, 2, int64_t, <=)
+DEF_OP_VV (le, 4, int64_t, <=)
+DEF_OP_VV (le, 8, int64_t, <=)
+DEF_OP_VV (le, 16, int64_t, <=)
+DEF_OP_VV (le, 32, int64_t, <=)
+DEF_OP_VV (le, 64, int64_t, <=)
+DEF_OP_VV (le, 128, int64_t, <=)
+DEF_OP_VV (le, 256, int64_t, <=)
+DEF_OP_VV (le, 512, int64_t, <=)
+
+DEF_OP_VV (le, 1, _Float16, <=)
+DEF_OP_VV (le, 2, _Float16, <=)
+DEF_OP_VV (le, 4, _Float16, <=)
+DEF_OP_VV (le, 8, _Float16, <=)
+DEF_OP_VV (le, 16, _Float16, <=)
+DEF_OP_VV (le, 32, _Float16, <=)
+DEF_OP_VV (le, 64, _Float16, <=)
+DEF_OP_VV (le, 128, _Float16, <=)
+DEF_OP_VV (le, 256, _Float16, <=)
+DEF_OP_VV (le, 512, _Float16, <=)
+DEF_OP_VV (le, 1024, _Float16, <=)
+DEF_OP_VV (le, 2048, _Float16, <=)
+
+DEF_OP_VV (le, 1, float, <=)
+DEF_OP_VV (le, 2, float, <=)
+DEF_OP_VV (le, 4, float, <=)
+DEF_OP_VV (le, 8, float, <=)
+DEF_OP_VV (le, 16, float, <=)
+DEF_OP_VV (le, 32, float, <=)
+DEF_OP_VV (le, 64, float, <=)
+DEF_OP_VV (le, 128, float, <=)
+DEF_OP_VV (le, 256, float, <=)
+DEF_OP_VV (le, 512, float, <=)
+DEF_OP_VV (le, 1024, float, <=)
+
+DEF_OP_VV (le, 1, double, <=)
+DEF_OP_VV (le, 2, double, <=)
+DEF_OP_VV (le, 4, double, <=)
+DEF_OP_VV (le, 8, double, <=)
+DEF_OP_VV (le, 16, double, <=)
+DEF_OP_VV (le, 32, double, <=)
+DEF_OP_VV (le, 64, double, <=)
+DEF_OP_VV (le, 128, double, <=)
+DEF_OP_VV (le, 256, double, <=)
+DEF_OP_VV (le, 512, double, <=)
+
+/* { dg-final { scan-assembler-times {vmsle\.vv} 40 } } */
+/* { dg-final { scan-assembler-times {vmfle\.vv} 27 } } */
+/* { dg-final { scan-assembler-not {csrr} } } */
+/* { dg-final { scan-tree-dump-not "1,1" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "2,2" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "4,4" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "16,16" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "32,32" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "64,64" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "128,128" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "256,256" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "512,512" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "1024,1024" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "2048,2048" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "4096,4096" "optimized" } } */
{ \
return v[INDEX]; \
}
+
+#define DEF_MASK_LOGIC(PREFIX, NUM, TYPE, OP) \
+ void __attribute__ ((noinline, noclone)) \
+ PREFIX##_##TYPE##NUM (TYPE *restrict a, TYPE *restrict b, TYPE *restrict c, \
+ TYPE *restrict d, TYPE *restrict e) \
+ { \
+ for (int i = 0; i < NUM; ++i) \
+ a[i] = (b[i] > c[i]) OP (d[i] < e[i]); \
+ }
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 -fno-schedule-insns -fno-schedule-insns2 --param=riscv-autovec-lmul=m8 -fdump-tree-optimized" } */
+
+#include "def.h"
+
+DEF_MASK_LOGIC (and, 1, int8_t, &)
+DEF_MASK_LOGIC (and, 2, int8_t, &)
+DEF_MASK_LOGIC (and, 4, int8_t, &)
+DEF_MASK_LOGIC (and, 8, int8_t, &)
+DEF_MASK_LOGIC (and, 16, int8_t, &)
+DEF_MASK_LOGIC (and, 32, int8_t, &)
+DEF_MASK_LOGIC (and, 64, int8_t, &)
+DEF_MASK_LOGIC (and, 128, int8_t, &)
+DEF_MASK_LOGIC (and, 256, int8_t, &)
+DEF_MASK_LOGIC (and, 512, int8_t, &)
+DEF_MASK_LOGIC (and, 1024, int8_t, &)
+DEF_MASK_LOGIC (and, 2048, int8_t, &)
+DEF_MASK_LOGIC (and, 4096, int8_t, &)
+
+DEF_MASK_LOGIC (and, 1, int16_t, &)
+DEF_MASK_LOGIC (and, 2, int16_t, &)
+DEF_MASK_LOGIC (and, 4, int16_t, &)
+DEF_MASK_LOGIC (and, 8, int16_t, &)
+DEF_MASK_LOGIC (and, 16, int16_t, &)
+DEF_MASK_LOGIC (and, 32, int16_t, &)
+DEF_MASK_LOGIC (and, 64, int16_t, &)
+DEF_MASK_LOGIC (and, 128, int16_t, &)
+DEF_MASK_LOGIC (and, 256, int16_t, &)
+DEF_MASK_LOGIC (and, 512, int16_t, &)
+DEF_MASK_LOGIC (and, 1024, int16_t, &)
+DEF_MASK_LOGIC (and, 2048, int16_t, &)
+
+DEF_MASK_LOGIC (and, 1, int32_t, &)
+DEF_MASK_LOGIC (and, 2, int32_t, &)
+DEF_MASK_LOGIC (and, 4, int32_t, &)
+DEF_MASK_LOGIC (and, 8, int32_t, &)
+DEF_MASK_LOGIC (and, 16, int32_t, &)
+DEF_MASK_LOGIC (and, 32, int32_t, &)
+DEF_MASK_LOGIC (and, 64, int32_t, &)
+DEF_MASK_LOGIC (and, 128, int32_t, &)
+DEF_MASK_LOGIC (and, 256, int32_t, &)
+DEF_MASK_LOGIC (and, 512, int32_t, &)
+DEF_MASK_LOGIC (and, 1024, int32_t, &)
+
+DEF_MASK_LOGIC (and, 1, int64_t, &)
+DEF_MASK_LOGIC (and, 2, int64_t, &)
+DEF_MASK_LOGIC (and, 4, int64_t, &)
+DEF_MASK_LOGIC (and, 8, int64_t, &)
+DEF_MASK_LOGIC (and, 16, int64_t, &)
+DEF_MASK_LOGIC (and, 32, int64_t, &)
+DEF_MASK_LOGIC (and, 64, int64_t, &)
+DEF_MASK_LOGIC (and, 128, int64_t, &)
+DEF_MASK_LOGIC (and, 256, int64_t, &)
+DEF_MASK_LOGIC (and, 512, int64_t, &)
+
+/* { dg-final { scan-assembler-times {vmand\.mm} 42 } } */
+/* { dg-final { scan-assembler-not {csrr} } } */
+/* { dg-final { scan-tree-dump-not "1,1" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "2,2" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "4,4" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "16,16" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "32,32" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "64,64" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "128,128" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "256,256" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "512,512" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "1024,1024" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "2048,2048" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "4096,4096" "optimized" } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 -fno-schedule-insns -fno-schedule-insns2 --param=riscv-autovec-lmul=m8 -fdump-tree-optimized" } */
+
+#include "def.h"
+
+DEF_MASK_LOGIC (and, 1, int8_t, |)
+DEF_MASK_LOGIC (and, 2, int8_t, |)
+DEF_MASK_LOGIC (and, 4, int8_t, |)
+DEF_MASK_LOGIC (and, 8, int8_t, |)
+DEF_MASK_LOGIC (and, 16, int8_t, |)
+DEF_MASK_LOGIC (and, 32, int8_t, |)
+DEF_MASK_LOGIC (and, 64, int8_t, |)
+DEF_MASK_LOGIC (and, 128, int8_t, |)
+DEF_MASK_LOGIC (and, 256, int8_t, |)
+DEF_MASK_LOGIC (and, 512, int8_t, |)
+DEF_MASK_LOGIC (and, 1024, int8_t, |)
+DEF_MASK_LOGIC (and, 2048, int8_t, |)
+DEF_MASK_LOGIC (and, 4096, int8_t, |)
+
+DEF_MASK_LOGIC (and, 1, int16_t, |)
+DEF_MASK_LOGIC (and, 2, int16_t, |)
+DEF_MASK_LOGIC (and, 4, int16_t, |)
+DEF_MASK_LOGIC (and, 8, int16_t, |)
+DEF_MASK_LOGIC (and, 16, int16_t, |)
+DEF_MASK_LOGIC (and, 32, int16_t, |)
+DEF_MASK_LOGIC (and, 64, int16_t, |)
+DEF_MASK_LOGIC (and, 128, int16_t, |)
+DEF_MASK_LOGIC (and, 256, int16_t, |)
+DEF_MASK_LOGIC (and, 512, int16_t, |)
+DEF_MASK_LOGIC (and, 1024, int16_t, |)
+DEF_MASK_LOGIC (and, 2048, int16_t, |)
+
+DEF_MASK_LOGIC (and, 1, int32_t, |)
+DEF_MASK_LOGIC (and, 2, int32_t, |)
+DEF_MASK_LOGIC (and, 4, int32_t, |)
+DEF_MASK_LOGIC (and, 8, int32_t, |)
+DEF_MASK_LOGIC (and, 16, int32_t, |)
+DEF_MASK_LOGIC (and, 32, int32_t, |)
+DEF_MASK_LOGIC (and, 64, int32_t, |)
+DEF_MASK_LOGIC (and, 128, int32_t, |)
+DEF_MASK_LOGIC (and, 256, int32_t, |)
+DEF_MASK_LOGIC (and, 512, int32_t, |)
+DEF_MASK_LOGIC (and, 1024, int32_t, |)
+
+DEF_MASK_LOGIC (and, 1, int64_t, |)
+DEF_MASK_LOGIC (and, 2, int64_t, |)
+DEF_MASK_LOGIC (and, 4, int64_t, |)
+DEF_MASK_LOGIC (and, 8, int64_t, |)
+DEF_MASK_LOGIC (and, 16, int64_t, |)
+DEF_MASK_LOGIC (and, 32, int64_t, |)
+DEF_MASK_LOGIC (and, 64, int64_t, |)
+DEF_MASK_LOGIC (and, 128, int64_t, |)
+DEF_MASK_LOGIC (and, 256, int64_t, |)
+DEF_MASK_LOGIC (and, 512, int64_t, |)
+
+/* { dg-final { scan-assembler-times {vmor\.mm} 42 } } */
+/* { dg-final { scan-assembler-not {csrr} } } */
+/* { dg-final { scan-tree-dump-not "1,1" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "2,2" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "4,4" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "16,16" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "32,32" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "64,64" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "128,128" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "256,256" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "512,512" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "1024,1024" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "2048,2048" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "4096,4096" "optimized" } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvfh_zvl4096b -mabi=lp64d -O3 -fno-schedule-insns -fno-schedule-insns2 --param=riscv-autovec-lmul=m8 -fdump-tree-optimized" } */
+
+#include "def.h"
+
+DEF_MASK_LOGIC (and, 1, int8_t, ^)
+DEF_MASK_LOGIC (and, 2, int8_t, ^)
+DEF_MASK_LOGIC (and, 4, int8_t, ^)
+DEF_MASK_LOGIC (and, 8, int8_t, ^)
+DEF_MASK_LOGIC (and, 16, int8_t, ^)
+DEF_MASK_LOGIC (and, 32, int8_t, ^)
+DEF_MASK_LOGIC (and, 64, int8_t, ^)
+DEF_MASK_LOGIC (and, 128, int8_t, ^)
+DEF_MASK_LOGIC (and, 256, int8_t, ^)
+DEF_MASK_LOGIC (and, 512, int8_t, ^)
+DEF_MASK_LOGIC (and, 1024, int8_t, ^)
+DEF_MASK_LOGIC (and, 2048, int8_t, ^)
+DEF_MASK_LOGIC (and, 4096, int8_t, ^)
+
+DEF_MASK_LOGIC (and, 1, int16_t, ^)
+DEF_MASK_LOGIC (and, 2, int16_t, ^)
+DEF_MASK_LOGIC (and, 4, int16_t, ^)
+DEF_MASK_LOGIC (and, 8, int16_t, ^)
+DEF_MASK_LOGIC (and, 16, int16_t, ^)
+DEF_MASK_LOGIC (and, 32, int16_t, ^)
+DEF_MASK_LOGIC (and, 64, int16_t, ^)
+DEF_MASK_LOGIC (and, 128, int16_t, ^)
+DEF_MASK_LOGIC (and, 256, int16_t, ^)
+DEF_MASK_LOGIC (and, 512, int16_t, ^)
+DEF_MASK_LOGIC (and, 1024, int16_t, ^)
+DEF_MASK_LOGIC (and, 2048, int16_t, ^)
+
+DEF_MASK_LOGIC (and, 1, int32_t, ^)
+DEF_MASK_LOGIC (and, 2, int32_t, ^)
+DEF_MASK_LOGIC (and, 4, int32_t, ^)
+DEF_MASK_LOGIC (and, 8, int32_t, ^)
+DEF_MASK_LOGIC (and, 16, int32_t, ^)
+DEF_MASK_LOGIC (and, 32, int32_t, ^)
+DEF_MASK_LOGIC (and, 64, int32_t, ^)
+DEF_MASK_LOGIC (and, 128, int32_t, ^)
+DEF_MASK_LOGIC (and, 256, int32_t, ^)
+DEF_MASK_LOGIC (and, 512, int32_t, ^)
+DEF_MASK_LOGIC (and, 1024, int32_t, ^)
+
+DEF_MASK_LOGIC (and, 1, int64_t, ^)
+DEF_MASK_LOGIC (and, 2, int64_t, ^)
+DEF_MASK_LOGIC (and, 4, int64_t, ^)
+DEF_MASK_LOGIC (and, 8, int64_t, ^)
+DEF_MASK_LOGIC (and, 16, int64_t, ^)
+DEF_MASK_LOGIC (and, 32, int64_t, ^)
+DEF_MASK_LOGIC (and, 64, int64_t, ^)
+DEF_MASK_LOGIC (and, 128, int64_t, ^)
+DEF_MASK_LOGIC (and, 256, int64_t, ^)
+DEF_MASK_LOGIC (and, 512, int64_t, ^)
+
+/* { dg-final { scan-assembler-times {vmxor\.mm} 42 } } */
+/* { dg-final { scan-assembler-not {csrr} } } */
+/* { dg-final { scan-tree-dump-not "1,1" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "2,2" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "4,4" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "16,16" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "32,32" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "64,64" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "128,128" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "256,256" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "512,512" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "1024,1024" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "2048,2048" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "4096,4096" "optimized" } } */